Publications
2019
Riesco, Adrián; Fidalgo, Eduardo; Al-Nabki, Wesam; Jáñez-Martino, Francisco; Alegre, Enrique
Classifying Pastebin content through the generation of PasteCC labeled dataset Proceedings Article
En: Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14, pp. 456–467, Springer International Publishing, 2019.
Resumen | Enlaces | BibTeX | Etiquetas: Cybercrime Detection, Logistic Regression, machine learning, Pastebin, Text classification, TF-IDF
@inproceedings{riesco_classifying_2019,
title = {Classifying Pastebin content through the generation of PasteCC labeled dataset},
author = {Adrián Riesco and Eduardo Fidalgo and Wesam Al-Nabki and Francisco Jáñez-Martino and Enrique Alegre},
url = {https://link.springer.com/chapter/10.1007/978-3-030-29859-3_39},
year = {2019},
date = {2019-01-01},
booktitle = {Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14},
pages = {456–467},
publisher = {Springer International Publishing},
abstract = {This paper presents the PasteCC_17K dataset, containing 17,640 text samples from Pastebin, classified into 15 categories, including 6 potentially illegal ones. The study evaluates different text representation techniques and classifiers, finding that TF-IDF with Logistic Regression offers the best performance, helping authorities detect suspicious content on Pastebin.},
keywords = {Cybercrime Detection, Logistic Regression, machine learning, Pastebin, Text classification, TF-IDF},
pubstate = {published},
tppubtype = {inproceedings}
}
2017
Al-Nabki, Wesam; Fidalgo, Eduardo; Alegre, Enrique; Paz-Centeno, Iván De
Classifying illegal activities on tor network based on web textual contents Artículo de revista
En: Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers, pp. 35–43, 2017.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Darknet Analysis, Logistic Regression, machine learning, Text classification, TF-IDF
@article{al_nabki_classifying_2017,
title = {Classifying illegal activities on tor network based on web textual contents},
author = {Wesam Al-Nabki and Eduardo Fidalgo and Enrique Alegre and Iván De Paz-Centeno},
url = {https://aclanthology.org/E17-1004/},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
pages = {35–43},
abstract = {This paper introduces DUTA, a publicly available dataset of Darknet domains labeled into 26 classes. Using DUTA, a classification study was conducted with TF-IDF and supervised classifiers. Logistic Regression with TF-IDF achieved 96.6% accuracy and a 93.7% F1-score in detecting illegal activities, aiding potential law enforcement tools.},
keywords = {Cybersecurity, Darknet Analysis, Logistic Regression, machine learning, Text classification, TF-IDF},
pubstate = {published},
tppubtype = {article}
}