Publications
2024
Jáñez-Martino, Francisco; Carofilis-Vasco, Andrés; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Spam hierarchical clustering for campaigns spotting and topic-based classification [Póster] Artículo de revista
En: 2024, (Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Logistic Regression, Multi-classification, Spam detection
@article{janez-martino_spam_2024-1,
title = {Spam hierarchical clustering for campaigns spotting and topic-based classification [Póster]},
author = {Francisco Jáñez-Martino and Andrés Carofilis-Vasco and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://idus.us.es/items/9828eae7-9cec-4574-8863-99e9020e1770},
year = {2024},
date = {2024-01-01},
abstract = {This article develops spam email multiclassification systems for cybersecurity, using two datasets: SPEMC-15K-E (English) and SPEMC-15K-S (Spanish). The datasets are classified into eleven categories. The best results for English (F1-score: 0.953, 94.6% accuracy) were achieved with TF-IDF and Logistic Regression, while for Spanish, TF-IDF and Naïve Bayes achieved an F1-score of 0.945 and 98.5% accuracy. TF-IDF with Logistic Regression also had the fastest processing time (2ms per email for English and 2.2ms for Spanish).},
note = {Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática},
keywords = {Cybersecurity, Logistic Regression, Multi-classification, Spam detection},
pubstate = {published},
tppubtype = {article}
}
2019
Riesco, Adrián; Fidalgo, Eduardo; Al-Nabki, Wesam; Jáñez-Martino, Francisco; Alegre, Enrique
Classifying Pastebin content through the generation of PasteCC labeled dataset Proceedings Article
En: Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14, pp. 456–467, Springer International Publishing, 2019.
Resumen | Enlaces | BibTeX | Etiquetas: Cybercrime Detection, Logistic Regression, machine learning, Pastebin, Text classification, TF-IDF
@inproceedings{riesco_classifying_2019,
title = {Classifying Pastebin content through the generation of PasteCC labeled dataset},
author = {Adrián Riesco and Eduardo Fidalgo and Wesam Al-Nabki and Francisco Jáñez-Martino and Enrique Alegre},
url = {https://link.springer.com/chapter/10.1007/978-3-030-29859-3_39},
year = {2019},
date = {2019-01-01},
booktitle = {Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14},
pages = {456–467},
publisher = {Springer International Publishing},
abstract = {This paper presents the PasteCC_17K dataset, containing 17,640 text samples from Pastebin, classified into 15 categories, including 6 potentially illegal ones. The study evaluates different text representation techniques and classifiers, finding that TF-IDF with Logistic Regression offers the best performance, helping authorities detect suspicious content on Pastebin.},
keywords = {Cybercrime Detection, Logistic Regression, machine learning, Pastebin, Text classification, TF-IDF},
pubstate = {published},
tppubtype = {inproceedings}
}
2017
Al-Nabki, Wesam; Fidalgo, Eduardo; Alegre, Enrique; Paz-Centeno, Iván De
Classifying illegal activities on tor network based on web textual contents Artículo de revista
En: Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers, pp. 35–43, 2017.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Darknet Analysis, Logistic Regression, machine learning, Text classification, TF-IDF
@article{al_nabki_classifying_2017,
title = {Classifying illegal activities on tor network based on web textual contents},
author = {Wesam Al-Nabki and Eduardo Fidalgo and Enrique Alegre and Iván De Paz-Centeno},
url = {https://aclanthology.org/E17-1004/},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
pages = {35–43},
abstract = {This paper introduces DUTA, a publicly available dataset of Darknet domains labeled into 26 classes. Using DUTA, a classification study was conducted with TF-IDF and supervised classifiers. Logistic Regression with TF-IDF achieved 96.6% accuracy and a 93.7% F1-score in detecting illegal activities, aiding potential law enforcement tools.},
keywords = {Cybersecurity, Darknet Analysis, Logistic Regression, machine learning, Text classification, TF-IDF},
pubstate = {published},
tppubtype = {article}
}