Publications
2024
Jáñez-Martino, Francisco; Carofilis-Vasco, Andrés; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Spam hierarchical clustering for campaigns spotting and topic-based classification [Póster] Artículo de revista
En: 2024, (Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Logistic Regression, Multi-classification, Spam detection
@article{janez-martino_spam_2024-1,
title = {Spam hierarchical clustering for campaigns spotting and topic-based classification [Póster]},
author = {Francisco Jáñez-Martino and Andrés Carofilis-Vasco and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://idus.us.es/items/9828eae7-9cec-4574-8863-99e9020e1770},
year = {2024},
date = {2024-01-01},
abstract = {This article develops spam email multiclassification systems for cybersecurity, using two datasets: SPEMC-15K-E (English) and SPEMC-15K-S (Spanish). The datasets are classified into eleven categories. The best results for English (F1-score: 0.953, 94.6% accuracy) were achieved with TF-IDF and Logistic Regression, while for Spanish, TF-IDF and Naïve Bayes achieved an F1-score of 0.945 and 98.5% accuracy. TF-IDF with Logistic Regression also had the fastest processing time (2ms per email for English and 2.2ms for Spanish).},
note = {Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática},
keywords = {Cybersecurity, Logistic Regression, Multi-classification, Spam detection},
pubstate = {published},
tppubtype = {article}
}
2023
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Classifying spam emails using agglomerative hierarchical clustering and a topic-based approach Artículo de revista
En: Applied Soft Computing, vol. 139, pp. 110226, 2023, (Publisher: Elsevier).
Resumen | Enlaces | BibTeX | Etiquetas: Hidden text, Image-based spam, Multi-classification, Spam detection, Term frequency, Text classification, Word embedding
@article{janez-martino_classifying_2023,
title = {Classifying spam emails using agglomerative hierarchical clustering and a topic-based approach},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.sciencedirect.com/science/article/pii/S1568494623002442},
year = {2023},
date = {2023-01-01},
journal = {Applied Soft Computing},
volume = {139},
pages = {110226},
abstract = {This paper introduces two novel datasets, SPEMC-15K-E and SPEMC-15K-S, containing 15K spam emails each in English and Spanish. The emails are categorized into 11 classes using hierarchical clustering. Evaluation of 16 classification pipelines reveals that TF-IDF with Logistic Regression achieves the highest performance for the English dataset (F1 score of 0.953, accuracy of 94.6%), while TF-IDF with Naïve Bayes performs best for Spanish (F1 score of 0.945, accuracy of 98.5%). TF-IDF with LR is also the fastest for both languages.},
note = {Publisher: Elsevier},
keywords = {Hidden text, Image-based spam, Multi-classification, Spam detection, Term frequency, Text classification, Word embedding},
pubstate = {published},
tppubtype = {article}
}