Publications
2025
Jáñez-Martino, Francisco; Barrón-Cedeño, Alberto; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Muti, Arianna
On persuasion in spam email: A multi-granularity text analysis Artículo de revista
En: Expert Systems with Applications, vol. 265, pp. 125767, 2025, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, Natural Language Processing, Spam detection
@article{janez-martino_persuasion_2025,
title = {On persuasion in spam email: A multi-granularity text analysis},
author = {Francisco Jáñez-Martino and Alberto Barrón-Cedeño and Rocío Alaiz-Rodríguez and Víctor González-Castro and Arianna Muti},
url = {https://www.sciencedirect.com/science/article/pii/S0957417424026344},
year = {2025},
date = {2025-01-01},
journal = {Expert Systems with Applications},
volume = {265},
pages = {125767},
abstract = {This paper explores the use of supervised machine learning models to detect persuasion techniques in spam emails, addressing both binary classification (presence/absence of persuasion) and multilabel classification (identifying specific persuasion techniques). The research utilizes natural language processing and adapts propaganda detection methods from news articles, analyzing emails at full-text, sentence, and snippet levels. The study includes the development of a custom spam dataset and fine-tuning of RoBERTa-based models, ultimately aiming to enhance cybersecurity through better understanding of persuasion tactics in malicious emails.},
note = {Publisher: Pergamon},
keywords = {Cybersecurity, machine learning, Natural Language Processing, Spam detection},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Spam email classification based on cybersecurity potential risk using natural language processing Artículo de revista
En: Knowledge-Based Systems, vol. 310, pp. 112939, 2025, (Publisher: Elsevier).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Email Classification, machine learning, Natural Language Processing, Spam detection
@article{janez-martino_spam_2025,
title = {Spam email classification based on cybersecurity potential risk using natural language processing},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.sciencedirect.com/science/article/pii/S0950705124015739},
year = {2025},
date = {2025-01-01},
journal = {Knowledge-Based Systems},
volume = {310},
pages = {112939},
abstract = {This study focuses on detecting spam emails, a key vector for cyberattacks. It introduces 56 features based on NLP techniques, grouped into five categories: Headers, Text, Attachments, URLs, and Protocols. A new dataset, SERC, was created for spam risk classification. Using binary classification and regression, the Random Forest classifier achieved the best performance (F1-Score of 0.914), and Random Forest Regressor had the lowest Mean Square Error (0.781). Features from the Headers and Text groups were found to be the most important.},
note = {Publisher: Elsevier},
keywords = {Cybersecurity, Email Classification, machine learning, Natural Language Processing, Spam detection},
pubstate = {published},
tppubtype = {article}
}
2024
Jáñez-Martino, Francisco; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Carofilis-Vasco, Andrés; Martínez-Mendoza, Alicia
Comparative Analysis of Natural Language Processing Models for Malware Spam Email Identification Artículo de revista
En: International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024), 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Artificial Intelligence, Cybersecurity, Natural Language Processing
@article{janez-martino_comparative_2024,
title = {Comparative Analysis of Natural Language Processing Models for Malware Spam Email Identification},
author = {Francisco Jáñez-Martino and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Andrés Carofilis-Vasco and Alicia Martínez-Mendoza},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=20&pagesize=80&sortby=title&citation_for_view=yATJZvcAAAAJ:z_wVstp3MssC},
year = {2024},
date = {2024-01-01},
journal = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
abstract = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
keywords = {Artificial Intelligence, Cybersecurity, Natural Language Processing},
pubstate = {published},
tppubtype = {article}
}
2020
Molpeceres-Barrientos, Gonzalo; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Parnell, Andrew
Machine learning techniques for the detection of inappropriate erotic content in text Artículo de revista
En: International Journal of Computational Intelligence Systems, vol. 13, no 1, pp. 591–603, 2020, (Publisher: Springer Netherlands Dordrecht).
Resumen | Enlaces | BibTeX | Etiquetas: machine learning, Natural Language Processing, NLP, Text classification
@article{molpeceres-barrientos_machine_2020,
title = {Machine learning techniques for the detection of inappropriate erotic content in text},
author = {Gonzalo Molpeceres-Barrientos and Rocío Alaiz-Rodríguez and Víctor González-Castro and Andrew Parnell},
url = {https://link.springer.com/article/10.2991/ijcis.d.200519.003},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {International Journal of Computational Intelligence Systems},
volume = {13},
number = {1},
pages = {591–603},
abstract = {This study addresses the problem of detecting erotic or sexual content in text documents, specifically for protecting children online. Using Natural Language Processing (NLP) techniques, the authors evaluated twelve models combining different text encoders (Bag of Words, TF-IDF, and Word2vec) with various classifiers (SVM, Logistic Regression, k-NN, and Random Forest). The evaluation was conducted on a dataset created from Reddit. The best result was achieved using TF-IDF with an SVM classifier, which achieved an accuracy of 0.97 and an F-score of 0.96 (precision 0.96/recall 0.95). This demonstrates the feasibility of detecting erotic content and creating filters for minors or user preferences.},
note = {Publisher: Springer Netherlands Dordrecht},
keywords = {machine learning, Natural Language Processing, NLP, Text classification},
pubstate = {published},
tppubtype = {article}
}
2019
Merayo-Alba, Sergio; Fidalgo, Eduardo; González-Castro, Víctor; Alaiz-Rodríguez, Rocío; Velasco-Mata, Javier
Use of natural language processing to identify inappropriate content in text Artículo de revista
En: Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14, pp. 254–263, 2019, (Publisher: Springer International Publishing).
Resumen | Enlaces | BibTeX | Etiquetas: deep learning, machine learning, Natural Language Processing, Text Encoders, Violent Content Detection
@article{merayo-alba_use_2019,
title = {Use of natural language processing to identify inappropriate content in text},
author = {Sergio Merayo-Alba and Eduardo Fidalgo and Víctor González-Castro and Rocío Alaiz-Rodríguez and Javier Velasco-Mata},
url = {https://link.springer.com/chapter/10.1007/978-3-030-29859-3_22},
year = {2019},
date = {2019-01-01},
journal = {Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14},
pages = {254–263},
abstract = {The quick development of communication through new technology media such as social networks and mobile phones has improved our lives. However, this also produces collateral problems such as the presence of insults and abusive comments. In this work, we address the problem of detecting violent content on text documents using Natural Language Processing techniques. Following an approach based on Machine Learning techniques, we have trained six models resulting from the combinations of two text encoders, Term Frequency-Inverse Document Frequency and Bag of Words, together with three classifiers: Logistic Regression, Support Vector Machines and Naïve Bayes. We have also assessed StarSpace, a Deep Learning approach proposed by Facebook and configured to use a Hit@1 accuracy. We evaluated these seven alternatives in two publicly available datasets from the Wikipedia Detox Project: Attack and Aggression. StarSpace achieved an accuracy of 0.938 and 0.937 in these datasets, respectively, being the algorithm recommended to detect violent content on text documents among the alternatives evaluated.},
note = {Publisher: Springer International Publishing},
keywords = {deep learning, machine learning, Natural Language Processing, Text Encoders, Violent Content Detection},
pubstate = {published},
tppubtype = {article}
}