Publications
2025
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Spam email classification based on cybersecurity potential risk using natural language processing Artículo de revista
En: Knowledge-Based Systems, vol. 310, pp. 112939, 2025, (Publisher: Elsevier).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Email Classification, machine learning, Natural Language Processing, Spam detection
@article{janez-martino_spam_2025,
title = {Spam email classification based on cybersecurity potential risk using natural language processing},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.sciencedirect.com/science/article/pii/S0950705124015739},
year = {2025},
date = {2025-01-01},
journal = {Knowledge-Based Systems},
volume = {310},
pages = {112939},
abstract = {This study focuses on detecting spam emails, a key vector for cyberattacks. It introduces 56 features based on NLP techniques, grouped into five categories: Headers, Text, Attachments, URLs, and Protocols. A new dataset, SERC, was created for spam risk classification. Using binary classification and regression, the Random Forest classifier achieved the best performance (F1-Score of 0.914), and Random Forest Regressor had the lowest Mean Square Error (0.781). Features from the Headers and Text groups were found to be the most important.},
note = {Publisher: Elsevier},
keywords = {Cybersecurity, Email Classification, machine learning, Natural Language Processing, Spam detection},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Barrón-Cedeño, Alberto; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Muti, Arianna
On persuasion in spam email: A multi-granularity text analysis Artículo de revista
En: Expert Systems with Applications, vol. 265, pp. 125767, 2025, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, Natural Language Processing, Spam detection
@article{janez-martino_persuasion_2025,
title = {On persuasion in spam email: A multi-granularity text analysis},
author = {Francisco Jáñez-Martino and Alberto Barrón-Cedeño and Rocío Alaiz-Rodríguez and Víctor González-Castro and Arianna Muti},
url = {https://www.sciencedirect.com/science/article/pii/S0957417424026344},
year = {2025},
date = {2025-01-01},
journal = {Expert Systems with Applications},
volume = {265},
pages = {125767},
abstract = {This paper explores the use of supervised machine learning models to detect persuasion techniques in spam emails, addressing both binary classification (presence/absence of persuasion) and multilabel classification (identifying specific persuasion techniques). The research utilizes natural language processing and adapts propaganda detection methods from news articles, analyzing emails at full-text, sentence, and snippet levels. The study includes the development of a custom spam dataset and fine-tuning of RoBERTa-based models, ultimately aiming to enhance cybersecurity through better understanding of persuasion tactics in malicious emails.},
note = {Publisher: Pergamon},
keywords = {Cybersecurity, machine learning, Natural Language Processing, Spam detection},
pubstate = {published},
tppubtype = {article}
}
2024
Al-Nabki, Wesam; Jáñez-Martino, Francisco; Fidalgo, Eduardo; Alegre, Enrique; Alaiz-Rodríguez, Rocío
A review of Spotting Child Sexual Exploitation Material using File Names and their Path Artículo de revista
En: IX Jornadas Nacionales de Investigación En Ciberseguridad, pp. 502–503, 2024, (Publisher: Antonia M. Reina Quintero).
Resumen | Enlaces | BibTeX | Etiquetas: certifications, Cybersecurity, higher education, training
@article{al_nabki_review_2024,
title = {A review of Spotting Child Sexual Exploitation Material using File Names and their Path},
author = {Wesam Al-Nabki and Francisco Jáñez-Martino and Eduardo Fidalgo and Enrique Alegre and Rocío Alaiz-Rodríguez},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9633501},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {IX Jornadas Nacionales de Investigación En Ciberseguridad},
pages = {502–503},
abstract = {Cybersecurity training is a global priority due to the current shortage of highly skilled professionals. This conference aims to address various training options available, such as courses, workshops, certifications, and undergraduate and postgraduate education programs, with a particular focus on the latter. The discussion will also cover the challenges and future perspectives needed to meet the growing demand for cybersecurity professionals.},
note = {Publisher: Antonia M. Reina Quintero},
keywords = {certifications, Cybersecurity, higher education, training},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Carofilis-Vasco, Andrés; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Spam hierarchical clustering for campaigns spotting and topic-based classification [Póster] Artículo de revista
En: 2024, (Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Logistic Regression, Multi-classification, Spam detection
@article{janez-martino_spam_2024-1,
title = {Spam hierarchical clustering for campaigns spotting and topic-based classification [Póster]},
author = {Francisco Jáñez-Martino and Andrés Carofilis-Vasco and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://idus.us.es/items/9828eae7-9cec-4574-8863-99e9020e1770},
year = {2024},
date = {2024-01-01},
abstract = {This article develops spam email multiclassification systems for cybersecurity, using two datasets: SPEMC-15K-E (English) and SPEMC-15K-S (Spanish). The datasets are classified into eleven categories. The best results for English (F1-score: 0.953, 94.6% accuracy) were achieved with TF-IDF and Logistic Regression, while for Spanish, TF-IDF and Naïve Bayes achieved an F1-score of 0.945 and 98.5% accuracy. TF-IDF with Logistic Regression also had the fastest processing time (2ms per email for English and 2.2ms for Spanish).},
note = {Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática},
keywords = {Cybersecurity, Logistic Regression, Multi-classification, Spam detection},
pubstate = {published},
tppubtype = {article}
}
Castaño, Felipe; Martínez-Mendoza, Alicia; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Alegre, Enrique
Familiarity Analysis and Phishing Website Detection using PhiKitA Dataset [Póster] Artículo de revista
En: 2024, (Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, PhinKitA Dataset, phishing detection
@article{castano_familiarity_2024,
title = {Familiarity Analysis and Phishing Website Detection using PhiKitA Dataset [Póster]},
author = {Felipe Castaño and Alicia Martínez-Mendoza and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Enrique Alegre},
url = {https://idus.us.es/items/04850276-e785-4039-977b-0c43806ac349},
year = {2024},
date = {2024-01-01},
abstract = {Phishing kits enable attackers to launch phishing campaigns more efficiently. This paper introduces PhiKitA, a dataset of phishing kits and the websites they generate. Three experiments were conducted: familiarity analysis, phishing website detection, and phishing kit classification, using MD5 hashes, fingerprints, and graph-based DOM representation. Results show that phishing website detection achieved 92.50% accuracy, while phishing kit classification proved less effective due to insufficient extracted information.},
note = {Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática},
keywords = {Cybersecurity, machine learning, PhinKitA Dataset, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Al-Nabki, Wesam; Fidalgo, Eduardo; Alegre, Enrique; Delany, Sarah Jane; Jáñez-Martino, Francisco
Classifying the content of online notepad services using active learning Artículo de revista
En: Journal of Intelligent Information Systems, pp. 1–27, 2024, (Publisher: Springer US).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Illegal Activities, machine learning, Pastebin, Text classification
@article{al-nabki_classifying_2024,
title = {Classifying the content of online notepad services using active learning},
author = {Wesam Al-Nabki and Eduardo Fidalgo and Enrique Alegre and Sarah Jane Delany and Francisco Jáñez-Martino},
url = {https://link.springer.com/article/10.1007/s10844-024-00902-8},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {Journal of Intelligent Information Systems},
pages = {1–27},
abstract = {This paper proposes a cascading classification system with Active Learning to identify suspicious activities on Pastebin. The model classifies texts into code snippets, readability, and suspicious or illegal activities. It introduces the INSPECT-3.8M dataset, containing 3.8 million labeled samples. This approach helps law enforcement agencies detect and block illegal content on Pastebin before it spreads.},
note = {Publisher: Springer US},
keywords = {Cybersecurity, Illegal Activities, machine learning, Pastebin, Text classification},
pubstate = {published},
tppubtype = {article}
}
Delgado, Juan José; Fidalgo, Eduardo; Alegre, Enrique; Carofilis-Vasco, Andrés; Martínez-Mendoza, Alicia
CECILIA: Enhancing CSIRT Effectiveness with Transformer-Based Cyber Incident Classification Artículo de revista
En: International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024), 2024.
Resumen | Enlaces | BibTeX | Etiquetas: CECILIA, Cyber Incident Classification, Cybersecurity
@article{delgado_cecilia_2024,
title = {CECILIA: Enhancing CSIRT Effectiveness with Transformer-Based Cyber Incident Classification},
author = {Juan José Delgado and Eduardo Fidalgo and Enrique Alegre and Andrés Carofilis-Vasco and Alicia Martínez-Mendoza},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=20&pagesize=80&sortby=title&citation_for_view=yATJZvcAAAAJ:XD-gHx7UXLsC},
year = {2024},
date = {2024-01-01},
journal = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
abstract = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
keywords = {CECILIA, Cyber Incident Classification, Cybersecurity},
pubstate = {published},
tppubtype = {article}
}
Díaz, Daniel; Al-Nabki, Wesam; Fernández-Robles, Laura; Alegre, Enrique; Fidalgo, Eduardo; Martínez-Mendoza, Alicia
SpamClus: An Agglomerative Clustering Algorithm for Spam Email Campaigns Detection Artículo de revista
En: International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024), 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Agglomerative Clustering, Cybersecurity, Email Classification, machine learning, Spam detection, SpamClus
@article{diaz_spamclus_2024,
title = {SpamClus: An Agglomerative Clustering Algorithm for Spam Email Campaigns Detection},
author = {Daniel Díaz and Wesam Al-Nabki and Laura Fernández-Robles and Enrique Alegre and Eduardo Fidalgo and Alicia Martínez-Mendoza},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=100&pagesize=100&sortby=title&citation_for_view=yATJZvcAAAAJ:t7zJ5fGR-2UC},
year = {2024},
date = {2024-01-01},
journal = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
abstract = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
keywords = {Agglomerative Clustering, Cybersecurity, Email Classification, machine learning, Spam detection, SpamClus},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Carofilis-Vasco, Andrés; Martínez-Mendoza, Alicia
Comparative Analysis of Natural Language Processing Models for Malware Spam Email Identification Artículo de revista
En: International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024), 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Artificial Intelligence, Cybersecurity, Natural Language Processing
@article{janez-martino_comparative_2024,
title = {Comparative Analysis of Natural Language Processing Models for Malware Spam Email Identification},
author = {Francisco Jáñez-Martino and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Andrés Carofilis-Vasco and Alicia Martínez-Mendoza},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=20&pagesize=80&sortby=title&citation_for_view=yATJZvcAAAAJ:z_wVstp3MssC},
year = {2024},
date = {2024-01-01},
journal = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
abstract = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
keywords = {Artificial Intelligence, Cybersecurity, Natural Language Processing},
pubstate = {published},
tppubtype = {article}
}
2023
Castaño, Felipe; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Alegre, Enrique
PhiKitA: Phishing Kit Attacks Dataset for Phishing Websites Identification Artículo de revista
En: IEEE Access, vol. 11, pp. 40779–40789, 2023, (Publisher: IEEE).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Dataset, phishing detection
@article{castano_phikita_2023,
title = {PhiKitA: Phishing Kit Attacks Dataset for Phishing Websites Identification},
author = {Felipe Castaño and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Enrique Alegre},
url = {https://ieeexplore.ieee.org/abstract/document/10103863},
year = {2023},
date = {2023-01-01},
journal = {IEEE Access},
volume = {11},
pages = {40779–40789},
abstract = {This paper introduces PhiKitA, a novel dataset containing phishing kits and phishing websites generated from these kits. The dataset is used to investigate phishing kit detection, phishing website identification, and the source of phishing websites. The study applied MD5 hashes, fingerprints, and graph representation DOM algorithms to analyze the dataset. The results show that the graph representation algorithm achieved an accuracy of 92.50% for phishing detection, while MD5 hash representation achieved a 39.54% F1 score, indicating its limited effectiveness in distinguishing phishing sources.},
note = {Publisher: IEEE},
keywords = {Cybersecurity, Dataset, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Martínez-Mendoza, Alicia; Sánchez-Paniagua, Manuel; Carofilis-Vasco, Andrés; Jáñez-Martino, Francisco; Fidalgo, Eduardo; Alegre, Enrique
Applying Machine Learning to login URLs for phishing detection Artículo de revista
En: Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023, pp. 487–488, 2023, (Publisher: Universidade de Vigo).
Resumen | Enlaces | BibTeX | Etiquetas: AI, Cybersecurity, machine learning, phishing detection, URL analysis
@article{martinez-mendoza_applying_2023,
title = {Applying Machine Learning to login URLs for phishing detection},
author = {Alicia Martínez-Mendoza and Manuel Sánchez-Paniagua and Andrés Carofilis-Vasco and Francisco Jáñez-Martino and Eduardo Fidalgo and Enrique Alegre},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9044941},
year = {2023},
date = {2023-01-01},
journal = {Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023},
pages = {487–488},
abstract = {This paper explores the application of machine learning for phishing detection using login URLs. By analyzing URL patterns and features, the study aims to differentiate between legitimate and phishing websites. Various machine learning models are evaluated to enhance detection accuracy, providing a proactive approach to cybersecurity threats.},
note = {Publisher: Universidade de Vigo},
keywords = {AI, Cybersecurity, machine learning, phishing detection, URL analysis},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
A review of spam email detection: analysis of spammer strategies and the dataset shift problem Artículo de revista
En: Artificial Intelligence Review, vol. 56, no 2, pp. 1145–1173, 2023, (Publisher: Springer Netherlands Dordrecht).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, dataset shift, machine learning, Spam detection
@article{janez-martino_review_2023,
title = {A review of spam email detection: analysis of spammer strategies and the dataset shift problem},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://link.springer.com/article/10.1007/s10462-022-10195-4},
year = {2023},
date = {2023-01-01},
journal = {Artificial Intelligence Review},
volume = {56},
number = {2},
pages = {1145–1173},
abstract = {Spam emails, which once were mainly an annoyance, now increasingly contain scams, malware, and phishing attempts. Despite high-performing spam filters based on machine learning, users continue to report rising incidents of fraud and attacks via spam. This paper highlights two key challenges in spam email detection: the dynamic nature of the environment, leading to dataset shift, and the presence of adversarial actors (spammers). The review focuses on the impact of these challenges and examines various spammer strategies and state-of-the-art techniques for developing robust filters. Experimental results show that ignoring dataset shift can severely degrade the performance of spam filters, leading to high error rates.},
note = {Publisher: Springer Netherlands Dordrecht},
keywords = {Cybersecurity, dataset shift, machine learning, Spam detection},
pubstate = {published},
tppubtype = {article}
}
2021
Castaño, Felipe; Sánchez-Paniagua, Manuel; Delgado, J; Velasco-Mata, Javier; Sepúlveda, A; Fidalgo, Eduardo; Alegre, Enrique
Evaluation of state-of-art phishing detection strategies based on machine learning Artículo de revista
En: Investigación en Ciberseguridad (Castilla-La Mancha). Ediciones de la Universidad De Castilla-La Mancha, 2021.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, phishing detection
@article{castano_evaluation_2021,
title = {Evaluation of state-of-art phishing detection strategies based on machine learning},
author = {Felipe Castaño and Manuel Sánchez-Paniagua and J Delgado and Javier Velasco-Mata and A Sepúlveda and Eduardo Fidalgo and Enrique Alegre},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=20&pagesize=80&sortby=title&citation_for_view=yATJZvcAAAAJ:Tiz5es2fbqcC},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Investigación en Ciberseguridad (Castilla-La Mancha). Ediciones de la Universidad De Castilla-La Mancha},
abstract = {This paper reviews and evaluates current state-of-the-art phishing detection strategies that use machine learning.},
keywords = {Cybersecurity, machine learning, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Velasco-Mata, Javier; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Efficient detection of botnet traffic by features selection and decision trees Artículo de revista
En: IEEE Access, vol. 9, pp. 120567–120579, 2021, (Publisher: IEEE).
Resumen | Enlaces | BibTeX | Etiquetas: Botnet Detection, Cybersecurity, feature selection, machine learning, Network Traffic Analysis
@article{velasco-mata_efficient_2021,
title = {Efficient detection of botnet traffic by features selection and decision trees},
author = {Javier Velasco-Mata and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://ieeexplore.ieee.org/abstract/document/9523853},
year = {2021},
date = {2021-01-01},
journal = {IEEE Access},
volume = {9},
pages = {120567–120579},
abstract = {Botnets pose a major online threat, causing significant economic losses. With the rise of connected devices, analyzing large network traffic data is crucial. This study enhances botnet traffic classification by selecting the most relevant features using Information Gain and Gini Importance. Three feature subsets (5, 6, and 7 features) were tested with Decision Tree, Random Forest, and k-Nearest Neighbors on two datasets derived from CTU-13 (QB-CTU13 and EQB-CTU13). Results show that Decision Trees with a five-feature set achieved the best performance, with an 85% F1 score and an average classification time of 0.78 microseconds per sample.},
note = {Publisher: IEEE},
keywords = {Botnet Detection, Cybersecurity, feature selection, machine learning, Network Traffic Analysis},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Paniagua, Manuel; Fidalgo, Eduardo; Alegre, Enrique; Jáñez-Martino, Francisco
Fraudulent e-commerce websites detection through machine learning Artículo de revista
En: Hybrid Artificial Intelligent Systems: 16th International Conference, HAIS 2021, Bilbao, Spain, September 22–24, 2021, Proceedings 16, pp. 267–279, 2021, (Publisher: Springer International Publishing).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, E-commerce, Fraud Detection, machine learning
@article{sanchez-paniagua_fraudulent_2021,
title = {Fraudulent e-commerce websites detection through machine learning},
author = {Manuel Sánchez-Paniagua and Eduardo Fidalgo and Enrique Alegre and Francisco Jáñez-Martino},
url = {https://link.springer.com/chapter/10.1007/978-3-030-86271-8_23},
year = {2021},
date = {2021-01-01},
journal = {Hybrid Artificial Intelligent Systems: 16th International Conference, HAIS 2021, Bilbao, Spain, September 22–24, 2021, Proceedings 16},
pages = {267–279},
abstract = {With the rise of e-commerce, users are increasingly vulnerable to fraudulent websites that sell counterfeit products or steal personal information. Existing protection methods, such as blacklists and rules, are prone to high false-positive rates and require constant updating. This paper presents a publicly available dataset of potentially fraudulent websites, incorporating seven new features for better detection. The model, using Random Forest and 11 handcrafted features, achieved an F1-Score of X on a dataset of 282 samples.},
note = {Publisher: Springer International Publishing},
keywords = {Cybersecurity, E-commerce, Fraud Detection, machine learning},
pubstate = {published},
tppubtype = {article}
}
Castaño, Felipe; Fidalgo, Eduardo; Alegre, Enrique; Chaves, Deisy; Sánchez-Paniagua, Manuel
State of the Art: Content-based and Hybrid Phishing Artículo de revista
En: 2021.
Resumen | Enlaces | BibTeX | Etiquetas: Content-based Features, Cybersecurity, deep learning, Hybrid Features, Hybrid Phishing, machine learning, phishing detection
@article{fidalgo_state_2021,
title = {State of the Art: Content-based and Hybrid Phishing},
author = {Felipe Castaño and Eduardo Fidalgo and Enrique Alegre and Deisy Chaves and Manuel Sánchez-Paniagua},
url = {https://arxiv.org/abs/2101.12723},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
abstract = {Phishing attacks have evolved and increased over time and, for this reason, the task of distinguishing between a legitimate site and a phishing site is more and more difficult, fooling even the most expert users. The main proposals focused on addressing this problem can be divided into four approaches: List-based, URL based, content-based, and hybrid. In this state of the art, the most recent techniques using web content-based and hybrid approaches for Phishing Detection are reviewed and compared.},
keywords = {Content-based Features, Cybersecurity, deep learning, Hybrid Features, Hybrid Phishing, machine learning, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Castano, Felipe; Fidalgo, Eduardo; Alegre, Enrique; Chaves, Deisy; Sánchez-Paniagua, Manuel
State of the art: content-based and hybrid phishing detection Artículo de revista
En: arXiv preprint arXiv:2101.12723, 2021.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Hybrid Phishing, phishing detection
@article{castano_state_2021,
title = {State of the art: content-based and hybrid phishing detection},
author = {Felipe Castano and Eduardo Fidalgo and Enrique Alegre and Deisy Chaves and Manuel Sánchez-Paniagua},
url = {https://arxiv.org/abs/2101.12723},
year = {2021},
date = {2021-01-01},
journal = {arXiv preprint arXiv:2101.12723},
abstract = {Phishing attacks have evolved and increased over time and, for this reason, the task of distinguishing between a legitimate site and a phishing site is more and more difficult, fooling even the most expert users. The main proposals focused on addressing this problem can be divided into four approaches: List-based, URL based, content-based, and hybrid. In this state of the art, the most recent techniques using web content-based and hybrid approaches for Phishing Detection are reviewed and compared.},
keywords = {Cybersecurity, Hybrid Phishing, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo
Trustworthiness of spam email addresses using machine learning Artículo de revista
En: Proceedings of the 21st ACM Symposium on Document Engineering, pp. 1–4, 2021.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, Phishing, Spam Email Detection, Trustworthiness Analysis
@article{janez-martino_trustworthiness_2021,
title = {Trustworthiness of spam email addresses using machine learning},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo},
url = {https://dl.acm.org/doi/abs/10.1145/3469096.3475060},
year = {2021},
date = {2021-01-01},
journal = {Proceedings of the 21st ACM Symposium on Document Engineering},
pages = {1–4},
abstract = {This paper addresses the growing issue of spam emails used by cybercriminals for scams, phishing, and malware attacks. It presents a proof-of-concept methodology to help users assess the trustworthiness of email addresses. The authors introduce a manually labeled dataset of email addresses, categorized as low and high quality, and extract 18 handcrafted features based on social engineering techniques and natural language properties. Four machine learning classifiers are tested, with Naive Bayes yielding the best performance (88.17% accuracy and 0.808 F1-Score). The study also utilizes the InterpretML framework to identify the most relevant features for building an automatic system to assess email address trustworthiness.},
keywords = {Cybersecurity, machine learning, Phishing, Spam Email Detection, Trustworthiness Analysis},
pubstate = {published},
tppubtype = {article}
}
2020
Biswas, Rubel; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Perceptual image hashing based on frequency dominant neighborhood structure applied to Tor domains recognition Artículo de revista
En: Neurocomputing, vol. 383, pp. 24–38, 2020, (Publisher: Elsevier).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Deep Web, perceptual hashing, TOR
@article{biswas_perceptual_2020,
title = {Perceptual image hashing based on frequency dominant neighborhood structure applied to Tor domains recognition},
author = {Rubel Biswas and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.sciencedirect.com/science/article/pii/S0925231219316674},
year = {2020},
date = {2020-01-01},
journal = {Neurocomputing},
volume = {383},
pages = {24–38},
abstract = {This paper proposes an automatic method to recognize illicit domains on the Tor network using perceptual hashing through domain snapshots. The method introduces DUSI-2K, a dataset of Tor service domain snapshots, and F-DNS, a new hashing technique based on Dominant Neighborhood Structure (DNS) and Global Neighborhood Structure (GNS). F-DNS outperforms other state-of-the-art methods, achieving an accuracy of 98.75% in recognizing Tor domains, significantly surpassing methods like ResNet50 and Inception-ResNet-v2. Fine-tuning these models does not improve results, demonstrating the effectiveness of F-DNS for Tor domain classification.},
note = {Publisher: Elsevier},
keywords = {Cybersecurity, Deep Web, perceptual hashing, TOR},
pubstate = {published},
tppubtype = {article}
}
Jánez-Martino, Francisco; Fidalgo, Eduardo; González, Santiago; Velasco-Mata, Javier
Classification of spam emails through hierarchical clustering and supervised learning Artículo de revista
En: arXiv preprint arXiv:2005.08773, 2020.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, Spam Classification, Text Processing, TF-IDF & BOW
@article{janez-martino_classification_2020,
title = {Classification of spam emails through hierarchical clustering and supervised learning},
author = {Francisco Jánez-Martino and Eduardo Fidalgo and Santiago González and Javier Velasco-Mata},
url = {https://arxiv.org/abs/2005.08773},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {arXiv preprint arXiv:2005.08773},
abstract = {This work introduces SPEMC-11K, the first multi-class spam email dataset, categorizing spam into Health and Technology, Personal Scams, and Sexual Content. Using TF-IDF and BOW with Naïve Bayes, Decision Trees, and SVM, the best accuracy (95.39% F1-score) is achieved with TF-IDF and SVM, while TF-IDF and NB offer the fastest classification (2.13ms per email).},
keywords = {Cybersecurity, machine learning, Spam Classification, Text Processing, TF-IDF & BOW},
pubstate = {published},
tppubtype = {article}
}
Biswas, Rubel; Carofilis-Vasco, Andrés; Fidalgo, Eduardo; Jáñez-Martino, Francisco; Blanco-Medina, Pablo
Perceptual Hashing applied to Tor domains recognition Artículo de revista
En: arXiv preprint arXiv:2005.10090, 2020.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, DCT, Deep Web, Image classification, TOR
@article{biswas_perceptual_2020-1,
title = {Perceptual Hashing applied to Tor domains recognition},
author = {Rubel Biswas and Andrés Carofilis-Vasco and Eduardo Fidalgo and Francisco Jáñez-Martino and Pablo Blanco-Medina},
url = {https://arxiv.org/abs/2005.10090},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {arXiv preprint arXiv:2005.10090},
abstract = {This paper introduces Frequency-Dominant Neighborhood Structure (F-DNS), a perceptual hashing method for automatically classifying Tor domains by their screenshots. F-DNS outperforms other methods, achieving better correlation coefficients, especially for rotated images. The method was tested on the Darknet Usage Service Images-2K (DUSI-2K) dataset and achieved an accuracy of 98.75%, surpassing other classification and hashing techniques.},
keywords = {Cybersecurity, DCT, Deep Web, Image classification, TOR},
pubstate = {published},
tppubtype = {article}
}
2019
Velasco-Mata, Javier; Fidalgo, Eduardo; González-Castro, Víctor; Alegre, Enrique; Blanco-Medina, Pablo
Botnet detection on TCP traffic using supervised machine learning Artículo de revista
En: Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14, pp. 444–455, 2019, (Publisher: Springer International Publishing).
Resumen | Enlaces | BibTeX | Etiquetas: Botnets, Classifiers, Cybersecurity, Datasets, machine learning
@article{velasco-mata_botnet_2019,
title = {Botnet detection on TCP traffic using supervised machine learning},
author = {Javier Velasco-Mata and Eduardo Fidalgo and Víctor González-Castro and Enrique Alegre and Pablo Blanco-Medina},
url = {https://link.springer.com/chapter/10.1007/978-3-030-29859-3_38},
year = {2019},
date = {2019-01-01},
journal = {Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14},
pages = {444–455},
abstract = {The rise of botnets on the Internet requires detecting their activity. Two datasets (TCP-Int and TCP-Sink) were created to evaluate traffic classifiers. Four Machine Learning models were tested, with Decision Tree achieving the best performance: 0.99 F1 score on TCP-Int and 0.99 AUC score on TCP-Sink.},
note = {Publisher: Springer International Publishing},
keywords = {Botnets, Classifiers, Cybersecurity, Datasets, machine learning},
pubstate = {published},
tppubtype = {article}
}
2018
Gangwar, Abhishek; Fidalgo, Eduardo; Alegre, Enrique; González-Castro, Víctor
Phishfingerprint: A practical approach for phishing web page identity retrieval based on visual cues Artículo de revista
En: International Conference of Applications of Intelligent Systems, 2018.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Perceptual Hash, Phishing, Visual Similarity
@article{gangwar_phishfingerprint_2018,
title = {Phishfingerprint: A practical approach for phishing web page identity retrieval based on visual cues},
author = {Abhishek Gangwar and Eduardo Fidalgo and Enrique Alegre and Víctor González-Castro},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=en&user=opCbArQAAAAJ&cstart=100&pagesize=100&sortby=title&citation_for_view=opCbArQAAAAJ:XiSMed-E-HIC},
year = {2018},
date = {2018-01-01},
journal = {International Conference of Applications of Intelligent Systems},
abstract = {This paper proposes a framework to maintain a repository of phishing web pages and retrieve the identity of newly reported phishing pages. The framework has two key contributions: first, it introduces a semi-automated method to create a non-redundant database of phishing websites. Second, it presents a robust two-stage approach to identify the reported phishing page by comparing its visual similarity to the registered pages using perceptual hash fingerprinting.},
keywords = {Cybersecurity, Perceptual Hash, Phishing, Visual Similarity},
pubstate = {published},
tppubtype = {article}
}
2017
Al-Nabki, Wesam; Fidalgo, Eduardo; Alegre, Enrique; Paz-Centeno, Iván De
Classifying illegal activities on tor network based on web textual contents Artículo de revista
En: Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers, pp. 35–43, 2017.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Darknet Analysis, Logistic Regression, machine learning, Text classification, TF-IDF
@article{al_nabki_classifying_2017,
title = {Classifying illegal activities on tor network based on web textual contents},
author = {Wesam Al-Nabki and Eduardo Fidalgo and Enrique Alegre and Iván De Paz-Centeno},
url = {https://aclanthology.org/E17-1004/},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
journal = {Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 1, Long Papers},
pages = {35–43},
abstract = {This paper introduces DUTA, a publicly available dataset of Darknet domains labeled into 26 classes. Using DUTA, a classification study was conducted with TF-IDF and supervised classifiers. Logistic Regression with TF-IDF achieved 96.6% accuracy and a 93.7% F1-score in detecting illegal activities, aiding potential law enforcement tools.},
keywords = {Cybersecurity, Darknet Analysis, Logistic Regression, machine learning, Text classification, TF-IDF},
pubstate = {published},
tppubtype = {article}
}