Publications
2024
Castaño, Felipe; Martínez-Mendoza, Alicia; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Alegre, Enrique
Familiarity Analysis and Phishing Website Detection using PhiKitA Dataset [Póster] Artículo de revista
En: 2024, (Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, PhinKitA Dataset, phishing detection
@article{castano_familiarity_2024,
title = {Familiarity Analysis and Phishing Website Detection using PhiKitA Dataset [Póster]},
author = {Felipe Castaño and Alicia Martínez-Mendoza and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Enrique Alegre},
url = {https://idus.us.es/items/04850276-e785-4039-977b-0c43806ac349},
year = {2024},
date = {2024-01-01},
abstract = {Phishing kits enable attackers to launch phishing campaigns more efficiently. This paper introduces PhiKitA, a dataset of phishing kits and the websites they generate. Three experiments were conducted: familiarity analysis, phishing website detection, and phishing kit classification, using MD5 hashes, fingerprints, and graph-based DOM representation. Results show that phishing website detection achieved 92.50% accuracy, while phishing kit classification proved less effective due to insufficient extracted information.},
note = {Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática},
keywords = {Cybersecurity, machine learning, PhinKitA Dataset, phishing detection},
pubstate = {published},
tppubtype = {article}
}
2023
Castaño, Felipe; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Alegre, Enrique
PhiKitA: Phishing Kit Attacks Dataset for Phishing Websites Identification Artículo de revista
En: IEEE Access, vol. 11, pp. 40779–40789, 2023, (Publisher: IEEE).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Dataset, phishing detection
@article{castano_phikita_2023,
title = {PhiKitA: Phishing Kit Attacks Dataset for Phishing Websites Identification},
author = {Felipe Castaño and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Enrique Alegre},
url = {https://ieeexplore.ieee.org/abstract/document/10103863},
year = {2023},
date = {2023-01-01},
journal = {IEEE Access},
volume = {11},
pages = {40779–40789},
abstract = {This paper introduces PhiKitA, a novel dataset containing phishing kits and phishing websites generated from these kits. The dataset is used to investigate phishing kit detection, phishing website identification, and the source of phishing websites. The study applied MD5 hashes, fingerprints, and graph representation DOM algorithms to analyze the dataset. The results show that the graph representation algorithm achieved an accuracy of 92.50% for phishing detection, while MD5 hash representation achieved a 39.54% F1 score, indicating its limited effectiveness in distinguishing phishing sources.},
note = {Publisher: IEEE},
keywords = {Cybersecurity, Dataset, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Martínez-Mendoza, Alicia; Sánchez-Paniagua, Manuel; Carofilis-Vasco, Andrés; Jáñez-Martino, Francisco; Fidalgo, Eduardo; Alegre, Enrique
Applying Machine Learning to login URLs for phishing detection Artículo de revista
En: Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023, pp. 487–488, 2023, (Publisher: Universidade de Vigo).
Resumen | Enlaces | BibTeX | Etiquetas: AI, Cybersecurity, machine learning, phishing detection, URL analysis
@article{martinez-mendoza_applying_2023,
title = {Applying Machine Learning to login URLs for phishing detection},
author = {Alicia Martínez-Mendoza and Manuel Sánchez-Paniagua and Andrés Carofilis-Vasco and Francisco Jáñez-Martino and Eduardo Fidalgo and Enrique Alegre},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9044941},
year = {2023},
date = {2023-01-01},
journal = {Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023},
pages = {487–488},
abstract = {This paper explores the application of machine learning for phishing detection using login URLs. By analyzing URL patterns and features, the study aims to differentiate between legitimate and phishing websites. Various machine learning models are evaluated to enhance detection accuracy, providing a proactive approach to cybersecurity threats.},
note = {Publisher: Universidade de Vigo},
keywords = {AI, Cybersecurity, machine learning, phishing detection, URL analysis},
pubstate = {published},
tppubtype = {article}
}
2022
Sánchez-Paniagua, Manuel; Fidalgo, Eduardo; Alegre, Enrique; Alaiz-Rodríguez, Rocío
Phishing websites detection using a novel multipurpose dataset and web technologies features Artículo de revista
En: Expert Systems with Applications, vol. 207, pp. 118010, 2022, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Dataset Creation, LightGBM Classifier, phishing detection, Web Technology Features
@article{sanchez-paniagua_phishing_2022,
title = {Phishing websites detection using a novel multipurpose dataset and web technologies features},
author = {Manuel Sánchez-Paniagua and Eduardo Fidalgo and Enrique Alegre and Rocío Alaiz-Rodríguez},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422012301},
year = {2022},
date = {2022-01-01},
journal = {Expert Systems with Applications},
volume = {207},
pages = {118010},
abstract = {Phishing attacks are a major challenge in cybersecurity, often involving the hijacking of sensitive data through fraudulent login forms. This paper proposes a new methodology for detecting phishing websites in real-world scenarios using URL, HTML, and web technology features. The authors introduce the Phishing Index Login Websites Dataset (PILWD), an offline dataset containing 134,000 verified samples, which enables researchers to test and compare detection approaches. Using the dataset, a LightGBM classifier with 54 features achieves a 97.95% accuracy in detecting phishing websites. This methodology is independent of third-party services and utilizes new features for improved detection.},
note = {Publisher: Pergamon},
keywords = {Dataset Creation, LightGBM Classifier, phishing detection, Web Technology Features},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Paniagua, Manuel; Fidalgo, Eduardo; Alegre, Enrique; Al-Nabki, Wesam; González-Castro, Víctor
Phishing URL detection: A real-case scenario through login URLs Artículo de revista
En: IEEE Access, vol. 10, pp. 42949–42960, 2022, (Publisher: IEEE).
Resumen | Enlaces | BibTeX | Etiquetas: Dataset Creation, machine learning, phishing detection, URL analysis
@article{sanchez-paniagua_phishing_2022-1,
title = {Phishing URL detection: A real-case scenario through login URLs},
author = {Manuel Sánchez-Paniagua and Eduardo Fidalgo and Enrique Alegre and Wesam Al-Nabki and Víctor González-Castro},
url = {https://ieeexplore.ieee.org/abstract/document/9759382},
year = {2022},
date = {2022-01-01},
journal = {IEEE Access},
volume = {10},
pages = {42949–42960},
abstract = {This paper compares machine learning and deep learning techniques to detect phishing websites through URL analysis. Unlike most current methods, which use only homepages, this study includes URLs from login pages for both legitimate and phishing websites, providing a more realistic scenario. It also demonstrates that existing techniques have high false-positive rates when tested on URLs from legitimate login pages. The authors create a new dataset, Phishing Index Login URL (PILU-90K), and show how older models decrease in accuracy over time. A Logistic Regression model with TF-IDF feature extraction achieves 96.50% accuracy on the login URL dataset.},
note = {Publisher: IEEE},
keywords = {Dataset Creation, machine learning, phishing detection, URL analysis},
pubstate = {published},
tppubtype = {article}
}
2021
Castano, Felipe; Fidalgo, Eduardo; Alegre, Enrique; Chaves, Deisy; Sánchez-Paniagua, Manuel
State of the art: content-based and hybrid phishing detection Artículo de revista
En: arXiv preprint arXiv:2101.12723, 2021.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Hybrid Phishing, phishing detection
@article{castano_state_2021,
title = {State of the art: content-based and hybrid phishing detection},
author = {Felipe Castano and Eduardo Fidalgo and Enrique Alegre and Deisy Chaves and Manuel Sánchez-Paniagua},
url = {https://arxiv.org/abs/2101.12723},
year = {2021},
date = {2021-01-01},
journal = {arXiv preprint arXiv:2101.12723},
abstract = {Phishing attacks have evolved and increased over time and, for this reason, the task of distinguishing between a legitimate site and a phishing site is more and more difficult, fooling even the most expert users. The main proposals focused on addressing this problem can be divided into four approaches: List-based, URL based, content-based, and hybrid. In this state of the art, the most recent techniques using web content-based and hybrid approaches for Phishing Detection are reviewed and compared.},
keywords = {Cybersecurity, Hybrid Phishing, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Castaño, Felipe; Sánchez-Paniagua, Manuel; Delgado, J; Velasco-Mata, Javier; Sepúlveda, A; Fidalgo, Eduardo; Alegre, Enrique
Evaluation of state-of-art phishing detection strategies based on machine learning Artículo de revista
En: Investigación en Ciberseguridad (Castilla-La Mancha). Ediciones de la Universidad De Castilla-La Mancha, 2021.
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, phishing detection
@article{castano_evaluation_2021,
title = {Evaluation of state-of-art phishing detection strategies based on machine learning},
author = {Felipe Castaño and Manuel Sánchez-Paniagua and J Delgado and Javier Velasco-Mata and A Sepúlveda and Eduardo Fidalgo and Enrique Alegre},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=20&pagesize=80&sortby=title&citation_for_view=yATJZvcAAAAJ:Tiz5es2fbqcC},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Investigación en Ciberseguridad (Castilla-La Mancha). Ediciones de la Universidad De Castilla-La Mancha},
abstract = {This paper reviews and evaluates current state-of-the-art phishing detection strategies that use machine learning.},
keywords = {Cybersecurity, machine learning, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Castaño, Felipe; Fidalgo, Eduardo; Alegre, Enrique; Chaves, Deisy; Sánchez-Paniagua, Manuel
State of the Art: Content-based and Hybrid Phishing Artículo de revista
En: 2021.
Resumen | Enlaces | BibTeX | Etiquetas: Content-based Features, Cybersecurity, deep learning, Hybrid Features, Hybrid Phishing, machine learning, phishing detection
@article{fidalgo_state_2021,
title = {State of the Art: Content-based and Hybrid Phishing},
author = {Felipe Castaño and Eduardo Fidalgo and Enrique Alegre and Deisy Chaves and Manuel Sánchez-Paniagua},
url = {https://arxiv.org/abs/2101.12723},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
abstract = {Phishing attacks have evolved and increased over time and, for this reason, the task of distinguishing between a legitimate site and a phishing site is more and more difficult, fooling even the most expert users. The main proposals focused on addressing this problem can be divided into four approaches: List-based, URL based, content-based, and hybrid. In this state of the art, the most recent techniques using web content-based and hybrid approaches for Phishing Detection are reviewed and compared.},
keywords = {Content-based Features, Cybersecurity, deep learning, Hybrid Features, Hybrid Phishing, machine learning, phishing detection},
pubstate = {published},
tppubtype = {article}
}
2020
Sánchez-Paniagua, Manuel; Fidalgo, Eduardo; González-Castro, Víctor; Alegre, Enrique
Impact of current phishing strategies in machine learning models for phishing detection Artículo de revista
En: 13th International Conference on Computational Intelligence in Security for Information Systems (CISIS), pp. 87–96, 2020.
Resumen | Enlaces | BibTeX | Etiquetas: machine learning, NLP, phishing detection, URL
@article{sanchez-paniagua_impact_2020,
title = {Impact of current phishing strategies in machine learning models for phishing detection},
author = {Manuel Sánchez-Paniagua and Eduardo Fidalgo and Víctor González-Castro and Enrique Alegre},
url = {https://link.springer.com/chapter/10.1007/978-3-030-57805-3_9},
year = {2020},
date = {2020-01-01},
journal = {13th International Conference on Computational Intelligence in Security for Information Systems (CISIS)},
pages = {87–96},
abstract = {Phishing is one of the most widespread attacks based on social engineering. The detection of Phishing using Machine Learning approaches is more robust than the blacklist-based ones, which need regular reports and updates. However, the datasets currently used for training the Supervised Learning approaches have some drawbacks. These datasets only have the landing page of legitimate domains and they do not include the login forms from the websites, which is the most common situation in a real case of Phishing. This makes the performance of Machine Learning-based models to drop, especially when they are tested using login pages.},
keywords = {machine learning, NLP, phishing detection, URL},
pubstate = {published},
tppubtype = {article}
}