Publications
2022
Sánchez-Paniagua, Manuel; Fidalgo, Eduardo; Alegre, Enrique; Alaiz-Rodríguez, Rocío
Phishing websites detection using a novel multipurpose dataset and web technologies features Artículo de revista
En: Expert Systems with Applications, vol. 207, pp. 118010, 2022, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Dataset Creation, LightGBM Classifier, phishing detection, Web Technology Features
@article{sanchez-paniagua_phishing_2022,
title = {Phishing websites detection using a novel multipurpose dataset and web technologies features},
author = {Manuel Sánchez-Paniagua and Eduardo Fidalgo and Enrique Alegre and Rocío Alaiz-Rodríguez},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422012301},
year = {2022},
date = {2022-01-01},
journal = {Expert Systems with Applications},
volume = {207},
pages = {118010},
abstract = {Phishing attacks are a major challenge in cybersecurity, often involving the hijacking of sensitive data through fraudulent login forms. This paper proposes a new methodology for detecting phishing websites in real-world scenarios using URL, HTML, and web technology features. The authors introduce the Phishing Index Login Websites Dataset (PILWD), an offline dataset containing 134,000 verified samples, which enables researchers to test and compare detection approaches. Using the dataset, a LightGBM classifier with 54 features achieves a 97.95% accuracy in detecting phishing websites. This methodology is independent of third-party services and utilizes new features for improved detection.},
note = {Publisher: Pergamon},
keywords = {Dataset Creation, LightGBM Classifier, phishing detection, Web Technology Features},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Paniagua, Manuel; Fidalgo, Eduardo; Alegre, Enrique; Al-Nabki, Wesam; González-Castro, Víctor
Phishing URL detection: A real-case scenario through login URLs Artículo de revista
En: IEEE Access, vol. 10, pp. 42949–42960, 2022, (Publisher: IEEE).
Resumen | Enlaces | BibTeX | Etiquetas: Dataset Creation, machine learning, phishing detection, URL analysis
@article{sanchez-paniagua_phishing_2022-1,
title = {Phishing URL detection: A real-case scenario through login URLs},
author = {Manuel Sánchez-Paniagua and Eduardo Fidalgo and Enrique Alegre and Wesam Al-Nabki and Víctor González-Castro},
url = {https://ieeexplore.ieee.org/abstract/document/9759382},
year = {2022},
date = {2022-01-01},
journal = {IEEE Access},
volume = {10},
pages = {42949–42960},
abstract = {This paper compares machine learning and deep learning techniques to detect phishing websites through URL analysis. Unlike most current methods, which use only homepages, this study includes URLs from login pages for both legitimate and phishing websites, providing a more realistic scenario. It also demonstrates that existing techniques have high false-positive rates when tested on URLs from legitimate login pages. The authors create a new dataset, Phishing Index Login URL (PILU-90K), and show how older models decrease in accuracy over time. A Logistic Regression model with TF-IDF feature extraction achieves 96.50% accuracy on the login URL dataset.},
note = {Publisher: IEEE},
keywords = {Dataset Creation, machine learning, phishing detection, URL analysis},
pubstate = {published},
tppubtype = {article}
}