Publications
2025
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Spam email classification based on cybersecurity potential risk using natural language processing Artículo de revista
En: Knowledge-Based Systems, vol. 310, pp. 112939, 2025, (Publisher: Elsevier).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Email Classification, machine learning, Natural Language Processing, Spam detection
@article{janez-martino_spam_2025,
title = {Spam email classification based on cybersecurity potential risk using natural language processing},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.sciencedirect.com/science/article/pii/S0950705124015739},
year = {2025},
date = {2025-01-01},
journal = {Knowledge-Based Systems},
volume = {310},
pages = {112939},
abstract = {This study focuses on detecting spam emails, a key vector for cyberattacks. It introduces 56 features based on NLP techniques, grouped into five categories: Headers, Text, Attachments, URLs, and Protocols. A new dataset, SERC, was created for spam risk classification. Using binary classification and regression, the Random Forest classifier achieved the best performance (F1-Score of 0.914), and Random Forest Regressor had the lowest Mean Square Error (0.781). Features from the Headers and Text groups were found to be the most important.},
note = {Publisher: Elsevier},
keywords = {Cybersecurity, Email Classification, machine learning, Natural Language Processing, Spam detection},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Barrón-Cedeño, Alberto; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Muti, Arianna
On persuasion in spam email: A multi-granularity text analysis Artículo de revista
En: Expert Systems with Applications, vol. 265, pp. 125767, 2025, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, Natural Language Processing, Spam detection
@article{janez-martino_persuasion_2025,
title = {On persuasion in spam email: A multi-granularity text analysis},
author = {Francisco Jáñez-Martino and Alberto Barrón-Cedeño and Rocío Alaiz-Rodríguez and Víctor González-Castro and Arianna Muti},
url = {https://www.sciencedirect.com/science/article/pii/S0957417424026344},
year = {2025},
date = {2025-01-01},
journal = {Expert Systems with Applications},
volume = {265},
pages = {125767},
abstract = {This paper explores the use of supervised machine learning models to detect persuasion techniques in spam emails, addressing both binary classification (presence/absence of persuasion) and multilabel classification (identifying specific persuasion techniques). The research utilizes natural language processing and adapts propaganda detection methods from news articles, analyzing emails at full-text, sentence, and snippet levels. The study includes the development of a custom spam dataset and fine-tuning of RoBERTa-based models, ultimately aiming to enhance cybersecurity through better understanding of persuasion tactics in malicious emails.},
note = {Publisher: Pergamon},
keywords = {Cybersecurity, machine learning, Natural Language Processing, Spam detection},
pubstate = {published},
tppubtype = {article}
}
2024
Martínez-Mendoza, Alicia; Jáñez-Martino, Francisco; Carofilis, Andrés; Fernández-Robles, Laura; Alegre, Enrique; Fidalgo, Eduardo
Towards Multi-Class Smishing Detection: A Novel Feature Vector Approach and the Smishing-4C Dataset Artículo de revista
En: 2024.
Enlaces | BibTeX | Etiquetas: Multiclass Classification, Smishing Classification, Smishing-4C Dataset, SMS, Text classification
@article{martinez-mendoza_towards_2024,
title = {Towards Multi-Class Smishing Detection: A Novel Feature Vector Approach and the Smishing-4C Dataset},
author = {Alicia Martínez-Mendoza and Francisco Jáñez-Martino and Andrés Carofilis and Laura Fernández-Robles and Enrique Alegre and Eduardo Fidalgo},
url = {https://besaya.infor.uva.es/sepln24/paper06.pdf},
year = {2024},
date = {2024-01-01},
keywords = {Multiclass Classification, Smishing Classification, Smishing-4C Dataset, SMS, Text classification},
pubstate = {published},
tppubtype = {article}
}
Medina-Martínez, Gabriel; Fernández-Robles, Laura; Castejón-Limas, Manuel
Repositorio Git para aprendizaje basado en resolución de problemas. Asignatura de Dirección de Proyectos Artículo de revista
En: Innovación docente en la Universidad de León, pp. 317–324, 2024, (Publisher: Servicio de Publicaciones).
Resumen | Enlaces | BibTeX | Etiquetas: Aprendizaje Basado en Problemas, Dirección de Proyectos, Git, Simulación de Proyectos, Virtualización
@article{medina-martinez_repositorio_2024,
title = {Repositorio Git para aprendizaje basado en resolución de problemas. Asignatura de Dirección de Proyectos},
author = {Gabriel Medina-Martínez and Laura Fernández-Robles and Manuel Castejón-Limas},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9318963},
year = {2024},
date = {2024-01-01},
journal = {Innovación docente en la Universidad de León},
pages = {317–324},
abstract = {Esta experiencia propone la implementación de un sistema de aprendizaje autónomo y guiado basado en la resolución de problemas específicos de gestión de plazos, costes y recursos. A través de un entorno de simulación, los estudiantes exploran los efectos de sus decisiones organizativas en proyectos. El sistema emplea un servidor Git para almacenar casos de estudio y un servidor de máquinas virtuales para ejecutar simulaciones personalizadas. Aplicado en la asignatura de Dirección de Proyectos en ingenierías, ha demostrado mejorar significativamente la calidad del aprendizaje, aumentando el interés estudiantil y los resultados académicos. Se planea expandir el repositorio en el futuro.},
note = {Publisher: Servicio de Publicaciones},
keywords = {Aprendizaje Basado en Problemas, Dirección de Proyectos, Git, Simulación de Proyectos, Virtualización},
pubstate = {published},
tppubtype = {article}
}
Díaz, Daniel; Al-Nabki, Wesam; Fernández-Robles, Laura; Alegre, Enrique; Fidalgo, Eduardo; Martínez-Mendoza, Alicia
SpamClus: An Agglomerative Clustering Algorithm for Spam Email Campaigns Detection Artículo de revista
En: International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024), 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Agglomerative Clustering, Cybersecurity, Email Classification, machine learning, Spam detection, SpamClus
@article{diaz_spamclus_2024,
title = {SpamClus: An Agglomerative Clustering Algorithm for Spam Email Campaigns Detection},
author = {Daniel Díaz and Wesam Al-Nabki and Laura Fernández-Robles and Enrique Alegre and Eduardo Fidalgo and Alicia Martínez-Mendoza},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=100&pagesize=100&sortby=title&citation_for_view=yATJZvcAAAAJ:t7zJ5fGR-2UC},
year = {2024},
date = {2024-01-01},
journal = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
abstract = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
keywords = {Agglomerative Clustering, Cybersecurity, Email Classification, machine learning, Spam detection, SpamClus},
pubstate = {published},
tppubtype = {article}
}
Castejón-Limas, Manuel; Medina-Martínez, Gabriel; del Castillo, Virginia Riego; Fernández-Robles, Laura
The formula for the completion time of project networks Artículo de revista
En: arXiv preprint arXiv:2410.10252, 2024.
Resumen | Enlaces | BibTeX | Etiquetas: CMP, Duration, Matrix, Path, PERT, Project Networks, Project Scheduling
@article{castejon-limas_formula_2024,
title = {The formula for the completion time of project networks},
author = {Manuel Castejón-Limas and Gabriel Medina-Martínez and Virginia Riego del Castillo and Laura Fernández-Robles},
url = {https://arxiv.org/abs/2410.10252},
year = {2024},
date = {2024-01-01},
journal = {arXiv preprint arXiv:2410.10252},
abstract = {This paper models project completion time using linear transformations and singular value decomposition (SVD) to analyze the topological relevance of paths and activities. It introduces spectral networks to capture the project's fundamental structure and proposes a project stress metric for comparing alternatives. The Moore-Penrose inverse is used to determine activity durations for a given path duration. A systematic review supports the novelty of these concepts.},
keywords = {CMP, Duration, Matrix, Path, PERT, Project Networks, Project Scheduling},
pubstate = {published},
tppubtype = {article}
}
Gangwar, Abhishek; González-Castro, Víctor; Alegre, Enrique; Fidalgo, Eduardo; Martínez-Mendoza, Alicia
DeepHSAR: Semi-supervised fine-grained learning for multi-label human sexual activity recognition Artículo de revista
En: Information Processing & Management, vol. 61, no 5, pp. 103800, 2024, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Fine-grained Classification, Multi-label Classification, pornography detection, Semi-supervised Classification, Sexual Activity Detection
@article{gangwar_deephsar_2024,
title = {DeepHSAR: Semi-supervised fine-grained learning for multi-label human sexual activity recognition},
author = {Abhishek Gangwar and Víctor González-Castro and Enrique Alegre and Eduardo Fidalgo and Alicia Martínez-Mendoza},
url = {https://www.sciencedirect.com/science/article/pii/S0306457324001596},
year = {2024},
date = {2024-01-01},
journal = {Information Processing & Management},
volume = {61},
number = {5},
pages = {103800},
abstract = {This paper presents DeepHSAR, a deep learning framework for semi-supervised Human Sexual Activity Recognition (HSAR), using the SexualActs-150k dataset with 150k images. It employs two classification streams for global and fine-grained recognition, achieving an F1-score of 79.29%. The method outperforms previous approaches and achieves 99.85% accuracy on the NPDI Pornography-2k dataset.},
note = {Publisher: Pergamon},
keywords = {Fine-grained Classification, Multi-label Classification, pornography detection, Semi-supervised Classification, Sexual Activity Detection},
pubstate = {published},
tppubtype = {article}
}
Bennabhaktula, Guru Swaroop; Alegre, Enrique; Strisciuglio, Nicola; Azzopardi, George
PushPull-Net: Inhibition-driven ResNet robust to image corruptions Artículo de revista
En: International Conference on Pattern Recognition, pp. 391–408, 2024, (Publisher: Springer Nature Switzerland Cham).
Resumen | Enlaces | BibTeX | Etiquetas: Convolutional Neural Networks, Image Corruption, ResNet, Visual Cortex Simulation
@article{swaroop_bennabhaktula_pushpull-net_2024,
title = {PushPull-Net: Inhibition-driven ResNet robust to image corruptions},
author = {Guru Swaroop Bennabhaktula and Enrique Alegre and Nicola Strisciuglio and George Azzopardi},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=en&user=opCbArQAAAAJ&cstart=100&pagesize=100&sortby=title&citation_for_view=opCbArQAAAAJ:NXb4pA-qfm4C},
year = {2024},
date = {2024-01-01},
journal = {International Conference on Pattern Recognition},
pages = {391–408},
abstract = {This paper introduces a new computational unit called PushPull-Conv, applied in the first layer of a ResNet architecture. Inspired by anti-phase inhibition in the visual cortex, this unit uses a pair of complementary filters: a push kernel and a pull kernel. The push kernel learns to respond to specific stimuli, while the pull kernel reacts to opposite contrasts. This design enhances stimulus selectivity and improves robustness by inhibiting responses in regions without preferred stimuli. Integrating PushPull-Conv into ResNets improves their resilience to image corruption.},
note = {Publisher: Springer Nature Switzerland Cham},
keywords = {Convolutional Neural Networks, Image Corruption, ResNet, Visual Cortex Simulation},
pubstate = {published},
tppubtype = {article}
}
Blanco-Medina, Pablo; Carofilis, Andrés; Fidalgo, Eduardo; Alegre, Enrique
Preprocesado de imagen y OCR para mejorar deteccion de smishing Artículo de revista
En: Jornadas de Automática, no 45, 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Apoyo a Operadores Humanos, Aprendizaje Profundo, Redes Sociales, Seguridad, Sistemas de Control y Automatización para la Ayuda Internacional
@article{blanco-medina_preprocesado_2024,
title = {Preprocesado de imagen y OCR para mejorar deteccion de smishing},
author = {Pablo Blanco-Medina and Andrés Carofilis and Eduardo Fidalgo and Enrique Alegre},
url = {https://revistas.udc.gal/index.php/JA_CEA/article/view/10955},
year = {2024},
date = {2024-01-01},
journal = {Jornadas de Automática},
number = {45},
abstract = {La globalización de las tecnologías de comunicación ha facilitado el aumento de las estafas por phishing, en particular a través de SMS fraudulentos conocidos como Smishing, que suplantan a compañías para robar datos o realizar acciones no autorizadas. Este trabajo propone una estrategia para extraer automáticamente URLs de capturas de pantalla de Smishing, combinando técnicas de visión artificial y mecanismos de detección de URL. Evaluado en 117 capturas de pantalla con 121 URLs, se obtuvo una precisión del 61.16% en la extracción de URLs sospechosas.},
keywords = {Apoyo a Operadores Humanos, Aprendizaje Profundo, Redes Sociales, Seguridad, Sistemas de Control y Automatización para la Ayuda Internacional},
pubstate = {published},
tppubtype = {article}
}
Blanco-Medina, Pablo; Carofilis-Vasco, Andrés; Fidalgo, Eduardo; Alegre, Enrique
Clasificación de capturas de smishing con aprendizaje profundo e IRIS Artículo de revista
En: Jornadas de Automática, no 45, 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Apoyo a Operadores Humanos, Aprendizaje Profundo, Automatización para la Ayuda Internacional, Redes Sociales, Seguridad
@article{blanco-medina_clasificacion_2024,
title = {Clasificación de capturas de smishing con aprendizaje profundo e IRIS},
author = {Pablo Blanco-Medina and Andrés Carofilis-Vasco and Eduardo Fidalgo and Enrique Alegre},
url = {https://revistas.udc.gal/index.php/JA_CEA/article/view/10904},
year = {2024},
date = {2024-01-01},
journal = {Jornadas de Automática},
number = {45},
abstract = {El Smishing es una variante del Phishing que usa mensajes de texto y smartphones para realizar actividades maliciosas. Los equipos de respuesta ante emergencias informáticas pueden beneficiarse de una herramienta para clasificar capturas de pantalla de smishing antes de extraer su contenido. Se compararon Redes Neuronales Convolucionales y Vision Transformers para clasificar estas capturas en dos categorías: texto dividido en líneas y texto unido. El conjunto de datos IRIS-244, con 244 capturas de smishing, se utilizó en el estudio, encontrando que la arquitectura Xception logró el mejor rendimiento con una precisión del 78.36%.},
keywords = {Apoyo a Operadores Humanos, Aprendizaje Profundo, Automatización para la Ayuda Internacional, Redes Sociales, Seguridad},
pubstate = {published},
tppubtype = {article}
}
Nabki, Mhd Wesam Al; Martino, Francisco Jáñez; Fernández, Eduardo Fidalgo; Alegre, Enrique; Rodríguez, Rocío Aláiz
A review of Spotting Child Sexual Exploitation Material using File Names and their Path Artículo de revista
En: IX Jornadas Nacionales de Investigación En Ciberseguridad, pp. 502–503, 2024, (Publisher: Antonia M. Reina Quintero).
Resumen | Enlaces | BibTeX | Etiquetas: certifications, Cybersecurity, higher education, training
@article{al_nabki_review_2024,
title = {A review of Spotting Child Sexual Exploitation Material using File Names and their Path},
author = {Mhd Wesam Al Nabki and Francisco Jáñez Martino and Eduardo Fidalgo Fernández and Enrique Alegre and Rocío Aláiz Rodríguez},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9633501},
year = {2024},
date = {2024-01-01},
journal = {IX Jornadas Nacionales de Investigación En Ciberseguridad},
pages = {502–503},
abstract = {Cybersecurity training is a global priority due to the current shortage of highly skilled professionals. This conference aims to address various training options available, such as courses, workshops, certifications, and undergraduate and postgraduate education programs, with a particular focus on the latter. The discussion will also cover the challenges and future perspectives needed to meet the growing demand for cybersecurity professionals.},
note = {Publisher: Antonia M. Reina Quintero},
keywords = {certifications, Cybersecurity, higher education, training},
pubstate = {published},
tppubtype = {article}
}
Martino, Francisco Jáñez; Carofilis, Andrés; Rodríguez, Rocío Alaiz; Castro, Víctor González; Fidalgo, Eduardo; Alegre, Enrique
Spam hierarchical clustering for campaigns spotting and topic-based classification [Póster] Artículo de revista
En: 2024, (Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Logistic Regression, Multi-classification, Spam detection
@article{janez_martino_spam_2024,
title = {Spam hierarchical clustering for campaigns spotting and topic-based classification [Póster]},
author = {Francisco Jáñez Martino and Andrés Carofilis and Rocío Alaiz Rodríguez and Víctor González Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://idus.us.es/items/9828eae7-9cec-4574-8863-99e9020e1770},
year = {2024},
date = {2024-01-01},
abstract = {This article develops spam email multiclassification systems for cybersecurity, using two datasets: SPEMC-15K-E (English) and SPEMC-15K-S (Spanish). The datasets are classified into eleven categories. The best results for English (F1-score: 0.953, 94.6% accuracy) were achieved with TF-IDF and Logistic Regression, while for Spanish, TF-IDF and Naïve Bayes achieved an F1-score of 0.945 and 98.5% accuracy. TF-IDF with Logistic Regression also had the fastest processing time (2ms per email for English and 2.2ms for Spanish).},
note = {Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática},
keywords = {Cybersecurity, Logistic Regression, Multi-classification, Spam detection},
pubstate = {published},
tppubtype = {article}
}
Jañez-Martino, Francisco; Vasco, Roberto Andrés Carofilis; Aláiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre-Gutiérrez, Enrique
Spam hierarchical clustering for campaigns spotting and topic-based classification Artículo de revista
En: IX Jornadas Nacionales de Investigación En Ciberseguridad, pp. 490–491, 2024, (Publisher: Antonia M. Reina Quintero).
Resumen | Enlaces | BibTeX | Etiquetas: Campaigns Spotting, Ciberseguridad, Jornadas Nacionales, Spam Hierarchical Clustering, Topic-Based Classification
@article{janez-martino_spam_2024,
title = {Spam hierarchical clustering for campaigns spotting and topic-based classification},
author = {Francisco Jañez-Martino and Roberto Andrés Carofilis Vasco and Rocío Aláiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre-Gutiérrez},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9633481},
year = {2024},
date = {2024-01-01},
journal = {IX Jornadas Nacionales de Investigación En Ciberseguridad},
pages = {490–491},
abstract = {IX Jornadas Nacionales de Investigación En Ciberseguridad, 2024},
note = {Publisher: Antonia M. Reina Quintero},
keywords = {Campaigns Spotting, Ciberseguridad, Jornadas Nacionales, Spam Hierarchical Clustering, Topic-Based Classification},
pubstate = {published},
tppubtype = {article}
}
García-Ordás, María Teresa; Alegre-Gutiérrez, Enrique; Alaíz-Rodríguez, Rocío; González-Castro, Víctor
Tool wear monitoring using an online, automatic and low cost system based on local texture Artículo de revista
En: arXiv preprint arXiv:2402.05977, 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Computer vision, machine learning, Milling, Tool wear
@article{garcia-ordas_tool_2024,
title = {Tool wear monitoring using an online, automatic and low cost system based on local texture},
author = {María Teresa García-Ordás and Enrique Alegre-Gutiérrez and Rocío Alaíz-Rodríguez and Víctor González-Castro},
url = {https://arxiv.org/abs/2402.05977},
year = {2024},
date = {2024-01-01},
journal = {arXiv preprint arXiv:2402.05977},
abstract = {This work presents a fast and cost-effective method using computer vision and machine learning to assess cutting tool wear in edge profile milling. A new dataset of 577 images was created, including functional and disposable cutting edges. The method divides the edges into regions (Wear Patches) and classifies them using texture descriptors (LBP). A Support Vector Machine (SVM) achieved 90.26% accuracy in detecting worn tools, demonstrating strong potential for automatic wear monitoring in milling.},
keywords = {Computer vision, machine learning, Milling, Tool wear},
pubstate = {published},
tppubtype = {article}
}
Nejad, Amin Shoari; Alaiz-Rodríguez, Rocío; McCarthy, Gerard D; Kelleher, Brian; Grey, Anthony; Parnell, Andrew
SERT: A transformer based model for multivariate temporal sensor data with missing values for environmental monitoring Artículo de revista
En: Computers & Geosciences, vol. 188, pp. 105601, 2024, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Environmental Monitoring, Missing Data, Spatio-Temporal Forecasting, Transformer Models
@article{nejad_sert_2024,
title = {SERT: A transformer based model for multivariate temporal sensor data with missing values for environmental monitoring},
author = {Amin Shoari Nejad and Rocío Alaiz-Rodríguez and Gerard D McCarthy and Brian Kelleher and Anthony Grey and Andrew Parnell},
url = {https://www.sciencedirect.com/science/article/pii/S0098300424000840},
year = {2024},
date = {2024-01-01},
journal = {Computers & Geosciences},
volume = {188},
pages = {105601},
abstract = {This research focuses on environmental monitoring and introduces two models for spatio-temporal forecasting that can handle missing values in sensor data without the need for imputation. The first model, SERT (Spatio-temporal Encoder Representations from Transformers), utilizes a transformer-based approach. The second, SST-ANN (Sparse Spatio-Temporal Artificial Neural Network), is a simpler and more interpretable model. Extensive experiments show that these models perform competitively or better than existing state-of-the-art models for multivariate spatio-temporal forecasting.},
note = {Publisher: Pergamon},
keywords = {Environmental Monitoring, Missing Data, Spatio-Temporal Forecasting, Transformer Models},
pubstate = {published},
tppubtype = {article}
}
Castano, Felipe; Lerchundi, Amaia Gil; Urrutia, Raúl Orduna; Fidalgo, Eduardo; Rodríguez, Rocío Alaiz
Automating cybersecurity TTP classification based on nnstructured attack descriptions Artículo de revista
En: Jornadas Nacionales de Investigación en Ciberseguridad (JNIC)(9ª. 2024. Sevilla)(2024), pp. 46-50., 2024, (Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática).
Resumen | Enlaces | BibTeX | Etiquetas: BERT, CTI, cyber threat intelligence, machile learning, SOC operations
@article{castano_automating_2024,
title = {Automating cybersecurity TTP classification based on nnstructured attack descriptions},
author = {Felipe Castano and Amaia Gil Lerchundi and Raúl Orduna Urrutia and Eduardo Fidalgo and Rocío Alaiz Rodríguez},
url = {https://idus.us.es/items/1566b428-106f-4ace-8d17-0835566c60bf},
year = {2024},
date = {2024-01-01},
journal = {Jornadas Nacionales de Investigación en Ciberseguridad (JNIC)(9ª. 2024. Sevilla)(2024), pp. 46-50.},
abstract = {This paper introduces WAVE-27K, a large dataset of unstructured CTI descriptions covering 27 MITRE techniques and 7 tactics. It contains 22,539 single-technique samples and 5,262 multi-technique samples, making it the largest dataset in its category. A BERT-based model trained on WAVE-27K achieved a 97% micro F1-score, demonstrating its quality for machine learning applications in cybersecurity.},
note = {Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática},
keywords = {BERT, CTI, cyber threat intelligence, machile learning, SOC operations},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Carofilis-Vasco, Andrés; Martínez-Mendoza, Alicia
Comparative Analysis of Natural Language Processing Models for Malware Spam Email Identification Artículo de revista
En: International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024), 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Artificial Intelligence, Cybersecurity, Natural Language Processing
@article{janez-martino_comparative_2024,
title = {Comparative Analysis of Natural Language Processing Models for Malware Spam Email Identification},
author = {Francisco Jáñez-Martino and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Andrés Carofilis-Vasco and Alicia Martínez-Mendoza},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=20&pagesize=80&sortby=title&citation_for_view=yATJZvcAAAAJ:z_wVstp3MssC},
year = {2024},
date = {2024-01-01},
journal = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
abstract = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
keywords = {Artificial Intelligence, Cybersecurity, Natural Language Processing},
pubstate = {published},
tppubtype = {article}
}
Delgado, Juan José; Fidalgo, Eduardo; Alegre, Enrique; Carofilis-Vasco, Andrés; Martínez-Mendoza, Alicia
CECILIA: Enhancing CSIRT Effectiveness with Transformer-Based Cyber Incident Classification Artículo de revista
En: International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024), 2024.
Resumen | Enlaces | BibTeX | Etiquetas: CECILIA, Cyber Incident Classification, Cybersecurity
@article{delgado_cecilia_2024,
title = {CECILIA: Enhancing CSIRT Effectiveness with Transformer-Based Cyber Incident Classification},
author = {Juan José Delgado and Eduardo Fidalgo and Enrique Alegre and Andrés Carofilis-Vasco and Alicia Martínez-Mendoza},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=20&pagesize=80&sortby=title&citation_for_view=yATJZvcAAAAJ:XD-gHx7UXLsC},
year = {2024},
date = {2024-01-01},
journal = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
abstract = {International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security (NLPAICS 2024)},
keywords = {CECILIA, Cyber Incident Classification, Cybersecurity},
pubstate = {published},
tppubtype = {article}
}
Alegre, Enrique; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Castaño, Felipe; Martínez-Mendoza, Alicia
Familiarity Analysis and Phishing Website Detection using PhiKitA Dataset Artículo de revista
En: IX Jornadas Nacionales de Investigación En Ciberseguridad, pp. 442–443, 2024, (Publisher: Antonia M. Reina Quintero).
Resumen | Enlaces | BibTeX | Etiquetas: Certificaciones, Ciberseguridad, Jornads Nacionales de Investigación
@article{alegre_familiarity_2024,
title = {Familiarity Analysis and Phishing Website Detection using PhiKitA Dataset},
author = {Enrique Alegre and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Felipe Castaño and Alicia Martínez-Mendoza},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9633478},
year = {2024},
date = {2024-01-01},
journal = {IX Jornadas Nacionales de Investigación En Ciberseguridad},
pages = {442–443},
abstract = {La formación es clave para el desarrollo de la ciberseguridad ante la escasez de profesionales altamente calificados. Esta conferencia analiza las distintas opciones de capacitación, incluyendo cursos, entrenamientos, certificaciones y programas de educación superior, con énfasis en pregrado y posgrado. También se examinan los retos y perspectivas para satisfacer la creciente demanda en el sector.},
note = {Publisher: Antonia M. Reina Quintero},
keywords = {Certificaciones, Ciberseguridad, Jornads Nacionales de Investigación},
pubstate = {published},
tppubtype = {article}
}
Castaño, Felipe; Mendoza, Alicia Martínez; Fidalgo, Eduardo; Rodríguez, Rocío Aláiz; Alegre, Enrique
Familiarity Analysis and Phishing Website Detection using PhiKitA Dataset [Póster] Artículo de revista
En: 2024, (Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, machine learning, PhinKitA Dataset, phishing detection
@article{castano_familiarity_2024,
title = {Familiarity Analysis and Phishing Website Detection using PhiKitA Dataset [Póster]},
author = {Felipe Castaño and Alicia Martínez Mendoza and Eduardo Fidalgo and Rocío Aláiz Rodríguez and Enrique Alegre},
url = {https://idus.us.es/items/04850276-e785-4039-977b-0c43806ac349},
year = {2024},
date = {2024-01-01},
abstract = {Phishing kits enable attackers to launch phishing campaigns more efficiently. This paper introduces PhiKitA, a dataset of phishing kits and the websites they generate. Three experiments were conducted: familiarity analysis, phishing website detection, and phishing kit classification, using MD5 hashes, fingerprints, and graph-based DOM representation. Results show that phishing website detection achieved 92.50% accuracy, while phishing kit classification proved less effective due to insufficient extracted information.},
note = {Publisher: Universidad de Sevilla. Escuela Técnica Superior de Ingeniería Informática},
keywords = {Cybersecurity, machine learning, PhinKitA Dataset, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Al-Nabki, Mhd Wesam; Fidalgo, Eduardo; Alegre, Enrique; Delany, Sarah Jane; Jáñez-Martino, Francisco
Classifying the content of online notepad services using active learning Artículo de revista
En: Journal of Intelligent Information Systems, pp. 1–27, 2024, (Publisher: Springer US).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Illegal Activities, machine learning, Pastebin, Text classification
@article{al-nabki_classifying_2024,
title = {Classifying the content of online notepad services using active learning},
author = {Mhd Wesam Al-Nabki and Eduardo Fidalgo and Enrique Alegre and Sarah Jane Delany and Francisco Jáñez-Martino},
url = {https://link.springer.com/article/10.1007/s10844-024-00902-8},
year = {2024},
date = {2024-01-01},
journal = {Journal of Intelligent Information Systems},
pages = {1–27},
abstract = {This paper proposes a cascading classification system with Active Learning to identify suspicious activities on Pastebin. The model classifies texts into code snippets, readability, and suspicious or illegal activities. It introduces the INSPECT-3.8M dataset, containing 3.8 million labeled samples. This approach helps law enforcement agencies detect and block illegal content on Pastebin before it spreads.},
note = {Publisher: Springer US},
keywords = {Cybersecurity, Illegal Activities, machine learning, Pastebin, Text classification},
pubstate = {published},
tppubtype = {article}
}
Martino, Francisco Jáñez
Analysis and classification of spam email using Artificial Intelligence to identify cyberthreats Artículo de revista
En: Procesamiento del Lenguaje Natural, vol. 72, pp. 155–158, 2024.
Resumen | Enlaces | BibTeX | Etiquetas: Academic Excellence, International Research, Ph.D. Thesis, Universidad de León
@article{janez_martino_analysis_2024,
title = {Analysis and classification of spam email using Artificial Intelligence to identify cyberthreats},
author = {Francisco Jáñez Martino},
url = {http://journal.sepln.org/sepln/ojs/ojs/index.php/pln/article/view/6586},
year = {2024},
date = {2024-01-01},
journal = {Procesamiento del Lenguaje Natural},
volume = {72},
pages = {155–158},
abstract = {Summary of the Ph.D. thesis written by Francisco Jáñez Martino and supervised by Prof. Dra. Rocío Alaiz Rodríguez and Dr. Víctor González Castro at Universidad de León. The defense of the thesis was in León (Spain) in 21st of December 2023 by a committee formed by Dr. Arturo Montejo Ráez (Universidad de Jaén, Spain), Dr. Petr Motlicek (Idiap Research Institute, Switzerland), and Dra. Laura Fernández Robles (Universidad de León, Spain). An international mention was garnered following a six-month tenure at the Universitá di Bologna under the supervision of Dr. Alberto Barrón Cedeño. This Ph.D. thesis was awarded an outstanding Cum Laude grade.},
keywords = {Academic Excellence, International Research, Ph.D. Thesis, Universidad de León},
pubstate = {published},
tppubtype = {article}
}
2023
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
A review of spam email detection: analysis of spammer strategies and the dataset shift problem Artículo de revista
En: Artificial Intelligence Review, vol. 56, no 2, pp. 1145–1173, 2023.
Resumen | Enlaces | BibTeX | Etiquetas:
@article{Jáñez-Martino2023,
title = {A review of spam email detection: analysis of spammer strategies and the dataset shift problem},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://doi.org/10.1007/s10462-022-10195-4},
doi = {10.1007/s10462-022-10195-4},
year = {2023},
date = {2023-01-01},
journal = {Artificial Intelligence Review},
volume = {56},
number = {2},
pages = {1145–1173},
abstract = {Spam emails have been traditionally seen as just annoying and unsolicited emails containing advertisements, but they increasingly include scams, malware or phishing. In order to ensure the security and integrity for the users, organisations and researchers aim to develop robust filters for spam email detection. Recently, most spam filters based on machine learning algorithms published in academic journals report very high performance, but users are still reporting a rising number of frauds and attacks via spam emails. Two main challenges can be found in this field: (a) it is a very dynamic environment prone to the dataset shift problem and (b) it suffers from the presence of an adversarial figure, i.e. the spammer. Unlike classical spam email reviews, this one is particularly focused on the problems that this constantly changing environment poses. Moreover, we analyse the different spammer strategies used for contaminating the emails, and we review the state-of-the-art techniques to develop filters based on machine learning. Finally, we empirically evaluate and present the consequences of ignoring the matter of dataset shift in this practical field. Experimental results show that this shift may lead to severe degradation in the estimated generalisation performance, with error rates reaching values up to 48.81%.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Joshi, Akanksha; Fidalgo, Eduardo; Alegre, Enrique; Fernández-Robles, Laura
DeepSumm: Exploiting topic models and sequence to sequence networks for extractive text summarization Artículo de revista
En: Expert Systems with Applications, vol. 211, pp. 118442, 2023, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: deep learning, Extractive Summarization, Topic Modeling, Word embedding
@article{joshi_deepsumm_2023,
title = {DeepSumm: Exploiting topic models and sequence to sequence networks for extractive text summarization},
author = {Akanksha Joshi and Eduardo Fidalgo and Enrique Alegre and Laura Fernández-Robles},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422015391},
year = {2023},
date = {2023-01-01},
journal = {Expert Systems with Applications},
volume = {211},
pages = {118442},
abstract = {This paper introduces DeepSumm, a method for extractive text summarization that combines topic modeling and word embeddings to improve summary quality. DeepSumm uses topic vectors and sequence networks to capture both local and global semantics in a document. It calculates scores for each sentence using Sentence Topic Score (STS), Sentence Content Score (SCS), Sentence Novelty Score (SNS), and Sentence Position Score (SPS), and combines them into a Final Sentence Score (FSS). The method outperforms existing approaches on the DUC 2002 and CNN/DailyMail datasets with improved ROUGE scores.},
note = {Publisher: Pergamon},
keywords = {deep learning, Extractive Summarization, Topic Modeling, Word embedding},
pubstate = {published},
tppubtype = {article}
}
Al-Nabki, MHD Wesam; Fidalgo, Eduardo; Alegre, Enrique; Alaiz-Rodriguez, Rocio
Author Correction: Short text classification approach to identify child sexual exploitation material Artículo de revista
En: Scientific Reports, vol. 13, no 1, pp. 17840, 2023, (Publisher: Nature Publishing Group UK London).
Resumen | Enlaces | BibTeX | Etiquetas: CSEM detection, law enforcement, machine learning, test classification
@article{al-nabki_author_2023,
title = {Author Correction: Short text classification approach to identify child sexual exploitation material},
author = {MHD Wesam Al-Nabki and Eduardo Fidalgo and Enrique Alegre and Rocio Alaiz-Rodriguez},
url = {https://www.nature.com/articles/s41598-023-45265-2},
year = {2023},
date = {2023-01-01},
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {17840},
abstract = {This paper presents a method to identify Child Sexual Exploitation Material (CSEM) files by analyzing file names and paths instead of content, aiding law enforcement in time-sensitive investigations. The approach tackles obfuscation using character n-grams, binary, and orthographic features. Two classification strategies are proposed: one combining separate file name and path classifiers, and another iterating over the path. Six machine learning and deep learning models were tested, with the best achieving an F1 score of 0.988, making it a promising tool for law enforcement agencies.},
note = {Publisher: Nature Publishing Group UK London},
keywords = {CSEM detection, law enforcement, machine learning, test classification},
pubstate = {published},
tppubtype = {article}
}
Martino, Francisco Jáñez; Rodríguez, Rocío Alaiz; Castro, Víctor González; Fidalgo, Eduardo; Alegre, Enrique
A review of spam email detection: analysis of spammer strategies and the dataset shift problem Artículo de revista
En: Artificial Intelligence Review, vol. 56, no 2, pp. 1145–1173, 2023, (Publisher: Springer Netherlands Dordrecht).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, dataset shift, machine learning, Spam detection
@article{janez_martino_review_2023,
title = {A review of spam email detection: analysis of spammer strategies and the dataset shift problem},
author = {Francisco Jáñez Martino and Rocío Alaiz Rodríguez and Víctor González Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://link.springer.com/article/10.1007/s10462-022-10195-4},
year = {2023},
date = {2023-01-01},
journal = {Artificial Intelligence Review},
volume = {56},
number = {2},
pages = {1145–1173},
abstract = {Spam emails, which once were mainly an annoyance, now increasingly contain scams, malware, and phishing attempts. Despite high-performing spam filters based on machine learning, users continue to report rising incidents of fraud and attacks via spam. This paper highlights two key challenges in spam email detection: the dynamic nature of the environment, leading to dataset shift, and the presence of adversarial actors (spammers). The review focuses on the impact of these challenges and examines various spammer strategies and state-of-the-art techniques for developing robust filters. Experimental results show that ignoring dataset shift can severely degrade the performance of spam filters, leading to high error rates.},
note = {Publisher: Springer Netherlands Dordrecht},
keywords = {Cybersecurity, dataset shift, machine learning, Spam detection},
pubstate = {published},
tppubtype = {article}
}
Porto-Álvarez, Jacobo; Cernadas, Eva; Martínez, Rebeca Aldaz; Fernández-Delgado, Manuel; Zapico, Emilio Huelga; González-Castro, Víctor; Baleato-González, Sandra; García-Figueiras, Roberto; Antúnez-López, J Ramon; Souto-Bayarri, Miguel
CT-based radiomics to predict KRAS mutation in CRC patients using a machine learning algorithm: a retrospective study Artículo de revista
En: Biomedicines, vol. 11, no 8, pp. 2144, 2023, (Publisher: MDPI).
Resumen | Enlaces | BibTeX | Etiquetas: colorectal cancer, KRAS Mutation, Radiogenomics, Radiomics, texture analysis
@article{porto-alvarez_ct-based_2023,
title = {CT-based radiomics to predict KRAS mutation in CRC patients using a machine learning algorithm: a retrospective study},
author = {Jacobo Porto-Álvarez and Eva Cernadas and Rebeca Aldaz Martínez and Manuel Fernández-Delgado and Emilio Huelga Zapico and Víctor González-Castro and Sandra Baleato-González and Roberto García-Figueiras and J Ramon Antúnez-López and Miguel Souto-Bayarri},
url = {https://www.mdpi.com/2227-9059/11/8/2144},
year = {2023},
date = {2023-01-01},
journal = {Biomedicines},
volume = {11},
number = {8},
pages = {2144},
abstract = {This study examines the use of CT-based radiomics to predict KRAS mutations in colorectal cancer (CRC) patients. Several classifiers were tested, with AdaBoost on clinical data achieving the highest accuracy (76.8%). Texture descriptors also showed a correlation with KRAS mutations. Radiomics could reduce the need for invasive diagnostic methods for CRC in the future.},
note = {Publisher: MDPI},
keywords = {colorectal cancer, KRAS Mutation, Radiogenomics, Radiomics, texture analysis},
pubstate = {published},
tppubtype = {article}
}
Chaves, D.; Agarwal, N.; Fidalgo, Eduardo; Alegre, Enrique
A Data Augmentation Strategy for Improving Age Estimation to Support CSEM Detection Artículo de revista
En: Proceedings of the 18th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications, vol. 5, no ISBN 978-989-758-634-7, ISSN 2184-4321, pp. 692–699, 2023, (Publisher: 10.5220/0011719700003417).
Resumen | Enlaces | BibTeX | Etiquetas: age stimation, CSEM, data augmentation, facial occlusion, prevention, synthetic datasets
@article{chaves_data_2023,
title = {A Data Augmentation Strategy for Improving Age Estimation to Support CSEM Detection},
author = {D. Chaves and N. Agarwal and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.scitepress.org/PublishedPapers/2023/117197/117197.pdf},
year = {2023},
date = {2023-01-01},
journal = {Proceedings of the 18th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications},
volume = {5},
number = {ISBN 978-989-758-634-7, ISSN 2184-4321},
pages = {692–699},
abstract = {Leveraging image-based age estimation in preventing Child Sexual Exploitation Material (CSEM) content over the internet is not investigated thoroughly in the research community. While deep learning methods are considered state-of-the-art for general age estimation, they perform poorly in predicting the age group of minors and older adults due to the few examples of these age groups in the existing datasets. In this work, we present a data augmentation strategy to improve the performance of age estimators trained on imbalanced data based on synthetic image generation and artificial facial occlusion. Facial occlusion is focused on modelling as CSEM criminals tend to cover certain parts of the victim, such as the eyes, to hide their identity. The proposed strategy is evaluated using the Soft Stagewise Regression Network (SSR-Net), a compact size age estimator and three publicly available datasets composed mainly of non-occluded images. Therefore, we create the Synthetic Augmented with Occluded Faces (SAOF-15K) dataset to assess the performance of eye and mouthoccluded images. Results show that our strategy improves the performance of the evaluated age estimator.},
note = {Publisher: 10.5220/0011719700003417},
keywords = {age stimation, CSEM, data augmentation, facial occlusion, prevention, synthetic datasets},
pubstate = {published},
tppubtype = {article}
}
Mendoza, Alicia Martínez; Martino, Francisco Jáñez; Rodríguez, Rocío Aláiz; Castro, Víctor González; Fernández, Eduardo Fidalgo; Alegre, Enrique
A survey on spam detection, spammer strategies and the dataset shift problem Artículo de revista
En: Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023, pp. 485–486, 2023, (Publisher: Universidade de Vigo).
Resumen | Enlaces | BibTeX | Etiquetas: dataset shift, Spam detection, spammer strategies
@article{martinez_mendoza_survey_2023,
title = {A survey on spam detection, spammer strategies and the dataset shift problem},
author = {Alicia Martínez Mendoza and Francisco Jáñez Martino and Rocío Aláiz Rodríguez and Víctor González Castro and Eduardo Fidalgo Fernández and Enrique Alegre},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9044942},
year = {2023},
date = {2023-01-01},
journal = {Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023},
pages = {485–486},
abstract = {Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023},
note = {Publisher: Universidade de Vigo},
keywords = {dataset shift, Spam detection, spammer strategies},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Classifying spam emails using agglomerative hierarchical clustering and a topic-based approach Artículo de revista
En: Applied Soft Computing, vol. 139, pp. 110226, 2023, (Publisher: Elsevier).
Resumen | Enlaces | BibTeX | Etiquetas: Hidden text, Image-based spam, Multi-classification, Spam detection, Term frequency, Text classification, Word embedding
@article{janez-martino_classifying_2023,
title = {Classifying spam emails using agglomerative hierarchical clustering and a topic-based approach},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.sciencedirect.com/science/article/pii/S1568494623002442},
year = {2023},
date = {2023-01-01},
journal = {Applied Soft Computing},
volume = {139},
pages = {110226},
abstract = {This paper introduces two novel datasets, SPEMC-15K-E and SPEMC-15K-S, containing 15K spam emails each in English and Spanish. The emails are categorized into 11 classes using hierarchical clustering. Evaluation of 16 classification pipelines reveals that TF-IDF with Logistic Regression achieves the highest performance for the English dataset (F1 score of 0.953, accuracy of 94.6%), while TF-IDF with Naïve Bayes performs best for Spanish (F1 score of 0.945, accuracy of 98.5%). TF-IDF with LR is also the fastest for both languages.},
note = {Publisher: Elsevier},
keywords = {Hidden text, Image-based spam, Multi-classification, Spam detection, Term frequency, Text classification, Word embedding},
pubstate = {published},
tppubtype = {article}
}
Chaves, Deisy; Fidalgo, Eduardo; Gonzalez, Pablo Rodriguez; Abia, AI Fernández; Alegre, Enrique; Barreiro, Joaquín
Automatic classification of pores in aluminum castings using machine learning Artículo de revista
En: XLIV Jornadas de Automática, pp. 849–854, 2023, (Publisher: Universidade da Coruña. Servizo de Publicacións).
Resumen | Enlaces | BibTeX | Etiquetas: casting manufacturing, Image classification, porosity detection, SVM classifiers
@article{chaves_automatic_2023,
title = {Automatic classification of pores in aluminum castings using machine learning},
author = {Deisy Chaves and Eduardo Fidalgo and Pablo Rodriguez Gonzalez and AI Fernández Abia and Enrique Alegre and Joaquín Barreiro},
url = {https://ruc.udc.es/dspace/handle/2183/33692},
year = {2023},
date = {2023-01-01},
journal = {XLIV Jornadas de Automática},
pages = {849–854},
abstract = {This paper proposes automating the classification of porosity defects in aluminum parts manufactured by casting. Images of parts produced by traditional sand molding and the Binder Jetting (BJ) additive technique are analyzed. The method uses SIFT descriptors and BoVW features to train two SVM classifiers: one for detecting pores and another for classifying the type of porosity (gas-related or shrinkage-related). This automated approach improves inspection efficiency and accuracy compared to traditional manual methods.},
note = {Publisher: Universidade da Coruña. Servizo de Publicacións},
keywords = {casting manufacturing, Image classification, porosity detection, SVM classifiers},
pubstate = {published},
tppubtype = {article}
}
Mendoza, Alicia Martínez; Paniagua, Manuel Sánchez; Vasco, Roberto Andrés Carofilis; Martino, Francisco Jañez; Fernández, Eduardo Fidalgo; Alegre, Enrique
Applying Machine Learning to login URLs for phishing detection Artículo de revista
En: Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023, pp. 487–488, 2023, (Publisher: Universidade de Vigo).
Resumen | Enlaces | BibTeX | Etiquetas: AI, Cybersecurity, machine learning, phishing detection, URL analysis
@article{martinez_mendoza_applying_2023,
title = {Applying Machine Learning to login URLs for phishing detection},
author = {Alicia Martínez Mendoza and Manuel Sánchez Paniagua and Roberto Andrés Carofilis Vasco and Francisco Jañez Martino and Eduardo Fidalgo Fernández and Enrique Alegre},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9044941},
year = {2023},
date = {2023-01-01},
journal = {Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023},
pages = {487–488},
abstract = {This paper explores the application of machine learning for phishing detection using login URLs. By analyzing URL patterns and features, the study aims to differentiate between legitimate and phishing websites. Various machine learning models are evaluated to enhance detection accuracy, providing a proactive approach to cybersecurity threats.},
note = {Publisher: Universidade de Vigo},
keywords = {AI, Cybersecurity, machine learning, phishing detection, URL analysis},
pubstate = {published},
tppubtype = {article}
}
Díaz-Ocampo, Alicia Martinez Wesam Al-Nabki Daniel; Robles, Laura Fernández; Fidalgo, Eduardo; Alegre, Enrique; Vasco, Andres Carofilis
Authorship identification in text documents using BERT and POS features Artículo de revista
En: 5th International Conference on Applications of Intelligent Systems (Las Palmas de Gran Canaria, España), 2023.
Resumen | Enlaces | BibTeX | Etiquetas: authorship identification, BERT, POS features, Text classification
@article{daniel_diaz-ocampo_authorship_2023,
title = {Authorship identification in text documents using BERT and POS features},
author = {Alicia Martinez Wesam Al-Nabki Daniel Díaz-Ocampo and Laura Fernández Robles and Eduardo Fidalgo and Enrique Alegre and Andres Carofilis Vasco},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&sortby=title&citation_for_view=yATJZvcAAAAJ:yB1At4FlUx8C},
year = {2023},
date = {2023-01-01},
journal = {5th International Conference on Applications of Intelligent Systems (Las Palmas de Gran Canaria, España)},
abstract = {This paper enhances authorship identification by combining BERT embeddings with POS features, improving classification accuracy.},
keywords = {authorship identification, BERT, POS features, Text classification},
pubstate = {published},
tppubtype = {article}
}
Castaño, Felipe; Fidalgo, Eduardo; Alaiz-Rodríguez, Rocío; Alegre, Enrique
PhiKitA: Phishing Kit Attacks Dataset for Phishing Websites Identification Artículo de revista
En: IEEE Access, vol. 11, pp. 40779–40789, 2023, (Publisher: IEEE).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, Dataset, phishing detection
@article{castano_phikita_2023,
title = {PhiKitA: Phishing Kit Attacks Dataset for Phishing Websites Identification},
author = {Felipe Castaño and Eduardo Fidalgo and Rocío Alaiz-Rodríguez and Enrique Alegre},
url = {https://ieeexplore.ieee.org/abstract/document/10103863},
year = {2023},
date = {2023-01-01},
journal = {IEEE Access},
volume = {11},
pages = {40779–40789},
abstract = {This paper introduces PhiKitA, a novel dataset containing phishing kits and phishing websites generated from these kits. The dataset is used to investigate phishing kit detection, phishing website identification, and the source of phishing websites. The study applied MD5 hashes, fingerprints, and graph representation DOM algorithms to analyze the dataset. The results show that the graph representation algorithm achieved an accuracy of 92.50% for phishing detection, while MD5 hash representation achieved a 39.54% F1 score, indicating its limited effectiveness in distinguishing phishing sources.},
note = {Publisher: IEEE},
keywords = {Cybersecurity, Dataset, phishing detection},
pubstate = {published},
tppubtype = {article}
}
Nejad, Amin Shoari; Alaiz-Rodríguez, Rocío; McCarthy, Gerard D; Kelleher, Brian; Grey, Anthony; Parnell, Andrew
SERT: A Transfomer Based Model for Spatio-Temporal Sensor Data with Missing Values for Environmental Monitoring Artículo de revista
En: arXiv preprint arXiv:2306.03042, 2023.
Resumen | Enlaces | BibTeX | Etiquetas: Artificial Neural Networks, Missing Data Handling, Spatio-Temporal Forecasting, Trasformer Models
@article{nejad_sert_2023,
title = {SERT: A Transfomer Based Model for Spatio-Temporal Sensor Data with Missing Values for Environmental Monitoring},
author = {Amin Shoari Nejad and Rocío Alaiz-Rodríguez and Gerard D McCarthy and Brian Kelleher and Anthony Grey and Andrew Parnell},
url = {https://ui.adsabs.harvard.edu/abs/2024CG....18805601N/abstract},
year = {2023},
date = {2023-01-01},
journal = {arXiv preprint arXiv:2306.03042},
abstract = {This work introduces two models for spatio-temporal forecasting that effectively handle missing values in multivariate time series data without imputation. The first model, SERT (Spatio-temporal Encoder Representations from Transformers), is transformer-based, while the second, SST-ANN (Sparse Spatio-Temporal Artificial Neural Network), is a simpler and interpretable approach. Extensive experiments on two datasets show that both models outperform or match state-of-the-art performance in handling missing data for multivariate spatio-temporal forecasting.},
keywords = {Artificial Neural Networks, Missing Data Handling, Spatio-Temporal Forecasting, Trasformer Models},
pubstate = {published},
tppubtype = {article}
}
Nejad, Amin Shoari; Alaiz-Rodríguez, Rocío; McCarthy, Gerard D; Kelleher, Brian; Grey, Anthony; Parnell, Andrew
SERT: A Transfomer Based Model for Spatio-Temporal Sensor Data with Missing Values for Environmental Monitoring Artículo de revista
En: arXiv e-prints, pp. arXiv–2306, 2023.
Resumen | Enlaces | BibTeX | Etiquetas: Artificial Neural Networks, Missing Data Handling, Spatio-Temporal Forecasting, Trasformer Models
@article{shoari_nejad_sert_2023,
title = {SERT: A Transfomer Based Model for Spatio-Temporal Sensor Data with Missing Values for Environmental Monitoring},
author = {Amin Shoari Nejad and Rocío Alaiz-Rodríguez and Gerard D McCarthy and Brian Kelleher and Anthony Grey and Andrew Parnell},
url = {https://arxiv.org/abs/2306.03042},
year = {2023},
date = {2023-01-01},
journal = {arXiv e-prints},
pages = {arXiv–2306},
abstract = {This research focuses on environmental monitoring and introduces two models for spatio-temporal forecasting that can handle missing values in sensor data without the need for imputation. The first model, SERT (Spatio-temporal Encoder Representations from Transformers), utilizes a transformer-based approach. The second, SST-ANN (Sparse Spatio-Temporal Artificial Neural Network), is a simpler and more interpretable model. Extensive experiments show that these models perform competitively or better than existing state-of-the-art models for multivariate spatio-temporal forecasting.},
keywords = {Artificial Neural Networks, Missing Data Handling, Spatio-Temporal Forecasting, Trasformer Models},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Classifying spam emails using agglomerative hierarchical clustering and a topic-based approach Artículo de revista
En: Applied Soft Computing, vol. 139, pp. 110226, 2023, ISSN: 1568-4946.
Resumen | Enlaces | BibTeX | Etiquetas: Hidden text, Image-based spam, Multi-classification, Spam detection, Term frequency, Text classification, Word embedding
@article{JANEZMARTINO2023110226b,
title = {Classifying spam emails using agglomerative hierarchical clustering and a topic-based approach},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.sciencedirect.com/science/article/pii/S1568494623002442},
doi = {https://doi.org/10.1016/j.asoc.2023.110226},
issn = {1568-4946},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Applied Soft Computing},
volume = {139},
pages = {110226},
abstract = {Spam emails are unsolicited, annoying and sometimes harmful messages which may contain malware, phishing or hoaxes. Unlike most studies that address the design of efficient anti-spam filters, we approach the spam email problem from a different and novel perspective. Focusing on the needs of cybersecurity units, we follow a topic-based approach for addressing the classification of spam email into multiple categories. We propose SPEMC-15K-E and SPEMC-15K-S, two novel datasets with approximately 15K emails each in English and Spanish, respectively, and we label them using agglomerative hierarchical clustering into 11 classes. We evaluate 16 pipelines, combining four text representation techniques -Term Frequency-Inverse Document Frequency (TF-IDF), Bag of Words, Word2Vec and BERT- and four classifiers: Support Vector Machine, Näive Bayes, Random Forest and Logistic Regression. Experimental results show that the highest performance is achieved with TF-IDF and LR for the English dataset, with a F1 score of 0.953 and an accuracy of 94.6%, and while for the Spanish dataset, TF-IDF with NB yields a F1 score of 0.945 and 98.5% accuracy. Regarding the processing time, TF-IDF with LR leads to the fastest classification, processing an English and Spanish spam email in 2ms and 2.2ms on average, respectively.},
keywords = {Hidden text, Image-based spam, Multi-classification, Spam detection, Term frequency, Text classification, Word embedding},
pubstate = {published},
tppubtype = {article}
}
Joshi, Akanksha; Fidalgo, Eduardo; Alegre, Enrique; Fernández-Robles, Laura
DeepSumm: Exploiting topic models and sequence to sequence networks for extractive text summarization Artículo de revista
En: Expert Systems with Applications, vol. 211, pp. 118442, 2023, ISSN: 0957-4174.
Resumen | Enlaces | BibTeX | Etiquetas: Attention networks, Extractive, Seq2seq, Text summarization, Topic models
@article{JOSHI2023118442,
title = {DeepSumm: Exploiting topic models and sequence to sequence networks for extractive text summarization},
author = {Akanksha Joshi and Eduardo Fidalgo and Enrique Alegre and Laura Fernández-Robles},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422015391},
doi = {https://doi.org/10.1016/j.eswa.2022.118442},
issn = {0957-4174},
year = {2023},
date = {2023-01-01},
journal = {Expert Systems with Applications},
volume = {211},
pages = {118442},
abstract = {In this paper, we propose DeepSumm, a novel method based on topic modeling and word embeddings for the extractive summarization of single documents. Recent summarization methods based on sequence networks fail to capture the long range semantics of the document which are encapsulated in the topic vectors of the document. In DeepSumm, our aim is to utilize the latent information in the document estimated via topic vectors and sequence networks to improve the quality and accuracy of the summarized text. Each sentence is encoded through two different recurrent neural networks based on probabilistic topic distributions and word embeddings, and then a sequence to sequence network is applied to each sentence encoding. The outputs of the encoder and the decoder in the sequence to sequence networks are combined after weighting using an attention mechanism and converted into a score through a multi-layer perceptron network. We refer to the score obtained through the topic model as Sentence Topic Score (STS) and to the score generated through word embeddings as Sentence Content Score (SCS). In addition, we propose Sentence Novelty Score (SNS) and Sentence Position Score (SPS) and perform a weighted fusion of the four scores for each sentence in the document to compute a Final Sentence Score (FSS). The proposed DeepSumm framework was evaluated on the standard DUC 2002 benchmark and CNN/DailyMail datasets. Experimentally, it was demonstrated that our method captures both the global and the local semantic information of the document and essentially outperforms existing state-of-the-art approaches for extractive text summarization with ROUGE-1, ROUGE-2, and ROUGE-L scores of 53.2, 28.7 and 49.2 on DUC 2002 and 43.3, 19.0 and 38.9 on CNN/DailyMail dataset.},
keywords = {Attention networks, Extractive, Seq2seq, Text summarization, Topic models},
pubstate = {published},
tppubtype = {article}
}
Nabki, MHD Wesam Al; Fidalgo, Eduardo; Alegre, Enrique; Chaves, Deisy
Supervised ranking approach to identify infLuential websites in the darknet Artículo de revista
En: Applied Intelligence, vol. 53, no 19, pp. 22952–22968, 2023, (Publisher: Springer US New York).
Resumen | Enlaces | BibTeX | Etiquetas: Criminal Detection, Domain Ranking, law enforcement, Learning-to Rank, TOR Network
@article{al_nabki_supervised_2023,
title = {Supervised ranking approach to identify infLuential websites in the darknet},
author = {MHD Wesam Al Nabki and Eduardo Fidalgo and Enrique Alegre and Deisy Chaves},
url = {https://link.springer.com/article/10.1007/s10489-023-04671-9},
year = {2023},
date = {2023-01-01},
journal = {Applied Intelligence},
volume = {53},
number = {19},
pages = {22952–22968},
abstract = {This paper introduces a supervised ranking framework to identify the most influential domains in the Tor network, focusing on criminal activities. It uses 40 features from various sources to train a learning-to-rank model, achieving an NDCG of 0.93 for top-10 drug-related domains. The framework outperforms link-based methods and demonstrates that user-visible text is key for effective ranking, aiding law enforcement in detecting suspicious Tor domains.},
note = {Publisher: Springer US New York},
keywords = {Criminal Detection, Domain Ranking, law enforcement, Learning-to Rank, TOR Network},
pubstate = {published},
tppubtype = {article}
}
Velasco-Mata, Javier; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
Real-time botnet detection on large network bandwidths using machine learning Artículo de revista
En: Scientific Reports, vol. 13, no 1, pp. 4282, 2023, (Publisher: Nature Publishing Group UK London).
Resumen | Enlaces | BibTeX | Etiquetas: Botnet Detection, F1 score, Network Traffic, Performance Optimization
@article{velasco-mata_real-time_2023,
title = {Real-time botnet detection on large network bandwidths using machine learning},
author = {Javier Velasco-Mata and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://www.nature.com/articles/s41598-023-31260-0},
year = {2023},
date = {2023-01-01},
journal = {Scientific Reports},
volume = {13},
number = {1},
pages = {4282},
abstract = {An ultra-fast approach for botnet detection is developed to process network traffic in one-second windows without significant loss in performance. The model outperforms existing methods, achieving an F1 score of 0.926 with a processing time of 0.007 ms per sample. It remains robust under network saturation and performs well on various bandwidths with minimal CPU core requirements.},
note = {Publisher: Nature Publishing Group UK London},
keywords = {Botnet Detection, F1 score, Network Traffic, Performance Optimization},
pubstate = {published},
tppubtype = {article}
}
Alegre, Enrique; Fidalgo, Eduardo; Fernandez-Robles, Laura; Martínez-Mendoza, Alicia; Biswas, Rubel
Smishing Detection: Recognition of URLs from SMS Screenshots Artículo de revista
En: 5th International Conference on Applications of Intelligent Systems (Las Palmas de Gran Canaria, España), 2023.
Resumen | Enlaces | BibTeX | Etiquetas: Intelligent Systems, Recognition of URLs, Smishing Detection, SMS Screenshots
@article{alegre_smishing_2023,
title = {Smishing Detection: Recognition of URLs from SMS Screenshots},
author = {Enrique Alegre and Eduardo Fidalgo and Laura Fernandez-Robles and Alicia Martínez-Mendoza and Rubel Biswas},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=100&pagesize=100&sortby=title&citation_for_view=yATJZvcAAAAJ:0KyAp5RtaNEC},
year = {2023},
date = {2023-01-01},
journal = {5th International Conference on Applications of Intelligent Systems (Las Palmas de Gran Canaria, España)},
abstract = {5th International Conference on Applications of Intelligent Systems (Las Palmas de Gran Canaria, España)},
keywords = {Intelligent Systems, Recognition of URLs, Smishing Detection, SMS Screenshots},
pubstate = {published},
tppubtype = {article}
}
Mendoza, Alicia Martínez; Paniagua, Manue Sánchez; Martino, Francisco Jañez; Rodríguez, Rocío Aláiz; Fidalgo, Eduardo; Alegre, Enrique
Novel benchmark dataset and features to detect phishing on web pages Artículo de revista
En: Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023, pp. 599–600, 2023, (Publisher: Universidade de Vigo).
Resumen | Enlaces | BibTeX | Etiquetas: Phishing, web pages
@article{martinez_mendoza_novel_2023,
title = {Novel benchmark dataset and features to detect phishing on web pages},
author = {Alicia Martínez Mendoza and Manue Sánchez Paniagua and Francisco Jañez Martino and Rocío Aláiz Rodríguez and Eduardo Fidalgo and Enrique Alegre},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9057251},
year = {2023},
date = {2023-01-01},
journal = {Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023},
pages = {599–600},
abstract = {Jornadas Nacionales de Investigación en Ciberseguridad: actas de las VIII Jornadas Nacionales de Investigación en ciberseguridad: Vigo, 21 a 23 de junio de 2023},
note = {Publisher: Universidade de Vigo},
keywords = {Phishing, web pages},
pubstate = {published},
tppubtype = {article}
}
2022
Díaz, Daniel; Mata, Javier Velasco; Río, Aitor Del; Fidalgo, Eduardo
Optimal botnet detection on network data Artículo de revista
En: VII Jornadas Nacionales de Investigación en Ciberseguridad 2022, 2022.
Resumen | Enlaces | BibTeX | Etiquetas: JNIC, network data, optimal botnet detection
@article{diaz_optimal_2022,
title = {Optimal botnet detection on network data},
author = {Daniel Díaz and Javier Velasco Mata and Aitor Del Río and Eduardo Fidalgo},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9206648},
year = {2022},
date = {2022-01-01},
journal = {VII Jornadas Nacionales de Investigación en Ciberseguridad 2022},
abstract = {Investigación en Ciberseguridad. Actas de las VII Jornadas Nacionales (JNIC 2022)},
keywords = {JNIC, network data, optimal botnet detection},
pubstate = {published},
tppubtype = {article}
}
Río, Aitor Del; Fidalgo, Edudardo; Blanco-Medina, Pablo; Chaves, Daisy; Prieto-Castro, Alexci; Alegre, Enrique
Semantic Attention Keypoint Filtering for Darknet Content Classification Artículo de revista
En: VII Jornadas Nacionales de Investigación en Ciberseguridad 2022, 2022.
Resumen | Enlaces | BibTeX | Etiquetas: Actas, Ciberseguridad, Classification, Darknet Content, VII Jornadas Nacionales
@article{del_rio_semantic_2022,
title = {Semantic Attention Keypoint Filtering for Darknet Content Classification},
author = {Aitor Del Río and Edudardo Fidalgo and Pablo Blanco-Medina and Daisy Chaves and Alexci Prieto-Castro and Enrique Alegre},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9206652},
year = {2022},
date = {2022-01-01},
journal = {VII Jornadas Nacionales de Investigación en Ciberseguridad 2022},
abstract = {Investigación en Ciberseguridad Actas de las VII Jornadas Nacionales (7º.2022.Bilbao)},
keywords = {Actas, Ciberseguridad, Classification, Darknet Content, VII Jornadas Nacionales},
pubstate = {published},
tppubtype = {article}
}
Medina, Pablo Blanco; Fidalgo, Eduardo; Alegre, Enrique; Castro, Víctor González
A survey on methods, datasets and implementations for scene text spotting Artículo de revista
En: IET Image Processing, vol. 16, no 13, pp. 3426–3445, 2022.
Resumen | Enlaces | BibTeX | Etiquetas: Computer vision, image text detection, OCR, text spotting
@article{blanco_medina_survey_2022,
title = {A survey on methods, datasets and implementations for scene text spotting},
author = {Pablo Blanco Medina and Eduardo Fidalgo and Enrique Alegre and Víctor González Castro},
url = {https://ietresearch.onlinelibrary.wiley.com/doi/full/10.1049/ipr2.12574},
year = {2022},
date = {2022-01-01},
journal = {IET Image Processing},
volume = {16},
number = {13},
pages = {3426–3445},
abstract = {ext Spotting combines the tasks of detecting and transcribing text present in images, addressing challenges like orientation, aspect ratio, vertical text, and multiple languages in a single image. This paper analyzes and compares the most recent methods and publications in the field, extending beyond traditional comparisons of architectures and performance. It also discusses aspects often overlooked, such as hardware, software, backbone architectures, main challenges, and programming languages used in algorithms. The review covers research from 2016 to 2022, highlighting current problems, future trends, and providing a baseline for the development and comparison of future Text Spotting methods.},
keywords = {Computer vision, image text detection, OCR, text spotting},
pubstate = {published},
tppubtype = {article}
}
Jeuland, Elouan Derenee; Ferreras, Aitor Del Río; Chaves, Deisy; Fidalgo, Eduardo; Castro, Víctor González; Alegre, Enrique
Assessment of age estimation methods for forensic applications using non-occluded and synthetic occluded facial images Artículo de revista
En: XLIII Jornadas de Automática, pp. 972–979, 2022, (Publisher: Universidade da Coruña. Servizo de Publicacións).
Resumen | Enlaces | BibTeX | Etiquetas: Age Estimation, CSEM, deep learning, facial occlusion
@article{jeuland_assessment_2022,
title = {Assessment of age estimation methods for forensic applications using non-occluded and synthetic occluded facial images},
author = {Elouan Derenee Jeuland and Aitor Del Río Ferreras and Deisy Chaves and Eduardo Fidalgo and Víctor González Castro and Enrique Alegre},
url = {https://ruc.udc.es/dspace/handle/2183/31412},
year = {2022},
date = {2022-01-01},
journal = {XLIII Jornadas de Automática},
pages = {972–979},
abstract = {This paper evaluates the performance of six deep-learning-based age estimators for forensic applications, particularly in identifying minors and offenders in Child Sexual Exploitation Materials (CSEM). While deep learning is the state-of-the-art for age estimation, it struggles with minors and older adults due to dataset imbalances. Additionally, offenders often use facial occlusion to obscure identities, further impacting estimator accuracy. The study assesses models on non-occluded and synthetically occluded datasets, revealing that eye occlusion has a greater effect than mouth occlusion. Minors and elderly individuals are the most affected by occlusion, making this research a valuable benchmark for forensic victim profiling.},
note = {Publisher: Universidade da Coruña. Servizo de Publicacións},
keywords = {Age Estimation, CSEM, deep learning, facial occlusion},
pubstate = {published},
tppubtype = {article}
}
Sánchez-Paniagua, Manuel; Fidalgo, Eduardo; Alegre, Enrique; Alaiz-Rodríguez, Rocío
Phishing websites detection using a novel multipurpose dataset and web technologies features Artículo de revista
En: Expert Systems with Applications, vol. 207, pp. 118010, 2022, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Dataset Creation, LightGBM Classifier, phishing detection, Web Technology Features
@article{sanchez-paniagua_phishing_2022,
title = {Phishing websites detection using a novel multipurpose dataset and web technologies features},
author = {Manuel Sánchez-Paniagua and Eduardo Fidalgo and Enrique Alegre and Rocío Alaiz-Rodríguez},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422012301},
year = {2022},
date = {2022-01-01},
journal = {Expert Systems with Applications},
volume = {207},
pages = {118010},
abstract = {Phishing attacks are a major challenge in cybersecurity, often involving the hijacking of sensitive data through fraudulent login forms. This paper proposes a new methodology for detecting phishing websites in real-world scenarios using URL, HTML, and web technology features. The authors introduce the Phishing Index Login Websites Dataset (PILWD), an offline dataset containing 134,000 verified samples, which enables researchers to test and compare detection approaches. Using the dataset, a LightGBM classifier with 54 features achieves a 97.95% accuracy in detecting phishing websites. This methodology is independent of third-party services and utilizes new features for improved detection.},
note = {Publisher: Pergamon},
keywords = {Dataset Creation, LightGBM Classifier, phishing detection, Web Technology Features},
pubstate = {published},
tppubtype = {article}
}
Delgado-Sotés, Juan José; Sánchez-Paniagua, Manuel; Velasco-Mata, Javier; Fidalgo, Eduardo; Prieto-Carballal, Juan; Azzopardi, George
Dataset creation and feature extraction for thedetection of fraudulent websites Artículo de revista
En: Investigación en Ciberseguridad Actas de las VII Jornadas Nacionales (7º. 2022. Bilbao), pp. 267–268, 2022, (Publisher: Fundación Tecnalia Research and Innovation).
Resumen | Enlaces | BibTeX | Etiquetas: Aprendizaje Automático, Direcciones IP, Selección de Variables, Severidad
@article{delgado-sotes_dataset_2022,
title = {Dataset creation and feature extraction for thedetection of fraudulent websites},
author = {Juan José Delgado-Sotés and Manuel Sánchez-Paniagua and Javier Velasco-Mata and Eduardo Fidalgo and Juan Prieto-Carballal and George Azzopardi},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9206641},
year = {2022},
date = {2022-01-01},
journal = {Investigación en Ciberseguridad Actas de las VII Jornadas Nacionales (7º. 2022. Bilbao)},
pages = {267–268},
abstract = {Este trabajo se centra en la creación de un conjunto de variables (features) para clasificar la maliciosidad de una dirección IP en un contexto multi-clase. Se han utilizado 23 variables, de las cuales 18 provienen de series temporales y listas de reputación, y 5 están relacionadas con la geolocalización de la IP. Además, se realizó un análisis estadístico para optimizar y estudiar la adecuación de estas variables, teniendo en cuenta los cambios posibles en la geolocalización y los hiperparámetros de las series temporales.},
note = {Publisher: Fundación Tecnalia Research and Innovation},
keywords = {Aprendizaje Automático, Direcciones IP, Selección de Variables, Severidad},
pubstate = {published},
tppubtype = {article}
}
Castejón-Limas, Manuel; Fernández-Robles, Laura; Alaiz-Moretón, Héctor; Cifuentes-Rodriguez, Jaime; Fernández-Llamas, Camino
A framework for the optimization of complex cyber-physical systems via directed acyclic graph Artículo de revista
En: Sensors, vol. 22, no 4, pp. 1490, 2022, (Publisher: MDPI).
Resumen | Enlaces | BibTeX | Etiquetas: Cyber-Physical Systems, Directed Acyclic Graphs, Lean Manufacturing, machine learning models, pipegraph, scikit-learn
@article{castejon-limas_framework_2022,
title = {A framework for the optimization of complex cyber-physical systems via directed acyclic graph},
author = {Manuel Castejón-Limas and Laura Fernández-Robles and Héctor Alaiz-Moretón and Jaime Cifuentes-Rodriguez and Camino Fernández-Llamas},
url = {https://www.mdpi.com/1424-8220/22/4/1490},
year = {2022},
date = {2022-01-01},
journal = {Sensors},
volume = {22},
number = {4},
pages = {1490},
abstract = {Mathematical modeling and data-driven methodologies are frequently required to optimize industrial processes in the context of Cyber-Physical Systems (CPS). This paper introduces the PipeGraph software library, an open-source python toolbox for easing the creation of machine learning models by using Directed Acyclic Graph (DAG)-like implementations that can be used for CPS. scikit-learn’s Pipeline is a very useful tool to bind a sequence of transformers and a final estimator in a single unit capable of working itself as an estimator. It sequentially assembles several steps that can be cross-validated together while setting different parameters. Steps encapsulation secures the experiment from data leakage during the training phase. The scientific goal of PipeGraph is to extend the concept of Pipeline by using a graph structure that can handle scikit-learn’s objects in DAG layouts. It allows performing diverse operations, instead of only transformations, following the topological ordering of the steps in the graph; it provides access to all the data generated along the intermediate steps; and it is compatible with GridSearchCV function to tune the hyperparameters of the steps. It is also not limited to (𝑋,𝑦) entries. Moreover, it has been proposed as part of the scikit-learn-contrib supported project, and is fully compatible with scikit-learn. Documentation and unitary tests are publicly available together with the source code. Two case studies are analyzed in which PipeGraph proves to be essential in improving CPS modeling and optimization: the first is about the optimization of a heat exchange management system, and the second deals with the detection of anomalies in manufacturing processes.},
note = {Publisher: MDPI},
keywords = {Cyber-Physical Systems, Directed Acyclic Graphs, Lean Manufacturing, machine learning models, pipegraph, scikit-learn},
pubstate = {published},
tppubtype = {article}
}
Joshi, Akanksha; Fidalgo, Eduardo; Alegre, Enrique; Alaiz-Rodriguez, Rocio
RankSum—An unsupervised extractive text summarization based on rank fusion Artículo de revista
En: Expert Systems with Applications, vol. 200, pp. 116846, 2022, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Extractive Summarization, Sentence Ranking, Text summarization, Unsupervised Learning
@article{joshi_ranksumunsupervised_2022,
title = {RankSum—An unsupervised extractive text summarization based on rank fusion},
author = {Akanksha Joshi and Eduardo Fidalgo and Enrique Alegre and Rocio Alaiz-Rodriguez},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422002998},
year = {2022},
date = {2022-01-01},
journal = {Expert Systems with Applications},
volume = {200},
pages = {116846},
abstract = {This paper introduces Ranksum, an approach for extractive text summarization that combines four multi-dimensional sentence features: topic information, semantic content, significant keywords, and position. It ranks sentences based on these features using an unsupervised method, followed by a weighted fusion to determine sentence significance. The method utilizes probabilistic topic models for topic ranking, sentence embeddings for semantic information, and graph-based strategies for identifying keywords. The approach also employs a novelty measure to avoid redundancy. Experimental results on datasets like CNN/DailyMail and DUC 2002 show that Ranksum outperforms existing summarization methods.},
note = {Publisher: Pergamon},
keywords = {Extractive Summarization, Sentence Ranking, Text summarization, Unsupervised Learning},
pubstate = {published},
tppubtype = {article}
}