Publications
2023
Martínez-Mendoza, Alicia; Jáñez-Martino, Francisco; Aláiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
A survey on spam detection, spammer strategies and the dataset shift problem Artículo de revista
En: Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023, pp. 485–486, 2023, (Publisher: Universidade de Vigo).
Resumen | Enlaces | BibTeX | Etiquetas: dataset shift, Spam detection, spammer strategies
@article{martinez-mendoza_survey_2023,
title = {A survey on spam detection, spammer strategies and the dataset shift problem},
author = {Alicia Martínez-Mendoza and Francisco Jáñez-Martino and Rocío Aláiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://dialnet.unirioja.es/servlet/articulo?codigo=9044942},
year = {2023},
date = {2023-01-01},
journal = {Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023},
pages = {485–486},
abstract = {Actas de las VIII Jornadas Nacionales de Investigación en Ciberseguridad: Vigo, 21 a 23 de junio de 2023},
note = {Publisher: Universidade de Vigo},
keywords = {dataset shift, Spam detection, spammer strategies},
pubstate = {published},
tppubtype = {article}
}
Jáñez-Martino, Francisco; Alaiz-Rodríguez, Rocío; González-Castro, Víctor; Fidalgo, Eduardo; Alegre, Enrique
A review of spam email detection: analysis of spammer strategies and the dataset shift problem Artículo de revista
En: Artificial Intelligence Review, vol. 56, no 2, pp. 1145–1173, 2023, (Publisher: Springer Netherlands Dordrecht).
Resumen | Enlaces | BibTeX | Etiquetas: Cybersecurity, dataset shift, machine learning, Spam detection
@article{janez-martino_review_2023,
title = {A review of spam email detection: analysis of spammer strategies and the dataset shift problem},
author = {Francisco Jáñez-Martino and Rocío Alaiz-Rodríguez and Víctor González-Castro and Eduardo Fidalgo and Enrique Alegre},
url = {https://link.springer.com/article/10.1007/s10462-022-10195-4},
year = {2023},
date = {2023-01-01},
journal = {Artificial Intelligence Review},
volume = {56},
number = {2},
pages = {1145–1173},
abstract = {Spam emails, which once were mainly an annoyance, now increasingly contain scams, malware, and phishing attempts. Despite high-performing spam filters based on machine learning, users continue to report rising incidents of fraud and attacks via spam. This paper highlights two key challenges in spam email detection: the dynamic nature of the environment, leading to dataset shift, and the presence of adversarial actors (spammers). The review focuses on the impact of these challenges and examines various spammer strategies and state-of-the-art techniques for developing robust filters. Experimental results show that ignoring dataset shift can severely degrade the performance of spam filters, leading to high error rates.},
note = {Publisher: Springer Netherlands Dordrecht},
keywords = {Cybersecurity, dataset shift, machine learning, Spam detection},
pubstate = {published},
tppubtype = {article}
}
2012
Moreno-Torres, José G; Raeder, Troy; Alaiz-Rodríguez, Rocío; Chawla, Nitesh V; Herrera, Francisco
A unifying view on dataset shift in classification Artículo de revista
En: Pattern recognition, vol. 45, no 1, pp. 521–530, 2012, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: data distribution, data science, dataset shift, machine learning, research framework
@article{moreno-torres_unifying_2012,
title = {A unifying view on dataset shift in classification},
author = {José G Moreno-Torres and Troy Raeder and Rocío Alaiz-Rodríguez and Nitesh V Chawla and Francisco Herrera},
url = {https://www.sciencedirect.com/science/article/pii/S0031320311002901},
year = {2012},
date = {2012-01-01},
journal = {Pattern recognition},
volume = {45},
number = {1},
pages = {521–530},
abstract = {The field of dataset shift has received a growing amount of interest in the last few years. The fact that most real-world applications have to cope with some form of shift makes its study highly relevant. The literature on the topic is mostly scattered, and different authors use different names to refer to the same concepts, or use the same name for different concepts. With this work, we attempt to present a unifying framework through the review and comparison of some of the most important works in the literature.},
note = {Publisher: Pergamon},
keywords = {data distribution, data science, dataset shift, machine learning, research framework},
pubstate = {published},
tppubtype = {article}
}
0000
Moreno-Torres, José G; Raeder, Troy; Alaiz-Rodríguez, Rocío; Chawla, Nitesh V; Herrera, Francisco
Tackling dataset shift in classification: Benchmarks and methods Miscelánea
0000.
Resumen | Enlaces | BibTeX | Etiquetas: Algorithm Comparison, Benchmark Dataset, Classification, dataset shift, Transfer Learning
@misc{moreno-torres_tackling_nodate,
title = {Tackling dataset shift in classification: Benchmarks and methods},
author = {José G Moreno-Torres and Troy Raeder and Rocío Alaiz-Rodríguez and Nitesh V Chawla and Francisco Herrera},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=en&user=2gj1UNYAAAAJ&cstart=20&pagesize=80&sortby=title&citation_for_view=2gj1UNYAAAAJ:0EnyYjriUFMC},
abstract = {This paper addresses the issue of dataset shift, which occurs when the data used to train a classifier differs from the data distribution it encounters during deployment. This phenomenon can lead to poor performance of the classifier, similar to the impact of noisy data. The paper introduces a new benchmark set of datasets to facilitate fair comparisons of algorithms designed to handle dataset shift. The study also includes a comprehensive analysis of key algorithms in the field, evaluating their effectiveness across a range of datasets and shifts.},
keywords = {Algorithm Comparison, Benchmark Dataset, Classification, dataset shift, Transfer Learning},
pubstate = {published},
tppubtype = {misc}
}