Publications
2019
Alegre, Enrique
SUPERVISED MACHINE LEARNING FOR CLASSIFICATION, MINING, AND RANKING OF ILLEGAL WEB CONTENTS Tesis doctoral
UNIVERSITY OF LEÓN, 2019.
Resumen | Enlaces | BibTeX | Etiquetas: Darknet, Illegal Activities, Pastebin, Text classification, TOR Network
@phdthesis{alegre_supervised_2019,
title = {SUPERVISED MACHINE LEARNING FOR CLASSIFICATION, MINING, AND RANKING OF ILLEGAL WEB CONTENTS},
author = {Enrique Alegre},
url = {https://scholar.google.es/citations?view_op=view_citation&hl=es&user=yATJZvcAAAAJ&cstart=100&pagesize=100&sortby=title&citation_for_view=yATJZvcAAAAJ:ldfaerwXgEUC},
year = {2019},
date = {2019-01-01},
school = {UNIVERSITY OF LEÓN},
abstract = {This thesis introduces algorithms, methods, and datasets aimed at classifying, mining information, and ranking web domains or similar resources containing text. The focus is on detecting web content that may indicate illegal activities, particularly in the Tor Darknet and Online Notepad Services (ONS), like Pastebin. Motivated by a collaboration with INCIBE, the research addresses the identification of criminal content in these areas, based on the assumption that the Tor network harbors a significant amount of illicit activity.},
keywords = {Darknet, Illegal Activities, Pastebin, Text classification, TOR Network},
pubstate = {published},
tppubtype = {phdthesis}
}
Al-Nabki, Wesam
Supervised machine learning for classification mining and ranking of illegal web contents Tesis doctoral
Universidad de León, 2019.
Resumen | Enlaces | BibTeX | Etiquetas: Darknet, machine learning, NER, Pastebin, Tor Darknet
@phdthesis{al-nabki_supervised_2019,
title = {Supervised machine learning for classification mining and ranking of illegal web contents},
author = {Wesam Al-Nabki},
url = {https://dialnet.unirioja.es/servlet/dctes?codigo=261157},
year = {2019},
date = {2019-01-01},
school = {Universidad de León},
abstract = {This thesis develops algorithms and datasets to classify and detect illegal activities in web domains, focusing on the Tor Darknet and services like Pastebin. Using machine learning, datasets like DUTA and DUTA-10K achieve high classification accuracy for Tor domains. Active Learning and Named Entity Recognition (NER) are used for classifying and identifying criminal content, while Graph Theory analyzes emerging products in Tor marketplaces. The thesis introduces ToRank for ranking influential onion domains, outperforming traditional ranking methods. It also compares content-based ranking techniques for detecting drug-related domains.},
keywords = {Darknet, machine learning, NER, Pastebin, Tor Darknet},
pubstate = {published},
tppubtype = {phdthesis}
}
Al-Nabki, Wesam; Fidalgo, Eduardo; Alegre, Enrique; Chaves, Deisy
Content-Based Features to Rank Influential Hidden Services of the Tor Darknet Artículo de revista
En: arXiv e-prints, pp. arXiv–1910, 2019.
Resumen | Enlaces | BibTeX | Etiquetas: Darknet, Feature extraction, Hidden Services, Influence Detection, Learning-to-Rank, TOR
@article{al-nabki_content-based_2019,
title = {Content-Based Features to Rank Influential Hidden Services of the Tor Darknet},
author = {Wesam Al-Nabki and Eduardo Fidalgo and Enrique Alegre and Deisy Chaves},
url = {https://arxiv.org/abs/1910.02332},
year = {2019},
date = {2019-01-01},
journal = {arXiv e-prints},
pages = {arXiv–1910},
abstract = {This paper introduces a content-based ranking framework to identify the most influential onion domains on the Tor Darknet. It models domains using 40 features from five sources (text, HTML, named entities, network topology, and visual content) and applies a Learning-to-Rank (LtR) approach for ranking. A case study on drug-related domains shows that (1) the listwise LtR method achieves an NDCG of 0.95 for the top-10, (2) the framework outperforms link-based ranking techniques, and (3) textual features (text, NER, HTML) offer the best balance of efficiency and accuracy. This system could aid law enforcement in detecting suspicious domains.},
keywords = {Darknet, Feature extraction, Hidden Services, Influence Detection, Learning-to-Rank, TOR},
pubstate = {published},
tppubtype = {article}
}
2017
Al-Nabki, Wesam; Fidalgo, Eduardo; Alegre, Enrique; González-Castro, Víctor
Detecting emerging products in tor network based on k-shell graph decomposition Artículo de revista
En: 2017.
Resumen | Enlaces | BibTeX | Etiquetas: Ciberseguridad, Darknet, K-shell, Minería de Datos, Teoría de Grafos, TOR
@article{al_nabki_detecting_2017,
title = {Detecting emerging products in tor network based on k-shell graph decomposition},
author = {Wesam Al-Nabki and Eduardo Fidalgo and Enrique Alegre and Víctor González-Castro},
url = {https://buleria.unileon.es/handle/10612/10718},
year = {2017},
date = {2017-01-01},
urldate = {2017-01-01},
abstract = {Este documento presenta un marco semiautomático para identificar productos populares y emergentes en la Darknet, utilizando un gráfico de correlaciones de productos (PCG) y el algoritmo k-Shell. Detectó MDMA y éxtasis como las drogas más relevantes, validando los resultados con informes internacionales. Esta herramienta ayuda a extraer información en mercados ilegales.},
keywords = {Ciberseguridad, Darknet, K-shell, Minería de Datos, Teoría de Grafos, TOR},
pubstate = {published},
tppubtype = {article}
}