Publications
2020
1.
Al-Nabki, Wesam; Jáñez-Martino, Francisco; Carofilis-Vasco, Andrés; Fidalgo, Eduardo; Velasco-Mata, Javier
Improving named entity recognition in tor darknet with local distance neighbor feature Artículo de revista
En: arXiv preprint arXiv:2005.08746, 2020.
Resumen | Enlaces | BibTeX | Etiquetas: Gazetteer, NER, Tor Darknet
@article{al-nabki_improving_2020,
title = {Improving named entity recognition in tor darknet with local distance neighbor feature},
author = {Wesam Al-Nabki and Francisco Jáñez-Martino and Andrés Carofilis-Vasco and Eduardo Fidalgo and Javier Velasco-Mata},
url = {https://arxiv.org/abs/2005.08746},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {arXiv preprint arXiv:2005.08746},
abstract = {This paper introduces a novel feature called Local Distance Neighbor (LDN) for named entity recognition in noisy user-generated texts, replacing the need for task-specific and costly gazetteers. The approach was tested on the W-NUT-2017 dataset, achieving state-of-the-art results for Group, Person, and Product categories. By adding 851 manually labeled samples, the method also demonstrated effectiveness in detecting named entities in the Tor Darknet, with F1 scores of 52.96% and 50.57%, aiding Law Enforcement Agencies in identifying entities related to weapons and drug selling.},
keywords = {Gazetteer, NER, Tor Darknet},
pubstate = {published},
tppubtype = {article}
}
This paper introduces a novel feature called Local Distance Neighbor (LDN) for named entity recognition in noisy user-generated texts, replacing the need for task-specific and costly gazetteers. The approach was tested on the W-NUT-2017 dataset, achieving state-of-the-art results for Group, Person, and Product categories. By adding 851 manually labeled samples, the method also demonstrated effectiveness in detecting named entities in the Tor Darknet, with F1 scores of 52.96% and 50.57%, aiding Law Enforcement Agencies in identifying entities related to weapons and drug selling.
2019
2.
Al-Nabki, Wesam
Supervised machine learning for classification mining and ranking of illegal web contents Tesis doctoral
Universidad de León, 2019.
Resumen | Enlaces | BibTeX | Etiquetas: Darknet, machine learning, NER, Pastebin, Tor Darknet
@phdthesis{al-nabki_supervised_2019,
title = {Supervised machine learning for classification mining and ranking of illegal web contents},
author = {Wesam Al-Nabki},
url = {https://dialnet.unirioja.es/servlet/dctes?codigo=261157},
year = {2019},
date = {2019-01-01},
school = {Universidad de León},
abstract = {This thesis develops algorithms and datasets to classify and detect illegal activities in web domains, focusing on the Tor Darknet and services like Pastebin. Using machine learning, datasets like DUTA and DUTA-10K achieve high classification accuracy for Tor domains. Active Learning and Named Entity Recognition (NER) are used for classifying and identifying criminal content, while Graph Theory analyzes emerging products in Tor marketplaces. The thesis introduces ToRank for ranking influential onion domains, outperforming traditional ranking methods. It also compares content-based ranking techniques for detecting drug-related domains.},
keywords = {Darknet, machine learning, NER, Pastebin, Tor Darknet},
pubstate = {published},
tppubtype = {phdthesis}
}
This thesis develops algorithms and datasets to classify and detect illegal activities in web domains, focusing on the Tor Darknet and services like Pastebin. Using machine learning, datasets like DUTA and DUTA-10K achieve high classification accuracy for Tor domains. Active Learning and Named Entity Recognition (NER) are used for classifying and identifying criminal content, while Graph Theory analyzes emerging products in Tor marketplaces. The thesis introduces ToRank for ranking influential onion domains, outperforming traditional ranking methods. It also compares content-based ranking techniques for detecting drug-related domains.