Publications
2020
Blanco-Medina, Pablo; Fidalgo, Eduardo; Alegre, Enrique; Alaiz-Rodríguez, Rocío; Jáñez-Martino, Francisco; Bonnici, Alexandra
Rectification and super-resolution enhancements for forensic text recognition Artículo de revista
En: Sensors, vol. 20, no 20, pp. 5850, 2020, (Publisher: MDPI).
Resumen | Enlaces | BibTeX | Etiquetas: Compruter forensics, Super-Resolution, Text Recognition, text spotting, Tor Darknet
@article{blanco-medina_rectification_2020,
title = {Rectification and super-resolution enhancements for forensic text recognition},
author = {Pablo Blanco-Medina and Eduardo Fidalgo and Enrique Alegre and Rocío Alaiz-Rodríguez and Francisco Jáñez-Martino and Alexandra Bonnici},
url = {https://www.mdpi.com/1424-8220/20/20/5850},
year = {2020},
date = {2020-01-01},
journal = {Sensors},
volume = {20},
number = {20},
pages = {5850},
abstract = {This paper focuses on improving text extraction from images, a challenge often encountered in environments like the Tor Darknet and Child Sexual Abuse (CSA) content, where accurate text retrieval is essential for identifying illegal activities. The authors evaluate eight text recognizers and enhance performance by integrating rectification networks and super-resolution algorithms. Testing on multiple datasets (TOICO-1K and CSA-text) showed improvements, with the highest performance increase on the ICDAR 2015 dataset. The combination of rectification and super-resolution yielded the best results, particularly when using deep learning models like CNNs.},
note = {Publisher: MDPI},
keywords = {Compruter forensics, Super-Resolution, Text Recognition, text spotting, Tor Darknet},
pubstate = {published},
tppubtype = {article}
}
Al-Nabki, Wesam; Jáñez-Martino, Francisco; Carofilis-Vasco, Andrés; Fidalgo, Eduardo; Velasco-Mata, Javier
Improving named entity recognition in tor darknet with local distance neighbor feature Artículo de revista
En: arXiv preprint arXiv:2005.08746, 2020.
Resumen | Enlaces | BibTeX | Etiquetas: Gazetteer, NER, Tor Darknet
@article{al-nabki_improving_2020,
title = {Improving named entity recognition in tor darknet with local distance neighbor feature},
author = {Wesam Al-Nabki and Francisco Jáñez-Martino and Andrés Carofilis-Vasco and Eduardo Fidalgo and Javier Velasco-Mata},
url = {https://arxiv.org/abs/2005.08746},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
journal = {arXiv preprint arXiv:2005.08746},
abstract = {This paper introduces a novel feature called Local Distance Neighbor (LDN) for named entity recognition in noisy user-generated texts, replacing the need for task-specific and costly gazetteers. The approach was tested on the W-NUT-2017 dataset, achieving state-of-the-art results for Group, Person, and Product categories. By adding 851 manually labeled samples, the method also demonstrated effectiveness in detecting named entities in the Tor Darknet, with F1 scores of 52.96% and 50.57%, aiding Law Enforcement Agencies in identifying entities related to weapons and drug selling.},
keywords = {Gazetteer, NER, Tor Darknet},
pubstate = {published},
tppubtype = {article}
}
2019
Joshi, Akanksha; Fidalgo, Eduardo; Alegre, Enrique; Fernández-Robles, Laura
SummCoder: An unsupervised framework for extractive text summarization based on deep auto-encoders Artículo de revista
En: Expert Systems with Applications, vol. 129, pp. 200–215, 2019, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Auto-Encoder, deep learning, Extractive Summarization, Extractive Text Summarization, Sentence Embedding, Tor Darknet
@article{joshi_summcoder_2019,
title = {SummCoder: An unsupervised framework for extractive text summarization based on deep auto-encoders},
author = {Akanksha Joshi and Eduardo Fidalgo and Enrique Alegre and Laura Fernández-Robles},
url = {https://www.sciencedirect.com/science/article/pii/S0957417419302192},
year = {2019},
date = {2019-01-01},
journal = {Expert Systems with Applications},
volume = {129},
pages = {200–215},
abstract = {This paper introduces SummCoder, a method for extractive text summarization using three metrics: content relevance, novelty, and position relevance. The model performs well on datasets like DUC 2002, Blog Summarization, and a new dataset, TIDSumm, focused on web documents from the Tor network. SummCoder outperforms or matches state-of-the-art methods based on ROUGE metrics, providing useful applications for Law Enforcement Agencies.},
note = {Publisher: Pergamon},
keywords = {Auto-Encoder, deep learning, Extractive Summarization, Extractive Text Summarization, Sentence Embedding, Tor Darknet},
pubstate = {published},
tppubtype = {article}
}
Al-Nabki, Wesam
Supervised machine learning for classification mining and ranking of illegal web contents Tesis doctoral
Universidad de León, 2019.
Resumen | Enlaces | BibTeX | Etiquetas: Darknet, machine learning, NER, Pastebin, Tor Darknet
@phdthesis{al-nabki_supervised_2019,
title = {Supervised machine learning for classification mining and ranking of illegal web contents},
author = {Wesam Al-Nabki},
url = {https://dialnet.unirioja.es/servlet/dctes?codigo=261157},
year = {2019},
date = {2019-01-01},
school = {Universidad de León},
abstract = {This thesis develops algorithms and datasets to classify and detect illegal activities in web domains, focusing on the Tor Darknet and services like Pastebin. Using machine learning, datasets like DUTA and DUTA-10K achieve high classification accuracy for Tor domains. Active Learning and Named Entity Recognition (NER) are used for classifying and identifying criminal content, while Graph Theory analyzes emerging products in Tor marketplaces. The thesis introduces ToRank for ranking influential onion domains, outperforming traditional ranking methods. It also compares content-based ranking techniques for detecting drug-related domains.},
keywords = {Darknet, machine learning, NER, Pastebin, Tor Darknet},
pubstate = {published},
tppubtype = {phdthesis}
}
Blanco-Medina, Pablo; Fidalgo, Eduardo; Alegre, Enrique; Jánez-Martino, Francisco
Improving text recognition in Tor darknet with rectification and super-resolution techniques Artículo de revista
En: 2019, (Publisher: IET Digital Library).
Resumen | Enlaces | BibTeX | Etiquetas: Super-Resolution, Text Recognition, text spotting, Tor Darknet
@article{blanco-medina_improving_2019,
title = {Improving text recognition in Tor darknet with rectification and super-resolution techniques},
author = {Pablo Blanco-Medina and Eduardo Fidalgo and Enrique Alegre and Francisco Jánez-Martino},
url = {https://ieeexplore.ieee.org/abstract/document/9136610},
year = {2019},
date = {2019-01-01},
abstract = {This paper investigates combining super-resolution algorithms with a rectification network to improve text recognition in low-resolution images, particularly in the Tor darknet. The results show that combining these methods yields the best performance, with improvements of 3.77% on the ICDAR 2015 dataset and 3.41% on the TOICO-1K Tor dataset. Rectification alone outperforms super-resolution, but the combination provides the best results.},
note = {Publisher: IET Digital Library},
keywords = {Super-Resolution, Text Recognition, text spotting, Tor Darknet},
pubstate = {published},
tppubtype = {article}
}