Publications
2023
Joshi, Akanksha; Fidalgo, Eduardo; Alegre, Enrique; Fernández-Robles, Laura
DeepSumm: Exploiting topic models and sequence to sequence networks for extractive text summarization Artículo de revista
En: Expert Systems with Applications, vol. 211, pp. 118442, 2023, ISSN: 0957-4174.
Resumen | Enlaces | BibTeX | Etiquetas: Attention networks, Extractive, Seq2seq, Text summarization, Topic models
@article{JOSHI2023118442,
title = {DeepSumm: Exploiting topic models and sequence to sequence networks for extractive text summarization},
author = {Akanksha Joshi and Eduardo Fidalgo and Enrique Alegre and Laura Fernández-Robles},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422015391},
doi = {https://doi.org/10.1016/j.eswa.2022.118442},
issn = {0957-4174},
year = {2023},
date = {2023-01-01},
journal = {Expert Systems with Applications},
volume = {211},
pages = {118442},
abstract = {In this paper, we propose DeepSumm, a novel method based on topic modeling and word embeddings for the extractive summarization of single documents. Recent summarization methods based on sequence networks fail to capture the long range semantics of the document which are encapsulated in the topic vectors of the document. In DeepSumm, our aim is to utilize the latent information in the document estimated via topic vectors and sequence networks to improve the quality and accuracy of the summarized text. Each sentence is encoded through two different recurrent neural networks based on probabilistic topic distributions and word embeddings, and then a sequence to sequence network is applied to each sentence encoding. The outputs of the encoder and the decoder in the sequence to sequence networks are combined after weighting using an attention mechanism and converted into a score through a multi-layer perceptron network. We refer to the score obtained through the topic model as Sentence Topic Score (STS) and to the score generated through word embeddings as Sentence Content Score (SCS). In addition, we propose Sentence Novelty Score (SNS) and Sentence Position Score (SPS) and perform a weighted fusion of the four scores for each sentence in the document to compute a Final Sentence Score (FSS). The proposed DeepSumm framework was evaluated on the standard DUC 2002 benchmark and CNN/DailyMail datasets. Experimentally, it was demonstrated that our method captures both the global and the local semantic information of the document and essentially outperforms existing state-of-the-art approaches for extractive text summarization with ROUGE-1, ROUGE-2, and ROUGE-L scores of 53.2, 28.7 and 49.2 on DUC 2002 and 43.3, 19.0 and 38.9 on CNN/DailyMail dataset.},
keywords = {Attention networks, Extractive, Seq2seq, Text summarization, Topic models},
pubstate = {published},
tppubtype = {article}
}
2022
Joshi, Akanksha; Fidalgo, Eduardo; Alegre, Enrique; Alaiz-Rodriguez, Rocio
RankSum—An unsupervised extractive text summarization based on rank fusion Artículo de revista
En: Expert Systems with Applications, vol. 200, pp. 116846, 2022, (Publisher: Pergamon).
Resumen | Enlaces | BibTeX | Etiquetas: Extractive Summarization, Sentence Ranking, Text summarization, Unsupervised Learning
@article{joshi_ranksumunsupervised_2022,
title = {RankSum—An unsupervised extractive text summarization based on rank fusion},
author = {Akanksha Joshi and Eduardo Fidalgo and Enrique Alegre and Rocio Alaiz-Rodriguez},
url = {https://www.sciencedirect.com/science/article/pii/S0957417422002998},
year = {2022},
date = {2022-01-01},
journal = {Expert Systems with Applications},
volume = {200},
pages = {116846},
abstract = {This paper introduces Ranksum, an approach for extractive text summarization that combines four multi-dimensional sentence features: topic information, semantic content, significant keywords, and position. It ranks sentences based on these features using an unsupervised method, followed by a weighted fusion to determine sentence significance. The method utilizes probabilistic topic models for topic ranking, sentence embeddings for semantic information, and graph-based strategies for identifying keywords. The approach also employs a novelty measure to avoid redundancy. Experimental results on datasets like CNN/DailyMail and DUC 2002 show that Ranksum outperforms existing summarization methods.},
note = {Publisher: Pergamon},
keywords = {Extractive Summarization, Sentence Ranking, Text summarization, Unsupervised Learning},
pubstate = {published},
tppubtype = {article}
}
2019
Domínguez, Víctor; Fidalgo, Eduardo; Biswas, Rubel; Alegre, Enrique; Robles, Laura Fernández
Application of extractive text summarization algorithms to speech-to-text media Artículo de revista
En: Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14, pp. 540–550, 2019, (Publisher: Springer International Publishing).
Resumen | Enlaces | BibTeX | Etiquetas: AI, machine learning, natural languaje processing, speech to text, Text summarization
@article{dominguez_application_2019,
title = {Application of extractive text summarization algorithms to speech-to-text media},
author = {Víctor Domínguez and Eduardo Fidalgo and Rubel Biswas and Enrique Alegre and Laura Fernández Robles},
url = {https://link.springer.com/chapter/10.1007/978-3-030-29859-3_46},
year = {2019},
date = {2019-01-01},
journal = {Hybrid Artificial Intelligent Systems: 14th International Conference, HAIS 2019, León, Spain, September 4–6, 2019, Proceedings 14},
pages = {540–550},
abstract = {This paper evaluates six extractive text summarization algorithms for speech-to-text summarization. The study assesses Luhn, TextRank, LexRank, LSA, SumBasic, and KLSum using ROUGE metrics on two datasets (DUC2001 and OWIDSum). Additionally, five speech documents from the ISCI Corpus were transcribed using Google Cloud Speech API and summarized. Results indicate that Luhn and TextRank perform best for extractive speech-to-text summarization.},
note = {Publisher: Springer International Publishing},
keywords = {AI, machine learning, natural languaje processing, speech to text, Text summarization},
pubstate = {published},
tppubtype = {article}
}