@inproceedings{zhu2021leveraging,
author = {Zhu, Chenguang and Yang, Ziyi and Gmyr, Robert and Zeng, Michael and Huang, Xuedong},
title = {Leveraging Lead Bias for Zero-shot Abstractive News Summarization},
booktitle = {2021 International ACM SIGIR Conference on Research and Development in Information Retrieval},
year = {2021},
month = {July},
abstract = {Lead bias is a common phenomenon in news summarization, where early parts of an article often contain the most salient information. While many algorithms exploit this fact in summary generation, it has a detrimental effect on teaching the model to discriminate and extract important information. We propose that the lead bias can be leveraged in a simple and effective way in our favor to pre-train abstractive news summarization models on large-scale unlabeled corpus: predicting the leading sentences using the rest of an article. We collect a massive news corpus and conduct careful data cleaning and filtering. We then apply the proposed self-supervised pre-training to existing generation models BART and T5. Via extensive experiments on six benchmark datasets, we show that this approach can dramatically improve the quality of summary and achieve state-of-the-art results for zero-shot news summarization without any fine-tuning. For example, in the DUC-2003 dataset, the ROUGE-1 of BART increases 13.7% after the lead-bias pre-training.},
publisher = {ACM},
url = {http://approjects.co.za/?big=en-us/research/publication/leveraging-lead-bias-for-zero-shot-abstractive-news-summarization/},
}