@inproceedings{zhu2021leveraging, author = {Zhu, Chenguang and Yang, Ziyi and Gmyr, Robert and Zeng, Michael and Huang, Xuedong}, title = {Leveraging Lead Bias for Zero-shot Abstractive News Summarization}, booktitle = {2021 International ACM SIGIR Conference on Research and Development in Information Retrieval}, year = {2021}, month = {July}, abstract = {Lead bias is a common phenomenon in news summarization, where early parts of an article often contain the most salient information. While many algorithms exploit this fact in summary generation, it has a detrimental effect on teaching the model to discriminate and extract important information. We propose that the lead bias can be leveraged in a simple and effective way in our favor to pre-train abstractive news summarization models on large-scale unlabeled corpus: predicting the leading sentences using the rest of an article. We collect a massive news corpus and conduct careful data cleaning and filtering. We then apply the proposed self-supervised pre-training to existing generation models BART and T5. Via extensive experiments on six benchmark datasets, we show that this approach can dramatically improve the quality of summary and achieve state-of-the-art results for zero-shot news summarization without any fine-tuning. For example, in the DUC-2003 dataset, the ROUGE-1 of BART increases 13.7% after the lead-bias pre-training.}, publisher = {ACM}, url = {http://approjects.co.za/?big=en-us/research/publication/leveraging-lead-bias-for-zero-shot-abstractive-news-summarization/}, }