@inproceedings{zhu2021leveraging, author = {Zhu, Chenguang and Yang, Ziyi and Gmyr, Robert and Zeng, Michael and Huang, Xuedong}, title = {Leveraging Lead Bias for Zero-shot Abstractive News Summarization}, organization = {ACM}, booktitle = {The 44th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR), 2021.}, year = {2021}, month = {July}, abstract = {Lead bias is a common phenomenon in news summarization, where early parts of an article often contain the most salient information. While many algorithms exploit this fact in summary generation, it has a detrimental effect on teaching the model to discriminate and extract important information. We propose that the lead bias can be leveraged in a simple and effective way in our favor to pre-train abstractive news summarization models on large-scale unlabeled corpus: predicting the leading sentences using the rest of an article. We collect a massive news corpus and conduct careful data cleaning and filtering. We then apply the proposed self-supervised pre-training to existing generation models BART and T5. Via extensive experiments on six benchmark datasets, we show that this approach can dramatically improve the quality of summary and achieve state-of-the-art results for zero-shot news summarization without any fine-tuning. For example, in the DUC-2003 dataset, the ROUGE-1 of BART increases 13.7% after the lead-bias pre-training.}, url = {http://approjects.co.za/?big=en-us/research/publication/make-lead-bias-in-your-favor-zero-shot-abstractive-news-summarization/}, }