@inproceedings{gao2000distribution-based, author = {Gao, Jianfeng and Lee, Kai-Fu}, title = {Distribution-based Pruning of Backoff Language Models}, year = {2000}, month = {October}, abstract = {We propose a distribution-based pruning of n-gram backoff language models. Instead of the conventional approach of pruning n-grams that are infrequent in training data, we prune n-grams that are likely to be infrequent in a new document. Our method is based on the n-gram distribution i.e. the probability that an n-gram occurs in a new document. Experimental results show that our method performed 7-9% (word perplexity reduction) better than conventional cutoff methods.}, publisher = {Association for Computational Linguistics}, url = {http://approjects.co.za/?big=en-us/research/publication/distribution-based-pruning-of-backoff-language-models/}, }