@inproceedings{gao2000n-gram,
author = {Gao, Jianfeng and Li, Mingjing and Lee, Kai-Fu},
title = {N-gram Distribution Based Language Model Adaptation},
year = {2000},
month = {October},
abstract = {This paper presents two techniques for language model (LM) adaptation. The first aims to build a more general LM. We propose a distribution-based pruning of n-gram LMs, where we prune n-grams that are likely to be infrequent in a new document. Experimental results show that the distribution-based pruning method performed up to 9% (word perplexity reduction) better than conventional cutoff methods. Moreover, the pruning method results in a more general n-gram backoff model, in spite of the domain, style, or temporal bias in the training data. The second aims to build a more task-specific LM. We propose an n-gram distribution adaptation method for LM training. Given a large set of out-of-task training data, called training set,anda small set of task-specific training data, called seed set, we adapt the LM towards the task by adjusting the n-gram distribution in the training set to that in the seed set. Experimental results show on-trivial improvements over conventional methods.},
url = {http://approjects.co.za/?big=en-us/research/publication/n-gram-distribution-based-language-model-adaptation/},
}