@inproceedings{xiong2018session-level, author = {Xiong, Wayne and Wu, Lingfeng and Zhang, Jun and Stolcke, Andreas}, title = {Session-level Language Modeling for Conversational Speech}, booktitle = {Proceedings EMNLP}, year = {2018}, month = {November}, abstract = {We propose to generalize language models for conversational speech recognition to allow them to operate across utterance boundaries and speaker changes, thereby capturing conversation-level phenomena such as adjacency pairs, lexical entrainment, and topical coherence. The model consists of a long-short-term memory (LSTM) recurrent network that reads the entire word-level history of a conversation, as well as information about turn taking and speaker overlap, in order to predict each next word. The model is applied in a rescoring framework, where the word history prior to the current utterance is approximated with preliminary recognition results. In experiments in the conversational telephone speech domain (Switchboard) we find that such a model gives substantial perplexity reductions over a standard LSTM-LM with utterance scope, as well as improvements in word error rate.}, publisher = {Assocation for Computational Linguistics}, url = {http://approjects.co.za/?big=en-us/research/publication/session-level-language-modeling-for-conversational-speech/}, pages = {2764-2768}, }