@inproceedings{xiong2018session-level,
author = {Xiong, Wayne and Wu, Lingfeng and Zhang, Jun and Stolcke, Andreas},
title = {Session-level Language Modeling for Conversational Speech},
booktitle = {Proceedings EMNLP},
year = {2018},
month = {November},
abstract = {We propose to generalize language models for conversational speech recognition to allow them to operate across utterance boundaries and speaker changes, thereby capturing conversation-level phenomena such as adjacency pairs, lexical entrainment, and topical coherence. The model consists of a long-short-term memory (LSTM) recurrent network that reads the entire word-level history of a conversation, as well as information about turn taking and speaker overlap, in order to predict each next word. The model is applied in a rescoring framework, where the word history prior to the current utterance is approximated with preliminary recognition results. In experiments in the conversational telephone speech domain (Switchboard) we find that such a model gives substantial perplexity reductions over a standard LSTM-LM with utterance scope, as well as improvements in word error rate.},
publisher = {Assocation for Computational Linguistics},
url = {http://approjects.co.za/?big=en-us/research/publication/session-level-language-modeling-for-conversational-speech/},
pages = {2764-2768},
}