@inproceedings{xiong2018the,
author = {Xiong, Wayne and Wu, Lingfeng and Droppo, Jasha and Huang, Xuedong and Stolcke, Andreas},
title = {The Microsoft 2017 Conversational Speech Recognition System},
booktitle = {Proc. IEEE ICASSP},
year = {2018},
month = {April},
abstract = {We describe the latest version of Microsoft's conversational speech recognition system for the Switchboard and CallHome domains.  The system adds a CNN-BLSTM acoustic model to the set of model architectures we combined previously, and includes character-based and dialog session aware LSTM language models in rescoring.  For system combination we adopt a two-stage approach, whereby acoustic model posteriors are first combined at the senone/frame level,followed by a word-level voting via confusion networks.  We also added another language model rescoring step following the confusion network combination.  The resulting system yields a 5.1% word error rate on the NIST 2000 Switchboard test set, and 9.8% on the CallHome subset.},
publisher = {IEEE},
url = {http://approjects.co.za/?big=en-us/research/publication/conference-paper-microsoft-2017-conversational-speech-recognition-system/},
pages = {5934-5938},
}