@inproceedings{xiong2018the, author = {Xiong, Wayne and Wu, Lingfeng and Droppo, Jasha and Huang, Xuedong and Stolcke, Andreas}, title = {The Microsoft 2017 Conversational Speech Recognition System}, booktitle = {Proc. IEEE ICASSP}, year = {2018}, month = {April}, abstract = {We describe the latest version of Microsoft's conversational speech recognition system for the Switchboard and CallHome domains.  The system adds a CNN-BLSTM acoustic model to the set of model architectures we combined previously, and includes character-based and dialog session aware LSTM language models in rescoring.  For system combination we adopt a two-stage approach, whereby acoustic model posteriors are first combined at the senone/frame level,followed by a word-level voting via confusion networks.  We also added another language model rescoring step following the confusion network combination.  The resulting system yields a 5.1% word error rate on the NIST 2000 Switchboard test set, and 9.8% on the CallHome subset.}, publisher = {IEEE}, url = {http://approjects.co.za/?big=en-us/research/publication/conference-paper-microsoft-2017-conversational-speech-recognition-system/}, pages = {5934-5938}, }