@inproceedings{xiong2017the,
author = {Xiong, Wayne and Droppo, Jasha and Huang, Xuedong and Seide, Frank and Seltzer, Mike and Stolcke, Andreas and Yu, Dong and Zweig, Geoffrey},
title = {The Microsoft 2016 Conversational Speech Recognition System},
booktitle = {Proc. IEEE ICASSP},
year = {2017},
month = {March},
abstract = {We describe Microsoft’s conversational speech recognition system, in which we combine recent developments in neural-network-based acoustic and language modeling to advance the state of the art on the Switchboard recognition task. Inspired by machine learning ensemble techniques, the system uses a range of convolutional and recurrent neural networks. I-vector modeling and lattice-free MMI training provide significant gains for all acoustic model architectures. Language model rescoring with multiple forward and backward running RNNLMs, and word posterior-based system combination provide a 20% boost. The best single system uses a ResNet architecture acoustic model with RNNLM rescoring, and achieves a word error rate of 6.9% on the NIST 2000 Switchboard task. The combined system has an error rate of 6.2%, representing an improvement over previously reported results on this benchmark task.
 },
publisher = {IEEE},
url = {http://approjects.co.za/?big=en-us/research/publication/microsoft-2016-conversational-speech-recognition-system/},
pages = {5255-5259},
edition = {Proc. IEEE ICASSP},
}