@inproceedings{mendis2016parallelizing,
author = {Mendis, Charith and Droppo, Jasha and Maleki, Saeed and Musuvathi, Madan and Mytkowicz, Todd and Zweig, Geoffrey},
title = {Parallelizing WFST Speech Decoders},
booktitle = {ICASSP 2016},
year = {2016},
month = {March},
abstract = {The performance-intensive part of a large-vocabulary continuous speech-recognition system is the Viterbi computation that determines the sequence of words that are most likely to generate the acoustic-state scores extracted from an input utterance. This paper presents an efficient parallel algorithm for Viterbi. The key idea is to partition the per-frame computation among threads to minimize inter-thread communication despite traversing a large irregular acoustic and language model graphs. Together with a per-thread beam search, load balancing language-model lookups, and memory optimizations, we achieve a 6:67 speedup over an highly-optimized production-quality WFST-based speech decoder. On a 200,000 word vocabulary and a 59 million ngram model, our decoder runs at 0:27 real time while achieving a word-error rate of 14.81% on 6214 labeled utterances from Voice Search data.},
publisher = {IEEE},
url = {http://approjects.co.za/?big=en-us/research/publication/parallelizing-wfst-speech-decoders/},
edition = {ICASSP 2016},
}