@inproceedings{meng2019character-aware, author = {Meng, Zhong and Gaur, Yashesh and Li, Jinyu and Gong, Yifan}, title = {Character-aware attention-based end-to-end speech recognition}, organization = {IEEE}, booktitle = {Automatic Speech Recognition and Understanding Workshop}, year = {2019}, month = {December}, abstract = {Predicting words and subword units (WSUs) as the output has shown to be effective for the attention-based encoder-decoder (AED) model in end-to-end speech recognition. However, as one input to the decoder recurrent neural network (RNN), each WSU embedding is learned independently through context and acoustic information in a purely data-driven fashion. Little effort has been made to explicitly model the morphological relationships among WSUs. In this work, we propose a novel character-aware (CA) AED model in which each WSU embedding is computed by summarizing the embeddings of its constituent characters using a CA-RNN. This WSU-independent CA-RNN is jointly trained with the encoder, the decoder and the attention network of a conventional AED to predict WSUs. With CA-AED, the embeddings of morphologically similar WSUs are naturally and directly correlated through the CA-RNN in addition to the semantic and acoustic relations modeled by a traditional AED. Moreover, CA-AED significantly reduces the model parameters in a traditional AED by replacing the large pool of WSU embeddings with a much smaller set of character embeddings. On a 3400 hours Microsoft Cortana dataset, CA-AED achieves up to 11.9% relative WER improvement over a strong AED baseline with 27.1% fewer model parameters.}, url = {http://approjects.co.za/?big=en-us/research/publication/character-aware-attention-based-end-to-end-speech-recognition/}, }