@unpublished{liu2021very,
author = {Liu, Xiaodong and Duh, Kevin and Liu, Liyuan and Gao, Jianfeng},
title = {Very Deep Transformers for Neural Machine Translation},
year = {2021},
month = {October},
abstract = {We explore the application of very deep Transformer models for Neural Machine Translation (NMT). Using a simple yet effective initialization technique that stabilizes training, we show that it is feasible to build standard Transformer-based models with up to 60 encoder layers and 12 decoder layers. These deep models outperform their baseline 6-layer counterparts by as much as 2.5 BLEU, and achieve new state-of-the-art benchmark results on WMT14 English-French (43.8 BLEU and 46.4 BLEU with back-translation) and WMT14 English-German (30.1 BLEU).The code and trained models are publicly available on GitHub.},
url = {http://approjects.co.za/?big=en-us/research/publication/very-deep-transformers-for-neural-machine-translation/},
}