@inproceedings{jiang2022end-to-end,
author = {Jiang, Xue and Peng, Xiulian and Zheng, Chengyu and Xue, Huaying and Zhang, Yuan and Lu, Yan},
title = {End-to-End Neural Speech Coding for Real-Time Communications},
booktitle = {ICASSP 2022},
year = {2022},
month = {February},
abstract = {Deep-learning based methods have shown their advantages in audio coding over traditional ones but limited attention has been paid on real-time communications (RTC). This paper proposes the TFNet, an end-to-end neural speech codec with low latency for RTC. It takes an encoder-temporal filtering-decoder paradigm that has seldom been investigated in audio coding. An interleaved structure is proposed for temporal filtering to capture both short-term and long-term temporal dependencies. Furthermore, with end-to-end optimization, the TFNet is jointly optimized with speech enhancement and packet loss concealment, yielding a one-for-all network for three tasks. Both subjective and objective results demonstrate the efficiency of the proposed TFNet.},
url = {http://approjects.co.za/?big=en-us/research/publication/end-to-end-neural-speech-coding-for-real-time-communications/},
}