{"id":748330,"date":"2021-05-28T10:21:26","date_gmt":"2021-05-28T17:21:26","guid":{"rendered":"https:\/\/www.microsoft.com\/en-us\/research\/?post_type=msr-event&#038;p=748330"},"modified":"2021-06-09T05:07:19","modified_gmt":"2021-06-09T12:07:19","slug":"icassp-2021","status":"publish","type":"msr-event","link":"https:\/\/www.microsoft.com\/en-us\/research\/event\/icassp-2021\/","title":{"rendered":"Microsoft at ICASSP 2021"},"content":{"rendered":"<p><strong>Website:<\/strong> <a class=\"msr-external-link glyph-append glyph-append-open-in-new-tab glyph-append-xsmall\" href=\"https:\/\/www.2021.ieeeicassp.org\/\" target=\"_blank\" rel=\"noopener\">ICASSP 2021<span class=\"sr-only\"> (opens in new tab)<\/span><\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<p>Microsoft is proud to be a Silver sponsor of the 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2021) event.<\/p>\n","protected":false},"featured_media":0,"template":"","meta":{"msr-url-field":"","msr-podcast-episode":"","msrModifiedDate":"","msrModifiedDateEnabled":false,"ep_exclude_from_search":false,"_classifai_error":"","msr_startdate":"2021-06-06","msr_enddate":"2021-06-11","msr_location":"Virtual","msr_expirationdate":"","msr_event_recording_link":"","msr_event_link":"","msr_event_link_redirect":false,"msr_event_time":"","msr_hide_region":true,"msr_private_event":false,"footnotes":""},"research-area":[243062,13545],"msr-region":[256048],"msr-event-type":[197941],"msr-video-type":[],"msr-locale":[268875],"msr-program-audience":[],"msr-post-option":[],"msr-impact-theme":[],"class_list":["post-748330","msr-event","type-msr-event","status-publish","hentry","msr-research-area-audio-acoustics","msr-research-area-human-language-technologies","msr-region-global","msr-event-type-conferences","msr-locale-en_us"],"msr_about":"<strong>Website:<\/strong> <a href=\"https:\/\/www.2021.ieeeicassp.org\/\" target=\"_blank\" rel=\"noopener\">ICASSP 2021<\/a>","tab-content":[{"id":0,"name":"About","content":"Microsoft is proud to be a Silver sponsor of the <a href=\"https:\/\/www.2021.ieeeicassp.org\/\" target=\"_blank\" rel=\"noopener\">2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) event<\/a>. See more details on our contributions below.\r\n\r\n&nbsp;\r\n<h3>Session Chairs<\/h3>\r\nThe following Microsoft researchers will chair sessions at the conference.\r\n\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhuc\/\">Zhuo Chen<\/a>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/hagamper\/\">Hannes Gamper<\/a>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ygong\/\">Yifan Gong<\/a>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jinyli\/\">Jinyu Li<\/a>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhme\/\">Zhong Meng<\/a>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/chkarada\/\">Chandan K A Reddy<\/a>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ivantash\/\">Ivan Tashev<\/a>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tayoshio\/\">Takuya Yoshioka<\/a>"},{"id":1,"name":"Sessions","content":"All times are displayed in\u00a0Eastern Daylight Time (UTC -4)\r\n<h2>Monday, June 7<\/h2>\r\n<p style=\"margin-bottom: 0px\">10:00 \u2013 13:30 | Tutorial<\/p>\r\n<p style=\"margin-bottom: 0px\"><strong>Distant conversational speech recognition and analysis: Recent advances, and trends towards end-to-end optimization<\/strong><\/p>\r\nPresenters: Keisuke Kinoshita, Yusuke Fujita, <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/nakanda\/\">Naoyuki Kanda<\/a>, Shinji Watanabe\r\n<p style=\"margin-bottom: 0px\">18:00 \u2013 19:00<\/p>\r\n<p style=\"margin-bottom: 0px\"><strong>Young Professionals Panel Discussion<\/strong><\/p>\r\nModerator: Subhro Das\r\nPanelists:\u00a0Sabrina Rashid, Vanessa Testoni,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/hpalangi\/\">Hamid\u00a0Palangi<\/a>\r\n\r\n<hr \/>\r\n\r\n<h2>Tuesday, June 8<\/h2>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Speech Synthesis 1: Architecture<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414403\"><strong>Lightspeech: Lightweight and Fast Text to Speech with Neural Architecture Search<\/strong><\/a><\/p>\r\nRenqian\u00a0Luo,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xuta\/\">Xu Tan<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ruiwa\/\">Rui Wang<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/taoqin\/\">Tao Qin<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/jinzhu-li-2871b930\/\" target=\"_blank\" rel=\"noopener\">Jinzhu\u00a0Li<\/a>,\u00a0<a href=\"http:\/\/linkedin.com\/in\/sheng-zhao-83689129\" target=\"_blank\" rel=\"noopener\">Sheng Zhao<\/a>,\u00a0Enhong\u00a0Chen,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tyliu\/\">Tie-Yan Liu<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Speech Synthesis 1: Architecture<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413969\"><strong>A New\u00a0High Quality\u00a0Trajectory Tiling Based Hybrid TTS In Real Time<\/strong><\/a><\/p>\r\nFeng-Long Xie, Xin-Hui Li, Wen-Chao\u00a0Su, Li Lu,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/frankkps\/\">Frank K. Soong<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Language Modeling 1: Fusion and Training for End-to-End ASR<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/publication\/internal-language-model-training-for-domain-adaptive-end-to-end-speech-recognition\/\"><strong>Internal Language Model Training for Domain-Adaptive End-To-End Speech Recognition<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhme\/\">Zhong Meng<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/nakanda\/\">Naoyuki Kanda<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/yashesh-yash-gaur-335b1618\/\" target=\"_blank\" rel=\"noopener\">Yashesh Gaur<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/sarangp\/\">Sarangarajan Parthasarathy<\/a>,\u00a0Eric Sun,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/liang-lu-6b336838\/\" target=\"_blank\" rel=\"noopener\">Liang Lu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xieche\/\">Xie Chen<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jinyli\/\">Jinyu Li<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ygong\/\">Yifan Gong<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0<a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1003\" target=\"_blank\" rel=\"noopener\">Audio and Speech Source Separation 1: Speech Separation<\/a><\/p>\r\n<p style=\"margin-bottom: 0px\">Session Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhuc\/\">Zhuo Chen<\/a><\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414176\"><strong>Rethinking The Separation Layers\u00a0In\u00a0Speech Separation Networks<\/strong><\/a><\/p>\r\nYi Luo,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhuc\/\">Zhuo Chen<\/a>, Cong Han, Chenda Li,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/tianyan-zhou\/\" target=\"_blank\" rel=\"noopener\">Tianyan Zhou<\/a>, Nima\u00a0Mesgarani\r\n<p style=\"margin-bottom: 0px\">13:00 \u2013 13:45 | <a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1094\" target=\"_blank\" rel=\"noopener\">Deep Learning Training Methods 3<\/a><\/p>\r\nSession Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jinyli\/\">Jinyu Li<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Brain-Computer Interfaces<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414492\"><strong>Decoding Music Attention from \u201cEEG Headphones\u201d: A User-Friendly Auditory Brain-Computer Interface<\/strong><\/a><\/p>\r\nWenkang\u00a0An, Barbara Shinn-Cunningham,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/hagamper\/\">Hannes Gamper<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/diemmano\/\">Dimitra Emmanouilidou<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/davidjo\/\">David Johnston<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/mihaijal\/\">Mihai Jalobeanu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/cutrell\/\">Edward Cutrell<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/awilson\/\">Andrew Wilson<\/a>, Kuan-Jung Chiang,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ivantash\/\">Ivan Tashev<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0<a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1217\" target=\"_blank\" rel=\"noopener\">Speech Enhancement 1: Speech Separation<\/a><\/p>\r\nSession Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tayoshio\/\">Takuya Yoshioka<\/a>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414127\"><strong>Dual-Path Modeling for Long Recording Speech Separation in Meetings<\/strong><\/a><\/p>\r\nChenda Li,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhuc\/\">Zhuo Chen<\/a>, Yi Luo, Cong Han,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/tianyan-zhou\/\" target=\"_blank\" rel=\"noopener\">Tianyan Zhou<\/a>, Keisuke Kinoshita, Marc Delcroix, Shinji Watanabe, Yanmin Qian\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0Speech Enhancement 1: Speech Separation<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413423\"><strong>Continuous Speech Separation with Conformer<\/strong><\/a><\/p>\r\nSanyuan\u00a0Chen,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yuwu1\/\">Yu Wu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhuc\/\">Zhuo Chen<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/jian-wu-31622212\/\" target=\"_blank\" rel=\"noopener\">Jian Wu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jinyli\/\">Jinyu Li<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tayoshio\/\">Takuya Yoshioka<\/a>,\u00a0<a href=\"https:\/\/github.com\/cywang97\" target=\"_blank\" rel=\"noopener\">Chengyi Wang<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/shujliu\/\">Shujie Liu<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/ming-zhou-57534361\/\" target=\"_blank\" rel=\"noopener\">Ming Zhou<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00 \u2013 14:45 | <a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1218\" target=\"_blank\" rel=\"noopener\">Speech Enhancement 2: Speech Separation and Dereverberation<\/a><\/p>\r\nSession Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tayoshio\/\">Takuya Yoshioka<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0Speaker Recognition 1: Benchmark Evaluation<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/publication\/microsoft-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2020\/\"><strong>Microsoft Speaker\u00a0Diarization\u00a0System for the\u00a0Voxceleb\u00a0Speaker Recognition Challenge 2020<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.linkedin.com\/in\/xiong-xiao-0a6a9b29\/\" target=\"_blank\" rel=\"noopener\">Xiong Xiao<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/nakanda\/\">Naoyuki Kanda<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhuc\/\">Zhuo Chen<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/tianyan-zhou\/\" target=\"_blank\" rel=\"noopener\">Tianyan Zhou<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tayoshio\/\">Takuya Yoshioka<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/sanyuan-chen-08a495167\/\" target=\"_blank\" rel=\"noopener\">Sanyuan Chen<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/yong-zhao-7122401a\/\" target=\"_blank\" rel=\"noopener\">Yong Zhao<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/gangliu2\/\" target=\"_blank\" rel=\"noopener\">Gang Liu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yuwu1\/\">Yu Wu<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/jian-wu-31622212\/\" target=\"_blank\" rel=\"noopener\">Jian Wu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/shujliu\/\">Shujie Liu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jinyli\/\">Jinyu Li<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ygong\/\">Yifan Gong<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0Dialogue Systems 2: Response Generation<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414472\"><strong>Topic-Aware Dialogue Generation with Two-Hop Based Graph Attention<\/strong><\/a><\/p>\r\nShijie\u00a0Zhou, Wenge Rong,\u00a0Jianfei\u00a0Zhang,\u00a0Yanmeng\u00a0Wang,\u00a0<a href=\"https:\/\/scholar.google.com\/citations?user=6ESGessAAAAJ&amp;hl=en\" target=\"_blank\" rel=\"noopener\">Libin Shi<\/a>, Zhang Xiong\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0Speech Recognition 4: Transformer Models 2<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413535\"><strong>Developing Real-Time Streaming Transformer Transducer for Speech Recognition on Large-Scale Dataset<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xieche\/\">Xie Chen<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yuwu1\/\">Yu Wu<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/zhengchao-wang-8a352a17\/\" target=\"_blank\" rel=\"noopener\">Zhenghao Wang<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/shujliu\/\">Shujie Liu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jinyli\/\">Jinyu Li<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0<a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1010\" target=\"_blank\" rel=\"noopener\">Active Noise Control, Echo Reduction, and Feedback Reduction 2: Active Noise Control and Echo Cancellation<\/a><\/p>\r\n<p style=\"margin-bottom: 0px\">Session Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/hagamper\/\">Hannes Gamper<\/a><\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413457\"><strong>ICASSP 2021 Acoustic Echo Cancellation Challenge: Datasets, Testing Framework, and Results<\/strong><\/a><\/p>\r\nKusha Sridhar,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/rosscutler\/\" target=\"_blank\" rel=\"noopener\">Ross Cutler<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/ando-saabas-1a86102\/\" target=\"_blank\" rel=\"noopener\">Ando Saabas<\/a>,\u00a0Tanel Parnamaa,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/markus-loide\/\" target=\"_blank\" rel=\"noopener\">Markus Loide<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/hagamper\/\">Hannes Gamper<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/sebraun\/\">Sebastian Braun<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/raichner\/\">Robert Aichner<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/sriramsrinivasan\/\" target=\"_blank\" rel=\"noopener\">Sriram Srinivasan<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0<a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1113\" target=\"_blank\" rel=\"noopener\">Learning<\/a><\/p>\r\n<p style=\"margin-bottom: 0px\">Session Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhme\/\">Zhong Meng<\/a><\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413644\"><strong>Sequence-Level Self-Teaching Regularization<\/strong><\/a><\/p>\r\nEric Sun,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/liang-lu-6b336838\/\" target=\"_blank\" rel=\"noopener\">Liang Lu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhme\/\">Zhong Meng<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ygong\/\">Yifan Gong<\/a>\r\n\r\n<hr \/>\r\n\r\n<h2>Wednesday, June 9<\/h2>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Language Understanding 1: End-to-end Speech Understanding 1<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/publication\/speech-language-pre-training-for-end-to-end-spoken-language-understanding\/\"><strong>Speech-Language Pre-Training for End-to-End Spoken Language Understanding<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yaoqian\/\">Yao Qian<\/a>, Ximo\u00a0Bian,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yushi\/\">Yu Shi<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/nakanda\/\">Naoyuki Kanda<\/a>,\u00a0Leo Shen,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/zhen-xiao-638b5338\/\" target=\"_blank\" rel=\"noopener\">Zhen Xiao<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/nzeng\/\">Michael Zeng<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Audio and Speech Source Separation 4: Multi-Channel Source Separation<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414187\"><strong>DBnet:\u00a0Doa-Driven Beamforming Network for end-to-end Reverberant Sound Source Separation<\/strong><\/a><\/p>\r\nAli\u00a0Aroudi,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/sebraun\/\">Sebastian Braun<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0Speech Enhancement 4: Multi-channel Processing<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413933\"><strong>Don\u2019t Shoot Butterfly with Rifles: Multi-Channel Continuous Speech Separation with Early Exit Transformer<\/strong><\/a><\/p>\r\nSanyuan\u00a0Chen,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yuwu1\/\">Yu Wu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhuc\/\">Zhuo Chen<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tayoshio\/\">Takuya Yoshioka<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/shujliu\/\">Shujie Liu<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jinyli\/\">Jinyu Li<\/a>,\u00a0Xiangzhan\u00a0Yu\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0Matrix Factorization and Applications<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413384\"><strong>Cold Start Revisited: A Deep Hybrid Recommender with Cold-Warm Item Harmonization<\/strong><\/a><\/p>\r\nOren Barkan,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/roy-hirsch-7077a7a6\/\" target=\"_blank\" rel=\"noopener\">Roy Hirsch<\/a>,\u00a0Ori Katz,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/avicaciularu\/\" target=\"_blank\" rel=\"noopener\">Avi Caciularu<\/a>,\u00a0Yoni Weill,\u00a0<a href=\"http:\/\/www.noamko.org\/\" target=\"_blank\" rel=\"noopener\">Noam Koenigstein<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0Biological Image Analysis<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414132\"><strong>CMIM: Cross-Modal Information Maximization\u00a0For\u00a0Medical Imaging<\/strong><\/a><\/p>\r\nTristan Sylvain, Francis Dutil, Tess Berthier, Lisa Di\u00a0Jorio, Margaux Luck,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/devonh\/\">Devon Hjelm<\/a>, Yoshua\u00a0Bengio\r\n<p style=\"margin-bottom: 0px\">15:30\u00a0\u2013\u00a016:15\u00a0|\u00a0Speech Recognition 8: Multilingual Speech Recognition<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413952\"><strong>Multi-Dialect Speech Recognition in English Using Attention on Ensemble of Experts<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.linkedin.com\/in\/dasamitece\/\" target=\"_blank\" rel=\"noopener\">Amit Das<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/kshitizk\/\" target=\"_blank\" rel=\"noopener\">Kshitiz Kumar<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/jian-wu-31622212\/\" target=\"_blank\" rel=\"noopener\">Jian Wu<\/a>\r\n<p style=\"margin-bottom: 0px\">15:30\u00a0\u2013\u00a016:15\u00a0|\u00a0Quality and Intelligibility Measures<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413877\"><strong>MBNET: MOS Prediction for Synthesized Speech with Mean-Bias Network<\/strong><\/a><\/p>\r\nYichong\u00a0Leng,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xuta\/\">Xu Tan<\/a>,\u00a0<a href=\"http:\/\/linkedin.com\/in\/sheng-zhao-83689129\" target=\"_blank\" rel=\"noopener\">Sheng Zhao<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/frankkps\/\">Frank K. Soong<\/a>, Xiang-Yang Li,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/taoqin\/\">Tao Qin<\/a>\r\n<p style=\"margin-bottom: 0px\">15:30\u00a0\u2013\u00a016:15\u00a0|\u00a0Quality and Intelligibility Measures<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414904\"><strong>Crowdsourcing Approach for Subjective Evaluation of Echo Impairment<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.linkedin.com\/in\/rosscutler\/\" target=\"_blank\" rel=\"noopener\">Ross Cutler<\/a>, Babak\u00a0Nadari,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/markus-loide\/\" target=\"_blank\" rel=\"noopener\">Markus Loide<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/sten-sootla-73ba6a154\/\" target=\"_blank\" rel=\"noopener\">Sten Sootla<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/ando-saabas-1a86102\/\" target=\"_blank\" rel=\"noopener\">Ando Saabas<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30 \u2013 17:15 | <a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1233\" target=\"_blank\" rel=\"noopener\">Speech Recognition 9: Confidence Measures<\/a><\/p>\r\nSession Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ygong\/\">Yifan Gong<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30 \u2013 17:15 | <a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1234\" target=\"_blank\" rel=\"noopener\">Speech Recognition 10: Robustness to Human Speech Variability<\/a><\/p>\r\nSession Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ygong\/\">Yifan Gong<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0Speech Processing 2: General Topics<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414878\"><strong>Dnsmos: A Non-Intrusive Perceptual Objective Speech Quality Metric to Evaluate Noise Suppressors<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/chkarada\/\">Chandan K A Reddy<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/vigopal\/\">Vishak Gopal<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/rosscutler\/\" target=\"_blank\" rel=\"noopener\">Ross Cutler<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0Style and Text Normalization<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/publication\/generating-human-readable-transcript-for-automatic-speech-recognition-with-pre-trained-language-model\/\"><strong>Generating Human Readable Transcript for Automatic Speech Recognition with Pre-Trained Language Model<\/strong><\/a><\/p>\r\nJunwei Liao,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yushi\/\">Yu Shi<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/migon\/\">Ming Gong<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/lisho\/\">Linjun Shou<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/seeskime\/\">Sefik Eskimez<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/liylu\/\">Liyang Lu<\/a>, Hong Qu,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/nzeng\/\">Michael Zeng<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0Modeling, Analysis and Synthesis of Acoustic Environments 3: Acoustic Analysis<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414743\"><strong>Prediction of Object Geometry from Acoustic Scattering Using Convolutional Neural Networks<\/strong><\/a><\/p>\r\nZiqi Fan,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/vivineet\/\">Vibhav Vineet<\/a>,\u00a0Chenshen\u00a0Lu, T.W. Wu, Kyla McMullen\r\n\r\n<hr \/>\r\n\r\n<h2>Thursday, June 10<\/h2>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Speech Recognition 11: Novel Approaches<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/publication\/minimum-bayes-risk-training-for-end-to-end-speaker-attributed-asr\/\"><strong>Minimum Bayes Risk Training for End-to-End Speaker-Attributed ASR<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/nakanda\/\">Naoyuki Kanda<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhme\/\">Zhong Meng<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/liang-lu-6b336838\/\" target=\"_blank\" rel=\"noopener\">Liang Lu<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/yashesh-yash-gaur-335b1618\/\" target=\"_blank\" rel=\"noopener\">Yashesh Gaur<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xiaofewa\/\">Xiaofei Wang<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhuc\/\">Zhuo Chen<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tayoshio\/\">Takuya Yoshioka<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Speech Synthesis 5: Prosody &amp; Style<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413864\"><strong>Speech Bert Embedding for Improving Prosody in Neural TTS<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.linkedin.com\/in\/liping-chen-06a20526\/\" target=\"_blank\" rel=\"noopener\">Liping Chen<\/a>,\u00a0<a href=\"https:\/\/scholar.google.com\/citations?user=eHjg60kAAAAJ&amp;hl=zh-CN\" target=\"_blank\" rel=\"noopener\">Yan Deng<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/xi-wang-502b2029\/\" target=\"_blank\" rel=\"noopener\">Xi Wang<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/frankkps\/\">Frank K. Soong<\/a>,\u00a0<a href=\"https:\/\/www.researchgate.net\/profile\/Lei-He-21\" target=\"_blank\" rel=\"noopener\">Lei He<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Speech Synthesis 6: Data Augmentation &amp; Adaptation<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414872\"><strong>Adaspeech\u00a02: Adaptive Text to Speech with\u00a0Untranscribed\u00a0Data<\/strong><\/a><\/p>\r\nYuzi Yan,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xuta\/\">Xu Tan<\/a>,\u00a0Bohan Li,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/taoqin\/\">Tao Qin<\/a>,\u00a0<a href=\"http:\/\/linkedin.com\/in\/sheng-zhao-83689129\" target=\"_blank\" rel=\"noopener\">Sheng Zhao<\/a>, Yuan Shen,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tyliu\/\">Tie-Yan Liu<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0<a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1221\" target=\"_blank\" rel=\"noopener\">Speech Enhancement 5: DNS Challenge Task<\/a><\/p>\r\n<p style=\"margin-bottom: 0px\">Session Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/chkarada\/\">Chandan K A Reddy<\/a><\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9415105\"><strong>ICASSP 2021 Deep Noise Suppression Challenge<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/chkarada\/\">Chandan K A Reddy<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/harishcdubey\/\" target=\"_blank\" rel=\"noopener\">Harishchandra Dubey<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/vigopal\/\">Vishak Gopal<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/rosscutler\/\" target=\"_blank\" rel=\"noopener\">Ross Cutler<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/sebraun\/\">Sebastian Braun<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/hagamper\/\">Hannes Gamper<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/raichner\/\">Robert Aichner<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/sriramsrinivasan\/\" target=\"_blank\" rel=\"noopener\">Sriram Srinivasan<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00 \u2013 14:45 | <a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1222\" target=\"_blank\" rel=\"noopener\">Speech Enhancement 6: Multi-modal Processing<\/a><\/p>\r\nSession Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/chkarada\/\">Chandan K A Reddy<\/a>\r\n<p style=\"margin-bottom: 0px\">14:00\u00a0\u2013\u00a014:45\u00a0|\u00a0Graph Signal Processing<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/publication\/fast-hierarchy-preserving-graph-embedding-via-subspace-constraints\/\"><strong>Fast Hierarchy Preserving Graph Embedding via Subspace Constraints<\/strong><\/a><\/p>\r\nXu Chen,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ludu\/\">Lun Du<\/a>,\u00a0Mengyuan\u00a0Chen, Yun Wang, QingQing Long,\u00a0Kunqing\u00a0Xie\r\n<p style=\"margin-bottom: 0px\">15:30\u00a0\u2013\u00a016:15\u00a0|\u00a0Speech Recognition 13: Acoustic Modeling 1<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/publication\/hypothesis-stitcher-for-end-to-end-speaker-attributed-asr-on-long-form-multi-talker-recordings\/\"><strong>Hypothesis Stitcher for End-to-End Speaker-Attributed ASR on Long-Form Multi-Talker Recordings<\/strong><\/a><\/p>\r\nXuankai\u00a0Chang,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/nakanda\/\">Naoyuki Kanda<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/yashesh-yash-gaur-335b1618\/\" target=\"_blank\" rel=\"noopener\">Yashesh Gaur<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xiaofewa\/\">Xiaofei Wang<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/zhme\/\">Zhong Meng<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tayoshio\/\">Takuya Yoshioka<\/a>\r\n<p style=\"margin-bottom: 0px\">15:30\u00a0\u2013\u00a016:15\u00a0|\u00a0Speech Recognition 14: Acoustic Modeling 2<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413819\"><strong>Ensemble Combination between Different Time Segmentations<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/scholar.google.com\/citations?user=0p0jF4YAAAAJ&amp;hl=en\" target=\"_blank\" rel=\"noopener\">Jeremy Heng Meng Wong<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/didimit\/\">Dimitrios Dimitriadis<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/ken-ichi-kumatani-236b384\/\" target=\"_blank\" rel=\"noopener\">Kenichi Kumatani<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/yashesh-yash-gaur-335b1618\/\" target=\"_blank\" rel=\"noopener\">Yashesh Gaur<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/george-polovets-6b836a19\/\" target=\"_blank\" rel=\"noopener\">George Polovets<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/sarangp\/\">Partha Parthasarathy<\/a>,\u00a0Eric Sun,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jinyli\/\">Jinyu Li<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ygong\/\">Yifan Gong<\/a>\r\n<p style=\"margin-bottom: 0px\">15:30\u00a0\u2013\u00a016:15\u00a0|\u00a0Privacy and Information Security<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9415092\"><strong>Detection Of Malicious DNS and Web Servers using Graph-Based Approaches<\/strong><\/a><\/p>\r\nJinyuan\u00a0Jia,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/zhdong\/\" target=\"_blank\" rel=\"noopener\">Zheng Dong<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/jie-li-6594295\/\" target=\"_blank\" rel=\"noopener\">Jie Li<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\https://www.microsoft.com/jstokes\/\">Jack W. Stokes<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0Language Assessment<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413659\"><strong>Improving Pronunciation Assessment Via Ordinal Regression with Anchored Reference Samples<\/strong><\/a><\/p>\r\nBin\u00a0Su,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/shaoguang-mao-929733120\/\">Shaoguang Mao<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/frankkps\/\">Frank K. Soong<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yanxia\/\">Yan Xia<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jtien\/\">Jonathan Tien<\/a>,\u00a0Zhiyong\u00a0Wu\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0Signal Enhancement and Restoration 1: Deep Learning<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413580\"><strong>Towards Efficient Models for Real-Time Deep Noise Suppression<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/sebraun\/\">Sebastian Braun<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/hagamper\/\">Hannes Gamper<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/chkarada\/\">Chandan K A Reddy<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ivantash\/\">Ivan Tashev<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30\u00a0\u2013\u00a017:15\u00a0|\u00a0Signal Enhancement and Restoration 3: Signal Enhancement<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414761\"><strong>Phoneme-Based Distribution Regularization for Speech Enhancement<\/strong><\/a><\/p>\r\nYajing\u00a0Liu,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xipe\/\">Xiulian Peng<\/a>, Zhiwei Xiong,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/yanlu\/\">Yan Lu<\/a>\r\n<p style=\"margin-bottom: 0px\">16:30 \u2013 17:15 | <a href=\"https:\/\/2021.ieeeicassp.org\/Papers\/ViewSession.asp?Sessionid=1265\" target=\"_blank\" rel=\"noopener\">Audio &amp; Images<\/a><\/p>\r\nSession Chair: <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ivantash\/\">Ivan Tashev<\/a>\r\n\r\n<hr \/>\r\n\r\n<h2>Friday, June 11<\/h2>\r\n<p style=\"margin-bottom: 0px\">1:30\u00a0\u2013\u00a012:15\u00a0|\u00a0Speech Recognition 18: Low Resource ASR<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414483\"><strong>MixSpeech: Data Augmentation for Low-Resource Automatic Speech Recognition<\/strong><\/a><\/p>\r\nLinghui\u00a0Meng,\u00a0Jin\u00a0Xu,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xuta\/\">Xu Tan<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/jindwang\/\">Jindong Wang<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/taoqin\/\">Tao Qin<\/a>, Bo Xu\r\n<p style=\"margin-bottom: 0px\">11:30\u00a0\u2013\u00a012:15\u00a0|\u00a0Speech Synthesis 7: General Topics<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/www.microsoft.com\/en-us\/research\/publication\/denoising-text-to-speech-with-frame-level-noise-modeling\/\"><strong>Denoispeech: Denoising Text to Speech with Frame-Level Noise Modeling<\/strong><\/a><\/p>\r\nChen Zhang, Yi Ren,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/xuta\/\">Xu Tan<\/a>,\u00a0Jinglin\u00a0Liu,\u00a0Kejun\u00a0Zhang,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/taoqin\/\">Tao Qin<\/a>,\u00a0<a href=\"http:\/\/linkedin.com\/in\/sheng-zhao-83689129\" target=\"_blank\" rel=\"noopener\">Sheng Zhao<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/tyliu\/\">Tie-Yan Liu<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Speech Enhancement 8: Echo Cancellation and Other Tasks<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414721\"><strong>Cascaded Time + Time-Frequency\u00a0Unet\u00a0For\u00a0Speech Enhancement: Jointly Addressing Clipping, Codec Distortions, And Gaps<\/strong><\/a><\/p>\r\nArun Asokan Nair,\u00a0<a href=\"https:\/\/www.microsoft.com\/applied-sciences\/people\/kazuhito-koishida\" target=\"_blank\" rel=\"noopener\">Kazuhito Koishida<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Speaker\u00a0Diarization<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9413761\"><strong>Hidden Markov Model\u00a0Diarisation\u00a0with Speaker Location Information<\/strong><\/a><\/p>\r\n<a href=\"https:\/\/scholar.google.com\/citations?user=0p0jF4YAAAAJ&amp;hl=en\" target=\"_blank\" rel=\"noopener\">Jeremy Heng\u00a0Meng Wong<\/a>,\u00a0<a href=\"https:\/\/www.linkedin.com\/in\/xiong-xiao-0a6a9b29\/\" target=\"_blank\" rel=\"noopener\">Xiong Xiao<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/en-us\/research\/people\/ygong\/\">Yifan Gong<\/a>\r\n<p style=\"margin-bottom: 0px\">13:00\u00a0\u2013\u00a013:45\u00a0|\u00a0Detection and Classification of Acoustic Scenes and Events 5: Scenes<\/p>\r\n<p style=\"margin-bottom: 0px\"><a href=\"https:\/\/ieeexplore.ieee.org\/document\/9414779\"><strong>Cross-Modal Spectrum Transformation Network for Acoustic Scene Classification<\/strong><\/a><\/p>\r\nYang Liu,\u00a0<a href=\"https:\/\/www.microsoft.com\/applied-sciences\/people\/alexandros-neophytou\" target=\"_blank\" rel=\"noopener\">Alexandros Neophytou<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/applied-sciences\/people\/sunando-sengupta\" target=\"_blank\" rel=\"noopener\">Sunando Sengupta<\/a>,\u00a0<a href=\"https:\/\/www.microsoft.com\/applied-sciences\/people\/eric-sommerlade\" target=\"_blank\" rel=\"noopener\">Eric Sommerlade<\/a>"},{"id":2,"name":"Grand Challenges","content":"<h2>ICASSP 2021 Acoustic Echo Cancellation Challenge<\/h2>\r\nThe <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/academic-program\/acoustic-echo-cancellation-challenge-icassp-2021\/\">ICASSP 2021 Acoustic Echo Cancellation Challenge<\/a> is intended to stimulate research in the area of acoustic echo cancellation (AEC), which is an important part of speech enhancement and still a top issue in audio communication and conferencing systems. We received 17 submissions for the challenge from industry and academic universities. Microsoft is happy to announce the winners of the ICASSP 2021 Acoustic Echo Cancellation Challenge.\r\n\r\n&nbsp;\r\n<h3>1st place<\/h3>\r\nOrganization: Amazon\r\nAuthors: Jean-Marc Valin, Srikanth Tenneti, Karim Helwani, Umut Isik, Arvindh Krishnaswamy\r\nPaper: <a href=\"https:\/\/arxiv.org\/pdf\/2102.05245.pdf\" target=\"_blank\" rel=\"noopener\">Low-Complexity, Real-Time Joint Neural Echo Control and Speech Enhancement Based On PercepNet<\/a>\r\n\r\n<hr \/>\r\n\r\n<h3>2nd place<\/h3>\r\nOrganization: SoundConnect and Alibaba\r\nAuthors: Ziteng Wang, Yueyue Na, Zhang Liu, Biao Tian, Qiang Fu\r\nPaper: <a href=\"https:\/\/arxiv.org\/pdf\/2102.08551.pdf\" target=\"_blank\" rel=\"noopener\">Weighted recursive least square filter and neural network based residual echo suppression for the AEC-Challenge<\/a>\r\n\r\n<hr \/>\r\n\r\n<h3>3rd place<\/h3>\r\nOrganization: Carl von Ossietzky University Oldenburg\r\nAuthors: Nils L. Westhausen, Bernd T. Meyer\r\nPaper: <a href=\"https:\/\/arxiv.org\/pdf\/2010.14337.pdf\" target=\"_blank\" rel=\"noopener\">Acoustic echo cancellation with the dual-signal transformation LSTM network<\/a>\r\n\r\n&nbsp;\r\n<h2>ICASSP 2021 Deep Noise Suppression (DNS) Challenge<\/h2>\r\nThe <a href=\"https:\/\/www.microsoft.com\/en-us\/research\/academic-program\/deep-noise-suppression-challenge-icassp-2021\/\">ICASSP 2021 Deep Noise Suppression (DNS) Challenge<\/a> is intended to stimulate research in the area of noise suppression, which is an important part of speech enhancement and still a top issue in audio communication and conferencing systems. We received 19 submissions for the challenge from industry and academic universities. Microsoft is happy to announce the winners of the ICASSP 2021 Deep Noise Suppression Challenge.\r\n\r\n&nbsp;\r\n<h3>1st place<\/h3>\r\nOrganization: Institute of Acoustics, Chinese Academy of Sciences\r\nAuthors: Andong Li, Wenzhe Liu, Xiaoxue Luo, Chengshi Zheng, Xiaodong Li\r\nPaper: <a href=\"https:\/\/arxiv.org\/pdf\/2102.04198.pdf\" target=\"_blank\" rel=\"noopener\">ICASSP 2021 DEEP NOISE SUPPRESSION CHALLENGE: DECOUPLING MAGNITUDE AND PHASE OPTIMIZATION WITH A TWO-STAGE DEEP NETWORK<\/a>\r\n\r\n<hr \/>\r\n\r\n<h3>2nd place<\/h3>\r\nOrganization: Sogou\r\nAuthors: Jingdong Li, Dawei Luo, Yun Liu, Yuanyuan Zhu, Zhaoxia Li, Guohui Cui, Wenqi Tang, Wei Chen\r\nPaper: <a href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/9413967\" target=\"_blank\" rel=\"noopener\">Densely Connected Multi-Stage Model with Channel Wise Subband Feature for Real-Time Speech Enhancement<\/a>\r\n\r\n<hr \/>\r\n\r\n<h3>3rd place<\/h3>\r\nOrganization: Seol National University, Supertone\r\nAuthors: Hyeong-Seok Choi, Sungjin Park, Jie Hwan Lee, Hoon Heo, Dongsuk Jeon, Kyogu Lee\r\nPaper: <a href=\"https:\/\/arxiv.org\/pdf\/2102.03207.pdf\" target=\"_blank\" rel=\"noopener\">REAL-TIME DENOISING AND DEREVERBERATION WTIH TINY RECURRENT U-NET<\/a>"}],"msr_startdate":"2021-06-06","msr_enddate":"2021-06-11","msr_event_time":"","msr_location":"Virtual","msr_event_link":"","msr_event_recording_link":"","msr_startdate_formatted":"June 6, 2021","msr_register_text":"Watch now","msr_cta_link":"","msr_cta_text":"","msr_cta_bi_name":"","featured_image_thumbnail":null,"event_excerpt":"Microsoft is proud to be a Silver sponsor of the 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2021) event.","msr_research_lab":[],"related-researchers":[],"msr_impact_theme":[],"related-academic-programs":[],"related-groups":[],"related-projects":[],"related-opportunities":[],"related-publications":[752071,754324,754333,763438,810712,810724,815227],"related-videos":[],"related-posts":[],"_links":{"self":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-event\/748330","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-event"}],"about":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/types\/msr-event"}],"version-history":[{"count":3,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-event\/748330\/revisions"}],"predecessor-version":[{"id":752965,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-event\/748330\/revisions\/752965"}],"wp:attachment":[{"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/media?parent=748330"}],"wp:term":[{"taxonomy":"msr-research-area","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/research-area?post=748330"},{"taxonomy":"msr-region","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-region?post=748330"},{"taxonomy":"msr-event-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-event-type?post=748330"},{"taxonomy":"msr-video-type","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-video-type?post=748330"},{"taxonomy":"msr-locale","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-locale?post=748330"},{"taxonomy":"msr-program-audience","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-program-audience?post=748330"},{"taxonomy":"msr-post-option","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-post-option?post=748330"},{"taxonomy":"msr-impact-theme","embeddable":true,"href":"https:\/\/www.microsoft.com\/en-us\/research\/wp-json\/wp\/v2\/msr-impact-theme?post=748330"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}