@inproceedings{3592cbd842914709ab247d6150bbee56,
title = "Summary on the Multimodal Information Based Speech Processing (MISP) 2022 Challenge",
abstract = "The Multimodal Information based Speech Processing (MISP) 2022 challenge aimed to enhance speech processing performance in harsh acoustic environments by leveraging additional modalities such as video or text. The challenge included two tracks: audio-visual speaker diarization (AVSD) and audio-visual diarization and recognition (AVDR). The training material was based on previous MISP 2021 recordings, but we have accurately synchronized audio and visual data. Additionally, a new evaluation set was provided. This paper gives an overview of the challenge setup, presents the results, and summarizes the effective techniques employed by the participants. We also analyze the current technical challenges and suggest directions for future research in AVSD and AVDR.",
keywords = "audio-visual, MISP challenge, speaker diarization, speech enhancement, speech recognition",
author = "Hang Chen and Shilong Wu and Yusheng Dai and Zhe Wang and Jun Du and Lee, \{Chin Hui\} and Jingdong Chen and Shinji Watanabe and Siniscalchi, \{Sabato Marco\} and Odette Scharenborg and Liu, \{Di Yuan\} and Yin, \{Bao Cai\} and Jia Pan and Gao, \{Jian Qing\} and Cong Liu",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 48th IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2023 ; Conference date: 04-06-2023 Through 10-06-2023",
year = "2023",
doi = "10.1109/ICASSP49357.2023.10433931",
language = "英语",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
}