@inproceedings{e833b90b74a34008993076e5581f9db1,
title = "Joint Training or Not: An Exploration of Pre-trained Speech Models in Audio-Visual Speaker Diarization",
abstract = "The scarcity of labeled audio-visual datasets is a constraint for training superior audio-visual speaker diarization systems. To improve the performance of audio-visual speaker diarization, we leverage pre-trained supervised and self-supervised speech models for audio-visual speaker diarization. Specifically, we adopt supervised (ResNet and ECAPA-TDNN) and self-supervised pre-trained models (WavLM and HuBERT) as the speaker and audio embedding extractors in an end-to-end audio-visual speaker diarization (AVSD) system. Then we explore the effectiveness of different frameworks, including Transformer, Conformer, and cross-attention mechanism, in the audio-visual decoder. To mitigate the degradation of performance caused by separate training, we jointly train the audio encoder, speaker encoder, and audio-visual decoder in the AVSD system. Experiments on the MISP dataset demonstrate that the proposed method achieves superior performance and obtained third place in MISP Challenge 2022.",
keywords = "audio-visual, joint traning, pre-trained model, speaker diarization",
author = "Huan Zhao and Li Zhang and Yue Li and Yannan Wang and Hongji Wang and Wei Rao and Qing Wang and Lei Xie",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2024.; 18th National Conference on Man-Machine Speech Communication, NCMMSC 2023 ; Conference date: 08-12-2023 Through 11-12-2023",
year = "2024",
doi = "10.1007/978-981-97-0601-3_23",
language = "英语",
isbn = "9789819706006",
series = "Communications in Computer and Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "265--275",
editor = "Jia Jia and Zhenhua Ling and Xie Chen and Ya Li and Zixing Zhang",
booktitle = "Man-Machine Speech Communication - 18th National Conference, NCMMSC 2023, Proceedings",
}