@inproceedings{787e711f8dd84804876a5ac67938490c,
title = "CAMEL: Cross-Attention Enhanced Mixture-of-Experts and Language Bias for Code-Switching Speech Recognition",
abstract = "Code-switching automatic speech recognition (ASR) aims to transcribe speech that contains two or more languages accurately. To better capture language-specific speech representations and address language confusion in code-switching ASR, the mixture-of-experts (MoE) architecture and an additional language diarization (LD) decoder are commonly employed. However, most researches remain stagnant in simple operations like weighted summation or concatenation to fuse language-specific speech representations, leaving significant opportunities to explore the enhancement of integrating language bias information. In this paper, we introduce CAMEL, a cross-attention-based MoE and language bias approach for code-switching ASR. Specifically, after each MoE layer, we fuse language-specific speech representations with cross-attention, leveraging its strong contextual modeling abilities. Additionally, we design a source attention-based mechanism to incorporate the language information from the LD decoder output into text embeddings. Experimental results demonstrate that our approach achieves state-of-the-art performance on the SEAME, ASRU200, and ASRU700+LibriSpeech460 Mandarin-English code-switching ASR datasets.",
keywords = "code-switching, cross-attention, language bias, mixture-of-experts, speech recognition",
author = "He Wang and Xucheng Wan and Naijun Zheng and Kai Liu and Huan Zhou and Guojian Li and Lei Xie",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025 ; Conference date: 06-04-2025 Through 11-04-2025",
year = "2025",
doi = "10.1109/ICASSP49660.2025.10890869",
language = "英语",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
editor = "Rao, {Bhaskar D} and Isabel Trancoso and Gaurav Sharma and Mehta, {Neelesh B.}",
booktitle = "2025 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2025 - Proceedings",
}