@inproceedings{cdb7bf22f8494f8ba9f52f1eebeb462c,
title = "Ideal-LLM: Integrating Dual Encoders and Language-Adapted LLM for Multilingual Speech-to-Text",
abstract = "Integrating audio encoders with LLMs has enabled models to process audio, enhancing speech-to-text tasks including automatic speech recognition (ASR) and automatic speech translation (AST). However, these methods often overlook language adaptation in multilingual settings, relying on multilingual data without adequately addressing language differences. To address this gap, we propose the Ideal-LLM model, which employs dual multilingual encoders to enrich language features and uses a language-adapted connector to target each language. By leveraging the complementary strengths of Whisper and MMS encoders, our approach ensures richer multilingual representations. Additionally, the connector enhances modal transformation via a weight selector tailored for each language. Experimental results demonstrate that Ideal-LLM improves ASR performance, achieving a 32.6\% relative reduction in word error rates compared to the standard speech encoder integrated with LLMs and yields an average BLEU score of 36.78 for AST.",
keywords = "Dual Encoders, Large Language Models, Multilingual Speech-to-Text",
author = "Hongfei Xue and Wei Ren and Xuelong Geng and Kun Wei and Longhao Li and Qijie Shao and Linju Yang and Kai Diao and Lei Xie",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2026.; 20th National Conference on Man-Machine Speech Communication, NCMMSC 2025 ; Conference date: 16-10-2025 Through 19-10-2025",
year = "2026",
doi = "10.1007/978-981-95-5382-2\_5",
language = "英语",
isbn = "9789819553815",
series = "Communications in Computer and Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "47--58",
editor = "Jia Jia and Zhiyong Wu and Lijian Gao and Gongping Huang and Ya Li",
booktitle = "Man-Machine Speech Communication - 20th National Conference, NCMMSC 2025, Proceedings",
}