@inproceedings{42de19f96ef1482cabbf57d37f5c5b92,
title = "Serial-Parallel Dual-Path Architecture for Speaking Style Recognition",
abstract = "Speaking Style Recognition (SSR) identifies a speaker{\textquoteright}s speaking style characteristics from speech. Existing style recognition approaches primarily rely on linguistic information, with limited integration of acoustic information, which restricts recognition accuracy improvements. The fusion of acoustic and linguistic modalities offers significant potential to enhance recognition performance. In this paper, we propose a novel serial-parallel dual-path architecture for SSR that leverages acoustic-linguistic bimodal information. The serial path follows the ASR+STYLE serial paradigm, reflecting a sequential temporal dependency, while the parallel path integrates our designed Acoustic-Linguistic Similarity Module (ALSM) to facilitate cross-modal interaction with temporal simultaneity. Compared to the existing SSR baseline—the OSUM model, our approach reduces parameter size by 88.4\% and achieves a 30.3\% improvement in SSR accuracy for eight styles on the test set.",
keywords = "ASR + STYLE, Acoustic-Linguistic Similarity, Cross-Modal, Serial-Parallel, Speaking Style Recognition",
author = "Guojian Li and Qijie Shao and Zhixian Zhao and Shuiyuan Wang and Zhonghua Fu and Lei Xie",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2026.; 20th National Conference on Man-Machine Speech Communication, NCMMSC 2025 ; Conference date: 16-10-2025 Through 19-10-2025",
year = "2026",
doi = "10.1007/978-981-95-5382-2\_19",
language = "英语",
isbn = "9789819553815",
series = "Communications in Computer and Information Science",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "241--254",
editor = "Jia Jia and Zhiyong Wu and Lijian Gao and Gongping Huang and Ya Li",
booktitle = "Man-Machine Speech Communication - 20th National Conference, NCMMSC 2025, Proceedings",
}