@inproceedings{b5adcc09aa994e67b0911b068ad85f24,
title = "A comparison of expressive speech synthesis approaches based on neural network",
abstract = "Adaptability and controllability in changing speaking styles and speaker characteristics are the advantages of deep neural networks (DNNs) based statistical parametric speech synthesis (SPSS). This paper presents a comprehensive study on the use of DNNs for expressive speech synthesis with a small set of emotional speech data. Specifically, we study three typical model adaptation approaches: (1) retraining a neural model by emotion-specific data (retrain), (2) augmenting the network input using emotion-specific codes (code) and (3) using emotion-dependent output layers with shared hidden layers (multi-head). Long-short term memory (LSTM) networks are used as the acoustic models. Objective and subjective evaluations have demonstrated that the multi-head approach consistently outperforms the other two approaches with more natural emotion delivered in the synthesized speech.",
keywords = "Code, Expressive speech synthesis, Multi-head network, Neural networks, Retrain, Statistical parametric speech synthesis, Text-to-speech",
author = "Liumeng Xue and Xiaolian Zhu and Xiaochun An and Lei Xie",
note = "Publisher Copyright: {\textcopyright} 2018 Association for Computing Machinery.; Joint Workshop of the 4th Workshop on Affective Social Multimedia Computing and 1st Multi-Modal Affective Computing of Large-Scale Multimedia Data Workshop, ASMMC-MMAC 2018 ; Conference date: 26-10-2018",
year = "2018",
month = oct,
day = "19",
doi = "10.1145/3267935.3267947",
language = "英语",
series = "ASMMC-MMAC 2018 - Proceedings of the Joint Workshop of the 4th Workshop on Affective Social Multimedia Computing and 1st Multi-Modal Affective Computing of Large-Scale Multimedia Data, Co-located with MM 2018",
publisher = "Association for Computing Machinery, Inc",
pages = "15--20",
booktitle = "ASMMC-MMAC 2018 - Proceedings of the Joint Workshop of the 4th Workshop on Affective Social Multimedia Computing and 1st Multi-Modal Affective Computing of Large-Scale Multimedia Data, Co-located with MM 2018",
}