@inproceedings{b5ccc77c13ee412fbf8246ba0dee8836,
title = "Automatic prosody prediction for Chinese speech synthesis using BLSTM-RNN and embedding features",
abstract = "Prosody affects the naturalness and intelligibility of speech. However, automatic prosody prediction from text for Chinese speech synthesis is still a great challenge and the traditional conditional random fields (CRF) based method always heavily relies on feature engineering. In this paper, we propose to use neural networks to predict prosodic boundary labels directly from Chinese characters without any feature engineering. Experimental results show that stacking feed-forward and bidirectional long short-term memory (BLSTM) recurrent network layers achieves superior performance over the CRF-based method. The embedding features learned from raw text further enhance the performance.",
keywords = "automatic prosody prediction, BLSTM, embedding features, neural network, speech synthesis",
author = "Chuang Ding and Lei Xie and Jie Yan and Weini Zhang and Yang Liu",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU 2015 ; Conference date: 13-12-2015 Through 17-12-2015",
year = "2016",
month = feb,
day = "10",
doi = "10.1109/ASRU.2015.7404780",
language = "英语",
series = "2015 IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU 2015 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "98--102",
booktitle = "2015 IEEE Workshop on Automatic Speech Recognition and Understanding, ASRU 2015 - Proceedings",
}