@inproceedings{3a76a393b78b4da09e9c851f1ad0abea,
title = "A Cantonese speech-driven talking face using translingual audio-to-visual conversion",
abstract = "This paper proposes a novel approach towards a video- realistic, speech-driven talking face for Cantonese. We present a technique that realizes a talking face for a target language (Cantonese) using only audio-visual facial recordings for a base language (English). Given a Cantonese speech input, we first use a Cantonese speech recognizer to generate a Cantonese syllable transcription. Then we map it to an English phoneme transcription via a translingual mapping scheme that involves symbol mapping and time alignment from Cantonese syllables to English phonemes. With the phoneme transcription, the input speech, and the audio-visual models for English, an EM-based conversion algorithm is adopted to generate mouth animation parameters associated with the input Cantonese audio. We have carried out audio-visual syllable recognition experiments to objectively evaluate the proposed talking face. Results show that the visual speech synthesized by the Cantonese talking face can effectively increase the accuracy of Cantonese syllable recognition under noisy acoustic conditions.",
author = "Lei Xie and Helen Meng and Liu, {Zhi Qiang}",
year = "2006",
doi = "10.1007/11939993_64",
language = "英语",
isbn = "3540496653",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
pages = "627--639",
booktitle = "Chinese Spoken Language Processing - 5th International Symposium, ISCSLP 2006, Proceedings",
note = "5th International Symposium on Chinese Spoken Language Processing, ISCSLP 2006 ; Conference date: 13-12-2006 Through 16-12-2006",
}