@inproceedings{8e92a2ea7c3f4bb499984f630d5e745a,
title = "Context dependent viseme models for voice driven animation",
abstract = "This paper addresses the problem of animating a talking figure, such as an avatar, using speech input only. The system that was developed is based on hidden Markov models for the acoustic observation vectors of the speech sounds that correspond to each of 16 visually distinct mouth shapes (visemes). The acoustic variability with context was taken into account by building acoustic viseme models that are dependent on the left and right viseme contexts. Our experimental results show that it is indeed possible to obtain visually relevant speech segmentation data directly from the purely acoustic speech signal.",
keywords = "Animation, Automatic speech recognition, Avatars, Context modeling, Hidden Markov models, Mouth, Robustness, Shape, Speech processing, Speech recognition",
author = "Xie Lei and Jiang Dongmei and I. Ravyse and W. Verhelst and H. Sahli and V. Slavova and Z. Rongchun",
note = "Publisher Copyright: {\textcopyright} 2003 Faculty of Electrical Engineering and Co.; 4th EURASIP Conference Focused on Video / Image Processing and Multimedia Communications, EC-VIP-MC 2003 ; Conference date: 02-07-2003 Through 05-07-2003",
year = "2003",
doi = "10.1109/VIPMC.2003.1220537",
language = "英语",
series = "Proceedings EC-VIP-MC 2003 - 4th EURASIP Conference Focused on Video / Image Processing and Multimedia Communications",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "649--654",
editor = "Sonja Grgic and Mislav Grgic",
booktitle = "Proceedings EC-VIP-MC 2003 - 4th EURASIP Conference Focused on Video / Image Processing and Multimedia Communications",
}