@inproceedings{b276aa2c31794af591d71b15966c5a09,
title = "Lip assistant: Visualize speech for hearing impaired people in multimedia services",
abstract = "This paper presents a very low bit rate speech-to-video synthesizer, named lip assistant, to help hearing impaired people to better access multimedia services via lipreading. Lip assistant can automatically convert acoustic speech to lip parameters with a bit rate of 2.2kbps, and decode them to video-realistic mouth animation on the fly. We use multi-stream HMMs (MSHMMs) and the principal component analysis (PCA) to model the audio-visual speech and the visual articulations, which are learned from AV facial recordings. Speech is converted to lip parameters with natural dynamics by an expectation maximization (EM)-based audio-to-lip converter. The video synthesizer generates video-realistic mouth animations from the encoded lip parameters via PCA expansion. Finally, mouth animation is superimposed on the original video as an assistant for hearing impaired viewers to make a better understanding on the audio-visual contents. Experimental results shows that lip assistant can significantly improve the speech intelligibility of both machines and humans.",
author = "Lei Xie and Yi Wang and Liu, {Zhi Qiang}",
year = "2006",
doi = "10.1109/ICSMC.2006.384815",
language = "英语",
isbn = "1424401003",
series = "Conference Proceedings - IEEE International Conference on Systems, Man and Cybernetics",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "4331--4336",
booktitle = "2006 IEEE International Conference on Systems, Man and Cybernetics",
note = "2006 IEEE International Conference on Systems, Man and Cybernetics ; Conference date: 08-10-2006 Through 11-10-2006",
}