@inproceedings{14a057891c7f432a962371a2212eb422,
title = "Phoneme lattice based texttiling towards multilingual story segmentation",
abstract = "This paper proposes a phoneme lattice based TextTiling approach towards multilingual story segmentation. The phoneme is the smallest segmental unit in a language and the number of phonemes in a language is usually far smaller than the number of words. Furthermore, many phonemes are shared by different languages. These properties make phonemes particularly appropriate for representing multilingual speech. As phoneme recognition is far from perfect, phoneme lattices, which carry much richer statistics than the 1-best hypotheses, are adopted in this paper as the input to the TextTiling approach. The term frequencies used in traditional TextTiling are replaced by the expected counts of phoneme n-gram units calculated from phoneme lattices. Experiments on TDT2 English and Mandarin corpora show that the phoneme lattice based TextTiling outperforms the phoneme 1-best based TextTiling and word based TextTiling in broadcast news story segmentation.",
keywords = "Phoneme lattice, Speech processing, Spoken document retrieval, Story segmentation, Topic detection and tracking",
author = "Xiaoxuan Wang and Lei Xie and Bin Ma and Chng, {Eng Siong} and Haizhou Li",
year = "2010",
language = "英语",
series = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
publisher = "International Speech Communication Association",
pages = "1305--1308",
booktitle = "Proceedings of the 11th Annual Conference of the International Speech Communication Association, INTERSPEECH 2010",
}