@inproceedings{72820a2fc88f4afe9d7d8a845cf4b7d6,
title = "Self-validated Story Segmentation of Chinese Broadcast News",
abstract = "Automatic story segmentation is an important prerequisite for semantic-level applications. The normalized cuts (NCuts) method has recently shown great promise for segmenting English spoken lectures. However, the availability assumption of the exact story number per file significantly limits its capability to handle a large number of transcripts. Besides, how to apply such method to Chinese language in the presence of speech recognition errors is unclear yet. Addressesing these two problems, we propose a self-validated NCuts (SNCuts) algorithm for segmenting Chinese broadcast news via inaccurate lexical cues, generated by the Chinese large vocabulary continuous speech recognizer (LVCSR). Due to the specialty of Chinese language, we present a subword-level graph embedding for the erroneous LVCSR transcripts. We regularize the NCuts criterion by a general exponential prior of story numbers, respecting the principle of Occam{\textquoteright}s razor. Given the maximum story number as a general parameter, we can automatically obtain reasonable segmentations for a large number of news transcripts, with the story numbers automatically determined for each file, and with comparable complexity to alternative non-self-validated methods. Extensive experiments on benchmark corpus show that: (i) the proposed SNCuts algorithm can efficiently produce comparable or even better segmentation quality, as compared to other state-of-the-art methods with true story number as an input parameter; and (ii) the subword-level embedding always helps to recovering lexical cohesion in Chinese erroneous transcripts, thus improving both segmentation accuracy and robustness to LVCSR errors.",
keywords = "Chinese broadcast news, Normalized cuts, Self-validation, Story segmentation, Subwords, Topic detection",
author = "Wei Feng and Lei Xie and Jin Zhang and Yujun Zhang and Yanning Zhang",
note = "Publisher Copyright: {\textcopyright} 2018, Springer Nature Switzerland AG.; 9th International Conference on Brain-Inspired Cognitive Systems, BICS 2018 ; Conference date: 07-07-2018 Through 08-07-2018",
year = "2018",
doi = "10.1007/978-3-030-00563-4_55",
language = "英语",
isbn = "9783030005627",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "568--578",
editor = "Amir Hussain and Bin Luo and Jiangbin Zheng and Xinbo Zhao and Cheng-Lin Liu and Jinchang Ren and Huimin Zhao",
booktitle = "Advances in Brain Inspired Cognitive Systems - 9th International Conference, BICS 2018, Proceedings",
}