@inproceedings{237b0332c50f491aaa8a4fbcca2b4fd1,
title = "Modeling the statistical behavior of lexical chains to capture word cohesiveness for automatic story segmentation",
abstract = "We present a mathematically rigorous framework for modeling the statistical behavior of lexical chains for automatic story segmentation of broadcast news audio. Lexical chains were first proposed in [1] to connect related terms within a story, as an embodiment of lexical cohesion. The vocabulary within a story tends to be cohesive, while a change in the vocabulary distribution tends to signify a topic shift that occurs across a story boundary. Previous work focused on the concept and nature of lexical chains but performed story segmentation based on arbitrary thresholding. This work proposes the use of the log-normal distribution to capture the statistical behavior of lexical chains, together with data-driven parameter selection for lexical chain formation. Experimentation based on the TDT-2 Mandarin Corpus shows that the proposed statistical model leads to better story segmentation, where the F1-measure increased from 0.468 to 0.641.",
keywords = "Chinese, Spoken document retrieval, Story segmentation",
author = "Chan, {Shing Kai} and Lei Xie and Meng, {Helen Mei Ling}",
year = "2007",
language = "英语",
isbn = "9781605603162",
series = "International Speech Communication Association - 8th Annual Conference of the International Speech Communication Association, Interspeech 2007",
pages = "2408--2411",
booktitle = "International Speech Communication Association - 8th Annual Conference of the International Speech Communication Association, Interspeech 2007",
note = "8th Annual Conference of the International Speech Communication Association, Interspeech 2007 ; Conference date: 27-08-2007 Through 31-08-2007",
}