@inproceedings{f3cafddcbb54485c87dcc55e240fb9f2,
title = "Extracting bottleneck features and word-like pairs from untranscribed speech for feature representation",
abstract = "We propose a framework to learn a frame-level speech representation in a scenario where no manual transcription is available. Our framework is based on pairwise learning using bottleneck features (BNFs). Initial frame-level features are extracted from a bottleneck-shaped multilingual deep neural network (DNN) which is trained with unsupervised phoneme-like labels. Word-like pairs are discovered in the untranscribed speech using the initial features, and frame alignment is performed on each word-like speech pair. The matching frame pairs are used as input-output to train another DNN with the mean square error (MSE) loss function. The final frame-level features are extracted from an internal hidden layer of MSE-based DNN. Our pairwise learned feature representation is evaluated on the ZeroSpeech 2017 challenge. The experiments show that pairwise learning improves phoneme discrimination in 10s and 120s test conditions. We find that it is important to use BNFs as initial features when pairwise learning is performed. With more word pairs obtained from the Switchboard corpus and its manual transcription, the phoneme discrimination of three languages in the evaluation data can further be improved despite data mismatch.",
keywords = "bottleneck features, deep neural network (DNN), feature representation, Pairwise learning, word-like speech pairs",
author = "Yougen Yuan and Leung, {Cheung Chi} and Lei Xie and Hongjie Chen and Bin Ma and Haizhou Li",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 2017 IEEE Automatic Speech Recognition and Understanding Workshop, ASRU 2017 ; Conference date: 16-12-2017 Through 20-12-2017",
year = "2017",
month = jul,
day = "2",
doi = "10.1109/ASRU.2017.8269010",
language = "英语",
series = "2017 IEEE Automatic Speech Recognition and Understanding Workshop, ASRU 2017 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "734--739",
booktitle = "2017 IEEE Automatic Speech Recognition and Understanding Workshop, ASRU 2017 - Proceedings",
}