@inproceedings{065430a266f149fdb50990d920ea11bd,
title = "Phone recognition using a non-linear manifold with broad phone class dependent DNNs",
abstract = "Although it is generally accepted that different broad phone classes (BPCs) have different production mechanisms and are better described by different types of features, most automatic speech recognition (ASR) systems use the same features and decision criteria for all phones. Motivated by this observation, this paper proposes a two-level DNN structure, referred to as a BPC-DNN, inspired by the notion of a topological manifold. In the first level, several small separate BPC-dependent DNNs are applied to different broad phonetic classes and in the second level the outputs of these DNNs are fused to obtain senone-dependent posterior probabilities, which can be used for frame level classification or integrated into Viterbi decoding for phone recognition. In a previous paper using this approach we reported improved frame classification accuracy on the TIMIT corpus compared with a conventional DNN. The contribution of the present paper is to demonstrate that this advantage extends to full phone recognition. Our most recent results show that the BPC-DNN achieves reductions in error rate relative to a conventional DNN of 16% and 8% for frame classification and phone recognition, respectively.",
keywords = "manifold learning, phone classification, speech recognition, neural network, broad phone classes",
author = "Mengjie Qian and Linxue Bai and Peter Jancovic and Martin Russell",
year = "2018",
month = sep,
day = "3",
doi = "10.21437/Interspeech.2018-1376",
language = "English",
series = "Interspeech",
publisher = "ISCA",
pages = "3753--3757",
booktitle = "Proceedings of Interspeech 2018",
note = "Interspeech 2018 ; Conference date: 02-09-2018 Through 06-09-2018",
}