Citation

BibTex format

@article{Moore:2016:10.1016/j.csl.2016.11.003,
author = {Moore, AH and Peso, P and Naylor, PA},
doi = {10.1016/j.csl.2016.11.003},
journal = {Computer Speech and Language},
pages = {574--584},
title = {Speech enhancement for robust automatic speech recognition: Evaluation using a baseline system and instrumental measures},
url = {http://dx.doi.org/10.1016/j.csl.2016.11.003},
volume = {46},
year = {2016}
}

RIS format (EndNote, RefMan)

TY  - JOUR
AB - Automatic speech recognition in everyday environments must be robust to significant levels of reverberation andnoise. One strategy to achieve such robustness is multi-microphone speech enhancement. In this study, we presentresults of an evaluation of different speech enhancement pipelines using a state-of-the-artASRsystem for a widerange of reverberation and noise conditions. The evaluation exploits the recently released ACE Challenge databasewhich includes measured multichannel acoustic impulse responses from 7 different rooms with reverberation timesranging from 0.33 s to 1.34 s. The reverberant speech is mixed with ambient, fan and babble noise recordings madewith the same microphone setups in each of the rooms. In the first experiment performance of theASRwithoutspeech processing is evaluated. Results clearly indicate the deleterious effect of both noise and reverberation. In thesecond experiment, different speech enhancement pipelines are evaluated with relative word error rate reductions ofup to 82%. Finally, the ability of selected instrumental metrics to predictASRperformance improvement is assessed.The best performing metric, Short-Time Objective Intelligibility Measure, is shown to have a Pearson correlationcoefficient of 0.79, suggesting that it is a useful predictor of algorithm performance in these tests.
AU - Moore,AH
AU - Peso,P
AU - Naylor,PA
DO - 10.1016/j.csl.2016.11.003
EP - 584
PY - 2016///
SN - 1095-8363
SP - 574
TI - Speech enhancement for robust automatic speech recognition: Evaluation using a baseline system and instrumental measures
T2 - Computer Speech and Language
UR - http://dx.doi.org/10.1016/j.csl.2016.11.003
UR - http://hdl.handle.net/10044/1/43057
VL - 46
ER -

Contact us

Address

Speech and Audio Processing Lab
CSP Group, EEE Department
Imperial College London

Exhibition Road, London, SW7 2AZ, United Kingdom

Email

p.naylor@imperial.ac.uk