Citation

BibTex format

@article{Xue:2018:10.1109/TASLP.2018.2845665,
author = {Xue, W and Moore, A and Brookes, DM and Naylor, P},
doi = {10.1109/TASLP.2018.2845665},
journal = {IEEE/ACM Transactions on Audio, Speech and Language Processing},
pages = {1833--1847},
title = {Modulation-domain multichannel Kalman filtering for speech enhancement},
url = {http://dx.doi.org/10.1109/TASLP.2018.2845665},
volume = {26},
year = {2018}
}

RIS format (EndNote, RefMan)

TY  - JOUR
AB - Compared with single-channel speech enhancement methods, multichannel methods can utilize spatial information to design optimal filters. Although some filters adaptively consider second-order signal statistics, the temporal evolution of the speech spectrum is usually neglected. By using linear prediction (LP) to model the inter-frame temporal evolution of speech, single-channel Kalman filtering (KF) based methods have been developed for speech enhancement. In this paper, we derive a multichannel KF (MKF) that jointly uses both interchannel spatial correlation and interframe temporal correlation for speech enhancement. We perform LP in the modulation domain, and by incorporating the spatial information, derive an optimal MKF gain in the short-time Fourier transform domain. We show that the proposed MKF reduces to the conventional multichannel Wiener filter if the LP information is discarded. Furthermore, we show that, under an appropriate assumption, the MKF is equivalent to a concatenation of the minimum variance distortion response beamformer and a single-channel modulation-domain KF and therefore present an alternative implementation of the MKF. Experiments conducted on a public head-related impulse response database demonstrate the effectiveness of the proposed method.
AU - Xue,W
AU - Moore,A
AU - Brookes,DM
AU - Naylor,P
DO - 10.1109/TASLP.2018.2845665
EP - 1847
PY - 2018///
SN - 2329-9290
SP - 1833
TI - Modulation-domain multichannel Kalman filtering for speech enhancement
T2 - IEEE/ACM Transactions on Audio, Speech and Language Processing
UR - http://dx.doi.org/10.1109/TASLP.2018.2845665
UR - https://ieeexplore.ieee.org/document/8375666
UR - http://hdl.handle.net/10044/1/60712
VL - 26
ER -

Contact us

Address

Speech and Audio Processing Lab
CSP Group, EEE Department
Imperial College London

Exhibition Road, London, SW7 2AZ, United Kingdom

Email

p.naylor@imperial.ac.uk