@inproceedings{c95d9fb1d57a4dc28d3f0f8f48e11ba8,
title = "A data driven approach to audiovisual speech mapping",
abstract = "The concept of using visual information as part of audio speech processing has been of significant recent interest. This paper presents a data driven approach that considers estimating audio speech acoustics using only temporal visual information without considering linguistic features such as phonemes and visemes. Audio (log filterbank) and visual (2D-DCT) features are extracted, and various configurations of MLP and datasets are used to identify optimal results, showing that given a sequence of prior visual frames an equivalent reasonably accurate audio frame estimation can be mapped.",
keywords = "ANNs, audiovisual, speech mapping, speech processing",
author = "Andrew Abel and Ricard Marxer and Jon Barker and Roger Watt and Bill Whitmer and Peter Derleth and Amir Hussain",
year = "2016",
month = nov,
day = "13",
doi = "10.1007/978-3-319-49685-6_30",
language = "English",
isbn = "9783319496849",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer-Verlag",
pages = "331--342",
editor = "Cheng-Lin Liu and Amir Hussain and Bin Luo and Tan, {Kay Chen} and Yi Zeng and Zhaoxiang Zhang",
booktitle = "Advances in Brain Inspired Cognitive Systems",
note = "8th International Conference on Brain Inspired Cognitive Systems, BICS 2016 ; Conference date: 28-11-2016 Through 30-11-2016",
}