Interesting links, 24/02/2023

Probing phoneme, language and speaker information in unsupervised speech representations, pdf

@inproceedings{deseyssel22_interspeech,
  author={Maureen {de Seyssel} and Marvin Lavechin and Yossi Adi and Emmanuel Dupoux and Guillaume Wisniewski},
  title={ {Probing phoneme, language and speaker information in unsupervised speech representations}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1402--1406},
  doi={10.21437/Interspeech.2022-373}
}

VQ-T: RNN Transducers using Vector-Quantized Prediction Network States, pdf

@inproceedings{shi22b_interspeech,
  author={Jiatong Shi and George Saon and David Haws and Shinji Watanabe and Brian Kingsbury},
  title={ {VQ-T: RNN Transducers using Vector-Quantized Prediction Network States}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1656--1660},
  doi={10.21437/Interspeech.2022-414}
}

CTC Variations Through New WFST Topologies, pdf

@inproceedings{laptev22_interspeech,
  author={Aleksandr Laptev and Somshubra Majumdar and Boris Ginsburg},
  title={ {CTC Variations Through New WFST Topologies}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1041--1045},
  doi={10.21437/Interspeech.2022-10854}
}

nvidia-riva/riva-asrlib-decoder – Standalone implementation of the CUDA-accelerated WFST Decoder available in Riva

Dealing with Unknowns in Continual Learning for End-to-end Automatic Speech Recognition, pdf

@inproceedings{sustek22_interspeech,
  author={Martin Sustek and Samik Sadhu and Hynek Hermansky},
  title={ {Dealing with Unknowns in Continual Learning for End-to-end Automatic Speech Recognition}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1046--1050},
  doi={10.21437/Interspeech.2022-11139}
}

From Undercomplete to Sparse Overcomplete Autoencoders to Improve LF-MMI based Speech Recognition, pdf

@inproceedings{handekabil22_interspeech,
  author={Selen {Hande Kabil} and Herve Bourlard},
  title={ {From Undercomplete to Sparse Overcomplete Autoencoders to Improve LF-MMI based Speech Recognition}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1061--1065},
  doi={10.21437/Interspeech.2022-11390}
}

A Temporal Extension of Latent Dirichlet Allocation for Unsupervised Acoustic Unit Discovery, pdf

@inproceedings{vandermerwe22_interspeech,
  author={Werner {van der Merwe} and Herman Kamper and Johan {Adam du Preez}},
  title={ {A Temporal Extension of Latent Dirichlet Allocation for Unsupervised Acoustic Unit Discovery}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1426--1430},
  doi={10.21437/Interspeech.2022-11369}
}

DEFORMER: Coupling Deformed Localized Patterns with Global Context for Robust End-to-end Speech Recognition, pdf

@inproceedings{xie22b_interspeech,
  author={Jiamin Xie and John H.L. Hansen},
  title={ {DEFORMER: Coupling Deformed Localized Patterns with Global Context for Robust End-to-end Speech Recognition}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1392--1396},
  doi={10.21437/Interspeech.2022-11172}
}

Automatic Pronunciation Assessment using Self-Supervised Speech Representation Learning, pdf

@inproceedings{kim22k_interspeech,
  author={Eesung Kim and Jae-Jin Jeon and Hyeji Seo and Hoon Kim},
  title={ {Automatic Pronunciation Assessment using Self-Supervised Speech Representation Learning}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1411--1415},
  doi={10.21437/Interspeech.2022-10245}
}

Knowledge of accent differences can be used to predict speech recognition, pdf

@inproceedings{szalay22_interspeech,
  author={Tuende Szalay and Mostafa Shahin and Beena Ahmed and Kirrie Ballard},
  title={ {Knowledge of accent differences can be used to predict speech recognition}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1372--1376},
  doi={10.21437/Interspeech.2022-10162}
}

Improving Phonetic Transcriptions of Children’s Speech by Pronunciation Modelling with Constrained CTC-Decoding, pdf

@inproceedings{rumberg22b_interspeech,
  author={Lars Rumberg and Christopher Gebauer and Hanna Ehlert and Ulrike Lüdtke and Jörn Ostermann},
  title={ {Improving Phonetic Transcriptions of Children’s Speech by Pronunciation Modelling with Constrained CTC-Decoding}},
  year=2022,
  booktitle={Proc. Interspeech 2022},
  pages={1357--1361},
  doi={10.21437/Interspeech.2022-332}
}