Scoring librispeech with Kaldi on Kaggle
Results of test-clean and test-other
%cd /opt
%%capture
!tar xvf /kaggle/input/extract-prebuilt-kaldi-from-docker/kaldi.tar
import os
os.environ['LD_LIBRARY_PATH'] = '/opt/conda/lib:/opt/kaldi/tools/openfst-1.6.7/lib:/opt/kaldi/src/lib:'
EXISTING_PATH = os.environ['PATH']
%cd /
%%capture
!tar xvf /kaggle/input/extract-cuda-from-kaldi-docker/cuda.tar
import os
os.environ['LD_LIBRARY_PATH'] = f'{os.environ["LD_LIBRARY_PATH"]}:/usr/local/cuda-10.0/targets/x86_64-linux/lib/'
%cd /opt/kaldi/egs
%%capture
!apt install -y flac
!mkdir -p usels/s5
%cd usels/s5
!mkdir /kaggle/working/data
!mkdir /kaggle/working/exp
!ln -s /kaggle/working/data
!ln -s /kaggle/working/exp
!ln -s ../../wsj/s5/steps
!ln -s ../../wsj/s5/utils
!ln -s ../../librispeech/s5/local
!mkdir conf
%%writefile conf/mfcc_hires.conf
# config for high-resolution MFCC features, intended for neural network training
# Note: we keep all cepstra, so it has the same info as filterbank features,
# but MFCC is more easily compressible (because less correlated) which is why
# we prefer this method.
--use-energy=false # use average of log energy, not energy.
--num-mel-bins=40 # similar to Google's setup.
--num-ceps=40 # there is no dimensionality reduction.
--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so
# there might be some information at the low end.
--high-freq=-400 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
%env KALDI_ROOT=/opt/kaldi
!cat ../../wsj/s5/path.sh
%env LC_ALL=C
#PWD = !pwd
PWD = '/opt/kaldi/egs/usels/s5'
KALDI_ROOT = '/opt/kaldi'
WSJ_PATH = f'{PWD}/utils/:{KALDI_ROOT}/tools/openfst/bin:{PWD}:{EXISTING_PATH}'
!cat $KALDI_ROOT/tools/config/common_path.sh
raw_kaldi_paths=!cat $KALDI_ROOT/tools/config/common_path.sh|grep '/src/'|awk -F':' '{print $1}'|awk -F'/' '{print "/opt/kaldi/src/"$NF}'
KALDI_PATHS=raw_kaldi_paths.nlstr.replace('\n',':')
!cat $KALDI_ROOT/tools/env.sh
PHONETISAURUS = "/tmp/output/opt/kaldi/tools/phonetisaurus-g2p"
TOOLS_PATH = f'/opt/kaldi/tools/python:{PHONETISAURUS}:{PHONETISAURUS}/src/scripts'
%env PATH = f"{WSJ_PATH}:{KALDI_PATHS}:{TOOLS_PATH}"
!cat ../../wsj/s5/cmd.sh
%env train_cmd=run.pl
%env decode_cmd=run.pl
!ln -s ../../wsj/s5/cmd.sh
!ln -s ../../wsj/s5/path.sh
!ln -s utils/queue.pl
!ln -s utils/run.pl
!rm *.pl
!local/data_prep.sh /kaggle/input/librispeech-test-clean-and-other/LibriSpeech/test-other data/test-other
!local/data_prep.sh /kaggle/input/librispeech-test-clean-and-other/LibriSpeech/test-clean data/test-clean
!utils/copy_data_dir.sh data/test-clean data/test-clean_hires
!utils/copy_data_dir.sh data/test-other data/test-other_hires
!ln -s utils/parse_options.sh
!steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf --cmd "$train_cmd" data/test-clean_hires
!steps/compute_cmvn_stats.sh data/test-clean_hires
!utils/fix_data_dir.sh data/test-clean_hires
!steps/make_mfcc.sh --nj 20 --mfcc-config conf/mfcc_hires.conf --cmd "$train_cmd" data/test-other_hires
!steps/compute_cmvn_stats.sh data/test-other_hires
!utils/fix_data_dir.sh data/test-other_hires
!ln -s /kaggle/input/kaldi-librispeech-model/exp/nnet3_cleaned/ exp/nnet3_cleaned
!ln -s /kaggle/input/kaldi-librispeech-model/exp/chain_cleaned/ exp/chain_cleaned
%env nspk=$(wc -l <data/test-clean_hires/spk2utt)
!steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "${nspk}" data/test-clean_hires exp/nnet3_cleaned/extractor exp/nnet3_cleaned_out/ivectors_test-clean_hires
%env nspk=$(wc -l <data/test-other_hires/spk2utt)
!steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj "${nspk}" data/test-other_hires exp/nnet3_cleaned/extractor exp/nnet3_cleaned_out/ivectors_test-other_hires
Just linking this directory won't work, as it expects to be able to write to it (Kaldi scripts, smh)
!cp -r /kaggle/input/kaldi-librispeech-model/data/lang_test_tgsmall data
%env tdnndir=exp/chain_cleaned/tdnn_1d_sp
%env graph_dir=exp/chain_cleaned_out/graph_tgsmall
!utils/mkgraph.sh --self-loop-scale 1.0 --remove-oov data/lang_test_tgsmall $tdnndir $graph_dir
!mkdir exp/tdnn_1d_sp
%pushd exp/tdnn_1d_sp
!for i in /kaggle/input/kaldi-librispeech-model/exp/chain_cleaned/tdnn_1d_sp/*;do ln -s $i;done
%popd
%env tdnndir=exp/tdnn_1d_sp
!steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 --nj 8 --cmd "$decode_cmd" --online-ivector-dir exp/nnet3_cleaned_out/ivectors_test-clean_hires $graph_dir data/test-clean_hires $tdnndir/decode_test-clean_tgsmall
!steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 --nj 8 --cmd "$decode_cmd" --online-ivector-dir exp/nnet3_cleaned_out/ivectors_test-other_hires $graph_dir data/test-other_hires $tdnndir/decode_test-other_tgsmall
!steps/score_kaldi.sh --cmd "run.pl" data/test-clean_hires $graph_dir $tdnndir/decode_test-clean_tgsmall
!steps/score_kaldi.sh --cmd "run.pl" data/test-other_hires $graph_dir $tdnndir/decode_test-other_tgsmall
!cat exp/tdnn_1d_sp/decode_test-clean_tgsmall/scoring_kaldi/best_wer
!cat exp/tdnn_1d_sp/decode_test-other_tgsmall/scoring_kaldi/best_wer
!cp -r /kaggle/input/kaldi-librispeech-model/exp/rnnlm_lstm_1a/ exp
!ln -s /opt/kaldi/scripts/rnnlm
%env decode_dir=exp/tdnn_1d_sp/decode_test-clean_tgsmall
!rnnlm/lmrescore_pruned.sh --cmd "$decode_cmd" --weight 0.45 --max-ngram-order 4 data/lang_test_tgsmall exp/rnnlm_lstm_1a data/test-clean_hires ${decode_dir} $tdnndir/decode_test-clean_rescore
%env decode_dir=exp/tdnn_1d_sp/decode_test-other_tgsmall
!rnnlm/lmrescore_pruned.sh --cmd "$decode_cmd" --weight 0.45 --max-ngram-order 4 data/lang_test_tgsmall exp/rnnlm_lstm_1a data/test-other_hires ${decode_dir} $tdnndir/decode_test-other_rescore
!steps/score_kaldi.sh --cmd "run.pl" data/test-clean_hires $graph_dir $tdnndir/decode_test-clean_rescore
!steps/score_kaldi.sh --cmd "run.pl" data/test-other_hires $graph_dir $tdnndir/decode_test-other_rescore
!cat $tdnndir/decode_test-clean_rescore/scoring_kaldi/best_wer
!cat $tdnndir/decode_test-other_rescore/scoring_kaldi/best_wer