wav2vec-u CV-sv - GAN
GAN training for wav2vec-u on Common Voice Swedish
The original attempt on Kaggle won't run because of an issue with CuDNN, but this notebook runs fine on Colab.
!pip install condacolab
import condacolab
condacolab.install()
%%capture
!conda install -c pykaldi pykaldi -y
!git clone https://github.com/jimregan/fairseq/ --branch issue3581
!git clone https://github.com/kpu/kenlm
%%capture
!apt-get -y install libeigen3-dev liblzma-dev zlib1g-dev libbz2-dev
%%capture
%cd /content/kenlm
!python setup.py install
%cd /tmp
import os
os.environ['PATH'] = f"{os.environ['PATH']}:/content/kenlm/build/bin/"
os.environ['FAIRSEQ_ROOT'] = '/content/fairseq'
%cd /content/fairseq/
%%capture
!python setup.py install
os.environ['HYDRA_FULL_ERROR'] = '1'
%%capture
!pip install editdistance
%%capture
!pip install kaggle
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
print('User uploaded file "{name}" with length {length} bytes'.format(
name=fn, length=len(uploaded[fn])))
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json
%cd /content
!kaggle datasets download "jimregan/w2vu-cvsv-prepared-text"
%%capture
!unzip /content/w2vu-cvsv-prepared-text.zip
!kaggle datasets download -d jimregan/w2vu-cvsv-precompute-pca512-cls128-mean-pooled
%%capture
!unzip w2vu-cvsv-precompute-pca512-cls128-mean-pooled.zip
!rm *.zip
import torch
torch.version.cuda
torch.backends.cudnn.version()
%cd /content/fairseq
from google.colab import drive
drive.mount('/content/drive')
%%writefile rungan.sh
PREFIX=w2v_unsup_gan_xp
TASK_DATA=/content/precompute_pca512_cls128_mean_pooled
TEXT_DATA=/content/preppedtext/phones/
KENLM_PATH=/content/preppedtext/phones/lm.phones.filtered.04.bin
PREFIX=$PREFIX CUDA_LAUNCH_BLOCKING=1 fairseq-hydra-train \
-m --config-dir fairseq/config/model/wav2vecu/gan \
--config-name w2vu \
task.data=${TASK_DATA} \
task.text_data=${TEXT_DATA} \
task.kenlm_path=${KENLM_PATH} \
checkpoint.no_epoch_checkpoints=true \
checkpoint.save_dir=/content/drive/MyDrive/w2vu \
'common.seed=range(0,5)'
!bash rungan.sh