Original version on Kaggle

%%capture
!apt install -y lynx

!lynx -dump https://librivox.org/dubliners-by-james-joyce/|grep mp3|awk '{print $NF}'|grep 64kb|grep 'mp3$' > dubliners1.txt
!lynx -dump https://librivox.org/dubliners-by-james-joyce-2/|grep mp3|awk '{print $NF}'|grep 64kb|grep 'mp3$' > dubliners2.txt
!wget -i dubliners1.txt
!wget -i dubliners2.txt

!for i in *.mp3.1;do mv $i $(basename $i .mp3.1)_2.mp3;done

Here starts the actual ASR stuff.

!pip install transformers

EN_MODEL = "jonatasgrosman/wav2vec2-large-xlsr-53-english"
PHONE_MODEL = "vitouphy/wav2vec2-xls-r-300m-timit-phoneme"

from transformers import pipeline

pipe_en = pipeline(model=EN_MODEL, device=0)
pipe_phone = pipeline(model=PHONE_MODEL, device=1)

from pathlib import Path
import json
cur = Path(".")
for file in cur.glob("*.mp3"):
    en_out = pipe_en(str(file), chunk_length_s=10, return_timestamps="word")
    phone_out = pipe_phone(str(file), chunk_length_s=10, return_timestamps="word")
    stem = file.stem
    with open(f"{stem}_en.json", "w") as enfile:
        json.dump(en_out, enfile)
    with open(f"{stem}_phone.json", "w") as phonefile:
        json.dump(phone_out, phonefile)