WhisperX to tsv
For HSI project
import json
from pathlib import Path
EG = "hsi_3_0715_227_001-mic2-002.json"
def write_tsv(jsonfile: Path, outpath):
jsonstem = jsonfile.stem
outfile = outpath / f"{jsonstem}.tsv"
with open(jsonfile) as inf, open(outfile, "w") as outf:
data = json.load(inf)
for seg in data["segments"]:
outf.write(str(seg["start"]) + "\t" + str(seg["end"]) + "\t" + seg["text"].strip() + "\n")
INPATH = Path("/Users/joregan/Playing/hsi/audio")
OUTPATH = Path("/tmp/hsi-rec")
INPATH = Path("/Users/joregan/Playing/hsi/audio/whisperx")
OUTPATH = Path("/tmp/hsi-recx")
if not OUTPATH.is_dir():
OUTPATH.mkdir()
for jsonfile in Path(INPATH).glob("*.json"):
write_tsv(jsonfile, OUTPATH)