%pip install praatio
Requirement already satisfied: praatio in /Users/joregan/opt/anaconda3/envs/hf/lib/python3.9/site-packages (6.2.0)
Requirement already satisfied: typing-extensions in /Users/joregan/opt/anaconda3/envs/hf/lib/python3.9/site-packages (from praatio) (4.3.0)
Note: you may need to restart the kernel to use updated packages.
from praatio import textgrid
from praatio.utilities.constants import Interval
input = "/Users/joregan/Playing/hsi/test2/whisperx-json"
output = '/Users/joregan/Playing/hsi/test2/whisperx-json/textgrids'
from pathlib import Path
import json

IS_DIR = False
input_path = Path(input)
output_path = Path(output)

if input_path.is_dir():
    IS_DIR = True
    assert not output_path.exists(), "f{output} exists, refusing to overwrite"
    output_path.mkdir()
def pad_silences(segments):
    start = 0.0
    out = []
    for segment in segments:
        out.append((start, segment[0], " "))
        out.append(segment)
        start = segment[1]
    return out
def write_converted(infile, outfile, verbose=True):
    if type(infile) == Path:
        infile = str(infile)
    if type(outfile) == Path:
        outfile = str(outfile)

    with open(infile) as inf:
        data = json.load(inf)

    out = []
    if not "segments" in data:
        if verbose:
            print("File", infile, "possibly incorrect JSON")
        return
    if len(data["segments"]) == 0:
        if verbose:
            print("File", infile, "contains no segments")
        return
    for segment in data["segments"]:
        if segment["start"] > segment["end"]:
            if verbose:
                print("File", infile, "has start time after end time")
                print(segment)
            return
        out.append(Interval(segment["start"], segment["end"], segment["text"]))

    tier_start = out[0][0]
    tier_end = out[-1][1]

    tg = textgrid.Textgrid()
    word_tier = textgrid.IntervalTier('words', pad_silences(out), tier_start, tier_end)
    tg.addTier(word_tier)
    tg.save(outfile, format="long_textgrid", includeBlankSpaces=False)
if not IS_DIR:
    write_converted(input, output)
else:
    for file in input_path.glob("*.json"):
        tgpath = output_path / f"{file.stem}.TextGrid"
        write_converted(file, tgpath)