EG = "/Users/joregan/hsi-vosk/hsi_1_0515_209_001_inter.wav.vosk"
DIR = "/Users/joregan/hsi-vosk/"
CTMDIR = "/Users/joregan/hsi-vosk/ctm"
from pathlib import Path
def get_recognition(filename):
    segments = []
    with open(filename) as inf:
        for line in inf.readlines():
            if line.startswith("INFO:root:{'result':"):
                text = line.strip()[10:]
                data = eval(text)
                segments.append(data)
    return segments
recs = get_recognition(EG)
print(recs[0]['result'])
[{'conf': 1.0, 'end': 1.11, 'start': 0.9, 'word': 'for'}, {'conf': 1.0, 'end': 1.41, 'start': 1.17, 'word': 'sure'}]
def clean_filename(filename):
    if type(filename) == str:
        filename = Path(filename).stem
    elif type(filename) == Path:
        filename = filename.stem
    if filename.endswith(".wav"):
        filename = filename[:-4]
    return filename
def kaldi_word_to_ctm_line(word, filename):
    # AJJacobs_2007P-0001605-0003029 1 0 0.09 <eps> 1.0
    filename = clean_filename(filename)
    # channel is always 1
    parts = [filename, "1"]
    assert "start" in word
    parts.append(str(word["start"]))
    assert "end" in word
    parts.append(str(word["end"]))
    assert "word" in word
    parts.append(word["word"])
    assert "conf" in word
    parts.append(str(word["conf"]))

    return " ".join(parts)
CTMPATH = Path(CTMDIR)
if not CTMPATH.is_dir():
    CTMPATH.mkdir()

for file in Path(DIR).glob("*.vosk"):
    recs = get_recognition(str(file))

    ctmlines = []
    for rec in recs:
        res = rec["result"]
        ctmlines += [kaldi_word_to_ctm_line(x, EG) for x in res]
    outfile = clean_filename(str(file)) + ".ctm"
    outpath = CTMPATH / outfile
    with open(str(outpath), "w") as of:
        of.write("\n".join(ctmlines))