Convert CTM to TextGrid
Trying to look at Reverb ASR's output, which uses CTM. BTW, it's not the Whisper-killer they claim.
from praatio import textgrid
from praatio.utilities.constants import Interval
ctmfile = "/tmp/hsi_7_0719_209_001_main.ctm"
praatfile = "/tmp/hsi_7_0719_209_001_main.TextGrid"
ctmlines = []
with open(ctmfile) as ctmf:
for line in ctmf.readlines():
line = line.strip()
ctmlines.append(line.split(" "))
ctmlines[0]
import math
intervals = []
for ctmline in ctmlines:
word = ctmline[4]
start = float(ctmline[2])
dur = float(ctmline[3])
end = float(f"{start + dur:.02f}")
intervals.append(Interval(start, end, word))
tier_start = intervals[0][0]
tier_end = intervals[-1][1]
tg = textgrid.Textgrid()
word_tier = textgrid.IntervalTier('words', intervals, tier_start, tier_end)
tg.addTier(word_tier)
tg.save(praatfile, format="long_textgrid", includeBlankSpaces=False)