Textgrid to .lab, take 2
Second pass
from praatio import textgrid
from pathlib import Path
def get_combined_words_and_phones(filename):
from praatio import textgrid
tg = textgrid.openTextgrid(filename, False)
if not tg.tierNameList:
return []
if tg.tierNameList == ['words', 'phones']:
word_tier = 'words'
elif tg.tierNameList == ['Word', 'phones']:
word_tier = 'Word'
word = tg.tierDict[word_tier]
phones = tg.tierDict['phones']
i = 0
j = 0
out = []
def it_to_dict(it):
ret = {}
ret['start'] = it.start
ret['end'] = it.end
ret['label'] = it.label
return ret
while i < len(word.entryList) and j < len(phones.entryList):
cur_word = it_to_dict(word.entryList[i])
cur_word['phones'] = []
while j < len(phones.entryList) and phones.entryList[j].end <= cur_word['end']:
end_time = phones.entryList[j].end
tmp_phone = it_to_dict(phones.entryList[j])
cur_word['phones'].append(tmp_phone)
j += 1
if end_time == cur_word['end']:
i += 1
out.append(cur_word)
continue
return out
This phone merging is only intended to merge a silence or spoken noise 'phone' to the left, but for the most part this doesn't do what I'd wanted, as it often means a silence 'word' has been inserted.
def merge_phones(word):
outphones = []
if len(word['phones']) == 1:
return word['phones']
for i in range(0, len(word['phones'])):
if i > 0 and word['phones'][i]['label'] in ["", "sil", "spn"]:
outphones[-1]['end'] = word['phones'][i]['end']
else:
outphones.append(word['phones'][i])
return outphones
def tg_to_lab(filename, target="phones"):
combined = get_combined_words_and_phones(filename)
merged = [merge_phones(x) for x in combined]
flattened = [item for sublist in merged for item in sublist]
out = []
for tmp_phone in flattened:
start = int(tmp_phone['start'] * 10000000)
end = int(tmp_phone['end'] * 10000000)
label = tmp_phone['label']
out.append(f"{start} {end} {label}")
return out
inpath = Path("/home/jim/Playing/mfa_alignments/snc-out")
outpath = Path("/home/jim/Playing/mfa_alignments/snc-lab")
for filename in inpath.glob("*.TextGrid"):
out = outpath / f"{filename.stem}.lab"
lab = tg_to_lab(filename)
with open(out, "w") as outf:
for line in lab:
outf.write(line + "\n")