%%capture
!pip install praatio
def irish_lc(word):
    if word[0:1] in "nt" and word[1:2] in "AEIOUÁÉÍÓÚ":
        return word[0:1] + "-" + word[1:].lower()
    else:
        return word.lower()
assert irish_lc("nAthair") == "n-athair"
assert irish_lc("nDeas") == "ndeas"
def get_combined_words_and_phones(filename):
    from praatio import textgrid

    tg = textgrid.openTextgrid(filename, False)
    if not tg.tierNameList or tg.tierNameList != ['Word', 'phones']:
        return []
    
    word = tg.tierDict['Word']
    phones = tg.tierDict['phones']
    
    i = 0
    j = 0
    out = []

    def it_to_dict(it):
        ret = {}
        ret['start'] = it.start
        ret['end'] = it.end
        ret['label'] = it.label
        return ret

    while i < len(word.entryList) and j < len(phones.entryList):
        cur_word = it_to_dict(word.entryList[i])
        cur_word['phones'] = []
        while j < len(phones.entryList) and phones.entryList[j].end <= cur_word['end']:
            end_time = phones.entryList[j].end
            tmp_phone = it_to_dict(phones.entryList[j])
            cur_word['phones'].append(tmp_phone)
            j += 1
            if end_time == cur_word['end']:
                i += 1
                out.append(cur_word)
                continue
    return out
def get_wordlist_from_combined(items, wordnorm=None):
    tmp = []
    for item in items:
        word = item['label']
        if wordnorm is None:
            word = word.lower()
        else:
            word = wordnorm(word)
        phones = " ".join([a['label'] for a in item['phones']])
        if phones == "sil":
            continue
        tmp.append((word, phones))
    return tmp
from pathlib import Path
wd = Path("PATH TO FILES")
tg_data = {}
for tg in wd.glob("*.TextGrid"):
    tg_data[tg.stem] = get_wordlist_from_combined(get_combined_words_and_phones(tg), wordnorm=irish_lc)
dictionary = set()
for (tg_name, tg_words) in tg_data.items():
    dictionary.update(set(tg_words))
joined = [" ".join(a) for a in dictionary]
with open("output.dict", "w") as outf:
    for word in sorted(joined):
        outf.write(word + "\n")