LANG_LOC = "/tmp"

from pathlib import Path

LANG_PATH = Path(LANG_LOC)

ok_words = {}
norms = {}

def check_nobdan(word, suggestions):
    sugg = suggestions.split(", ")
    pairs = {
        "ae": "æ",
        "Ae": "Æ",
        "AE": "Æ",
        "oe": "ø",
        "Oe": "Ø",
        "OE": "Ø",
        "aa": "å",
        "Aa": "Å",
        "AA": "Å",
        "ä": "æ",
        "Ä": "Æ",
        "ö": "ø",
        "Ö": "Ø"
    }
    for k, v in pairs.items():
        if word.replace(k, v) in sugg:
            return (word, word.replace(k, v))
    return None

with open(LANG_PATH / "hunspell_results.tsv", "r") as f:
    for line in f.readlines():
        if line.startswith("file_code"):
            continue
        parts = line.strip().split("\t")
        lang = parts[0]
        word = parts[1]
        status = parts[2]
        suggestions = parts[3] if len(parts) > 3 else ""
        if not lang in ok_words:
            ok_words[lang] = []
        if not lang in norms:
            norms[lang] = []
        if status == "OK":
            ok_words[lang].append(word)
        elif lang in ["nob", "dan"]:
            ck = check_nobdan(word, suggestions)
            if ck:
                norms[lang].append(ck)
        else:
            sugg = suggestions.split(", ")
            sugg_lc = [s.lower() for s in sugg]
            sugg_map = {s.lower(): s for s in sugg}
            if word.lower() in sugg_lc:
                norms[lang].append((word, sugg_map[word.lower()]))

for lang in ok_words.keys():
    norm_dict = {w: n for w, n in norms[lang]}
    with open(LANG_PATH / f"braxen-{lang}.txt") as f, \
            open(LANG_PATH / f"braxen-{lang}-filtered.txt", "w") as out_f:
        for line in f.readlines():
            line = line.strip()
            word, pron = line.split("\t")
            if word in ok_words[lang]:
                out_f.write(f"{word}\t{pron}\n")
            elif word in norm_dict:
                out_f.write(f"{norm_dict[word]}\t{pron}\n")