PER calculations
Local, incomplete
!pip install jiwer
!unzip upload.zip
!mv /content/output/swe.merged.out /content/output/swe.mwl.out
from jiwer import wer
test = {}
mwl = {}
wl = {}
wlm = {}
for lang in ["swe", "dan", "eng", "spa", "lat", "fre", "ita"]:
test[lang] = {}
mwl[lang] = {}
wl[lang] = {}
wlm[lang] = {}
with open(f"splits-orig/{lang}.test.retsv") as rtf:
for line in rtf.readlines():
line = line.strip().split("\t")
test[lang][line[0]] = line[1]
with open(f"output/{lang}.mwl.out") as rtf:
for line in rtf.readlines():
if line[0] in "¹²³⁴⁵⁶":
line = line[1:]
line = line.strip().split("\t")
mwl[lang][line[0]] = line[1]
with open(f"output/{lang}.wl.out") as rtf:
for line in rtf.readlines():
line = line.strip().split("\t")
wl[lang][line[0]] = line[1]
if lang == "swe":
continue
with open(f"output/{lang}.wl-merged.out") as rtf:
for line in rtf.readlines():
line = line.strip().split("\t")
wlm[lang][line[0]] = line[1]
for lang in ["swe", "dan", "eng", "spa", "lat", "fre", "ita"]:
refs = []
hyps = []
for word in test[lang]:
refs.append(test[lang][word])
hyps.append(wl[lang][word])
print("WL", lang, wer(refs, hyps))
for lang in ["swe", "dan", "eng", "spa", "lat", "fre", "ita"]:
refs = []
hyps = []
for word in test[lang]:
refs.append(test[lang][word])
hyps.append(mwl[lang][word])
print("MWL", lang, wer(refs, hyps))
for lang in ["dan", "eng", "spa", "lat", "fre", "ita"]:
refs = []
hyps = []
for word in test[lang]:
refs.append(test[lang][word])
hyps.append(wlm[lang][word])
print("WLM", lang, wer(refs, hyps))
!unzip raw.zip
raw = {}
for lang in ["swe", "dan", "eng", "spa", "lat", "fre", "ita"]:
raw[lang] = {}
with open(f"{lang}.wl-raw.out") as rtf:
for line in rtf.readlines():
line = line.strip().split("\t")
raw[lang][line[0]] = line[1]
for lang in ["swe", "dan", "eng", "spa", "lat", "fre", "ita"]:
refs = []
hyps = []
for word in test[lang]:
refs.append(test[lang][word])
hyps.append(raw[lang][word])
print("RAW", lang, wer(refs, hyps))