WER plots for finetuning on the AWB dataset
From a Fonetik paper in 2022
import matplotlib.pyplot as plt
import pandas as pd
MINS_DATA = """
5 0.057692307692307696
10 0.057692307692307696
15 0.05576923076923077
20 0.046153846153846156
25 0.038461538461538464
30 0.046153846153846156
35 0.03653846153846154
40 0.03653846153846154
45 0.025
50 0.03653846153846154
55 0.026923076923076925
60 0.032692307692307694
"""
mins = []
wer = []
for line in MINS_DATA.split("\n"):
if "\t" in line:
parts = line.split("\t")
mins.append(int(parts[0]))
wer.append(float(parts[1]) * 100)
pd.options.display.float_format = '{:,.2f}'.format
df = pd.DataFrame(data={"Minutes": mins, "WER": wer})
df
import numpy as np
ax = plt.gca()
ax.set_xticks(np.arange(5, 125, 5))
ax.set_xticklabels(labels=mins, minor=True)
df.plot(kind='line', x='Minutes', y='WER', ax=ax)