Espeak to/from CMUdict
Imperfect mapping between imperfect phonesets
MAPPING = """
AA0 ɑː
AA1 ˈɑː
AA2 ˌɑː
AE0 æ
AE1 ˈæ
AE2 ˌæ
AH0 ə
AH0 ɐ
AH1 ˈʌ
AH2 ˌʌ
AO0 ɔː
AO1 ˈɔː
AO2 ˌɔː
AW0 aʊ
AW1 ˈaʊ
AW2 ˌaʊ
AY0 aɪ
AY1 ˈaɪ
AY2 ˌaɪ
B b
CH tʃ
D d
DH ð
EH0 ɛ
EH1 ˈɛ
EH2 ˌɛ
ER0 ɚ
ER1 ˈɜː
ER2 ˌɜː
EY0 eɪ
EY1 ˈeɪ
EY2 ˌeɪ
F f
G ɡ
HH h
IH0 ɪ
IH1 ˈɪ
IH2 ˌɪ
IY0 i
IY1 ˈiː
IY2 ˌiː
JH dʒ
K k
L l
M m
N n
NG ŋ
OW0 oʊ
OW1 ˈoʊ
OW2 ˌoʊ
OY0 ɔɪ
OY1 ˈɔɪ
OY2 ˌɔɪ
P p
R ɹ
S s
SH ʃ
T t
TH θ
UH0 ʊ
UH1 ˈʊ
UH2 ˌʊ
UW0 uː
UW1 ˈuː
UW2 ˌuː
V v
W w
Y j
Z z
ZH ʒ
"""
espeak_to_cmudict = {}
for line in MAPPING.split("\n"):
if line == "":
continue
line = line.strip()
parts = line.split(" ")
if len(parts) != 2:
print(line)
continue
v, k = line.split(" ")
espeak_to_cmudict[k] = v
cmudict_to_espeak = {}
for line in MAPPING.split("\n"):
if line == "":
continue
line = line.strip()
parts = line.split(" ")
if len(parts) != 2:
print(line)
continue
k, v = line.split(" ")
if not k in cmudict_to_espeak:
cmudict_to_espeak[k] = v