!wget https://www.openslr.org/resources/29/lexicon-sv.tgz
--2023-09-13 16:17:26--  https://www.openslr.org/resources/29/lexicon-sv.tgz
Resolving www.openslr.org (www.openslr.org)... 46.101.158.64
Connecting to www.openslr.org (www.openslr.org)|46.101.158.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5374148 (5,1M) [application/x-gzip]
Saving to: ‘lexicon-sv.tgz’

lexicon-sv.tgz      100%[===================>]   5,12M  9,79MB/s    in 0,5s    

2023-09-13 16:17:27 (9,79 MB/s) - ‘lexicon-sv.tgz’ saved [5374148/5374148]

!tar zxvf lexicon-sv.tgz
x lexicon.txt
lexicon = {}
with open("lexicon.txt") as lexf:
    for line in lexf.readlines():
        parts = line.strip().split()
        word = parts[0]
        phones = parts[1:]
        if word[0:1] in "!<&-":
            continue
        if not word in lexicon:
            lexicon[word] = []
        lexicon[word].append(phones)
VOWELS = "2: 9 A: E E*U E: I O U Y a a*U e e: i: o: u0 u: y: }:".split(" ")
CONS = "N S b d d` f g h j k l l` m n n` p r s s' s` t t` v".split(" ")
def move_stress(phones):
    outphones = []
    stress = ''
    for phone in phones:
        if phone.startswith('%"'):
            stress = '"'
            phone = phone[2:]
            if phone in VOWELS:
                outphones.append(stress + phone)
                stress = ''
            else:
                outphones.append(phone)
        elif phone[0:1] in '%"':
            stress = phone[0:1]
            phone = phone[1:]
            if phone in VOWELS:
                outphones.append(stress + phone)
                stress = ''
            else:
                outphones.append(phone)
        else:
            if stress != '' and phone in VOWELS:
                phone = stress + phone
                stress = ''
            outphones.append(phone)
    if stress != '':
        print("Error: unplaced stress", phones)
    return outphones
        
test = '%"j Y t r a %s E j'.split()
assert move_stress(test) == ['j', '"Y', 't', 'r', 'a', 's', '%E', 'j']
import itertools
class BaseRule():
    def __init__(self, rule, phone, keep_stress=False):
        self.rule = rule
        self.phone = phone
        self.keep_stress = keep_stress

    def clean_phones(self, phones):
        if self.keep_stress:
            return [x.replace('"', '').replace('%', '') for x in phones]
        else:
            return phones
    
    def applies(self, phones):
        pass

    def expand(self, phones, positions=[]):
        pass

    def __str__(self):
        return f"[{self.rule}]"
class PhonologicalRule(BaseRule):
    def __init__(self, rule, phone, transform=[], left_context=[], right_context=[], keep_stress=False):
        super().__init__(rule, phone, keep_stress)
        self.lctx = left_context
        self.rctx = right_context
        self.transform = transform

    def lctx_ok(self, phones, pos):
        if self.lctx == []:
            return True
        end = pos
        start = end - len(self.lctx)
        if phones[start:end] == self.lctx:
            return True
        return False

    def rctx_ok(self, phones, pos):
        if self.rctx == []:
            return True
        start = pos + 1
        end = start + len(self.rctx)
        if phones[start:end] == self.rctx:
            return True
        return False
    
    def ctx_ok(self, phones, pos):
        return self.rctx_ok(phones, pos) and self.lctx_ok(phones, pos)

    def applies(self, phones):
        positions = []
        phones = self.clean_phones(phones)
        if not self.phone in phones:
            return []
        for i in range(0, len(phones)):
            if phones[i] == self.phone and self.ctx_ok(phones, i):
                positions.append(i)
        return positions

    def expand(self, phones, positions=[]):
        tmp = []
        if positions == []:
            positions = self.applies(phones)
        for i in range(0, len(phones)):
            if i in positions:
                tmp.append([phones[i], " ".join(self.transform)])
            else:
                tmp.append([phones[i]])
        expanded = [x for x in itertools.product(*tmp)]
        tidied = set()
        for exp in expanded:
            tidied.add(tuple([c for c in exp if c != '']))
        return [list(t) for t in tidied]
rule = PhonologicalRule("k → ∅ / _ t", "k", [], [], ["t"])
print(rule)
[k → ∅ / _ t]
rule.expand("v I k t I k t".split(" "))
[['v', 'I', 't', 'I', 't'],
 ['v', 'I', 't', 'I', 'k', 't'],
 ['v', 'I', 'k', 't', 'I', 'k', 't'],
 ['v', 'I', 'k', 't', 'I', 't']]
2D  ɖ
2L  ɭ
2N  ɳ
2S  ʂ
2T  ʈ
A   a
A:  ɑː
B   b
D   d
E   e
E0  ə
E:  eː
F   f
G   ɡ
H   h
I   ɪ
I:  iː
J   j
K   k
L   l
M   m
N   n
NG  ŋ
O   ʊ
O:  uː
P   p
R   r
S   s
SJ  ɧ
T   t
TJ  ɕ
U   ɵ
U:  ʉː
V   v
Y   ʏ
Y:  yː
[   ɛ
[3  æː
[4  æ
[:  ɛː
\   œ
\3  œ̞ː
\4  œ̞
\:  øː
]   ɔ
]:  oː
gcl <gcl>
ha  <ha>
hes <hes>
kl  <kl>
pa  <pa>
sm  <sm>
v   <v>
~H  ~h
~L  ~l
~N  ~n
ADDITIONS = """
INTE\tn t e\tI -> 0 / VOWEL # _
EN\tE N\tn -> N / _ # [+velar]
EN\tE m\tn -> m / _ # [+labial]
JA\t"A:
JA\t"j a
NEJ\t"n E
"""