def is_schwa(phone, is_timit=True):
  if is_timit:
    return phone in ["ax", "axr", "ix"]
  else:
    return phone == "AH0"

# CMUdict doesn't have syllabic consonants
def is_syllabic_consonant(phone, is_timit=True):
  SYLLC = ["el", "em", "en", "er", "er1", "er2"]
  if is_timit and phone in SYLLC:
    return True
  else:
    return False

def is_vowel(phone):
  VOWELS = ["aa", "ae", "ah", "ao", "aw", "ax", "axr", "ay", "eh", "ey", "ih", "ix", "iy", "ow", "oy", "uh", "uw"]
  if phone[-1] in "012":
    return phone[:-1].lower() in VOWELS
  else:
    return phone.lower() in VOWELS

def is_vocalic(phone):
  return is_vowel(phone) or is_syllabic_consonant(phone)

# http://web.archive.org/web/20100614180508/http://semarch.linguistics.fas.nyu.edu/barker/Syllables/syllabify.pl
def sonority(phone):
  STOPS = ["p", "b", "t", "d", "k", "g"]
  AFFRICATES = ["ch", "jh"]
  FRICATIVES = ["th", "dh", "f", "v", "s", "z", "sh", "zh"]
  NASALS = ["m", "n", "ng"]
  LIQUIDS = ["l", "r"]
  GLIDES = ["w", "y"]

  # 's' is special
  if phone == "s":
    return 1
  elif phone in STOPS:
    return 1
  elif phone in AFFRICATES:
    return 2
  elif phone in FRICATIVES:
    return 3
  elif phone in NASALS:
    return 4
  elif phone in LIQUIDS:
    return 5
  elif phone == "hh":
    return 6
  elif phone in GLIDES:
    return 6
  else:
    return 7

def last_phoneme(graphone):
  grapheme, phoneme = graphone.split('}')
  return phoneme.split('|')[-1]
def first_phoneme(graphone):
  grapheme, phoneme = graphone.split('}')
  return phoneme.split('|')[0]

assert last_phoneme('x}e|k|s') == 's'
assert first_phoneme('x}e|k|s') == 'e'

def voicing_mismatch(phone1, phone2):
  VOICED   = ["b", "d", "g", "jh", "dh", "v", "z", "zh"]
  DEVOICED = ["p", "t", "k", "ch", "th", "f", "s", "sh"]
  if phone1 in VOICED and phone2 in DEVOICED:
    return True
  elif phone2 in VOICED and phone1 in DEVOICED:
    return True
  else:
    return False

def merge_graphones(graphones):
  graphemes = []
  phonemes = []
  for graphone in graphones:
    graphemes_string, phonemes_string = graphone.split('}')
    cur_graphemes = graphemes_string.split('|')
    cur_phonemes = phonemes_string.split('|')
    graphemes += cur_graphemes
    phonemes += cur_phonemes
  if len(graphemes) > 1:
    pruned_graphemes = [a for a in graphemes if a != '_']
    if len(pruned_graphemes) == 0:
      pruned_graphemes = ['_']
  else:
    pruned_graphemes = graphemes
  if len(phonemes) > 1:
    pruned_phonemes = [a for a in phonemes if a != '_']
    if len(pruned_phonemes) == 0:
      pruned_phonemes = ['_']
  else:
    pruned_phonemes = phonemes
  return '}'.join(('|'.join(pruned_graphemes), '|'.join(pruned_phonemes)))

assert merge_graphones("a}a t|h}th x}k|s".split(' ')) == 'a|t|h|x}a|th|k|s'
assert merge_graphones("a}a t|h}th x}k|s e}_".split(' ')) == 'a|t|h|x|e}a|th|k|s'
assert merge_graphones("a}_ t|h}_ x}_ e}_".split(' ')) == 'a|t|h|x|e}_'
assert merge_graphones("_}a _}th _}k|s".split(' ')) == '_}a|th|k|s'

def syllabify(graphones):
    sonority_up = True
    last_sonority_up = True
    last_sonority = 0
    isvowel = False
    last_isvowel = False
    saw_vowel = False
    stack = []
    output = []
    last_phoneme = ""
    labials = ["p", "b", "m", "f", "v"]
    s_sh = ["s", "sh"]
    for graphone in graphones[::-1]:
        phoneme = first_phoneme(graphone)
        phone_sonority = sonority(phoneme)
  
        isvowel = is_vocalic(phoneme)

        sonority_up = last_sonority < phone_sonority 

        # For timit
        if graphone == '_':
            stack.append(graphone)
            continue

        if last_sonority == 3 and phone_sonority == 1:
            sonority_up = True

        if last_phoneme == 'w' and phoneme in labials:
            last_sonority_up = False
            sonority_up = True

        if last_phoneme == "m" and not sonority_up and not phoneme in s_sh:
            last_sonority_up = False
            sonority_up = True

        if phoneme == "m" and not sonority_up and last_sonority < 7:
            last_sonority_up = False
            sonority_up = True

        if phoneme == "n" and not sonority_up and last_sonority < 6:
            last_sonority_up = False
            sonority_up = True

        if last_phoneme == "m" and not sonority_up and not phoneme in s_sh:
            last_sonority_up = False
            sonority_up = True

        if not sonority_up and phoneme == "ng":
            last_sonority_up = False
            sonority_up = True

        if last_sonority == 7 and phone_sonority == 7:
            last_sonority_up = True
            sonority_up = True 

        if sonority_up and last_sonority == 1 and sonority == 1 and phoneme != "s":
            sonority_up = True

        # avoid bs/ps onsets
        if last_phoneme in ["s", "sh", "z", "zh"] and phoneme in "bp": 
            last_sonority_up = False
            sonority_up = True

        if last_phoneme == 'l' and phoneme in ['d', 't', 'dh', 'th']: 
            last_sonority_up = False
            sonority_up = True

        def splitsyll():
            if not saw_vowel:
                return False
            if isvowel and saw_vowel:
                return True
            if last_isvowel and isvowel:
                return True
            if voicing_mismatch(phoneme, last_phoneme):
                return True
            if not last_sonority_up and sonority_up:
                return True
            return False

        if splitsyll():
            output.append(merge_graphones(stack[::-1]))
            stack = []
            saw_vowel = False

        stack.append(graphone)
        last_sonority_up = sonority_up
        last_phoneme = phoneme
        last_sonority = phone_sonority
        last_isvowel = isvowel
        saw_vowel = saw_vowel or isvowel

    output.append(merge_graphones(stack[::-1]))
    return output[::-1]

assert syllabify('a}ax b}b o|u}aw1 t}t'.split(' ')) == ['a}ax', 'b|o|u|t}b|aw1|t']

with open('TIMIT.clean.corpus', 'r') as f, open('TIMIT.syllable.corpus', 'w') as of:
  for line in f.readlines():
    graphones = line.split(' ')
    syll = syllabify(graphones)
    print(' '.join(syll), file=of)