import pocketsphinx
textgrids="/Users/joregan/Playing/hsi/audio/whisperx-textgrids/"
ctmdir="/Users/joregan/Playing/hsi/ctc_prefix_beam_search/"
ctmeditdir="/Users/joregan/Playing/hsi/whisper_reverb_ctmedit/"
from pathlib import Path

def read_ctm(filename):
    if type(filename) is Path:
        filename = str(filename)

    ctmlines = []
    with open(filename) as inf:
        for line in inf.readlines():
            line = line.strip()
            ctmlines.append(line.split(" "))
    return ctmlines
from praatio import textgrid

def read_textgrid(filename, tiername="words"):
    if type(filename) is Path:
        filename = str(filename)
    tg = textgrid.openTextgrid(filename, False)

    utterances = []
    tier = tg.getTier(tiername)
    for entry in tier.entries:
        text = entry.label.strip()
        utterances.append((entry.start, entry.end, text))
    return utterances
tgpath = Path(textgrids)
ctmpath = Path(ctmdir)
ctmeditpath = Path(ctmeditdir)
EG = "hsi_5_0718_210_001_main"
CTMEDATA = read_ctm(ctmeditpath / f"{EG}.ctmedit")
TGDATA = read_textgrid(tgpath / f"{EG}.TextGrid")
def clean_word(word):
    return word.lower().strip(".,;:!?")

def map_words_to_tg(tgdata):
    mappings = []
    for num, item in enumerate(tgdata):
        for word in item[2].split(" "):
            clean = clean_word(word)
            mappings.append([clean, word, num])
    return mappings
def ctm_excerpt(ctmdata, start, end, fluff=0.6):
    if type(start) is str:
        start = float(start)
    if type(end) is str:
        end = float(end)
    
    excerpt = []
    for ctmline in ctmdata:
        cstart = float(ctmline[2])
        cdur = float(ctmline[3])
        cend = cstart + cdur
        if cstart < (start - fluff):
            continue
        if cend > (end + fluff):
            break
        excerpt.append(ctmline)
    return excerpt
def index_ctmdata_by_start(ctmdata):
    return {float(x[2]): n for n, x in enumerate(ctmdata)}
index_ctmdata_by_start(CTMEDATA)
ctm_excerpt(CTMEDATA, 57.734, 60.576)
[['hsi_5_0718_210_001_main.wav', '1', '57.33', '0.1', 'i', '0.00', '-', 'ins'],
 ['hsi_5_0718_210_001_main.wav', '1', '57.57', '0.1', 'i', '0.00', 'i', 'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '57.85',
  '0.1',
  'really',
  '0.00',
  'really',
  'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '58.01',
  '0.1',
  'think',
  '0.00',
  'think',
  'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '58.17',
  '0.26',
  "i've",
  '0.00',
  "i've",
  'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '58.53',
  '0.1',
  'made',
  '0.00',
  'made',
  'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '58.77',
  '0.1',
  'the',
  '0.00',
  'the',
  'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '58.93',
  '0.1',
  'best',
  '0.00',
  'best',
  'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '59.03',
  '0.0',
  '-',
  '1.0',
  'out',
  'del'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '59.37',
  '0.1',
  'outta',
  '0.00',
  'of',
  'sub'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '59.65',
  '0.1',
  'this',
  '0.00',
  'this',
  'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '59.85',
  '0.1',
  'place',
  '0.00',
  'place',
  'cor'],
 ['hsi_5_0718_210_001_main.wav',
  '1',
  '60.13',
  '0.18',
  'actually',
  '0.00',
  'actually',
  'cor']]
def namethis(ctmedit, start, end, consumed_from=0):
    index = index_ctmdata_by_start(ctmedit)
    excerpt = ctm_excerpt(ctmedit, start, end)
    ex_start = float(excerpt[0][2])
    ex_start_ptr = index[ex_start]
    # if (ex_start_ptr - 1) > consumed_from:
    #     # extend here?
    #     pass
    prev_start = float(CTMEDATA[ex_start_ptr - 1][2])
    print(prev_start, ex_start, CTMEDATA[ex_start_ptr - 1])
namethis(CTMEDATA, 57.734, 60.576)
56.91 57.33 ['hsi_5_0718_210_001_main.wav', '1', '56.91', '0.04', 'think', '0.00', 'think', 'cor']
TGDATA
[(7.489, 7.709, 'Yeah.'),
 (7.729, 8.19, 'Yeah.'),
 (8.21, 8.49, 'Yes.'),
 (40.339, 41.12, 'Oh, that was nice.'),
 (41.461, 42.622, 'Nice idea to have that.'),
 (44.164, 45.025, 'Oh, I like this place.'),
 (55.393, 57.114, 'Yeah, so what do you think?'),
 (57.734,
  60.576,
  "I really think I've made the best out of this place actually."),
 (62.837, 63.038, 'Sorry?'),
 (63.918,
  72.263,
  "Yeah, it does and I really like what I did with the flowers up there because it's like the hanging gardens of Babylon."),
 (73.604, 77.026, 'I would like to have more of course, but I think in time'),
 (78.106, 82.527, 'They will hang down like a waterfall of green leaves.'),
 (83.368,
  88.609,
  "That's my intention anyway because I'm really into plants."),
 (90.149,
  98.892,
  "What's nice with these plants too, that's called the tongue of mother-in-law, the one by the window in Swedish."),
 (100.792, 103.194, "Because it's called Dragon Tongue."),
 (104.174,
  110.498,
  'And your mother-in-law, in the old way, everyone thought their mother-in-law was a dragon.'),
 (112.239, 113.62, 'It was called Mother-in-law Tongue.'),
 (114.14, 116.081, 'And that, of course, is Monstera.'),
 (117.022, 117.642, 'A new one.'),
 (118.622, 123.145, 'I actually bought it from the Amazons.'),
 (124.266,
  129.769,
  "So it's brought here by a friend as just a little, little cuddling."),
 (130.922, 133.284, 'And now it looks like that.'),
 (133.845, 136.848, "And that's only in three months."),
 (137.068, 140.272, 'So I have to say I have green fingers.'),
 (140.572, 142.434, 'And yes.'),
 (143.595, 145.898, 'And do you wonder about the kettle I have there?'),
 (148.234, 149.635, "Yes, but it's not actually."),
 (150.915, 153.917, 'Because when you boil water, you clean it.'),
 (155.498,
  161.941,
  'And if you want the plants to grow properly, you want the water to be as clean as possible.'),
 (162.321,
  166.883,
  'But of course, you also take out the nourishment in the water.'),
 (167.023, 171.906, 'So you have to add nourishment, fertilizers and so.'),
 (173.246, 176.648, "But it's really important to have clean water."),
 (177.488,
  184.871,
  'And then I let it stand there for two or three weeks and then I put it in regular water.'),
 (186.172,
  192.815,
  'Just one deciliter of pure water and then not dirty water, but regular tap water.'),
 (197.077, 197.517, 'The pillows?'),
 (198.817, 200.398, 'Oh, I thought you pointed that way.'),
 (201.859, 202.339, 'Those pillows?'),
 (202.879, 203.219, 'Yes.'),
 (204.22, 204.32, 'Oh!'),
 (206.042,
  212.584,
  "I'm glad you like them because those are really, really meaning a lot to me."),
 (213.805, 215.806, 'They used to belong to my grandmother.'),
 (216.946,
  223.108,
  'And I have so many memories of them, with them, together with them.'),
 (223.148,
  229.731,
  "Because, you know, every time I was with my... We didn't watch much TV in those days because I'm 60 years old."),
 (229.811, 231.471, 'So we, after...'),
 (234.876,
  238.538,
  "It wasn't like today when you can watch TV all day or Netflix or anything."),
 (239.119,
  242.762,
  'So we played a lot of games and we also listened to the radio.'),
 (244.263,
  251.949,
  'And whenever, or my grandmother would tell me stories from her childhood, and then I always sat on the floor.'),
 (253.03,
  260.176,
  'So I sat on one of the pillows and then I, the white one, and then I had the green one.'),
 (260.896,
  282.635,
  'like this and I would listen to her because they lived up north and it was so exciting to listen to her stories about reindeers and stuff like that and they also she she also believed in she was very spiritual so she believed in maybe not spirits but that energies remained'),
 (285.618, 286.118, 'around us.'),
 (286.819, 292.123, 'So I actually feel that my grandmother is with me.'),
 (293.544, 296.927, 'Oh, my hair stands when I talk about her.'),
 (297.167, 299.529, "So maybe she's here now and likes this."),
 (301.971, 304.033, "So I'm glad you like them."),
 (308.977, 309.257, 'The what?'),
 (315.906,
  328.944,
  "uh yeah maybe it's just as you can see i very often i just come home throw off my shoes and i i just want to go in and relax and"),
 (331.687,
  346.876,
  "So you don't need my watch so I put it over there and I okay it's like this I have ADHD so sometimes I just put something down and then I forget that I've put it down so"),
 (355.356, 356.216, 'What would be?'),
 (357.958, 358.599, 'Yes, of course.'),
 (360.321,
  366.887,
  "And I always look for it because sometimes it's going beneath the cushion."),
 (371.231, 371.632, 'This one?'),
 (377.597,
  382.982,
  'Yes, because I understand what you mean, because you would like the chair to be this way.'),
 (383.943,
  391.91,
  "But there's a very good explanation for it, because have you heard of an artist called Benjamin Kokomovic?"),
 (392.871, 393.291, "You haven't?"),
 (393.912, 397.815, 'Oh, he was very much into spirituality.'),
 (398.876,
  414.833,
  'And what he said was that if you place a chair like that, in that direction, right into a wall, you can actually attract spirits.'),
 (416.214, 418.557, "And so that's why it's like that."),
 (422.273,
  431.618,
  "Yes, because, okay, I don't know if you're into this, but actually, I am very much in contact with the other world."),
 (432.318,
  450.908,
  'So what happens is, because this chair is really special, it once belonged to a great psychic, a Swedish psychic, and whenever someone comes to visit me, the light goes tick, tick, tick, tick.'),
 (452.249,
  461.817,
  "And so I know they're there and sometimes I ask them, is it right if I join you?"),
 (462.658, 463.659, "Sometimes it's not."),
 (463.819, 467.923, 'So one blink is yes, two blinks is no.'),
 (468.303, 470.565, 'But if I, what do you think I do then?'),
 (473.855, 476.897, "No, if it's okay, what do you think I do?"),
 (478.478, 480.519, 'Yes, when I join them, what do you think I do then?'),
 (481.979, 483.18, 'What I told you in the beginning?'),
 (485.321, 486.462, 'I take the pillows.'),
 (487.502,
  493.726,
  'So I sit like when I sat with my grandmother, so I sit on the white one and have the green one like this.'),
 (502.306,
  524.157,
  "I know what you... I know it might look a bit strange but actually it's... I used to have the plant over there so it's a UV light but then of course when I bought that chair I had to ask the chair actually where it wanted to be"),
 (524.837,
  553.0,
  "and it wanted to be there so and I haven't moved the lamp yet but it should be and as you see it's I am now taking away the UV light because I have two similar lights up there my god I forgot about those I never use them actually well and now when you tell me when you're noticing it it looks quite off in this room"),
 (559.825, 560.646, 'To the lamps?'),
 (561.927,
  567.013,
  'No, but now that you mention it, it actually looked like the belt of Orion.'),
 (570.056,
  573.659,
  "Maybe there is something I've done without thinking about it."),
 (574.0, 576.442, 'I really have to check that before I take them down.'),
 (576.562, 579.926, 'But they were very expensive, those lamps.'),
 (580.146, 581.948, "They're from Germany, actually."),
 (586.606,
  594.651,
  "Yes, because I'm not, as you can see, I'm not very much into art."),
 (595.672, 596.652, 'The other walls are empty.'),
 (598.253, 601.255, 'So I just have the one with the flowers.'),
 (604.357, 609.401, 'So if I feel something for a painting, then I buy it.'),
 (609.861, 611.662, "So that's what I really felt for."),
 (612.323, 615.245, "And I love the tree over there because it's so"),
 (617.748, 620.232, "It's like, have you seen the film Matrix?"),
 (620.894,
  626.202,
  'Yeah, you know, in the room where they study martial arts and stuff like that.'),
 (626.723, 627.364, "I think that's"),
 (629.72,
  635.925,
  'I could imagine when we die that we come to a place like that at first.'),
 (637.126, 637.366, 'Sorry?'),
 (638.427, 640.368, 'Yeah, or tree of death.'),
 (641.109,
  649.956,
  "And maybe it's, it looks really peaceful, but could also be a tree of wisdom and stuff like that."),
 (652.597, 653.518, 'And that one,'),
 (654.959, 658.224, 'is also one of my grandmothers.'),
 (658.284,
  672.402,
  "It's not what I like, like in art, but I like that one because when I was a kid, I used to sit in front of it and imagine that I was living there."),
 (673.343,
  693.133,
  "and that it was my dad, because my parents were divorced and I never met my dad, and that my dad was waiting for me, because you can see that there's one horse without anyone on it, and that he was waiting for me to come there, and that my mother was driving that carriage."),
 (697.6, 699.2, 'Yeah, yeah it is.'),
 (700.26,
  706.502,
  'And I like an open space because I can do my morning yoga here and yeah.'),
 (707.922,
  725.165,
  "And it's also from, it's from Cairo in Egypt and it was, it actually, the symbols here means that that's the afterlife and the life we're in now."),
 (726.306,
  734.345,
  "and all the various... My God, there's something wrong with my hand, I have to... Yeah.")]
def partition_ctm(ctmdata, amount=1.0):
    segments = []
    current = []

    prev = 0.0
    for ctmline in ctmdata:
        start = float(ctmline[2])
        end = float(ctmline[3]) + start
        if prev > 0.0 and (end - prev) > amount:
            segments.append(current)
            current = []
        current.append(ctmline)
    segments.append(current)
    return segments
partitioned = partition_ctm(CTMEDATA)
for partition in partitioned:
    edit_type = set()
    for ctmline in partition:
        edit_type.add(ctmline[-1])
    if len(edit_type) == 1:
        print(partition)
partitioned[0][-1]
['hsi_5_0718_210_001_main.wav',
 '1',
 '734.04',
 '0.1',
 'yeah',
 '0.00',
 'yeah',
 'cor']