LINES = """
2442207080018759021 1 112.18 0.059 Det 1.0 Det cor
2442207080018759021 1 112.32 0.36 återstår 1.0 återstår cor
2442207080018759021 1 112.76 0.339 nämligen 1.0 nämligen cor
2442207080018759021 1 113.24 0.219 två 1.0 två cor
2442207080018759021 1 114.4 0.399 viktiga 1.0 <eps> ins
2442207080018759021 1 115.86 0.42 brister 1.0 brister cor
2442207080018759021 1 116.28 0.0 <eps> 1.0 som del
2442207080018759021 1 116.28 0.0 <eps> 1.0 är del
2442207080018759021 1 116.28 0.0 <eps> 1.0 viktiga del
2442207080018759021 1 116.62 0.099 att 1.0 att cor
2442207080018759021 1 116.82 0.48 åtgärda. 1.0 åtgärda. cor
"""
def accept_paraphrase(lines):
    outlines = []
    ltext = []
    rtext = []
    for line in lines.split("\n"):
        line = line.strip()
        if line == "":
            continue
        parts = line.split(" ")
        if parts[-1] == "cor":
            outlines.append(line)
            ltext.append(parts[6])
            rtext.append(parts[6])
        elif parts[-1] == "sub":
            ltext.append(parts[4])
            rtext.append(parts[6])
            parts[6] = parts[4]
            parts[7] = "cor"
            outlines.append(" ".join(parts))
        elif parts[6] == "<eps>":
            ltext.append(parts[4])
            parts[6] = parts[4]
            parts[7] = "cor"
            outlines.append(" ".join(parts))
        elif parts[4] == "<eps>":
            rtext.append(parts[6])
        else:
            print("Huh?", line)
    return outlines, (" ".join(ltext), " ".join(rtext))
def generate_filename(lines):
    first = lines[0].split(" ")
    last = lines[-1].split(" ")
    file_id = first[0]
    start = first[2]
    last_start = float(last[2])
    last_dur = float(last[3])
    end = last_start + last_dur
    # filename = f"{file_id}_{start}_{end:.2f}.ctmedit"
    seg_dur = end - float(start)
    filename = f"{file_id}_{start}_{seg_dur:.2f}.ctmedit"
    return filename
from pathlib import Path
def write_ctm_segment(outdir, lines):
    filename = generate_filename(lines)
    dirpath = Path(outdir)
    if not dirpath.is_dir():
        dirpath.mkdir()
    outfile = dirpath / filename
    with open(outfile, "w") as f:
        for line in lines:
            f.write(line + "\n")
def write_text(outdir, filename, text):
    outfile = Path(outdir) / filename
    with open(outfile, "w") as f:
        f.write(text)
a, b = accept_paraphrase(LINES)
OUTDIR = "/Users/joregan/paraphrases"
write_ctm_segment(OUTDIR, a)
filename = generate_filename(a)
write_text(OUTDIR, filename.replace(".ctmedit", ".txt"), b[0])
write_text(OUTDIR, filename.replace(".ctmedit", ".paraphrase"), b[1])
b
('Vi har lämnat in en motivreservation, det vill säga att den texten som kommer före vill vi ha lite förändrad.',
 'Vi har lämnat in en motivreservation, det vill säga att vi vill ha texten lite förändrad.')
LINES = """
2442203250006958021 1 39.58 0.06 Vi 1.0 Vi cor
2442203250006958021 1 39.82 0.1 har 1.0 har cor
2442203250006958021 1 40.0 0.34 lämnat 1.0 lämnat cor
2442203250006958021 1 40.44 0.1 in 1.0 in cor
2442203250006958021 1 40.78 0.119 en 1.0 en cor
2442203250006958021 1 42.1 1.479 motivreservation, 1.0 motivreservation, cor
2442203250006958021 1 43.64 0.079 det 1.0 det cor
2442203250006958021 1 43.74 0.119 vill 1.0 vill cor
2442203250006958021 1 43.92 0.179 säga 1.0 säga cor
2442203250006958021 1 44.16 0.1 att 1.0 att cor
2442203250006958021 1 44.3 0.08 den 1.0 <eps> ins
2442203250006958021 1 44.5 0.38 texten 1.0 <eps> ins
2442203250006958021 1 44.94 0.1 som 1.0 <eps> ins
2442203250006958021 1 45.12 0.2 kommer 1.0 <eps> ins
2442203250006958021 1 45.4 0.34 före 1.0 vi sub
2442203250006958021 1 46.38 0.159 vill 1.0 vill cor
2442203250006958021 1 46.56 0.039 vi 1.0 <eps> ins
2442203250006958021 1 46.7 0.039 ha 1.0 ha cor
2442203250006958021 1 46.739 0.0 <eps> 1.0 texten del
2442203250006958021 1 47.4 0.179 lite 1.0 lite cor
2442203250006958021 1 47.64 0.38 förändrad 1.0 förändrad. cor
"""
a, b = accept_paraphrase(LINES)
a, b = accept_paraphrase(LINES)
generate_filename(a)
'2442203250006958021_39.58_8.44.ctmedit'