SAMPLE = """
{
  "1": {
    "snippet": "Yes, I will try. Let's see here. So we have couch.",
    "references": [
      {
        "ref_id": 0,
        "phrase": "couch",
        "resolved_ref": "Sofa_b5c02446",
        "plurality": "single",
        "original_label": "exact",
        "object_id": "Sofa_b5c02446",
        "phrase_start": 15.892,
        "phrase_end": 17.032
      }
    ],
    "start": 10.192,
    "end": 17.092000000000002,
    "high_level": {
      "current_topic": "Sofa_b5c02446",
      "topic_duration_id": 0
    }
  }
}
"""
import json

sample = json.loads(SAMPLE)
from pathlib import Path

JSONDIR = Path("/tmp/resolved_references_with_timings")
WAVDIR = Path("/tmp/audio_prompt_clips")
for wavfile in WAVDIR.glob("*.wav"):
    wavstem = wavfile.stem
    parts = wavstem.split("_")
    orig_stem = "_".join(parts[:5])
    jsonfile = JSONDIR / f"{orig_stem}_with_timings.json"
    with open(jsonfile, "r") as f:
        data = json.load(f)
    phrase_id = parts[5]
    orig_start = float(parts[6])
    orig_end = float(parts[7])
    text = data[phrase_id]["snippet"]
    with open(WAVDIR / f"{wavstem}.txt", "w") as f:
        f.write(text)
from string import punctuation

punct = set(punctuation)

with open("/Users/joregan/Downloads/object_utterances_batch1_to_4.md") as inf:
    count = 1
    for line in inf.readlines():
        line = line.strip()
        if line == "":
            continue
        parts = line.split(" ")
        new_parts = []
        for part in parts:
            if part.startswith("**"):
                new_parts.append(f"*{part[2:]}*")
            elif part.endswith("**"):
                new_parts.append(f"{part[:-2]}")
            elif part[-3:-1] == "**":
                new_parts.append(f"{part[:-3]}{part[-1:]}")
            else:
                new_parts.append(part)
        with open(f"/tmp/test_batch/test_batch_{count}.txt", "w") as of:
            of.write(" ".join(new_parts))
        count += 1
def read_reference_dir(ref_dir):
    pairs = {}
    if type(ref_dir) == str:
        ref_dir = Path(ref_dir)
        for wavfile in ref_dir.glob("*.wav"):
            name = str(wavfile)
            text = name.replace(".wav", ".txt")
            with open(text, "r") as f:
                data = f.readlines().strip()
            pairs[wavfile] = data
    return pairs
def get_mmconv_speaker_room_pairs(ref_dir):
    pairs = {}
    if type(ref_dir) == str:
        ref_dir = Path(ref_dir)
    for wavfile in ref_dir.glob("*.wav"):
        name = str(wavfile)
        text = name.replace(".wav", ".txt")
        parts = text.split("_")
        speaker = parts[1]
        if not speaker in pairs:
            pairs[speaker] = {}
        room = parts[3]
        if not room in pairs[speaker]:
            pairs[speaker][room] = {}
        with open(text, "r") as f:
            data = f.readlines().strip()
        pairs[speaker][room][wavfile] = data
    return pairs