BASE_PATH = "/home/jim/Playing/unlabelled"

files = []
with open(f"{BASE_PATH}/ina/no-music") as inf:
    for line in inf.readlines():
        stripped = line.strip()
        if stripped.startswith("./"):
            stripped = stripped[2:]
        if stripped.endswith(".csv"):
            stripped = stripped[0:-4]
        files.append(stripped)

exts = ["m4a", "mkv", "mp3", "MP3", "mp4", "wav"]

from pathlib import Path

data = {}
for file in files:
    for ext in exts:
        pathstr = f"{BASE_PATH}/{file}.{ext}"
        cur_path = Path(pathstr)
        if cur_path.is_file():
            data[file] = pathstr

from pydub import AudioSegment

for basename, fname in data.items():
    outstr = f"{BASE_PATH}/flac/{basename}.flac"
    audio = AudioSegment.from_file(fname)
    audio.export(outstr, format="flac", parameters=["-ac", "1", "-ar", "16000"])

count = 1
with open(f"{BASE_PATH}/vad_input.txt", "w") as outf:
    for basename, fname in data.items():
        outstr = f"{BASE_PATH}/flac/{basename}.flac"
        audio = AudioSegment.from_file(outstr)
        outf.write(f"train{count:04d} {outstr} {audio.duration_seconds}\n")
        count += 1