Convert to flac for wav2vec
Converting from a list
BASE_PATH = "/home/jim/Playing/unlabelled"
files = []
with open(f"{BASE_PATH}/ina/no-music") as inf:
for line in inf.readlines():
stripped = line.strip()
if stripped.startswith("./"):
stripped = stripped[2:]
if stripped.endswith(".csv"):
stripped = stripped[0:-4]
files.append(stripped)
exts = ["m4a", "mkv", "mp3", "MP3", "mp4", "wav"]
from pathlib import Path
data = {}
for file in files:
for ext in exts:
pathstr = f"{BASE_PATH}/{file}.{ext}"
cur_path = Path(pathstr)
if cur_path.is_file():
data[file] = pathstr
from pydub import AudioSegment
for basename, fname in data.items():
outstr = f"{BASE_PATH}/flac/{basename}.flac"
audio = AudioSegment.from_file(fname)
audio.export(outstr, format="flac", parameters=["-ac", "1", "-ar", "16000"])
count = 1
with open(f"{BASE_PATH}/vad_input.txt", "w") as outf:
for basename, fname in data.items():
outstr = f"{BASE_PATH}/flac/{basename}.flac"
audio = AudioSegment.from_file(outstr)
outf.write(f"train{count:04d} {outstr} {audio.duration_seconds}\n")
count += 1