Extract data for the ARS project
Not much use to anyone else, but I may need to repeat it
from pathlib import Path
LINESFILE = Path("/home/joregan/third-shuffle")
from pydub import AudioSegment
VIDBASE = "/sbtal/riksdag-video/"
VIDSFX = "_480p.mp4"
parameters=["-ac", "1", "-acodec", "pcm_s16le", "-ar", "16000"]
VIDBASEPATH = Path(VIDBASE)
for tscr in LINESFILE.glob("TERROR_*"):
with open(tscr) as tscf:
lines = []
for line in tscf.readlines():
line = line.strip()
if line == "":
continue
else:
lines.append(line)
parts_s = lines[0].split(" ")
parts_e = lines[-1].split(" ")
vidfile = VIDBASEPATH / f"{vidid}{VIDSFX}"
if not vidfile.exists():
print("Error", vidfile)
vidid = parts_s[0]
fstart = float(parts_s[2])
fend = float(parts_e[2]) + float(parts_e[3])
print(fstart, fend)
start = int(fstart * 1000)
end = int(fend * 1000)
# audio = AudioSegment.from_file(str(vidfile), "mp4")
# tmpwav = audio.export(f"/tmp/{vidid}.wav", format="wav", parameters=parameters)
wavaudio = AudioSegment.from_wav(f"/tmp/{vidid}.wav")
sect = wavaudio[start:end]
outname = str(LINESFILE / f"{vidid}.wav")
sect.export(outname, format="wav", parameters=parameters)