!pip install ffmpeg-python
_BASE = "/Users/joregan/asr/slt/audio"
import json
from pathlib import Path
slt = {}
with open("/Users/joregan/asr/slt.json") as slt_file:
    for line in slt_file.readlines():
        linedata = json.loads(line)
        slt_id = Path(linedata["path"]).stem
        slt[slt_id] = linedata
        
from pathlib import Path
base = Path(_BASE)
for shn in base.glob("**/*.shn"):
    meta = {}
    meta["id"] = shn.stem
    probe = ffmpeg.probe(shn)
    if not "format" in probe and not "tags" in probe["format"]:
        continue
    tags = probe["format"]["tags"]
    if "Gender" in tags:
        meta["gender"] = tags["Gender"]
    if "UserID" in tags:
        meta["user_id"] = tags["UserID"]
    if "Dialect" in tags:
        meta["dialect"] = tags["Dialect"]
    if "recording_date" in tags:
        meta["recording_date"] = tags["recording_date"]
    if "recording_time" in tags:
        meta["recording_time"] = tags["recording_time"]
    if meta["id"] in slt:
        slt[meta["id"]].update(meta)
    else:
        slt[meta["id"]] = meta
with open("slt-meta.json", "w") as slt_out:
    for item in slt.keys():
        slt_out.write(json.dumps(slt[item]) + "\n")