%%capture
!sudo apt-get install git-lfs
!git lfs install
Error: Failed to call git rev-parse --git-dir: exit status 128 
Git LFS initialized.
!git clone https://huggingface.co/datasets/KTH/waxholm
Cloning into 'waxholm'...
remote: Enumerating objects: 7501, done.
remote: Counting objects: 100% (7501/7501), done.
remote: Compressing objects: 100% (7422/7422), done.
remote: Total 7501 (delta 84), reused 7487 (delta 77), pack-reused 0
Receiving objects: 100% (7501/7501), 207.52 MiB | 24.12 MiB/s, done.
Resolving deltas: 100% (84/84), done.
Updating files: 100% (7334/7334), done.
Filtering content: 100% (2522/2522), 282.66 MiB | 1.39 MiB/s, done.
!mkdir wav
import soundfile as sf

def smp_headers(filename: str):
    with open(filename, "rb") as f:
        f.seek(0)
        raw_headers = f.read(1024)
        raw_headers = raw_headers.rstrip(b'\x00')
        asc_headers = raw_headers.decode("ascii")
        asc_headers.rstrip('\x00')
        tmp = [a for a in asc_headers.split("\r\n")]
        back = -1
        while abs(back) > len(tmp) + 1:
            if tmp[back] == '=':
                break
            back -= 1
        tmp = tmp[0:back-1]
        return dict(a.split("=") for a in tmp)


def smp_read_sf(filename: str):
    headers = smp_headers(filename)
    if headers["msb"] == "last":
        ENDIAN = "LITTLE"
    else:
        ENDIAN = "BIG"

    data, sr = sf.read(filename, channels=int(headers["nchans"]),
                       samplerate=16000, endian=ENDIAN, start=512,
                       dtype="int16", format="RAW", subtype="PCM_16")
    return (data, sr)


def write_wav(filename, arr):
    import wave

    with wave.open(filename, "w") as f:
        f.setnchannels(1)
        f.setsampwidth(2)
        f.setframerate(16000)
        f.writeframes(arr)
from pathlib import Path

for smp in Path("./waxholm/scenes_formatted").glob("**/*.smp"):
    arr, sr = smp_read_sf(str(smp))
    write_wav(f"wav/{smp.stem}.wav", arr)
import IPython.display as ipd
ipd.Audio('wav/fp2001.1.00.wav')