Running speaker diarisation with pyannote audio
Using the development version
conda install pytorch torchaudio -c pytorch
conda install numpy cffi
conda install libsndfile=1.0.28 -c conda-forge
pip install https://github.com/pyannote/pyannote-audio/archive/develop.zip
pip install speechbrain
pip install pydub
pip install librosa
pip install ipykernel
import librosa
import torch
!youtube-dl --write-sub --sub-lang 'sv' -o '%(id)s.%(ext)s' j8AH29Ad-zU
from pyannote.audio import Pipeline
SAMPLE = "j8AH29Ad-zU.mp4"
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
audio, sr = librosa.load(SAMPLE, mono=False)
audiot = torch.from_numpy(audio)
diarization = pipeline({"waveform": audiot, "sample_rate": sr})
diarization.uri = 'j8AH29Ad-zU'
with open("j8AH29Ad-zU.rttm", "w") as f:
diarization.write_rttm(f)