Language IDs with MMS
Using North Sámi
%%capture
%pip install accelerate torchaudio datasets
%pip install --upgrade git+https://github.com/huggingface/transformers.git
import requests
import json
def audio_from_json(jsonurl):
req = requests.get(jsonurl)
if req.status_code != 200:
return None
data = json.loads(req.text)
if not "audioUrl" in data:
return None
return data["audioUrl"]
Audio from this page
audio_url = audio_from_json('https://sverigesradio.se/playerajax/audio?id=8695609&type=publication&publicationid=8695609&quality=medium')
audio_file = audio_url.split('/')[-1].split('?')[0]
!wget {audio_url} -O {audio_file}
from transformers import pipeline
classifier = pipeline("audio-classification", model="facebook/mms-lid-126")
classifier(audio_file)
classifier = pipeline("audio-classification", model="facebook/mms-lid-256")
classifier(audio_file)
classifier = pipeline("audio-classification", model="facebook/mms-lid-512")
classifier(audio_file)
classifier = pipeline("audio-classification", model="facebook/mms-lid-1024")
classifier(audio_file)
classifier = pipeline("audio-classification", model="facebook/mms-lid-2048")
classifier(audio_file)
classifier = pipeline("audio-classification", model="facebook/mms-lid-4017")
classifier(audio_file)