Azure speech recognition for Irish, part 2
json output contains nbest, timestamps, and text without replacements
%%capture
!pip install azure-cognitiveservices-speech
!pip install youtube-dl
%%capture
!youtube-dl https://www.youtube.com/watch?v=cfjdfaqWY3Y
import azure.cognitiveservices.speech as speechsdk
Use either Key1 or Key2 (on Azure Portal, in "Keys and Endpoints" from the menu on the left hand side of the screen).
_SUBS=input('put your subscription key here: ')
_LOC='westeurope'
speech_config = speechsdk.SpeechConfig(region=_LOC, subscription=_SUBS)
!wget https://upload.wikimedia.org/wikipedia/commons/6/60/MSF_chapter_3.ogg https://upload.wikimedia.org/wikipedia/commons/e/ee/MSF_chapter_4.ogg https://upload.wikimedia.org/wikipedia/commons/b/b3/MSF_chapter_5.ogg https://upload.wikimedia.org/wikipedia/commons/2/21/MSF_chapter_6.ogg https://upload.wikimedia.org/wikipedia/commons/7/71/MSF_chapter_7.ogg https://upload.wikimedia.org/wikipedia/commons/d/d5/MSF_chapter_8.ogg
!ffmpeg -i MSF_chapter_5.ogg -acodec pcm_s16le -ac 1 -ar 16000 MSF_chapter_5.wav
speech_config.speech_recognition_language = 'ga-IE'
speech_config.request_word_level_timestamps()
speech_config.output_format = speechsdk.OutputFormat(1)
speech_config.endpoint_id=f'https://{_LOC}.api.cognitive.microsoft.com/sts/v1.0/issuetoken'
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
import time
import json
def speech_recognize_continuous_from_file(speech_config, filename):
"""performs continuous speech recognition with input from an audio file"""
speech_config = speech_config
audio_config = speechsdk.audio.AudioConfig(filename=filename)
outfilename = filename.replace('.wav', '.json')
outfile = open(outfilename, 'a')
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, language='ga-IE', audio_config=audio_config)
done = False
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
def cancelled(evt):
result = evt.result
cancellation_details = result.cancellation_details
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
def recognised(evt):
response = json.loads(evt.result.json)
outfile.write('{}\n'.format(evt.result.json))
# Connect callbacks to the events fired by the speech recognizer
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(recognised)
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(cancelled)
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
outfile.close()
for i in "345678":
speech_recognize_continuous_from_file(speech_config, f'MSF_chapter_{i}.wav')