def speech_recognize_keyword_from_microphone(): """performs keyword-triggered speech recognition with input microphone""" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # Creates an instance of a keyword recognition model. Update this to # point to the location of your keyword recognition model. model = speechsdk.KeywordRecognitionModel( "YourKeywordRecognitionModelFile.table") # The phrase your keyword recognition model triggers on. keyword = "YourKeyword" speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) done = False def stop_cb(evt): """callback that signals to stop continuous recognition upon receiving an event `evt`""" print('CLOSING on {}'.format(evt)) nonlocal done done = True def recognizing_cb(evt): """callback for recognizing event""" if evt.result.reason == speechsdk.ResultReason.RecognizingKeyword: print('RECOGNIZING KEYWORD: {}'.format(evt)) elif evt.result.reason == speechsdk.ResultReason.RecognizingSpeech: print('RECOGNIZING: {}'.format(evt)) def recognized_cb(evt): """callback for recognized event""" if evt.result.reason == speechsdk.ResultReason.RecognizedKeyword: print('RECOGNIZED KEYWORD: {}'.format(evt)) elif evt.result.reason == speechsdk.ResultReason.RecognizedSpeech: print('RECOGNIZED: {}'.format(evt)) elif evt.result.reason == speechsdk.ResultReason.NoMatch: print('NOMATCH: {}'.format(evt)) # Connect callbacks to the events fired by the speech recognizer speech_recognizer.recognizing.connect(recognizing_cb) speech_recognizer.recognized.connect(recognized_cb) speech_recognizer.session_started.connect( lambda evt: print('SESSION STARTED: {}'.format(evt))) speech_recognizer.session_stopped.connect( lambda evt: print('SESSION STOPPED {}'.format(evt))) speech_recognizer.canceled.connect( lambda evt: print('CANCELED {}'.format(evt))) # stop continuous recognition on either session stopped or canceled events speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) # Start keyword recognition speech_recognizer.start_keyword_recognition(model) print('Say something starting with "{}" followed by whatever you want...'. format(keyword)) while not done: time.sleep(.5) speech_recognizer.stop_keyword_recognition()
def speech_recognize_keyword_locally_from_microphone(): """runs keyword spotting locally, with direct access to the result audio""" # Creates an instance of a keyword recognition model. Update this to # point to the location of your keyword recognition model. model = speechsdk.KeywordRecognitionModel( "YourKeywordRecognitionModelFile.table") # The phrase your keyword recognition model triggers on. keyword = "YourKeyword" # Create a local keyword recognizer with the default microphone device for input. keyword_recognizer = speechsdk.KeywordRecognizer() done = False def recognized_cb(evt): # Only a keyword phrase is recognized. The result cannot be 'NoMatch' # and there is no timeout. The recognizer runs until a keyword phrase # is detected or recognition is canceled (by stop_recognition_async() # or due to the end of an input file or stream). result = evt.result if result.reason == speechsdk.ResultReason.RecognizedKeyword: print("RECOGNIZED KEYWORD: {}".format(result.text)) nonlocal done done = True def canceled_cb(evt): result = evt.result if result.reason == speechsdk.ResultReason.Canceled: print('CANCELED: {}'.format(result.cancellation_details.reason)) nonlocal done done = True # Connect callbacks to the events fired by the keyword recognizer. keyword_recognizer.recognized.connect(recognized_cb) keyword_recognizer.canceled.connect(canceled_cb) # Start keyword recognition. result_future = keyword_recognizer.recognize_once_async(model) print('Say something starting with "{}" followed by whatever you want...'. format(keyword)) result = result_future.get() # Read result audio (incl. the keyword). if result.reason == speechsdk.ResultReason.RecognizedKeyword: time.sleep(2) # give some time so the stream is filled result_stream = speechsdk.AudioDataStream(result) result_stream.detach_input( ) # stop any more data from input getting to the stream save_future = result_stream.save_to_wav_file_async( "AudioFromRecognizedKeyword.wav") print('Saving file...') saved = save_future.get()
def speech_recognize_keyword_from_microphone(mictuning): WAKE_WORD = "assistant" WAKE_WORD_MODEL = "./kws.table" # 設定ファイルから読み込み speech_key = inifile.get('speech_config', 'speech_key1') service_region = inifile.get('speech_config', 'service_region') language = inifile.get('speech_config', 'language') # speech config のインスタンス作成 speech_config = speechsdk.SpeechConfig( subscription=speech_key, region=service_region, speech_recognition_language=language) # 指定された設定で speech recognizer を作成 speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) # Keyword Recognition Model のインスタンスを作成 model = speechsdk.KeywordRecognitionModel(WAKE_WORD_MODEL) # Keyword Recognition Model がトリガーするフレーズ keyword = WAKE_WORD # キーワード認識継続を判定するトリガー done = False def stop_cb(evt): # イベント「evt」の受信時に連続認識を停止するコールバック speech_recognizer.stop_keyword_recognition() nonlocal done done = True def recognizing_cb(evt): # イベントを認識するためのコールバック if evt.result.reason == speechsdk.ResultReason.RecognizingKeyword: pass elif evt.result.reason == speechsdk.ResultReason.RecognizingSpeech: pass def recognized_cb(evt): # 認識されたイベントのコールバック if evt.result.reason == speechsdk.ResultReason.RecognizedKeyword: pass elif evt.result.reason == speechsdk.ResultReason.RecognizedSpeech: global text text = evt.result.text pass elif evt.result.reason == speechsdk.ResultReason.NoMatch: pass # コールバックを音声認識エンジンによって起動されたイベントに接続 speech_recognizer.recognizing.connect(recognizing_cb) speech_recognizer.recognized.connect(recognized_cb) speech_recognizer.session_started.connect(lambda evt: print('', end="")) speech_recognizer.session_stopped.connect(lambda evt: print('', end="")) speech_recognizer.canceled.connect(lambda evt: print('', end="")) # セッションの停止またはキャンセルされたイベントで連続認識を停止 speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) # キーワード認識を開始 speech_recognizer.start_keyword_recognition(model) print('"{}" と呼びかけ、続けて何かおっしゃってください'.format(keyword)) while not done: dire = mictuning.direction time.sleep(.5) return text.replace('アシスタント', ''), dire