def __init__(self): self.__logger = Logger() self._request_exceptions = [type(item) for item in [requests.ConnectionError(), requests.HTTPError(), requests.TooManyRedirects(), requests.Timeout(), requests.TooManyRedirects(), requests.RequestException(), requests.ConnectTimeout(), requests.ReadTimeout()]] self._system_errors = [type(item) for item in [KeyError(), AttributeError(), IndexError(), ZeroDivisionError(), SystemError(), ValueError(), AssertionError()]] self._file_errors = [type(item) for item in [FileExistsError(), FileNotFoundError()]] self._database_errors = [type(item) for item in [sqlite3.Error(), sqlite3.DataError(), sqlite3.ProgrammingError(), sqlite3.DatabaseError(), sqlite3.NotSupportedError(), sqlite3.IntegrityError(), sqlite3.InterfaceError(), sqlite3.InternalError(), sqlite3.OperationalError()]] self._speech_recognizer_errors = [type(item) for item in [sr.RequestError(), sr.UnknownValueError(), sr.WaitTimeoutError(), sr.RequestError()]] self.__logger.info('ExceptionsHandler was successfully initialized.', __name__)
def recognize_ibm(self, audio_data, username, password, language="en-US", show_all=False, url="https://stream.watsonplatform.net/speech-to-text/api"): """ Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the IBM Speech to Text API. The IBM Speech to Text username and password are specified by ``username`` and ``password``, respectively. Unfortunately, these are not available without `signing up for an account <https://console.ng.bluemix.net/registration/>`__. Once logged into the Bluemix console, follow the instructions for `creating an IBM Watson service instance <https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml>`__, where the Watson service is "Speech To Text". IBM Speech to Text usernames are strings of the form XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX, while passwords are mixed-case alphanumeric strings. The recognition language is determined by ``language``, an RFC5646 language tag with a dialect like ``"en-US"`` (US English) or ``"zh-CN"`` (Mandarin Chinese), defaulting to US English. The supported language values are listed under the ``model`` parameter of the `audio recognition API documentation <https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/#sessionless_methods>`__, in the form ``LANGUAGE_BroadbandModel``, where ``LANGUAGE`` is the language value. Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response <https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/#sessionless_methods>`__ as a JSON dictionary. Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection. """ assert isinstance(audio_data, sr.AudioData), "Data must be audio data" assert isinstance(username, str), "``username`` must be a string" assert isinstance(password, str), "``password`` must be a string" flac_data = audio_data.get_flac_data( convert_rate=None if audio_data.sample_rate >= 16000 else 16000, # audio samples should be at least 16 kHz convert_width=None if audio_data.sample_width >= 2 else 2 # audio samples should be at least 16-bit ) url = "{}/v1/recognize?{}".format(url, urlencode({ "profanity_filter": "false", "continuous": "true", "model": "{}_BroadbandModel".format(language), })) request = Request(url, data=flac_data, headers={ "Content-Type": "audio/x-flac", "X-Watson-Learning-Opt-Out": "true", # prevent requests from being logged, for improved privacy }) authorization_value = base64.standard_b64encode("{}:{}".format(username, password).encode("utf-8")).decode( "utf-8") request.add_header("Authorization", "Basic {}".format(authorization_value)) try: response = urlopen(request, timeout=self.operation_timeout) except HTTPError as e: raise sr.RequestError("recognition request failed: {}".format(e.reason)) except URLError as e: raise sr.RequestError("recognition connection failed: {}".format(e.reason)) response_text = response.read().decode("utf-8") result = json.loads(response_text) # return results if show_all: return result if "results" not in result or len(result["results"]) < 1 or "alternatives" not in result["results"][0]: raise sr.UnknownValueError() transcription = [] for utterance in result["results"]: if "alternatives" not in utterance: raise sr.UnknownValueError() for hypothesis in utterance["alternatives"]: if "transcript" in hypothesis: transcription.append(hypothesis["transcript"]) return "\n".join(transcription)
def recognize_deepspeech(audio_data, cmdline): """ Author: Misha Jiline (https://github.com/mjiline) """ assert isinstance(audio_data, sr.AudioData), "Data must be audio data" assert isinstance(cmdline, str), "``cmdline`` must be a string" try: import tempfile import subprocess from subprocess import PIPE except ImportError: raise sr.RequestError("missing tempfile module") raw_data = audio_data.get_wav_data(convert_rate=16000, convert_width=2) with tempfile.NamedTemporaryFile(suffix='.wav') as fp: fp.write(raw_data) fp.seek(0) transcript = subprocess.run("exec %s --audio %s" % (cmdline, fp.name), shell=True, stdout=PIPE, stderr=PIPE).stdout transcript = transcript.decode('utf-8') return transcript, {}
def test_listen_raises_connection_error(self): with patch.object(self.listener, 'transcribe') as transcribe: transcribe.side_effect = sr.RequestError() with self.assertRaises(Exception) as context: self.listener.listen() self.assertIn('Error connecting to API', str(context.exception))
def recognize_amazon(audio_data, bot_name, bot_alias, user_id, content_type="audio/l16; rate=16000; channels=1", access_key_id=None, secret_access_key=None, region=None): """ Performs speech recognition on ``audio_data`` (an ``AudioData`` instance). If access_key_id or secret_access_key is not set it will go through the list in the link below http://boto3.readthedocs.io/en/latest/guide/configuration.html#configuring-credentials Author: Patrick Artounian (https://github.com/partounian) Source: https://github.com/Uberi/speech_recognition/pull/331 """ assert isinstance(audio_data, sr.AudioData), "Data must be audio data" assert isinstance(bot_name, str), "``bot_name`` must be a string" assert isinstance(bot_alias, str), "``bot_alias`` must be a string" assert isinstance(user_id, str), "``user_id`` must be a string" assert isinstance(content_type, str), "``content_type`` must be a string" assert access_key_id is None or isinstance( access_key_id, str), "``access_key_id`` must be a string" assert secret_access_key is None or isinstance( secret_access_key, str), "``secret_access_key`` must be a string" assert region is None or isinstance(region, str), "``region`` must be a string" try: import boto3 except ImportError: raise sr.RequestError( "missing boto3 module: ensure that boto3 is set up correctly.") client = boto3.client('lex-runtime', aws_access_key_id=access_key_id, aws_secret_access_key=secret_access_key, region_name=region) raw_data = audio_data.get_raw_data(convert_rate=16000, convert_width=2) accept = "text/plain; charset=utf-8" response = client.post_content(botName=bot_name, botAlias=bot_alias, userId=user_id, contentType=content_type, accept=accept, inputStream=raw_data) if not response["inputTranscript"]: raise sr.UnknownValueError() return response["inputTranscript"], response
def say_answer(recog): """Retrieve the text for a spoken answer""" try: with sr.Microphone() as source: print("Say your answer\n") audio = recog.listen(source, phrase_time_limit=20) return recog.recognize_google(audio) except sr.UnknownValueError: print('Speech recognition could not understand audio') raise sr.UnknownValueError except sr.RequestError as err: print('Could not request results from the speech recognition ' 'service; {0}'.format(err)) raise sr.RequestError(err)
def recognize(self, connected): if connected: r = sr.Recognizer() with sr.Microphone() as source: print("I'm listening...") audio = r.listen(source) try: self.data = r.recognize_google(audio) except sr.UnknownValueError: print("I cannot understand you clearly.") except sr.RequestError as e: raise sr.RequestError("Error: No connection", str(e)) print("You said: " + self.data) else: self.data = input("Input: ") if self.data: return self.data else: print("Test")
def setup_decoder(audio_file, keyword_entries): language = "en-US" audio_file_type = audio_file.split(".")[1] if audio_file_type == 'wav': curr_dir = os.getcwd() data_dir = os.path.join(curr_dir, '../data/') speech_recognition_directory = '/Library/Python/2.7/site-packages/speech_recognition/' audio_data_path = os.path.join(data_dir, audio_file) else: raise speech_recognition.RequestError("file type must be .wav") assert isinstance(language, str), "``language`` must be a string" assert keyword_entries is None or all( isinstance(keyword, (type(""), type(u""))) and 0 <= sensitivity <= 1 for keyword, sensitivity in keyword_entries), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers " \ "between 0 and 1" # import the PocketSphinx speech recognition module try: from pocketsphinx import pocketsphinx except ImportError: raise speech_recognition.RequestError( "missing PocketSphinx module: ensure that PocketSphinx is set up correctly." ) except ValueError: raise speech_recognition.RequestError( "bad PocketSphinx installation detected; make sure you have PocketSphinx version 0.0.9 or better." ) language_directory = os.path.join( os.path.dirname(speech_recognition_directory), "pocketsphinx-data", language) if not os.path.isdir(language_directory): raise speech_recognition.RequestError( "missing PocketSphinx language data directory: \"{}\"".format( language_directory)) acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model") if not os.path.isdir(acoustic_parameters_directory): raise speech_recognition.RequestError( "missing PocketSphinx language model parameters directory: \"{}\"" .format(acoustic_parameters_directory)) language_model_file = os.path.join(language_directory, "language-model.lm.bin") if not os.path.isfile(language_model_file): raise speech_recognition.RequestError( "missing PocketSphinx language model file: \"{}\"".format( language_model_file)) phoneme_dictionary_file = os.path.join( language_directory, "pronounciation-dictionary.dict") if not os.path.isfile(phoneme_dictionary_file): raise speech_recognition.RequestError( "missing PocketSphinx phoneme dictionary file: \"{}\"".format( phoneme_dictionary_file)) # create decoder object config = pocketsphinx.Decoder.default_config() # set the path of the hidden Markov model (HMM) parameter files config.set_string("-hmm", acoustic_parameters_directory) config.set_string("-lm", language_model_file) config.set_string("-dict", phoneme_dictionary_file) # disable logging (logging causes unwanted output in terminal) config.set_string("-logfn", os.devnull) decoder = pocketsphinx.Decoder(config) return audio_data_path, decoder
def prepare_sphinx2(self, language="en-US", keyword_entries=None): assert isinstance(language, str) or ( isinstance(language, tuple) and len(language) == 3 ), "``language`` must be a string or 3-tuple of Sphinx data file paths of the form ``(acoustic_parameters, language_model, phoneme_dictionary)``" assert keyword_entries is None or all( isinstance(keyword, (type(""), type(u""))) and 0 <= sensitivity <= 1 for keyword, sensitivity in keyword_entries ), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers between 0 and 1" if isinstance(language, str): # directory containing language data language_directory = os.path.join( os.path.dirname(os.path.realpath(__file__)), "pocketsphinx-data", language) if not os.path.isdir(language_directory): raise sr.RequestError( "missing PocketSphinx language data directory: \"{}\"". format(language_directory)) acoustic_parameters_directory = os.path.join( language_directory, "acoustic-model") language_model_file = os.path.join(language_directory, "language-model.lm.bin") phoneme_dictionary_file = os.path.join( language_directory, "pronounciation-dictionary.dict") else: # 3-tuple of Sphinx data file paths acoustic_parameters_directory, language_model_file, phoneme_dictionary_file = language if not os.path.isdir(acoustic_parameters_directory): raise sr.RequestError( "missing PocketSphinx language model parameters directory: \"{}\"" .format(acoustic_parameters_directory)) if not os.path.isfile(language_model_file): raise sr.RequestError( "missing PocketSphinx language model file: \"{}\"".format( language_model_file)) if not os.path.isfile(phoneme_dictionary_file): raise sr.RequestError( "missing PocketSphinx phoneme dictionary file: \"{}\"".format( phoneme_dictionary_file)) # create decoder object config = pocketsphinx.Decoder.default_config() config.set_string( "-hmm", acoustic_parameters_directory ) # set the path of the hidden Markov model (HMM) parameter files config.set_string("-lm", language_model_file) config.set_string("-dict", phoneme_dictionary_file) config.set_string( "-logfn", os.devnull ) # disable logging (logging causes unwanted output in terminal) self.decoder = pocketsphinx.Decoder(config) with open("sphinx.txt", "w") as f: # generate a keywords file - Sphinx documentation recommendeds sensitivities between 1e-50 and 1e-5 f.writelines("{} /{}/\n".format(keyword, sensitivity) for keyword, sensitivity in keyword_entries) # perform the speech recognition with the keywords file (this is inside the context manager so the file isn;t deleted until we're done) self.decoder.set_kws("keywords", "sphinx.txt") self.decoder.set_search("keywords") return
import os try: import speech_recognition as sr #@UnusedImport #check if package is installed except: print("No speech_recognition installed on system. Try to use fallback...") import resources.lib.speech_recognition as sr #@Reimport #if not, use the provides ones # import the PocketSphinx speech recognition module try: from pocketsphinx import pocketsphinx except ImportError: raise sr.RequestError( "missing PocketSphinx module: ensure that PocketSphinx is set up correctly." ) except ValueError: raise sr.RequestError( "bad PocketSphinx installation; try reinstalling PocketSphinx version 0.0.9 or better." ) if not hasattr(pocketsphinx, "Decoder") or not hasattr(pocketsphinx.Decoder, "default_config"): raise sr.RequestError( "outdated PocketSphinx installation; ensure you have PocketSphinx version 0.0.9 or better." ) class MyRecognizer(sr.Recognizer): def prepare_sphinx2(self, language="en-US", keyword_entries=None): assert isinstance(language, str) or ( isinstance(language, tuple) and len(language) == 3
def recognize_ibm(audio_data, username, password, language="en-US", show_all=False): assert isinstance(audio_data, sr.AudioData), "Data must be audio data" assert isinstance(username, str), "``username`` must be a string" assert isinstance(password, str), "``password`` must be a string" flac_data = audio_data.get_flac_data( convert_rate=None if audio_data.sample_rate >= 16000 else 16000, # audio samples should be at least 16 kHz convert_width=None if audio_data.sample_width >= 2 else 2 # audio samples should be at least 16-bit ) url = "https://stream.watsonplatform.net/speech-to-text/api/v1/recognize?{}".format( urlencode({ "profanity_filter": "false", "model": "{}_BroadbandModel".format(language), "inactivity_timeout": -1, # don't stop recognizing when the audio stream activity stops "timestamps": "true" })) request = Request( url, data=flac_data, headers={ "Content-Type": "audio/x-flac", "X-Watson-Learning-Opt-Out": "true", # prevent requests from being logged, for improved privacy }) authorization_value = base64.standard_b64encode("{}:{}".format( username, password).encode("utf-8")).decode("utf-8") request.add_header("Authorization", "Basic {}".format(authorization_value)) try: response = urlopen(request, timeout=None) except HTTPError as e: raise sr.RequestError("recognition request failed: {}".format( e.reason)) except URLError as e: raise sr.RequestError("recognition connection failed: {}".format( e.reason)) response_text = response.read().decode("utf-8") result = json.loads(response_text) # return results if show_all: return result if "results" not in result or len( result["results"] ) < 1 or "alternatives" not in result["results"][0]: raise sr.UnknownValueError() transcription = [] for utterance in result["results"]: if "alternatives" not in utterance: raise sr.UnknownValueError() for hypothesis in utterance["alternatives"]: if "transcript" in hypothesis: transcription.append(hypothesis["transcript"]) return "\n".join(transcription)