def start(self, callback): """ Args: callback (function): Function that is called when text is transcribed from speech """ try: with MicrophoneInput() as mic: print("Starting SpeechToTextClient") self._mic = mic audio_generator = self._mic.generator() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=self._mic.RATE, language_code=self.language_code, use_enhanced=True, speech_contexts=self.speech_context) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) requests = (types.StreamingRecognizeRequest( audio_content=content) for content in audio_generator) responses = self.client.streaming_recognize( streaming_config, requests) for response in responses: if not response.results: # no results continue # first result is best result result = response.results[0] if not result.alternatives: continue transcript = result.alternatives[0].transcript.strip( ).lower() callback((transcript, result.is_final)) except OutOfRange: self.restart(callback)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient.from_service_account_json( "./MyProject-90749589d270.json") config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses) return user_phrase_result
def test_streaming_recognize(): client = make_speech_client() config = types.StreamingRecognitionConfig() requests = [types.StreamingRecognizeRequest(audio_content=b"...")] super_patch = mock.patch( "google.cloud.speech_v1.services.speech.SpeechClient.streaming_recognize", autospec=True, ) with super_patch as streaming_recognize: client.streaming_recognize(config, requests) # Assert that we called streaming recognize with an iterable # that evaluates to the correct format. _, args, kwargs = streaming_recognize.mock_calls[0] api_requests = kwargs["requests"] assert isinstance(api_requests, GeneratorType) assert list(api_requests) == [ { "streaming_config": config }, requests[0], ] assert "retry" in kwargs assert "timeout" in kwargs
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. # The language code you speak. language_code = 'th-TH' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) # Initial loop value rounds = 1 while True: try: print('streaming loop :' + str(rounds)) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() # Create request data requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) # POST data to google cloud speech responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses) except Exception as err: print(err) rounds += 1
def test_streaming_recognize(self): from google.cloud.speech_v1 import types client = self._make_one() config = types.StreamingRecognitionConfig() requests = [types.StreamingRecognizeRequest(audio_content=b'...')] with mock.patch.object(client, '_streaming_recognize') as sr: client.streaming_recognize(config, requests) # Assert that we called streaming recognize with an iterable # that evalutes to the correct format. _, args, _ = sr.mock_calls[0] api_requests = args[0] assert isinstance(api_requests, GeneratorType) assert list(api_requests) == [ types.StreamingRecognizeRequest(streaming_config=config), requests[0], ]
def work1(): global var, tflag var.set("ここに音声認識結果が表示されます") language_code = 'ja-JP' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. for response in responses: if not response.results: continue # The `results` list is consecutive. For streaming, we only care about # the first result being considered, since once it's `is_final`, it # moves on to considering the next utterance. result = response.results[0] if not result.alternatives: continue # Display the transcription of the top alternative. transcript = result.alternatives[0].transcript if not result.is_final: txtlist = textwrap.wrap(transcript, int(ww / w)) print(txtlist) setxt = "" if (len(txtlist) <= num_comment): for i in range(len(txtlist)): setxt += txtlist[i] var.set(setxt) else: for i in range(num_comment): setxt += txtlist[len(txtlist) - num_comment + i] var.set(setxt) else: # Exit recognition if any of the transcribed phrases could be # one of our keywords. if re.search(r'\b(exit|quit)\b', transcript, re.I): on_closing()
def test_streaming_recognize(): client = make_speech_client() config = types.StreamingRecognitionConfig() requests = [types.StreamingRecognizeRequest(audio_content=b'...')] super_patch = mock.patch( 'google.cloud.speech_v1.speech_client.SpeechClient.' 'streaming_recognize', autospec=True) with super_patch as streaming_recognize: client.streaming_recognize(config, requests) # Assert that we called streaming recognize with an iterable # that evaluates to the correct format. _, args, kwargs = streaming_recognize.mock_calls[0] api_requests = args[1] assert isinstance(api_requests, GeneratorType) assert list(api_requests) == [ types.StreamingRecognizeRequest(streaming_config=config), requests[0], ] assert 'retry' in kwargs assert 'timeout' in kwargs
def transcribe_streaming(stream_file): """Streams transcription of the given audio file.""" client = speech.SpeechClient() with io.open(stream_file, 'rb') as audio_file: content = audio_file.read() # In practice, stream should be a generator yielding chunks of audio data. stream = [content] requests = (types.StreamingRecognizeRequest(audio_content=chunk) for chunk in stream) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code='en-US', model="command_and_search") streaming_config = types.StreamingRecognitionConfig(config=config) # streaming_recognize returns a generator. responses = client.streaming_recognize(streaming_config, requests) return_text = [] confidence = [] for response in responses: # Once the transcription has settled, the first result will contain the # is_final result. The other results will be for subsequent portions of # the audio. for result in response.results: # print('Finished: {}'.format(result.is_final)) # print('Stability: {}'.format(result.stability)) alternatives = result.alternatives # The alternatives are ordered from most likely to least. for alternative in alternatives: # print('Confidence: {}'.format(alternative.confidence)) # print(u'Transcript: {}'.format(alternative.transcript)) return_text.append(alternative.transcript) confidence.append(alternative.confidence) confidence = np.mean(confidence) return return_text, confidence return return_text, confidence
def listenToMic(self, recordDuration=99.0, silenceTimeout=0.0): MIC_SAMPLE_RATE = 16000 MIC_CHUNK_SIZE = int(MIC_SAMPLE_RATE / 10) # 100ms streaming_config = types.StreamingRecognitionConfig( config=self.makeConfig(MIC_SAMPLE_RATE), interim_results=True) with MicrophoneStream(MIC_SAMPLE_RATE, MIC_CHUNK_SIZE) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = self.client.streaming_recognize( streaming_config, requests) if silenceTimeout > 0.0: logging.info( "started speech detection - listening for input. stop after {} of silence." .format(silenceTimeout)) else: logging.info("started speech detection for {} seconds.".format( recordDuration)) thread = TranscribeThread(responses) thread.start() if silenceTimeout > 0.0: while not thread.checkTranscript(silenceTimeout): time.sleep(0.01) thread.stop() else: # record for x seconds time.sleep(recordDuration) thread.stop() # wait for thread to end & read result logging.info("stopping recording thread") thread.join() result = thread.result logging.info("finished speech detection") return result
def listen(self, single_utterance=True): speech_contexts = types.SpeechContext(phrases=['male']) language_code = 'en-US' client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, speech_contexts=[speech_contexts], language_code=language_code) # If single_utterance=True, the go command uttered by the subject after a long pause for self-admin is not # registered streaming_config = types.StreamingRecognitionConfig( config=config, single_utterance=single_utterance) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. return self.listen_print_loop(responses)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-GB' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) try: responses = client.streaming_recognize(streaming_config, requests, timeout=21) except: noresult = ("no result") return noresult num_chars_printed = 0 for response in responses: try: if not response.results: print("no result ") continue # The `results` list is consecutive. For streaming, we only care about # the first result being considered, since once it's `is_final`, it # moves on to considering the next utterance. result = response.results[0] if not result.alternatives: print("no alternatives") continue # Display the transcription of the top alternative. transcript = result.alternatives[0].transcript # Display interim results, but with a carriage return at the end of the # line, so subsequent lines will overwrite them. # # If the previous result was longer than this one, we need to print # some extra spaces to overwrite the previous result overwrite_chars = ' ' * (num_chars_printed - len(transcript)) if not result.is_final: sys.stdout.write(transcript + overwrite_chars + '\r') sys.stdout.flush() print("loop no result") return transcript + overwrite_chars num_chars_printed = 0 except: noresult = '' return noresult
credentials = service_account.Credentials.from_service_account_file( 'Location of API Key File') data = [] client = speech.SpeechClient(credentials=credentials) config = types.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=lan_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with StreamRecognition(RATE, CHUNK) as stream: audio_generator = stream.speech_generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) end = (time.time() - start) - 10 formatted_time = "{:.2f}".format(end) while True: fetched_text = strem_recognition_module.print_speech_loop( responses) #does not append text on screen data.append(fetched_text) recognized_text = ''.join(data) #does append text on screen finaldata = lt.translator(recognized_text, translator_code) universal_data = lt.universal(recognized_text + "++ " + finaldata)