def request_stream(data_stream, rate, interim_results=True): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: data_stream: A generator that yields raw audio data to send. rate: The sampling rate in hertz. interim_results: Whether to return intermediate results, before the transcription is finalized. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech.StreamingRecognitionConfig( interim_results=interim_results, config=recognition_config, ) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) for data in data_stream: # Subsequent requests can all just have the content yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def main(input_uri, encoding, sample_rate, language_code='en-US'): service = cloud_speech.beta_create_Speech_stub( make_channel('speech.googleapis.com', 443)) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto response = service.SyncRecognize( cloud_speech.SyncRecognizeRequest( config=cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding=encoding, # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB sample_rate=sample_rate, # the rate in hertz # See https://g.co/cloud/speech/docs/languages for a list of # supported languages. language_code=language_code, # a BCP-47 language tag ), audio=cloud_speech.RecognitionAudio(uri=input_uri, )), DEADLINE_SECS) # Print the recognition result alternatives and confidence scores. for result in response.results: print('Result:') for alternative in result.alternatives: print(u' ({}): {}'.format(alternative.confidence, alternative.transcript))
def request_stream(channels=CHANNELS, rate=RATE, chunk=CHUNK): global flag_RecogEnd global LANG_CODE recognition_config = cloud_speech.RecognitionConfig( encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz language_code=LANG_CODE, # a BCP-47 language tag ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=True) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) while True: time.sleep(SLEEP_SEC) if flag_RecogEnd: return # バッファにデータが溜まったら,データ送信 if len(frames) > 0: data_1frame = frames.pop(0) data_l2s = b''.join(map(str, data_1frame)) wf.writeframes(data_l2s) # waveファイルに書き込み yield cloud_speech.StreamingRecognizeRequest( audio_content=data_l2s) # google ASR
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: stop_audio: A threading.Event object stops the recording when set. channels: How many audio channels to record. rate: The sampling rate. chunk: Buffer audio into chunks of this size before sending to the api. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech.RecognitionConfig(encoding='LINEAR16', sample_rate=rate) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, # Note that setting interim_results to True means that you'll likely # get multiple results for the same bit of audio, as the system # re-interprets audio in the context of subsequent audio. However, this # will give us quick results without having to tell the server when to # finalize a piece of audio. interim_results=True, single_utterance=False) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) with record_audio(channels, rate, chunk) as audio_stream: while not stop_audio.is_set(): data = audio_stream.read(chunk) if not data: raise StopIteration() # Subsequent requests can all just have the content yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def _process(self, input_file, language_code): if not input_file.endswith('.flac'): raise RuntimeError('Only flac encoding file is supported.') audio_content = cloud_speech.RecognitionAudio(content=open( input_file, 'rb').read(), ) sample_rate = self._sample_rate(input_file) operation = self.service.AsyncRecognize( cloud_speech.AsyncRecognizeRequest( config=cloud_speech.RecognitionConfig( encoding=AUDIO_ENCODING, sample_rate=sample_rate, language_code=language_code, ), audio=audio_content), DEADLINE_SECS) return operation
def main(input_uri, encoding, sample_rate): channel = make_channel('speech.googleapis.com', 443) service = cloud_speech_pb2.beta_create_Speech_stub(channel) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto response = service.AsyncRecognize(cloud_speech_pb2.AsyncRecognizeRequest( config=cloud_speech_pb2.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding=encoding, # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB sample_rate=sample_rate, # the rate in hertz # See # https://g.co/cloud/speech/docs/best-practices#language_support # for a list of supported languages. language_code='fi-FI', # a BCP-47 language tag ), audio=cloud_speech_pb2.RecognitionAudio( uri=input_uri, ) ), DEADLINE_SECS) # Print the longrunning operation handle. print >> sys.stderr, response # Construct a long running operation endpoint. service = operations_grpc_pb2.beta_create_Operations_stub(channel) name = response.name while True: # Give the server a few seconds to process. print >> sys.stderr, 'Waiting for server processing...' time.sleep(1) # Get the long running operation with response. response = service.GetOperation( operations_grpc_pb2.GetOperationRequest(name=name), DEADLINE_SECS) if response.done: break # Print the recognition results. results = cloud_speech_pb2.AsyncRecognizeResponse() response.response.Unpack(results) for result in results.results: for alternative in result.alternatives: print(('"{}",{}').format(alternative.transcript.encode('utf-8'), alternative.confidence))
def main(input_uri, encoding, sample_rate): service = cloud_speech.beta_create_Speech_stub( make_channel('speech.googleapis.com', 443)) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto response = service.SyncRecognize( cloud_speech.SyncRecognizeRequest( config=cloud_speech.RecognitionConfig( encoding=encoding, sample_rate=sample_rate, ), audio=cloud_speech.RecognitionAudio(uri=input_uri, )), DEADLINE_SECS) # Print the recognition results. print(response.results)
def main(input_uri, encoding, sample_rate, language_code='en-US'): channel = make_channel('speech.googleapis.com', 443) service = cloud_speech_pb2.beta_create_Speech_stub(channel) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto operation = service.AsyncRecognize( cloud_speech_pb2.AsyncRecognizeRequest( config=cloud_speech_pb2.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding=encoding, # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB sample_rate=sample_rate, # the rate in hertz # See https://g.co/cloud/speech/docs/languages for a list of # supported languages. language_code=language_code, # a BCP-47 language tag ), audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )), DEADLINE_SECS) # Print the longrunning operation handle. print(operation) # Construct a long running operation endpoint. service = operations_grpc_pb2.beta_create_Operations_stub(channel) name = operation.name while True: # Give the server a few seconds to process. print('Waiting for server processing...') time.sleep(1) operation = service.GetOperation( operations_grpc_pb2.GetOperationRequest(name=name), DEADLINE_SECS) if operation.done: break response = cloud_speech_pb2.AsyncRecognizeResponse() operation.response.Unpack(response) # Print the recognition result alternatives and confidence scores. for result in response.results: print('Result:') for alternative in result.alternatives: print(u' ({}): {}'.format(alternative.confidence, alternative.transcript))
def request_stream(self): recognition_config = cloud_speech_pb2.RecognitionConfig( encoding=self.audio_encoding, sample_rate=self.sampling_rate, language_code=self.lang_code, max_alternatives=1, ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=True) yield cloud_speech_pb2.StreamingRecognizeRequest(streaming_config=streaming_config) silent_cnt=0 while True: #print(sys._getframe().f_code.co_name,"1") time.sleep(self.frame_seconds / 4) #print("self.should_finish_stream", self.should_finish_stream, "len", len(frames)) if self.should_finish_stream: return if len(self.frames) > 0: #音量チェック 連続して無音区間が続いたら処理を抜ける。 data = self.frames[0] rms = audioop.rms(data, 2) decibel = 20 * math.log10(rms) if rms > 0 else 0 if decibel < self.silent_decibel: silent_cnt = silent_cnt+1 else : silent_cnt = 0 if silent_cnt > self.max_silent_cnt : print(sys._getframe().f_code.co_name, "find silent frames return") return #print("request_stream2 3 framen len=", len(self.frames)) if len(self.frames) > 0: #print(sys._getframe().f_code.co_name,"2", "framelen=",len(self.frames)) #self.frames.pop(0) yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=self.frames.pop(0))
def request_stream(self, stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): recognition_config = cloud_speech.RecognitionConfig( encoding='LINEAR16', sample_rate=rate, language_code='ko-KR', ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=False ) yield cloud_speech.StreamingRecognizeRequest(streaming_config=streaming_config) with self.record_audio(channels, rate, chunk) as audio_stream: while not rospy.is_shutdown(): data = audio_stream.read(chunk) if not data: raise StopIteration() yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: stop_audio: A threading.Event object stops the recording when set. channels: How many audio channels to record. rate: The sampling rate in hertz. chunk: Buffer audio into chunks of this size before sending to the api. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/A6xv5G for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See # https://g.co/cloud/speech/docs/best-practices#language_support # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, # Note that setting interim_results to True means that you'll likely # get multiple results for the same bit of audio, as the system # re-interprets audio in the context of subsequent audio. However, this # will give us quick results without having to tell the server when to # finalize a piece of audio. interim_results=True, single_utterance=True) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) with record_audio(channels, rate, chunk) as audio_stream: while not stop_audio.is_set(): data = audio_stream.read(chunk) if not data: raise StopIteration() # Subsequent requests can all just have the content yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(): recognition_config = cloud_speech_pb2.RecognitionConfig( encoding=args.audio_encoding, sample_rate=args.sampling_rate, language_code=args.lang_code, max_alternatives=1, ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=True) yield cloud_speech_pb2.StreamingRecognizeRequest( streaming_config=streaming_config) while True: time.sleep(args.frame_seconds / 4) if should_finish_stream: return if len(frames) > 0: yield cloud_speech_pb2.StreamingRecognizeRequest( audio_content=frames.pop(0))
def main(input_uri, encoding, sample_rate): service = cloud_speech.beta_create_Speech_stub( make_channel('speech.googleapis.com', 443)) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto response = service.SyncRecognize( cloud_speech.SyncRecognizeRequest( config=cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/A6xv5G for the full list. encoding=encoding, # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB sample_rate=sample_rate, # the rate in hertz # See # https://g.co/cloud/speech/docs/best-practices#language_support # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ), audio=cloud_speech.RecognitionAudio(uri=input_uri, )), DEADLINE_SECS) # Print the recognition results. print(response.results)
def request_stream(): global queue global recognition_result global should_finish_stream recognition_config = cloud_speech_pb2.RecognitionConfig( encoding=args.audio_encoding, sample_rate=args.sampling_rate, language_code=args.lang_code, max_alternatives=1, ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=True ) yield cloud_speech_pb2.StreamingRecognizeRequest(streaming_config=streaming_config) frame_length = int(args.sampling_rate * args.frame_seconds) frame = b"" while True: if should_finish_stream: return try: data = queue.get(False) frame += data except Exception as e: if len(frame) > frame_length: rms = audioop.rms(frame, 2) decibel = 20 * math.log10(rms) if rms > 0 else 0 if decibel < args.silent_decibel: recognition_result.success = False return yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=frame) frame = b"" time.sleep(args.frame_seconds / 4)
def main(input_uri, encoding, sample_rate): channel = make_channel('speech.googleapis.com', 443) service = cloud_speech_pb2.beta_create_Speech_stub(channel) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto response = service.AsyncRecognize( cloud_speech_pb2.AsyncRecognizeRequest( config=cloud_speech_pb2.RecognitionConfig( encoding=encoding, sample_rate=sample_rate, ), audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )), DEADLINE_SECS) # Print the longrunning operation handle. print(response) # Construct a long running operation endpoint. service = operations_grpc_pb2.beta_create_Operations_stub(channel) name = response.name while True: # Give the server a few seconds to process. print('Waiting for server processing...') time.sleep(1) # Get the long running operation with response. response = service.GetOperation( operations_grpc_pb2.GetOperationRequest(name=name), DEADLINE_SECS) if response.done: break # Print the recognition results. results = cloud_speech_pb2.AsyncRecognizeResponse() response.response.Unpack(results) print(results)
def g_request_steam(self, data_stream, rate, init_buff=None): r_config = cloud_speech.RecognitionConfig( encoding='LINEAR16', sample_rate=rate, language_code='en-US', speech_context= cloud_speech.SpeechContext( phrases=["mirror", "add", "item", "help", "close", "clothes", "tag", "tags", "find", "number 1", "wear", "start", "stop", "stylist", "wardrobe", "exit", "1", "2", "3", "4", "5", "6", "7", "8"] ) ) r_stream_config = cloud_speech.StreamingRecognitionConfig( config=r_config, single_utterance=False, interim_results=False) yield cloud_speech.StreamingRecognizeRequest( streaming_config=r_stream_config) if init_buff: yield cloud_speech.StreamingRecognizeRequest(audio_content=init_buff) for data in data_stream: yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: stop_audio: A threading.Event object stops the recording when set. channels: How many audio channels to record. rate: The sampling rate in hertz. chunk: Buffer audio into chunks of this size before sending to the api. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See # https://g.co/cloud/speech/docs/best-practices#language_support # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, ) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) with record_audio(channels, rate, chunk) as audio_stream: while not stop_audio.is_set(): data = audio_stream.read(chunk) if not data: raise StopIteration() # Subsequent requests can all just have the content yield cloud_speech.StreamingRecognizeRequest(audio_content=data)