def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: stop_audio: A threading.Event object stops the recording when set. channels: How many audio channels to record. rate: The sampling rate. chunk: Buffer audio into chunks of this size before sending to the api. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech.RecognitionConfig(encoding='LINEAR16', sample_rate=rate) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, # Note that setting interim_results to True means that you'll likely # get multiple results for the same bit of audio, as the system # re-interprets audio in the context of subsequent audio. However, this # will give us quick results without having to tell the server when to # finalize a piece of audio. interim_results=True, single_utterance=False) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) with record_audio(channels, rate, chunk) as audio_stream: while not stop_audio.is_set(): data = audio_stream.read(chunk) if not data: raise StopIteration() # Subsequent requests can all just have the content yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(data_stream, rate, interim_results=True): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: data_stream: A generator that yields raw audio data to send. rate: The sampling rate in hertz. interim_results: Whether to return intermediate results, before the transcription is finalized. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech.StreamingRecognitionConfig( interim_results=interim_results, config=recognition_config, ) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) for data in data_stream: # Subsequent requests can all just have the content yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(channels=CHANNELS, rate=RATE, chunk=CHUNK): global flag_RecogEnd global LANG_CODE recognition_config = cloud_speech.RecognitionConfig( encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz language_code=LANG_CODE, # a BCP-47 language tag ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=True) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) while True: time.sleep(SLEEP_SEC) if flag_RecogEnd: return # バッファにデータが溜まったら,データ送信 if len(frames) > 0: data_1frame = frames.pop(0) data_l2s = b''.join(map(str, data_1frame)) wf.writeframes(data_l2s) # waveファイルに書き込み yield cloud_speech.StreamingRecognizeRequest( audio_content=data_l2s) # google ASR
def request_stream(self): recognition_config = cloud_speech_pb2.RecognitionConfig( encoding=self.audio_encoding, sample_rate=self.sampling_rate, language_code=self.lang_code, max_alternatives=1, ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=True) yield cloud_speech_pb2.StreamingRecognizeRequest(streaming_config=streaming_config) silent_cnt=0 while True: #print(sys._getframe().f_code.co_name,"1") time.sleep(self.frame_seconds / 4) #print("self.should_finish_stream", self.should_finish_stream, "len", len(frames)) if self.should_finish_stream: return if len(self.frames) > 0: #音量チェック 連続して無音区間が続いたら処理を抜ける。 data = self.frames[0] rms = audioop.rms(data, 2) decibel = 20 * math.log10(rms) if rms > 0 else 0 if decibel < self.silent_decibel: silent_cnt = silent_cnt+1 else : silent_cnt = 0 if silent_cnt > self.max_silent_cnt : print(sys._getframe().f_code.co_name, "find silent frames return") return #print("request_stream2 3 framen len=", len(self.frames)) if len(self.frames) > 0: #print(sys._getframe().f_code.co_name,"2", "framelen=",len(self.frames)) #self.frames.pop(0) yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=self.frames.pop(0))
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: stop_audio: A threading.Event object stops the recording when set. channels: How many audio channels to record. rate: The sampling rate in hertz. chunk: Buffer audio into chunks of this size before sending to the api. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/A6xv5G for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See # https://g.co/cloud/speech/docs/best-practices#language_support # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, # Note that setting interim_results to True means that you'll likely # get multiple results for the same bit of audio, as the system # re-interprets audio in the context of subsequent audio. However, this # will give us quick results without having to tell the server when to # finalize a piece of audio. interim_results=True, single_utterance=True) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) with record_audio(channels, rate, chunk) as audio_stream: while not stop_audio.is_set(): data = audio_stream.read(chunk) if not data: raise StopIteration() # Subsequent requests can all just have the content yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(self, stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): recognition_config = cloud_speech.RecognitionConfig( encoding='LINEAR16', sample_rate=rate, language_code='ko-KR', ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=False ) yield cloud_speech.StreamingRecognizeRequest(streaming_config=streaming_config) with self.record_audio(channels, rate, chunk) as audio_stream: while not rospy.is_shutdown(): data = audio_stream.read(chunk) if not data: raise StopIteration() yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(): global queue global recognition_result global should_finish_stream recognition_config = cloud_speech_pb2.RecognitionConfig( encoding=args.audio_encoding, sample_rate=args.sampling_rate, language_code=args.lang_code, max_alternatives=1, ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=True ) yield cloud_speech_pb2.StreamingRecognizeRequest(streaming_config=streaming_config) frame_length = int(args.sampling_rate * args.frame_seconds) frame = b"" while True: if should_finish_stream: return try: data = queue.get(False) frame += data except Exception as e: if len(frame) > frame_length: rms = audioop.rms(frame, 2) decibel = 20 * math.log10(rms) if rms > 0 else 0 if decibel < args.silent_decibel: recognition_result.success = False return yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=frame) frame = b"" time.sleep(args.frame_seconds / 4)
def request_stream(): recognition_config = cloud_speech_pb2.RecognitionConfig( encoding=args.audio_encoding, sample_rate=args.sampling_rate, language_code=args.lang_code, max_alternatives=1, ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( config=recognition_config, interim_results=True, single_utterance=True) yield cloud_speech_pb2.StreamingRecognizeRequest( streaming_config=streaming_config) while True: time.sleep(args.frame_seconds / 4) if should_finish_stream: return if len(frames) > 0: yield cloud_speech_pb2.StreamingRecognizeRequest( audio_content=frames.pop(0))
def g_request_steam(self, data_stream, rate, init_buff=None): r_config = cloud_speech.RecognitionConfig( encoding='LINEAR16', sample_rate=rate, language_code='en-US', speech_context= cloud_speech.SpeechContext( phrases=["mirror", "add", "item", "help", "close", "clothes", "tag", "tags", "find", "number 1", "wear", "start", "stop", "stylist", "wardrobe", "exit", "1", "2", "3", "4", "5", "6", "7", "8"] ) ) r_stream_config = cloud_speech.StreamingRecognitionConfig( config=r_config, single_utterance=False, interim_results=False) yield cloud_speech.StreamingRecognizeRequest( streaming_config=r_stream_config) if init_buff: yield cloud_speech.StreamingRecognizeRequest(audio_content=init_buff) for data in data_stream: yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: stop_audio: A threading.Event object stops the recording when set. channels: How many audio channels to record. rate: The sampling rate in hertz. chunk: Buffer audio into chunks of this size before sending to the api. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See # https://g.co/cloud/speech/docs/best-practices#language_support # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, ) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) with record_audio(channels, rate, chunk) as audio_stream: while not stop_audio.is_set(): data = audio_stream.read(chunk) if not data: raise StopIteration() # Subsequent requests can all just have the content yield cloud_speech.StreamingRecognizeRequest(audio_content=data)