Пример #1
0
def request_stream(data_stream, rate, interim_results=True):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        data_stream: A generator that yields raw audio data to send.
        rate: The sampling rate in hertz.
        interim_results: Whether to return intermediate results, before the
            transcription is finalized.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech.RecognitionConfig(
        # There are a bunch of config options you can specify. See
        # https://goo.gl/KPZn97 for the full list.
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code='en-US',  # a BCP-47 language tag
    )
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        interim_results=interim_results,
        config=recognition_config,
    )

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    for data in data_stream:
        # Subsequent requests can all just have the content
        yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
Пример #2
0
def main(input_uri, encoding, sample_rate, language_code='en-US'):
    service = cloud_speech.beta_create_Speech_stub(
        make_channel('speech.googleapis.com', 443))
    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    response = service.SyncRecognize(
        cloud_speech.SyncRecognizeRequest(
            config=cloud_speech.RecognitionConfig(
                # There are a bunch of config options you can specify. See
                # https://goo.gl/KPZn97 for the full list.
                encoding=encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
                sample_rate=sample_rate,  # the rate in hertz
                # See https://g.co/cloud/speech/docs/languages for a list of
                # supported languages.
                language_code=language_code,  # a BCP-47 language tag
            ),
            audio=cloud_speech.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)

    # Print the recognition result alternatives and confidence scores.
    for result in response.results:
        print('Result:')
        for alternative in result.alternatives:
            print(u'  ({}): {}'.format(alternative.confidence,
                                       alternative.transcript))
Пример #3
0
def request_stream(channels=CHANNELS, rate=RATE, chunk=CHUNK):
    global flag_RecogEnd
    global LANG_CODE
    recognition_config = cloud_speech.RecognitionConfig(
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        language_code=LANG_CODE,  # a BCP-47 language tag
    )
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        config=recognition_config, interim_results=True, single_utterance=True)

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    while True:
        time.sleep(SLEEP_SEC)

        if flag_RecogEnd:
            return

        # バッファにデータが溜まったら,データ送信
        if len(frames) > 0:
            data_1frame = frames.pop(0)
            data_l2s = b''.join(map(str, data_1frame))
            wf.writeframes(data_l2s)  # waveファイルに書き込み
            yield cloud_speech.StreamingRecognizeRequest(
                audio_content=data_l2s)  # google ASR
Пример #4
0
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        stop_audio: A threading.Event object stops the recording when set.
        channels: How many audio channels to record.
        rate: The sampling rate.
        chunk: Buffer audio into chunks of this size before sending to the api.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech.RecognitionConfig(encoding='LINEAR16',
                                                        sample_rate=rate)
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        config=recognition_config,
        # Note that setting interim_results to True means that you'll likely
        # get multiple results for the same bit of audio, as the system
        # re-interprets audio in the context of subsequent audio. However, this
        # will give us quick results without having to tell the server when to
        # finalize a piece of audio.
        interim_results=True,
        single_utterance=False)

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    with record_audio(channels, rate, chunk) as audio_stream:
        while not stop_audio.is_set():
            data = audio_stream.read(chunk)
            if not data:
                raise StopIteration()

            # Subsequent requests can all just have the content
            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
Пример #5
0
 def _process(self, input_file, language_code):
     if not input_file.endswith('.flac'):
         raise RuntimeError('Only flac encoding file is supported.')
     audio_content = cloud_speech.RecognitionAudio(content=open(
         input_file, 'rb').read(), )
     sample_rate = self._sample_rate(input_file)
     operation = self.service.AsyncRecognize(
         cloud_speech.AsyncRecognizeRequest(
             config=cloud_speech.RecognitionConfig(
                 encoding=AUDIO_ENCODING,
                 sample_rate=sample_rate,
                 language_code=language_code,
             ),
             audio=audio_content), DEADLINE_SECS)
     return operation
Пример #6
0
def main(input_uri, encoding, sample_rate):
    channel = make_channel('speech.googleapis.com', 443)
    service = cloud_speech_pb2.beta_create_Speech_stub(channel)
    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    response = service.AsyncRecognize(cloud_speech_pb2.AsyncRecognizeRequest(
        config=cloud_speech_pb2.RecognitionConfig(
            # There are a bunch of config options you can specify. See
            # https://goo.gl/KPZn97 for the full list.
            encoding=encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
            sample_rate=sample_rate,  # the rate in hertz
            # See
            # https://g.co/cloud/speech/docs/best-practices#language_support
            # for a list of supported languages.
            language_code='fi-FI',  # a BCP-47 language tag
        ),
        audio=cloud_speech_pb2.RecognitionAudio(
            uri=input_uri,
        )
    ), DEADLINE_SECS)

    # Print the longrunning operation handle.
    print >> sys.stderr, response

    # Construct a long running operation endpoint.
    service = operations_grpc_pb2.beta_create_Operations_stub(channel)

    name = response.name

    while True:
        # Give the server a few seconds to process.
        print >> sys.stderr, 'Waiting for server processing...'
        time.sleep(1)
        # Get the long running operation with response.
        response = service.GetOperation(
            operations_grpc_pb2.GetOperationRequest(name=name),
            DEADLINE_SECS)

        if response.done:
            break

    # Print the recognition results.
    results = cloud_speech_pb2.AsyncRecognizeResponse()
    response.response.Unpack(results)
    for result in results.results:
	for alternative in result.alternatives:
            print(('"{}",{}').format(alternative.transcript.encode('utf-8'), alternative.confidence))
def main(input_uri, encoding, sample_rate):
    service = cloud_speech.beta_create_Speech_stub(
        make_channel('speech.googleapis.com', 443))
    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    response = service.SyncRecognize(
        cloud_speech.SyncRecognizeRequest(
            config=cloud_speech.RecognitionConfig(
                encoding=encoding,
                sample_rate=sample_rate,
            ),
            audio=cloud_speech.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)
    # Print the recognition results.
    print(response.results)
Пример #8
0
def main(input_uri, encoding, sample_rate, language_code='en-US'):
    channel = make_channel('speech.googleapis.com', 443)
    service = cloud_speech_pb2.beta_create_Speech_stub(channel)
    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    operation = service.AsyncRecognize(
        cloud_speech_pb2.AsyncRecognizeRequest(
            config=cloud_speech_pb2.RecognitionConfig(
                # There are a bunch of config options you can specify. See
                # https://goo.gl/KPZn97 for the full list.
                encoding=encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
                sample_rate=sample_rate,  # the rate in hertz
                # See https://g.co/cloud/speech/docs/languages for a list of
                # supported languages.
                language_code=language_code,  # a BCP-47 language tag
            ),
            audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)

    # Print the longrunning operation handle.
    print(operation)

    # Construct a long running operation endpoint.
    service = operations_grpc_pb2.beta_create_Operations_stub(channel)

    name = operation.name

    while True:
        # Give the server a few seconds to process.
        print('Waiting for server processing...')
        time.sleep(1)
        operation = service.GetOperation(
            operations_grpc_pb2.GetOperationRequest(name=name), DEADLINE_SECS)

        if operation.done:
            break

    response = cloud_speech_pb2.AsyncRecognizeResponse()
    operation.response.Unpack(response)
    # Print the recognition result alternatives and confidence scores.
    for result in response.results:
        print('Result:')
        for alternative in result.alternatives:
            print(u'  ({}): {}'.format(alternative.confidence,
                                       alternative.transcript))
Пример #9
0
    def request_stream(self):
        recognition_config = cloud_speech_pb2.RecognitionConfig(
        encoding=self.audio_encoding,
        sample_rate=self.sampling_rate,
        language_code=self.lang_code,
        max_alternatives=1,
        )
        streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
            config=recognition_config,
            interim_results=True,
            single_utterance=True)

        yield cloud_speech_pb2.StreamingRecognizeRequest(streaming_config=streaming_config)

        silent_cnt=0
        while True:
            #print(sys._getframe().f_code.co_name,"1")
            time.sleep(self.frame_seconds / 4)
            #print("self.should_finish_stream", self.should_finish_stream, "len", len(frames))

            if self.should_finish_stream:
                return

            if len(self.frames) > 0:
                #音量チェック 連続して無音区間が続いたら処理を抜ける。

                data = self.frames[0]
                rms = audioop.rms(data, 2)
                decibel = 20 * math.log10(rms) if rms > 0 else 0

                if decibel < self.silent_decibel:
                    silent_cnt = silent_cnt+1
                else :
                    silent_cnt = 0

                if silent_cnt > self.max_silent_cnt :
                    print(sys._getframe().f_code.co_name, "find silent frames return")
                    return

            #print("request_stream2 3 framen len=", len(self.frames))
            if len(self.frames) > 0:
                #print(sys._getframe().f_code.co_name,"2", "framelen=",len(self.frames))
                #self.frames.pop(0)
                yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=self.frames.pop(0))
Пример #10
0
    def request_stream(self, stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
        recognition_config = cloud_speech.RecognitionConfig(
            encoding='LINEAR16',
            sample_rate=rate,
            language_code='ko-KR',
        )
        streaming_config = cloud_speech.StreamingRecognitionConfig(
            config=recognition_config,
            interim_results=True,
            single_utterance=False
        )

        yield cloud_speech.StreamingRecognizeRequest(streaming_config=streaming_config)
        with self.record_audio(channels, rate, chunk) as audio_stream:
            while not rospy.is_shutdown():
                data = audio_stream.read(chunk)
                if not data:
                    raise StopIteration()
                yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
Пример #11
0
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        stop_audio: A threading.Event object stops the recording when set.
        channels: How many audio channels to record.
        rate: The sampling rate in hertz.
        chunk: Buffer audio into chunks of this size before sending to the api.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech.RecognitionConfig(
        # There are a bunch of config options you can specify. See
        # https://goo.gl/A6xv5G for the full list.
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        # See
        # https://g.co/cloud/speech/docs/best-practices#language_support
        # for a list of supported languages.
        language_code='en-US',  # a BCP-47 language tag
    )
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        config=recognition_config,
        # Note that setting interim_results to True means that you'll likely
        # get multiple results for the same bit of audio, as the system
        # re-interprets audio in the context of subsequent audio. However, this
        # will give us quick results without having to tell the server when to
        # finalize a piece of audio.
        interim_results=True,
        single_utterance=True)

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    with record_audio(channels, rate, chunk) as audio_stream:
        while not stop_audio.is_set():
            data = audio_stream.read(chunk)
            if not data:
                raise StopIteration()

            # Subsequent requests can all just have the content
            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
Пример #12
0
def request_stream():
    recognition_config = cloud_speech_pb2.RecognitionConfig(
        encoding=args.audio_encoding,
        sample_rate=args.sampling_rate,
        language_code=args.lang_code,
        max_alternatives=1,
    )
    streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
        config=recognition_config, interim_results=True, single_utterance=True)

    yield cloud_speech_pb2.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    while True:
        time.sleep(args.frame_seconds / 4)

        if should_finish_stream:
            return

        if len(frames) > 0:
            yield cloud_speech_pb2.StreamingRecognizeRequest(
                audio_content=frames.pop(0))
Пример #13
0
def main(input_uri, encoding, sample_rate):
    service = cloud_speech.beta_create_Speech_stub(
        make_channel('speech.googleapis.com', 443))
    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    response = service.SyncRecognize(
        cloud_speech.SyncRecognizeRequest(
            config=cloud_speech.RecognitionConfig(
                # There are a bunch of config options you can specify. See
                # https://goo.gl/A6xv5G for the full list.
                encoding=encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
                sample_rate=sample_rate,  # the rate in hertz
                # See
                # https://g.co/cloud/speech/docs/best-practices#language_support
                # for a list of supported languages.
                language_code='en-US',  # a BCP-47 language tag
            ),
            audio=cloud_speech.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)
    # Print the recognition results.
    print(response.results)
Пример #14
0
def request_stream():
	global queue
	global recognition_result
	global should_finish_stream

	recognition_config = cloud_speech_pb2.RecognitionConfig(
		encoding=args.audio_encoding,
		sample_rate=args.sampling_rate,
		language_code=args.lang_code,
		max_alternatives=1,
	)
	streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
		config=recognition_config,
		interim_results=True, 
		single_utterance=True
	)

	yield cloud_speech_pb2.StreamingRecognizeRequest(streaming_config=streaming_config)

	frame_length = int(args.sampling_rate * args.frame_seconds)
	frame = b""

	while True:
		if should_finish_stream:
			return

		try:
			data = queue.get(False)
			frame += data
		except Exception as e:
			if len(frame) > frame_length:
				rms = audioop.rms(frame, 2)
				decibel = 20 * math.log10(rms) if rms > 0 else 0
				if decibel < args.silent_decibel:
					recognition_result.success = False
					return
				yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=frame)
				frame = b""
			time.sleep(args.frame_seconds / 4)
def main(input_uri, encoding, sample_rate):
    channel = make_channel('speech.googleapis.com', 443)
    service = cloud_speech_pb2.beta_create_Speech_stub(channel)
    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    response = service.AsyncRecognize(
        cloud_speech_pb2.AsyncRecognizeRequest(
            config=cloud_speech_pb2.RecognitionConfig(
                encoding=encoding,
                sample_rate=sample_rate,
            ),
            audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)

    # Print the longrunning operation handle.
    print(response)

    # Construct a long running operation endpoint.
    service = operations_grpc_pb2.beta_create_Operations_stub(channel)

    name = response.name

    while True:
        # Give the server a few seconds to process.
        print('Waiting for server processing...')
        time.sleep(1)
        # Get the long running operation with response.
        response = service.GetOperation(
            operations_grpc_pb2.GetOperationRequest(name=name), DEADLINE_SECS)

        if response.done:
            break

    # Print the recognition results.
    results = cloud_speech_pb2.AsyncRecognizeResponse()
    response.response.Unpack(results)
    print(results)
Пример #16
0
    def g_request_steam(self, data_stream, rate, init_buff=None):

        r_config = cloud_speech.RecognitionConfig(
            encoding='LINEAR16',
            sample_rate=rate,
            language_code='en-US',
            speech_context= cloud_speech.SpeechContext(
                phrases=["mirror", "add", "item", "help", "close", "clothes", "tag", "tags", "find", "number 1", "wear", "start", "stop", "stylist", "wardrobe", "exit", "1", "2", "3", "4", "5", "6", "7", "8"]
            )
        )
        r_stream_config = cloud_speech.StreamingRecognitionConfig(
            config=r_config,
            single_utterance=False,
            interim_results=False)

        yield cloud_speech.StreamingRecognizeRequest(
            streaming_config=r_stream_config)

        if init_buff:
            yield cloud_speech.StreamingRecognizeRequest(audio_content=init_buff)

        for data in data_stream:
            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        stop_audio: A threading.Event object stops the recording when set.
        channels: How many audio channels to record.
        rate: The sampling rate in hertz.
        chunk: Buffer audio into chunks of this size before sending to the api.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech.RecognitionConfig(
        # There are a bunch of config options you can specify. See
        # https://goo.gl/KPZn97 for the full list.
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        # See
        # https://g.co/cloud/speech/docs/best-practices#language_support
        # for a list of supported languages.
        language_code='en-US',  # a BCP-47 language tag
    )
    streaming_config = cloud_speech.StreamingRecognitionConfig(
        config=recognition_config, )

    yield cloud_speech.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    with record_audio(channels, rate, chunk) as audio_stream:
        while not stop_audio.is_set():
            data = audio_stream.read(chunk)
            if not data:
                raise StopIteration()

            # Subsequent requests can all just have the content
            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)