def transcribe_wav_file(speech_file): '''Transcribe the given audio file.''' client = speech.SpeechClient() with io.open(speech_file, 'rb') as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=44100, language_code='en-US', audio_channel_count=2, enable_separate_recognition_per_channel=False, max_alternatives=1, enable_word_time_offsets=True) response = client.recognize(config=config, audio=audio) # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. last_word = None for result in response.results: # The first alternative is the most likely one for this portion. # wait length between results first_word = result.alternatives[0].words[0].start_time if last_word is not None: td = first_word - last_word time.sleep(td.seconds) last_word = result.alternatives[0].words[0].end_time yield result.alternatives[0].transcript
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = "en-US" # a BCP-47 language tag client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code ) streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True ) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = ( speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator ) responses = client.streaming_recognize(streaming_config, requests) start_time = time.time() wait_time = 2 # Now, put the transcription responses to use. listen_print_loop(responses)
def run_quickstart(): # [START speech_quickstart] import io import os # Imports the Google Cloud client library # [START speech_python_migration_imports] from google.cloud import speech # [END speech_python_migration_imports] # Instantiates a client # [START speech_python_migration_client] client = speech.SpeechClient() # [END speech_python_migration_client] # The name of the audio file to transcribe file_name = os.path.join(os.path.dirname(__file__), ".", "file.wav") # Loads the audio into memory with io.open(file_name, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="ko-KR", ) # Detects speech in the audio file response = client.recognize(config=config, audio=audio) for result in response.results: print("Transcript: {}".format(result.alternatives[0].transcript))
def get_transcript_long(content: bytes = None, audio_path: str = None): """ Gets transcript of long audio file asynchonously. Args: content (bytes): Content of audio file as bytes. audio_path (str): Path or uri to audio file. Returns: object: Processed audio file for speech-to-text. """ if content is None and audio_path is None: raise ValueError('At least one parameter cannot be None.') audio = speech.RecognitionAudio(uri=audio_path) if content is None else speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.FLAC, sample_rate_hertz=16000, language_code="en-US", ) operation = client.long_running_recognize(config=config, audio=audio) print("Waiting for operation to complete...") response = operation.result(timeout=90) # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. for result in response.results: # The first alternative is the most likely one for this portion. print(u"Transcript: {}".format(result.alternatives[0].transcript)) print("Confidence: {}".format(result.alternatives[0].confidence)) return response
def audio_to_text(filename): # [START speech_quickstart] import io # Imports the Google Cloud client library # [START migration_import] from google.cloud import speech # [END migration_import] # Instantiates a client # [START migration_client] client = speech.SpeechClient() # [END migration_client] # The name of the audio file to transcribe file_name = os.path.join(os.path.dirname(__file__), '.', 'uploads', filename) # Loads the audio into memory with io.open(file_name, 'rb') as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=44100, language_code='en-US', audio_channel_count=2, enable_separate_recognition_per_channel=True) # Detects speech in the audio file response = client.recognize(config=config, audio=audio) return response.results[0].alternatives[0].transcript
def transcribe_file_with_word_time_offsets(speech_file): """Transcribe the given audio file synchronously and output the word time offsets.""" from google.cloud import speech client = speech.SpeechClient() with io.open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US", enable_word_time_offsets=True, ) response = client.recognize(request={"config": config, "audio": audio}) for result in response.results: alternative = result.alternatives[0] print("Transcript: {}".format(alternative.transcript)) for word_info in alternative.words: word = word_info.word start_time = word_info.start_time end_time = word_info.end_time print( f"Word: {word}, start_time: {start_time.total_seconds()}, end_time: {end_time.total_seconds()}" )
def create_speech_config(): phrases = [] if (len(confvars.G_PHRASES_PATH) != 0): with open(confvars.G_PHRASES_PATH, "r", encoding=confvars.G_PHRASES_ENCODING) as fp: for line in fp: if line: phrases.append(line.strip().encode( 'ascii', 'ignore').decode('ascii')) else: glbl.main_logger.info( f"Phrases file {confvars.G_PHRASES_PATH} is null.") glbl.main_logger.info(f"phrases as context, num={len(phrases)}") speech_context = speech.SpeechContext( phrases=phrases[:confvars.G_MAX_PHRASES]) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=confvars.G_AUD_SAMPLING_RATE, enable_word_time_offsets=False, model='video', profanity_filter=True, enable_automatic_punctuation=True, speech_contexts=[speech_context], language_code=confvars.G_LANGUAGE_CODE) speech_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) return speech_config
def transcribe_gcs(gcs_uri, lang, creds): """Asynchronously transcribes the audio file specified by the gcs_uri.""" #client = speech.SpeechClient() client = speech.SpeechClient.from_service_account_json(creds) audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( #encoding=speech.RecognitionConfig.AudioEncoding.FLAC, sample_rate_hertz=16000, language_code=lang, enable_word_time_offsets=True, enable_automatic_punctuation=True, ) operation = client.long_running_recognize(config=config, audio=audio) print("Waiting for operation to complete...") response = operation.result(timeout=250) data = proto_message_to_dict(response) with open('response.json', 'w') as f: json.dump(data, f) with open('response.json', 'r') as f: data = json.load(f) onewordSRT('abhi_oneword.srt', data) youtubeStyleSRT('abhi_yt.srt', data)
def transcribe_file(speech_file, file_name): """Transcribe the given audio file.""" client = speech.SpeechClient.from_service_account_json('key.json') with io.open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( audio_channel_count=2, language_code="en-US", ) response = client.recognize(config=config, audio=audio) # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. if response: print("RES", response) text = "" confidence = 0 for result in response.results: text += result.alternatives[0].transcript + " " confidence = result.alternatives[0].confidence return (text, confidence) else: return ("Could not create lyrics..", 0)
def transcribe_file(): from google.cloud import speech import io client = speech.SpeechClient() with io.open('proken.wav', 'rb') as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=44100, language_code='ja-JP', audio_channel_count=1, enable_separate_recognition_per_channel=True) operation = client.long_running_recognize(request={ "config": config, "audio": audio }) operation = client.long_running_recognize(config=config, audio=audio) response = operation.result(timeout=90) with io.open("proken.txt", "w", encoding="utf-8") as f: for result in response.results: f.write(u'Transcript: {}'.format( result.alternatives[0].transcript))
def transcribe_gcs_with_multichannel(gcs_uri): """Transcribe the given audio file on GCS with multi channel.""" # [START speech_transcribe_multichannel_gcs] from google.cloud import speech client = speech.SpeechClient() audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=44100, language_code="en-US", audio_channel_count=2, enable_separate_recognition_per_channel=True, ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] print("-" * 20) print("First alternative of result {}".format(i)) print(u"Transcript: {}".format(alternative.transcript)) print(u"Channel Tag: {}".format(result.channel_tag))
def run_quickstart(): # [START speech_quickstart] # Imports the Google Cloud client library # [START speech_python_migration_imports] from google.cloud import speech # [END speech_python_migration_imports] # Instantiates a client # [START speech_python_migration_client] client = speech.SpeechClient() # [END speech_python_migration_client] # The name of the audio file to transcribe gcs_uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw" audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US", ) # Detects speech in the audio file response = client.recognize(config=config, audio=audio) for result in response.results: print("Transcript: {}".format(result.alternatives[0].transcript))
def transcribe_file(speech_file): from google.cloud import speech import io if ".mp3" in speech_file: sound = AudioSegment.from_mp3(speech_file) sound = sound.set_channels(1) sound.export(speech_file[:-4] + ".wav", format="wav") speech_file = speech_file[:-4] + ".wav" client = speech.SpeechClient() with io.open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, language_code="en-US") response = client.recognize(config=config, audio=audio) response_string = "" for result in response.results: response_string += result.alternatives[0].transcript # print(response_string) # question_obj = find_question(response_string) # if question_obj != None: # response_string = question_obj['question'] return response_string
def transcribe_model_selection_gcs(gcs_uri, model): """Transcribe the given audio file asynchronously with the selected model.""" from google.cloud import speech client = speech.SpeechClient() audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US", model=model, ) operation = client.long_running_recognize(request={ "config": config, "audio": audio }) print("Waiting for operation to complete...") response = operation.result(timeout=90) for i, result in enumerate(response.results): alternative = result.alternatives[0] print("-" * 20) print("First alternative of result {}".format(i)) print(u"Transcript: {}".format(alternative.transcript))
def transcribe_file(speech_file): """Transcribe the given audio file asynchronously.""" from google.cloud import speech client = speech.SpeechClient() # [START speech_python_migration_async_request] with io.open(speech_file, "rb") as audio_file: content = audio_file.read() """ Note that transcription is limited to a 60 seconds audio file. Use a GCS file for audio longer than 1 minute. """ audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, language_code="en-US", ) # [START speech_python_migration_async_response] operation = client.long_running_recognize(config=config, audio=audio) # [END speech_python_migration_async_request] print("Waiting for operation to complete...") response = operation.result(timeout=90) # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. for result in response.results: # The first alternative is the most likely one for this portion. print(u"Transcript: {}".format(result.alternatives[0].transcript)) print("Confidence: {}".format(result.alternatives[0].confidence))
def main(): language_code = "uk-UA" client = speech.SpeechClient() interaction_type = speech.RecognitionMetadata.InteractionType.DICTATION metadata = speech.RecognitionMetadata(interaction_type=interaction_type) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, metadata=metadata, ) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) listen_print_loop(responses)
def transcribe_file(speech_file): """Transcribe the given audio file.""" from google.cloud import speech import io f = open('transcribe.txt', "a") client = speech.SpeechClient() # [START speech_python_migration_sync_request] # [START speech_python_migration_config] with io.open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=SAMPLE_RATE, language_code="en-US", ) # [END speech_python_migration_config] # [START speech_python_migration_sync_response] response = client.recognize(config=config, audio=audio) # [END speech_python_migration_sync_request] # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. for result in response.results: # The first alternative is the most likely one for this portion. #print(u"Transcript: {}".format(result.alternatives[0].transcript)) print(result.alternatives[0].transcript) f.write(result.alternatives[0].transcript) # [END speech_python_migration_sync_response] f.write("\n\n") f.close()
def download_audio_and_transcribe(self, recording_url: str) -> str: transcription: str = "" self.connect(destination="speech") response = requests.get(url=recording_url, stream=True) reqs = (speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in response.iter_content()) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, language_code="en-US", ) streaming_config = speech.StreamingRecognitionConfig(config=config) responses = self.speech_client.streaming_recognize(config=streaming_config, requests=reqs,) for response in responses: # Once the transcription has settled, the first result will contain the # is_final result. The other results will be for subsequent portions of # the audio. for result in response.results: # print("Finished: {}".format(result.is_final)) # print("Stability: {}".format(result.stability)) alternatives = result.alternatives # The alternatives are ordered from most likely to least. for alternative in alternatives: # print("Confidence: {}".format(alternative.confidence)) transcription = u"{}".format(alternative.transcript) return transcription
def transcribe_gcs_with_word_time_offsets(gcs_uri): """Transcribe the given audio file asynchronously and output the word time offsets.""" from google.cloud import speech client = speech.SpeechClient() audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.FLAC, sample_rate_hertz=16000, language_code="en-US", enable_word_time_offsets=True, ) operation = client.long_running_recognize(request={ "config": config, "audio": audio }) print("Waiting for operation to complete...") result = operation.result(timeout=90) for result in result.results: alternative = result.alternatives[0] print("Transcript: {}".format(alternative.transcript)) print("Confidence: {}".format(alternative.confidence)) for word_info in alternative.words: word = word_info.word start_time = word_info.start_time end_time = word_info.end_time print( f"Word: {word}, start_time: {start_time.total_seconds()}, end_time: {end_time.total_seconds()}" )
def speech2text(): client = speech.SpeechClient() file_name = "output.wav" with io.open(file_name, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content = content) config = speech.RecognitionConfig( audio_channel_count = 2, sample_rate_hertz = 44100, enable_word_time_offsets = True, language_code = "en-US", ) response = client.recognize(request={"config": config, "audio": audio}) ret = [] for res in response.results: for words in res.alternatives[0].words: ret.append([str(words.word), int(words.end_time.seconds) + float(words.end_time.microseconds/1000000)]) tr = [] for res in response.results: tr.append(res.alternatives[0].transcript.strip()) return tr, ret
def get_transcript(content: bytes = None, audio_path: str = None): """ Gets transcript of audio file. Args: content (bytes): Content of audio file as bytes. audio_path (str): Path or uri to audio file. Returns: object: Processed audio file for speech-to-text. """ if content is None and audio_path is None: raise ValueError("At least one parameter cannot be None.") audio = ( speech.RecognitionAudio(uri=audio_path) if content is None else speech.RecognitionAudio(content=content) ) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US", ) # Detects speech in the audio file response = client.recognize(config=config, audio=audio) for result in response.results: print(f"Transcript: {result.alternatives[0].transcript}") return response
def transcribe_file_with_enhanced_model(path): """Transcribe the given audio file using an enhanced model.""" # [START speech_transcribe_enhanced_model] import io from google.cloud import speech client = speech.SpeechClient() # path = 'resources/commercial_mono.wav' with io.open(path, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, language_code="en-US", # Enhanced models are only available to projects that # opt in for audio data collection. use_enhanced=True, # A model must be specified to use enhanced model. model="phone_call", ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] print("-" * 20) print("First alternative of result {}".format(i)) print("Transcript: {}".format(alternative.transcript))
def transcribe_model_selection(speech_file, model): """Transcribe the given audio file synchronously with the selected model.""" from google.cloud import speech client = speech.SpeechClient() with open(speech_file, "rb") as audio_file: content = audio_file.read() audio = speech.RecognitionAudio(content=content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=48000, language_code="en-US", model=model, ) response = client.recognize(config=config, audio=audio) output = "" for i, result in enumerate(response.results): alternative = result.alternatives[0] print("-" * 20) output += ("-" * 20 + "\n") print("First alternative of result {}".format(i)) output += "First alternative of result {}\n".format(i) print(u"Transcript: {}".format(alternative.transcript)) output += u"Transcript: {}\n".format(alternative.transcript) with open(speech_file + ".txt", "w") as f: f.write(output)
def transcribe_gcs(gcs_uri): """Asynchronously transcribes the audio file specified by the gcs_uri.""" from google.cloud import speech client = speech.SpeechClient() audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.FLAC, sample_rate_hertz=16000, language_code="en-US", ) operation = client.long_running_recognize(request={ "config": config, "audio": audio }) print("Waiting for operation to complete...") response = operation.result(timeout=90) # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. for result in response.results: # The first alternative is the most likely one for this portion. print(u"Transcript: {}".format(result.alternatives[0].transcript)) print("Confidence: {}".format(result.alternatives[0].confidence))
def transcribe_context_classes(storage_uri): """Provides "hints" to the speech recognizer to favor specific classes of words in the results.""" # [START speech_context_classes] from google.cloud import speech client = speech.SpeechClient() # storage_uri = 'gs://YOUR_BUCKET_ID/path/to/your/file.wav' audio = speech.RecognitionAudio(uri=storage_uri) # SpeechContext: to configure your speech_context see: # https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#speechcontext # Full list of supported phrases (class tokens) here: # https://cloud.google.com/speech-to-text/docs/class-tokens speech_context = speech.SpeechContext(phrases=["$TIME"]) # RecognitionConfig: to configure your encoding and sample_rate_hertz, see: # https://cloud.google.com/speech-to-text/docs/reference/rpc/google.cloud.speech.v1#recognitionconfig config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, language_code="en-US", speech_contexts=[speech_context], ) response = client.recognize(config=config, audio=audio) for i, result in enumerate(response.results): alternative = result.alternatives[0] print("-" * 20) print("First alternative of result {}".format(i)) print("Transcript: {}".format(alternative.transcript))
def __init__( self, language: str, credentials: Union[None, str, dict] = None, sample_rate: int = 16000, **kwargs, ) -> None: if credentials: if isinstance(credentials, str): credentials = service_account.Credentials.from_service_account_file( credentials) elif isinstance(credentials, dict): credentials = service_account.Credentials.from_service_account_info( credentials) else: raise ValueError( "Invalid Credentials: Only dict, str, or None accepted") self._client = speech.SpeechClient(credentials=credentials) self._config = speech.StreamingRecognitionConfig( config=speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=sample_rate, language_code=language, enable_automatic_punctuation=True, ), interim_results=True, ) self._queue: Queue = Queue() self._thread: Any = None
def transcribe(gs_prefix, vod): ''' This is an async call to google clouds long running speech recognize ''' gcs_uri = gs_prefix + vod client = speech.SpeechClient() audio = speech.RecognitionAudio(uri=gcs_uri) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED, sample_rate_hertz=44100, language_code="en-US", ) operation = client.long_running_recognize(config=config, audio=audio) print('Running transcription for vod:', vod) print("Waiting for operation to complete...") response = operation.result(timeout=3600) # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. with open( r"D:\\Users\\Brad\\Graduate_School\\2021\\data606\\data\\transcripts\\" + vod + '.txt', 'w') as openfile: for result in response.results: # The first alternative is the most likely one for this portion. transcript = result.alternatives[0].transcript print(u"Transcript: {}".format(transcript)) openfile.writelines(transcript)
def speech_to_text(): try: # move to config file? os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = app.config['GOOGLE_KEY'] try: file = request.files['audio_data'] content = file.read() client = speech.SpeechClient() audio = speech.RecognitionAudio(content = content) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, language_code="en-US", ) response = client.recognize(config = config, audio = audio) for result in response.results: result = result.alternatives[0].transcript print("Transcript: {}".format(result)) return result return "ERROR: Google failed to transcribe!" except Exception as err: print("Failed to transcribe audio:") print(err) except Exception as err: print("Failed to get google api credentials:") print(err)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'ko-KR' # a BCP-47 language tag client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = speech.StreamingRecognitionConfig( config=config, # single_utterance=True 파라미터 추가함 --> single spoken utterance만 인지해서 응답해줌 # 중간에 말을 멈추거나 하면 스트리밍인식을 종료함 --> 스피커소리 다시 인식 안하게됨 #single_utterance=True, # false로 바꿧어. 이렇게 바꾸면 is_final 이 true인것만 반환함 interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # listen_print_loop가 리턴해도 다시 실핼될 수 있도 listen_print_loop(responses) print('main: finished listen_print_loop')
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'ja-JP' # a BCP-47 language tag import os os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "speech-rec-827143ff9a4c.json" client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True) print("start rec") with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. for s in listen_print_loop(responses): # voiceroid.say(s) print(s)