def transcribe_file(speech_file): """Transcribe the given audio file.""" from google.cloud import speech from google.cloud.speech import enums from google.cloud.speech import types client = speech.SpeechClient() with io.open(speech_file, 'rb') as audio_file: content = audio_file.sread() audio = types.RecognitionAudio(content=content) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code='en-US') response = client.recognize(config, audio) # Each result is for a consecutive portion of the audio. Iterate through # them to get the transcripts for the entire audio file. full_text = "" for result in response.results: # The first alternative is the most likely one for this portion. full_text += format(result.alternatives[0].transcript) print(u'Transcript: {}'.format(result.alternatives[0].transcript)) return full_text
def transcribe_gcs(gcs_uri): from google.cloud import speech from google.cloud.speech import enums from google.cloud.speech import types client = speech.SpeechClient() audio = types.RecognitionAudio(uri=gcs_uri) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code='ko-KR') operation = client.long_running_recognize(config, audio) response = operation.result() return response
def transcribe_gcs(gcs_url): from google.cloud import speech from google.cloud.speech import enums from google.cloud.speech import types client = speech.SpeechClient() audio = types.RecognitionAudio(uri=gcs_url) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, enable_automatic_punctuation=True, language_code='zh-TW') operation = client.long_running_recognize(config, audio) response = operation.result(timeout=90) toexcel(response, gcs_url)