def auto_run(directory): for file in os.listdir(directory): if not file.endswith('.json'): continue file_path = os.path.join(directory, file) with open(file_path, 'r', encoding='utf-8') as f: result = json_format.Parse( f.read(), types.cloud_speech_pb2.LongRunningRecognizeResponse()) format_transcript( result.results, file_path[:-5] ) # output .srt formatted version of transcription print(file_path)
def transcribe_gcs(mp4_file): audio_file_path = process_video(mp4_file) #Create audio file if audio_file_path: bucket_name = 'jomi-speech-to-text.appspot.com' # Your gcloud bucket name print(mp4_file) audio_file_name = os.path.basename(audio_file_path) + '.ogg' print(audio_file_name) upload_to_gcloud(bucket_name, source_file_name=audio_file_path + '.ogg', destination_blob_name=audio_file_name) """Asynchronously transcribes the audio file specified by the gcs_uri.""" client = speech.SpeechClient() audio = types.RecognitionAudio(uri="gs://" + bucket_name + "/" + audio_file_name) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.OGG_OPUS, language_code='en-US', sample_rate_hertz=16000, enable_word_time_offsets=True) operation = client.long_running_recognize(config, audio) if not operation.done(): print('Waiting for results...') result = operation.result() results = result.results raw_text_file = open(audio_file_path + '.txt', 'w') for result in results: for alternative in result.alternatives: raw_text_file.write( alternative.transcript.encode('utf-8') + '\n') raw_text_file.close() #output raw text file of transcription format_transcript( results, audio_file_path) #output .srt formatted version of transcription else: return