def test_transcribe_gcs_word_time_offsets(capsys): transcribe_async.transcribe_gcs( 'gs://python-docs-samples-tests/speech/audio.flac') out, err = capsys.readouterr() match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I) time = float(match.group(1)) assert time > 0
def edit(filename): audio_urls = [] files = crud.blobs_list(bucket) audio_files = [x for x, y in files if (y == '.wav' and filename in x)] for af in audio_files: audio_filename = "{}.wav".format(af) audio_urls.append("https://storage.cloud.google.com/{}/{}".format( bucket, audio_filename)) text_filename = "{}.json".format(filename) text_url = "https://storage.cloud.google.com/{}/{}".format( bucket, text_filename) print(audio_urls) response = "" if crud.blob_exists(bucket, text_filename): item = crud.blob_info(bucket, text_filename) response = json.loads(item.download_as_string()) if request.method == 'POST': if request.form['button'] == 'save': response = request.form['text'] with open('copy.json', 'w', encoding='utf-8') as f: json.dump(response, f, ensure_ascii=False) metadata = {'status': 0} # set status to in progress crud.upload_blob(bucket, "copy.json", text_filename, metadata) if request.form['button'] == 'transcribe': transcript = transcribe.transcribe_gcs("gs://{}/{}".format( bucket, audio_filename)) response = " ".join(transcript) if request.form['button'] == 'done': response = request.form['text'] blob = crud.blob_info(response) blob.metadata = {'status': 1} # set status to in progress blob.patch() return render_template('editor.html', audio_urls=audio_urls, response=response)
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( 'lecture_name', help='Name of the lecture') parser.add_argument( 'path', help='File or GCS path for audio file to be recognized') args = parser.parse_args() transcript_filename = args.lecture_name + ".txt" content_filename = args.lecture_name + "_content.txt" subtitle_filename = args.lecture_name + ".vtt" summary_filename = args.lecture_name + "_summary.txt" # generate transcript using google api sys.stdout = open(transcript_filename, 'w+') content = transcribe_async.transcribe_gcs(args.path) open(content_filename, 'w+').write(content) # generate vtt file from transcript sys.stdout = original_stdout if not os.path.exists(transcript_filename): print("transcript file does not exist.\n") exit(-1) vtt_gen.generate_subtitles(transcript_filename, subtitle_filename) # generate summary # textsum.generate_summary(content_filename, summary_filename) # entity analysis entity_analysis.entities_text(content)
def test_transcribe_gcs(capsys): transcribe_async.transcribe_gcs( 'gs://python-docs-samples-tests/speech/audio.flac') out, err = capsys.readouterr() assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
def test_transcribe_gcs(resource, capsys): transcribe_async.transcribe_gcs( 'gs://python-docs-samples-tests/speech/audio.flac') out, err = capsys.readouterr() assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)