def test_transcribe_gcs_word_time_offsets(capsys):
    transcribe_async.transcribe_gcs(
        'gs://python-docs-samples-tests/speech/audio.flac')
    out, err = capsys.readouterr()

    match = re.search(r'Bridge, start_time: ([0-9.]+)', out, re.DOTALL | re.I)
    time = float(match.group(1))

    assert time > 0
Пример #2
0
def edit(filename):
    audio_urls = []

    files = crud.blobs_list(bucket)
    audio_files = [x for x, y in files if (y == '.wav' and filename in x)]

    for af in audio_files:
        audio_filename = "{}.wav".format(af)
        audio_urls.append("https://storage.cloud.google.com/{}/{}".format(
            bucket, audio_filename))

    text_filename = "{}.json".format(filename)
    text_url = "https://storage.cloud.google.com/{}/{}".format(
        bucket, text_filename)

    print(audio_urls)

    response = ""
    if crud.blob_exists(bucket, text_filename):
        item = crud.blob_info(bucket, text_filename)
        response = json.loads(item.download_as_string())

    if request.method == 'POST':
        if request.form['button'] == 'save':
            response = request.form['text']
            with open('copy.json', 'w', encoding='utf-8') as f:
                json.dump(response, f, ensure_ascii=False)
            metadata = {'status': 0}  # set status to in progress
            crud.upload_blob(bucket, "copy.json", text_filename, metadata)
        if request.form['button'] == 'transcribe':
            transcript = transcribe.transcribe_gcs("gs://{}/{}".format(
                bucket, audio_filename))
            response = " ".join(transcript)
        if request.form['button'] == 'done':
            response = request.form['text']
            blob = crud.blob_info(response)
            blob.metadata = {'status': 1}  # set status to in progress
            blob.patch()

    return render_template('editor.html',
                           audio_urls=audio_urls,
                           response=response)
Пример #3
0
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'lecture_name', help='Name of the lecture')
    parser.add_argument(
        'path', help='File or GCS path for audio file to be recognized')
    
    args = parser.parse_args()

    transcript_filename = args.lecture_name + ".txt"
    content_filename = args.lecture_name + "_content.txt"
    subtitle_filename = args.lecture_name + ".vtt"
    summary_filename = args.lecture_name + "_summary.txt"

    # generate transcript using google api
    sys.stdout = open(transcript_filename, 'w+')
    content = transcribe_async.transcribe_gcs(args.path)
    open(content_filename, 'w+').write(content)

    # generate vtt file from transcript
    sys.stdout = original_stdout
    if not os.path.exists(transcript_filename):
        print("transcript file does not exist.\n")
        exit(-1)
    vtt_gen.generate_subtitles(transcript_filename, subtitle_filename)

    # generate summary
    # textsum.generate_summary(content_filename, summary_filename)

    # entity analysis
    entity_analysis.entities_text(content)
def test_transcribe_gcs(capsys):
    transcribe_async.transcribe_gcs(
        'gs://python-docs-samples-tests/speech/audio.flac')
    out, err = capsys.readouterr()

    assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
def test_transcribe_gcs(resource, capsys):
    transcribe_async.transcribe_gcs(
        'gs://python-docs-samples-tests/speech/audio.flac')
    out, err = capsys.readouterr()

    assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)