def split_and_save_files(t, target_dir, subtitle_file):
    hash = get_hash(subtitle_file + t["original_phrase"] + str(t["ts_start"]))
    wav_file_dir = os.path.join(target_dir, "wav", hash[:2])
    txt_file_dir = os.path.join(target_dir, "txt", hash[:2])
    metadata_dir = os.path.join(target_dir, "metadata", hash[:2])
    os.makedirs(wav_file_dir, exist_ok=True)
    os.makedirs(txt_file_dir, exist_ok=True)
    os.makedirs(metadata_dir, exist_ok=True)

    target_wav_file = os.path.join(wav_file_dir, hash + ".wav")
    target_txt_file = os.path.join(txt_file_dir, hash + ".txt")
    target_metadata_file = os.path.join(metadata_dir, hash + ".json")

    text = t["original_phrase"]
    if len(text) == 0:
        return
    if not os.path.exists(target_wav_file) or not os.path.exists(
            target_txt_file):
        extract_audio_part_segment(audio_file, t["ts_start"], t["ts_end"],
                                   target_wav_file)

        with io.open(target_txt_file, "w", encoding='utf-8') as f:
            f.write(text)

        with io.open(target_metadata_file, "w", encoding='utf-8') as f:
            t["ts_start"] = str(t["ts_start"])
            t["ts_end"] = str(t["ts_end"])
            t["metadata"] = metadata
            json.dump(t, f)
        assert os.path.exists(target_txt_file) and os.path.exists(target_wav_file) \
            and getsize(target_wav_file) > 4 * 1024, "{} not created".format(target_wav_file)
示例#2
0
def _get_transcript_google_web_asr(t):
    import tempfile
    try:
        with tempfile.NamedTemporaryFile(suffix=".wav") as f:
            extract_audio_part_segment(t["video_file"], t["ts_start"], t["ts_end"], f.name)

            r = sr.Recognizer()
            with sr.AudioFile(f.name) as source:
                audio = r.record(source)

                return r.recognize_google(audio)
    except Exception as e:
        print(e)
        return None
示例#3
0
            metadata_dir = os.path.join(target_dir, "metadata", hash[:2])

            os.makedirs(wav_file_dir, exist_ok=True)
            os.makedirs(txt_file_dir, exist_ok=True)
            os.makedirs(metadata_dir, exist_ok=True)

            target_wav_file = os.path.join(wav_file_dir, hash + ".wav")
            target_txt_file = os.path.join(txt_file_dir, hash + ".txt")
            target_metadata_file = os.path.join(metadata_dir, hash + ".json")

            text = t["original_phrase"]
            if len(text) == 0:
                continue
            if not os.path.exists(target_wav_file) or not os.path.exists(
                    target_txt_file):
                extract_audio_part_segment(video_file, t["ts_start"],
                                           t["ts_end"], target_wav_file)

                with io.open(target_txt_file, "w") as f:
                    f.write(text)

                with io.open(target_metadata_file, "w") as f:
                    t["ts_start"] = str(t["ts_start"])
                    t["ts_end"] = str(t["ts_end"])
                    t["metadata"] = metadata
                    json.dump(t, f)

                assert os.path.exists(target_txt_file) and os.path.exists(target_wav_file) \
                    and getsize(target_wav_file) > 4 * 1024, "{} not created".format(target_wav_file)
    except Exception as e:
        print("error : " + str(e))
        termcolor.cprint(e, color="red")