Exemplo n.º 1
0
def extract_audio():
    all_videos = find_all_video_files(output_dir)
    for video in tqdm(all_videos):
        mkvfile = os.path.join(os.path.dirname(video), 'temp.mkv')
        command = 'mkvmerge -o ' + mkvfile + ' ' + video
        subprocess.call(command, shell=True)
        video_ts_file = os.path.join(os.path.dirname(video), 'video_ts.txt')
        audio_ts_file = os.path.join(os.path.dirname(video), 'audio_ts.txt')
        command = 'mkvextract ' + mkvfile + ' timestamps_v2 0:' + video_ts_file
        subprocess.call(command, shell=True)
        command = 'mkvextract ' + mkvfile + ' timestamps_v2 1:' + audio_ts_file
        subprocess.call(command, shell=True)
        with open(video_ts_file, 'r') as f:
            f.readline()  # skip header
            video_start = f.readline()
        with open(audio_ts_file, 'r') as f:
            f.readline()  # skip header
            audio_start = f.readline()
        offset_ms = int(audio_start) - int(video_start)
        # extract audio
        audio_tmp = os.path.join(os.path.dirname(video), 'temp.wav')
        command = 'ffmpeg -i ' + video + ' -ar 44100 -ac 1 -y ' + audio_tmp
        subprocess.call(command, shell=True)
        # use the offset to pad the audio with zeros, or trim the audio
        audio_name = os.path.splitext(video)[0] + '.wav'
        tfm = Transformer()
        if offset_ms >= 0:
            tfm.pad(start_duration=offset_ms / 1000)
        elif offset_ms < 0:
            tfm.trim(start_time=-offset_ms / 1000)
        tfm.build(audio_tmp, audio_name)
        os.remove(mkvfile)
        os.remove(audio_tmp)
        os.remove(video_ts_file)
        os.remove(audio_ts_file)
Exemplo n.º 2
0
def _processSamples(sample_list):

    for sample in sample_list:

        sample_new_name = _renameSample(sample)
        _out = join(out_path, sample_new_name)
        processed_samples.append(_out)
        _in = sample

        # Sox processing using Transform instance
        tfm = Transformer()
        tfm.convert(samplerate=44100, n_channels=2, bitdepth=16)

        if NORMALIZE:
            tfm.norm(db_level=-3)
        if SILENCE:
            tfm.silence(location=-1,
                        silence_threshold=0.05,
                        min_silence_duration=0.1)
        if PADDING:
            tfm.pad(0, PADDING)

        tfm.build(_in, _out)