def combine_wavs_batch(audio_paths, method, **kargv): audio_paths.sort() method = method.lower() if method == "librosa": fn = partial(split_on_silence_with_librosa, **kargv) elif method == "pydub": fn = partial(split_on_silence_with_pydub, **kargv) parallel_run(fn, audio_paths, desc="Split on silence", parallel=False) audio_path = audio_paths[0] spl = os.path.basename(audio_path).split('.', 1) prefix = os.path.dirname(audio_path)+"/"+spl[0]+"." in_ext = audio_path.rsplit(".")[1] data = load_json(config.alignment_path, encoding="utf8") #print(data) for i in range(len(wavs)-1): if len(wavs[i]) > 15000: continue if not paths[i] in data: continue sum = len(wavs[i]) filename = prefix + str(i).zfill(4)+"." asr = data[paths[i]]+" " concated = wavs[i] for j in range(i+1, len(wavs)): sum += len(wavs[j]) sum += 400 if sum > 15000: break if not paths[j] in data: break filename = filename + str(j).zfill(4) + "." asr = asr + data[paths[j]] + " " concated = concated + silence + wavs[j] final_fn = filename+"wav" data[final_fn] = asr concated.export(final_fn, format="wav") print(filename+"wav | "+str(len(concated))) if os.path.exists(config.alignment_path): backup_file(config.alignment_path) write_json(config.alignment_path, data) get_durations(data.keys(), print_detail=False) return 0
results.update(item) found_count = sum([type(value) == str for value in results.values()]) print(" [*] # found: {:.5f}% ({}/{})".format( len(results) / len(data), len(results), len(data))) print(" [*] # exact match: {:.5f}% ({}/{})".format( found_count / len(items), found_count, len(items))) return results if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--recognition_path', required=True) parser.add_argument('--alignment_filename', default="alignment.json") parser.add_argument('--score_threshold', default=0.4, type=float) parser.add_argument('--recognition_encoding', default='utf-8') config, unparsed = parser.parse_known_args() results = align_text_batch(config) base_dir = os.path.dirname(config.recognition_path) alignment_path = \ os.path.join(base_dir, config.alignment_filename) if os.path.exists(alignment_path): backup_file(alignment_path) write_json(alignment_path, results) duration = get_durations(results.keys(), print_detail=False)
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--audio_pattern', required=True) parser.add_argument('--alignment_path', required=True) parser.add_argument('--out_ext', default='wav') parser.add_argument('--method', choices=['librosa', 'pydub'], required=True) config = parser.parse_args() data = load_json(config.alignment_path, encoding="utf8") audio_paths = glob(config.audio_pattern) for path in audio_paths: single_path = path.replace('.wav', '_s??.wav') single_paths = glob(single_path) combine_wavs_batch( single_paths, config.method, out_ext=config.out_ext, ) wavs = [] paths = [] if os.path.exists(config.alignment_path): backup_file(config.alignment_path) write_json(config.alignment_path, data) get_durations(data.keys(), print_detail=False)