def run(args): stft_kwargs = { "frame_length": args.frame_length, "frame_shift": args.frame_shift, "window": args.window, "center": args.center, # false to comparable with kaldi "transpose": True # T x F } wpe_kwargs = { "taps": args.taps, "delay": args.delay, "iters": args.iters, "psd_context": args.context } spectrogram_reader = SpectrogramReader(args.wav_scp, **stft_kwargs) if not os.path.exists(args.dst_dir): os.makedirs(args.dst_dir) for key, reverbed in spectrogram_reader: # N x T x F => F x N x T reverbed = np.transpose(reverbed, [2, 0, 1]) # F x N x T dereverb = wpe(reverbed, **wpe_kwargs) # F x N x T => N x T x F dereverb = np.transpose(dereverb, [1, 2, 0]) # write for each channel for chid in range(dereverb.shape[0]): samps = istft(dereverb[chid], **stft_kwargs) write_wav(os.path.join(args.dst_dir, "{}.CH{:d}.wav".format(key, chid + 1)), samps, fs=args.samp_freq) logger.info("Processed {:d} utterances".format(len(spectrogram_reader)))
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, # false to comparable with kaldi "transpose": True # T x F } wpe_kwargs = { "num_iters": args.num_iters, "context": args.context, "taps": args.taps, "delay": args.delay } spectrogram_reader = SpectrogramReader( args.wav_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) num_done = 0 with WaveWriter(args.dst_dir, fs=args.sr) as writer: for key, reverbed in spectrogram_reader: logger.info(f"Processing utt {key}...") # N x T x F => F x N x T reverbed = np.transpose(reverbed, (2, 0, 1)) try: if args.nara_wpe: from nara_wpe.wpe import wpe_v8 # T x F x N dereverb = wpe_v8(reverbed, taps=args.taps, delay=args.delay, iterations=args.num_iters, psd_context=args.context) else: dereverb = wpe(reverbed, **wpe_kwargs) except np.linalg.LinAlgError: logger.warn(f"{key}: Failed cause LinAlgError in wpe") continue # F x N x T => N x T x F dereverb = np.transpose(dereverb, (1, 2, 0)) # dump multi-channel samps = np.stack( [inverse_stft(spectra, **stft_kwargs) for spectra in dereverb]) writer.write(key, samps) # show progress cause slow speed num_done += 1 if not num_done % 100: logger.info(f"Processed {num_done:d} utterances...") logger.info( f"Processed {num_done:d} utterances over {len(spectrogram_reader):d}")
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, # false to comparable with kaldi "transpose": True # T x F } wpe_kwargs = { "num_iters": args.num_iters, "context": args.context, "taps": args.taps, "delay": args.delay } spectrogram_reader = SpectrogramReader( args.wav_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) num_done = 0 with WaveWriter(args.dst_dir, fs=args.samp_fs) as writer: for key, reverbed in spectrogram_reader: logger.info("Processing utt {}...".format(key)) # N x T x F => F x N x T reverbed = np.transpose(reverbed, (2, 0, 1)) try: # F x N x T dereverb = wpe(reverbed, **wpe_kwargs) except np.linalg.LinAlgError: logger.warn("{}: Failed cause LinAlgError in wpe".format(key)) continue # F x N x T => N x T x F dereverb = np.transpose(dereverb, (1, 2, 0)) # dump multi-channel samps = np.stack( [istft(spectra, **stft_kwargs) for spectra in dereverb]) writer.write(key, samps) # show progress cause slow speed num_done += 1 if not num_done % 100: logger.info("Processed {:d} utterances...".format(num_done)) logger.info("Processed {:d} utterances over {:d}".format( num_done, len(spectrogram_reader)))