Пример #1
0
def run(args):
    stft_kwargs = {
        "frame_length": args.frame_length,
        "frame_shift": args.frame_shift,
        "window": args.window,
        "center": args.center,  # false to comparable with kaldi
        "transpose": True  # T x F
    }
    wpe_kwargs = {
        "taps": args.taps,
        "delay": args.delay,
        "iters": args.iters,
        "psd_context": args.context
    }
    spectrogram_reader = SpectrogramReader(args.wav_scp, **stft_kwargs)

    if not os.path.exists(args.dst_dir):
        os.makedirs(args.dst_dir)

    for key, reverbed in spectrogram_reader:
        # N x T x F => F x N x T
        reverbed = np.transpose(reverbed, [2, 0, 1])
        # F x N x T
        dereverb = wpe(reverbed, **wpe_kwargs)
        # F x N x T => N x T x F
        dereverb = np.transpose(dereverb, [1, 2, 0])
        # write for each channel
        for chid in range(dereverb.shape[0]):
            samps = istft(dereverb[chid], **stft_kwargs)
            write_wav(os.path.join(args.dst_dir,
                                   "{}.CH{:d}.wav".format(key, chid + 1)),
                      samps,
                      fs=args.samp_freq)
    logger.info("Processed {:d} utterances".format(len(spectrogram_reader)))
Пример #2
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,  # false to comparable with kaldi
        "transpose": True  # T x F
    }
    wpe_kwargs = {
        "num_iters": args.num_iters,
        "context": args.context,
        "taps": args.taps,
        "delay": args.delay
    }
    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)

    num_done = 0
    with WaveWriter(args.dst_dir, fs=args.sr) as writer:
        for key, reverbed in spectrogram_reader:
            logger.info(f"Processing utt {key}...")
            # N x T x F => F x N x T
            reverbed = np.transpose(reverbed, (2, 0, 1))
            try:
                if args.nara_wpe:
                    from nara_wpe.wpe import wpe_v8
                    # T x F x N
                    dereverb = wpe_v8(reverbed,
                                      taps=args.taps,
                                      delay=args.delay,
                                      iterations=args.num_iters,
                                      psd_context=args.context)
                else:
                    dereverb = wpe(reverbed, **wpe_kwargs)
            except np.linalg.LinAlgError:
                logger.warn(f"{key}: Failed cause LinAlgError in wpe")
                continue
            # F x N x T => N x T x F
            dereverb = np.transpose(dereverb, (1, 2, 0))
            # dump multi-channel
            samps = np.stack(
                [inverse_stft(spectra, **stft_kwargs) for spectra in dereverb])
            writer.write(key, samps)
            # show progress cause slow speed
            num_done += 1
            if not num_done % 100:
                logger.info(f"Processed {num_done:d} utterances...")
    logger.info(
        f"Processed {num_done:d} utterances over {len(spectrogram_reader):d}")
Пример #3
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,  # false to comparable with kaldi
        "transpose": True  # T x F
    }
    wpe_kwargs = {
        "num_iters": args.num_iters,
        "context": args.context,
        "taps": args.taps,
        "delay": args.delay
    }
    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)

    num_done = 0
    with WaveWriter(args.dst_dir, fs=args.samp_fs) as writer:
        for key, reverbed in spectrogram_reader:
            logger.info("Processing utt {}...".format(key))
            # N x T x F => F x N x T
            reverbed = np.transpose(reverbed, (2, 0, 1))
            try:
                # F x N x T
                dereverb = wpe(reverbed, **wpe_kwargs)
            except np.linalg.LinAlgError:
                logger.warn("{}: Failed cause LinAlgError in wpe".format(key))
                continue
            # F x N x T => N x T x F
            dereverb = np.transpose(dereverb, (1, 2, 0))
            # dump multi-channel
            samps = np.stack(
                [istft(spectra, **stft_kwargs) for spectra in dereverb])
            writer.write(key, samps)
            # show progress cause slow speed
            num_done += 1
            if not num_done % 100:
                logger.info("Processed {:d} utterances...".format(num_done))
    logger.info("Processed {:d} utterances over {:d}".format(
        num_done, len(spectrogram_reader)))