def run(args): griffin_lim_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, "transpose": True, "epochs": args.epochs } feature_reader = ScriptReader(args.feat_scp) if args.fbank: mel_kwargs = { "n_mels": args.num_bins, "fmin": args.min_freq, "fmax": args.max_freq, "htk": True } # N x F mel_weights = audio_lib.filters.mel(args.samp_freq, nfft(args.frame_length), **mel_kwargs) # F x N mel_inv_weights = np.linalg.pinv(mel_weights) with WaveWriter( args.dump_dir, fs=args.samp_freq, normalize=args.normalize) as writer: for key, spec in feature_reader: # if log, tranform to linear if args.apply_log: spec = np.exp(spec) # convert fbank to spectrum # feat: T x N if args.fbank: spec = np.maximum(spec @ np.transpose(mel_inv_weights), EPSILON) # if power spectrum, tranform to magnitude spectrum if args.apply_pow: spec = np.sqrt(spec) if spec.shape[1] - 1 != nfft(args.frame_length) // 2: raise RuntimeError("Seems missing --fbank options?") # griffin lim samps = griffin_lim(spec, **griffin_lim_kwargs) writer.write(key, samps) logger.info("Processed {:d} utterance done".format(len(feature_reader)))
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, } FeatureReader = {"numpy": NumpyReader, "kaldi": ScriptReader} feature_reader = FeatureReader[args.fmt](args.feat_scp) phase_reader = None if args.phase_ref: phase_reader = SpectrogramReader( args.phase_ref, **stft_kwargs, round_power_of_two=args.round_power_of_two) logger.info(f"Using phase reference from {args.phase_ref}") with WaveWriter(args.dump_dir, fs=args.sr, normalize=args.normalize) as writer: for key, spec in feature_reader: logger.info(f"Processing utterance {key}...") # if log, tranform to linear if args.apply_log: spec = np.exp(spec) # if power spectrum, tranform to magnitude spectrum if args.apply_pow: spec = np.sqrt(spec) if phase_reader is None: # griffin lim samps = griffin_lim(spec, epoches=args.epoches, transpose=True, norm=0.8, **stft_kwargs) else: if key not in phase_reader: raise KeyError(f"Missing key {key} in phase reader") ref = phase_reader[key] angle = np.angle(ref[0] if ref.ndim == 3 else ref) phase = np.exp(angle * 1j) samps = inverse_stft(spec * phase, **stft_kwargs, norm=0.8) writer.write(key, samps) logger.info(f"Processed {len(feature_reader)} utterance done")