def test_spectrum(self): wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav')) with self.cached_session(use_gpu=False, force_gpu=False): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path) pitch = Pitch.params({ 'window_length': 0.025, 'soft_min_f0': 10.0 }).instantiate() pitch_test = pitch(input_data, sample_rate) self.assertEqual(tf.rank(pitch_test).eval(), 2) output_true = np.array( [ [0.03881124, 0.3000031, - 0.02324523], [0.006756478, 0.3000097, 0.01047742], [0.02455365, 0.3000154, 0.00695902], [0.02453586, 0.3000221, 0.008448198], [0.03455311, 0.3000307, - 0.07547269], [0.04293294, 0.3000422, - 0.04193667] ] )
def test_pitch(self): wav_path = str( Path(PACKAGE_ROOT_DIR).joinpath('layers/ops/data/sm1_cln.wav')) with self.cached_session(use_gpu=False, force_gpu=False): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav.call(wav_path) input_data = input_data / 32768 pitch = Pitch.params({ 'window_length': 0.025, 'frame_length': 0.010, 'thres_autoc': 0.3 }).instantiate() pitch_test = pitch(input_data, sample_rate) output_true = np.array([ 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 122.823532, 117.647057, 116.788322, 116.788322, 119.402985, 119.402985, 119.402985, 119.402985, 119.402985, 123.076920, 124.031006, 125.000000, 132.065216, 139.130432, 139.130432, 137.931030, 126.108368, 114.285713, 115.107910, 122.070084, 129.032257, 130.081299, 130.081299, 129.032257, 130.081299, 131.147537, 129.032257, 125.000000, 120.300751, 115.107910 ]) self.assertAllClose(pitch_test.eval().flatten()[:50], output_true)
def compute_pitch(): parser = get_parser() args = parser.parse_args() config = {} config['sample_rate'] = int(args.sample_rate) config['window_length'] = args.window_length config['frame_length'] = args.frame_length config['snip_edges'] = args.snip_edges config['preemph_coeff'] = args.preemph_coeff config['min_f0'] = args.min_f0 config['max_f0'] = args.max_f0 config['soft_min_f0'] = args.soft_min_f0 config['penalty_factor'] = args.penalty_factor config['lowpass_cutoff'] = args.lowpass_cutoff config['resample_freq'] = args.resample_freq config['delta_pitch'] = args.delta_pitch config['nccf_ballast'] = args.nccf_ballast config['lowpass_filter_width'] = args.lowpass_filter_width config['upsample_filter_width'] = args.upsample_filter_width config['max_frames_latency'] = args.max_frames_latency config['frames_per_chunk'] = args.frames_per_chunk config['simulate_first_pass_online'] = args.simulate_first_pass_online config['recompute_frame'] = args.recompute_frame config['nccf_ballast_online'] = args.nccf_ballast_online pitch = Pitch.params(config).instantiate() with kaldiio.ReadHelper(args.rspecifier, segments=args.segments) as reader, \ KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt_id, (sample_rate, array) in reader: if sample_rate != args.sample_rate: args.sample_rate = sample_rate array = array.astype(np.float32) audio_data = tf.constant(array, dtype=tf.float32) pitch_test = tf.squeeze(pitch(audio_data, args.sample_rate)) sess = tf.Session() pitch_feats = pitch_test.eval(session=sess) writer[utt_id] = pitch_feats
def test_spectrum(self): wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav')) with self.cached_session(use_gpu=False, force_gpu=False): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path) pitch = Pitch.params({ 'window_length': 0.025, 'soft_min_f0': 10.0 }).instantiate() pitch_test = pitch(input_data, sample_rate) self.assertEqual(tf.rank(pitch_test).eval(), 2) output_true = [[-0.1366025, 143.8855], [-0.0226383, 143.8855], [-0.08464742, 143.8855], [-0.08458386, 143.8855], [-0.1208689, 143.8855]] self.assertAllClose( pitch_test.eval()[0:5, :], output_true, rtol=1e-05, atol=1e-05)
def __init__(self, config: dict): super().__init__(config) self.fbank = Fbank(config) self.pitch = Pitch(config)