def test_fbank(self): """Test Fbank using 16kHz && 8kHz wav.""" wav_path_16k = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav")) wav_path_8k = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/english.wav")) with self.session(): # value test read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path_16k) fbank = Fbank.params({"delta_delta": False}).instantiate() fbank_test = fbank(input_data, sample_rate) real_fank_feats = np.array([ [3.768338, 4.946218, 6.289874, 6.330853, 6.761764, 6.884573], [3.803553, 5.450971, 6.547878, 5.796172, 6.397846, 7.242926], ]) # self.assertAllClose(np.squeeze(fbank_test.eval()[0:2, 0:6, 0]), # real_fank_feats, rtol=1e-05, atol=1e-05) if tf.executing_eagerly(): print(fbank_test.numpy()[0:2, 0:6, 0]) else: print(fbank_test.eval()[0:2, 0:6, 0]) count = 1 for wav_file in [wav_path_8k, wav_path_16k]: read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_file) if tf.executing_eagerly(): print(wav_file, sample_rate.numpy()) else: print(wav_file, sample_rate.eval()) conf = { "delta_delta": True, "lower_frequency_limit": 100, "upper_frequency_limit": 0, } fbank = Fbank.params(conf).instantiate() fbank_test = fbank(input_data, sample_rate) if tf.executing_eagerly(): print(fbank_test.numpy()) else: print(fbank_test.eval()) print(fbank.num_channels()) conf = { "delta_delta": False, "lower_frequency_limit": 100, "upper_frequency_limit": 0, } fbank = Fbank.params(conf).instantiate() fbank_test = fbank(input_data, sample_rate) print(fbank_test) print(fbank.num_channels()) count += 1 del read_wav del fbank
def test_framepow(self): wav_path_16k = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav")) with self.session(): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path_16k) config = {"snip_edges": 1} framepow = Framepow.params(config).instantiate() framepow_test = framepow(input_data, sample_rate) real_framepow_feats = np.array( [9.819611, 9.328745, 9.247337, 9.26451, 9.266059]) if tf.executing_eagerly(): self.assertAllClose( framepow_test.numpy()[0:5], real_framepow_feats, rtol=1e-05, atol=1e-05, ) print(framepow_test.numpy()[0:5]) else: self.assertAllClose( framepow_test.eval()[0:5], real_framepow_feats, rtol=1e-05, atol=1e-05, ) print(framepow_test.eval()[0:5])
def test_mel_spectrum(self): # 16kHz test wav_path_16k = str( Path(os.environ['MAIN_ROOT']).joinpath('examples/sm1_cln.wav')) with self.session(): # value test read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path_16k) config = { 'type': 'MelSpectrum', 'window_type': 'hann', 'upper_frequency_limit': 7600, 'filterbank_channel_count': 80, 'lower_frequency_limit': 80, 'dither': 0.0, 'window_length': 0.025, 'frame_length': 0.010, 'remove_dc_offset': False, 'preEph_coeff': 0.0, 'output_type': 3 } mel_spectrum = MelSpectrum.params(config).instantiate() mel_spectrum_test = mel_spectrum(input_data, sample_rate) if tf.executing_eagerly(): print(mel_spectrum_test.numpy()[0:2, 0:10]) else: print(mel_spectrum_test.eval()[0:2, 0:10])
def test_write_wav(self): wav_path = str(Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav")) with self.cached_session() as sess: config = {"speed": 1.1} read_wav = ReadWav.params(config).instantiate() input_data, sample_rate = read_wav(wav_path) write_wav = WriteWav.params().instantiate() new_path = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln_resample.wav") ) writewav_op = write_wav(new_path, input_data / 32768, sample_rate) sess.run(writewav_op)
def test_spectrum(self): """Test Pitch using 16kHz && 8kHz wav.""" wav_path_16k = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav")) wav_path_8k = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/english.wav")) with self.session(): for wav_file in [wav_path_16k]: read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_file) pitch = Pitch.params({ "window_length": 0.025, "soft_min_f0": 10.0 }).instantiate() pitch_test = pitch(input_data, sample_rate) if tf.executing_eagerly(): self.assertEqual(tf.rank(pitch_test).numpy(), 2) else: self.assertEqual(tf.rank(pitch_test).eval(), 2) output_true = [ [-0.1366025, 143.8855], [-0.0226383, 143.8855], [-0.08464742, 143.8855], [-0.08458386, 143.8855], [-0.1208689, 143.8855], ] if wav_file == wav_path_16k: if tf.executing_eagerly(): print("Transform: ", pitch_test.numpy()[0:5, :]) print("kaldi:", output_true) self.assertAllClose( pitch_test.numpy()[0:5, :], output_true, rtol=1e-05, atol=1e-05, ) else: print("Transform: ", pitch_test.eval()) print("kaldi:", output_true) self.assertAllClose( pitch_test.eval()[0:5, :], output_true, rtol=1e-05, atol=1e-05, )
def test_read_wav(self): wav_path = str(Path(os.environ['MAIN_ROOT']).joinpath('examples/sm1_cln.wav')) with self.session(): speed = 0.9 read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path, speed) audio_data_true, sample_rate_true = librosa.load(wav_path, sr=16000) if (speed == 1.0): if tf.executing_eagerly(): self.assertAllClose(input_data.numpy() / 32768, audio_data_true) self.assertAllClose(sample_rate.numpy(), sample_rate_true) else: self.assertAllClose(input_data.eval() / 32768, audio_data_true) self.assertAllClose(sample_rate.eval(), sample_rate_true)
def test_spectrum(self): wav_path_16k = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav")) wav_path_8k = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/english.wav")) with self.session(): for wav_file in [wav_path_8k, wav_path_16k]: read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_file) spectrum = Spectrum.params({ "window_length": 0.025, "dither": 0.0 }).instantiate() spectrum_test = spectrum(input_data, sample_rate) output_true = np.array([ [9.819611, 2.84503, 3.660894, 2.7779, 1.212233], [9.328745, 2.553949, 3.276319, 3.000918, 2.499342], ]) if tf.executing_eagerly(): self.assertEqual(tf.rank(spectrum_test).numpy(), 2) else: self.assertEqual(tf.rank(spectrum_test).eval(), 2) if wav_file == wav_path_16k: if tf.executing_eagerly(): self.assertAllClose( spectrum_test.numpy()[0:2, 0:5], output_true, rtol=1e-05, atol=1e-05, ) else: self.assertAllClose( spectrum_test.eval()[0:2, 0:5], output_true, rtol=1e-05, atol=1e-05, )
def test_FbankPitch(self): wav_path = str( Path(os.environ['MAIN_ROOT']).joinpath('examples/sm1_cln.wav')) with self.session(): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path) config = { 'window_length': 0.025, 'output_type': 1, 'frame_length': 0.010, 'dither': 0.0 } fbank_pitch = FbankPitch.params(config).instantiate() fbank_pitch_test = fbank_pitch(input_data, sample_rate) if tf.executing_eagerly(): self.assertEqual(tf.rank(fbank_pitch_test).numpy(), 3) print(fbank_pitch_test.numpy()[0:2, :, 0]) else: self.assertEqual(tf.rank(fbank_pitch_test).eval(), 3) print(fbank_pitch_test.eval()[0:2, :, 0])
def test_mfcc(self): wav_path_16k = str( Path(os.environ["MAIN_ROOT"]).joinpath("examples/sm1_cln.wav")) with self.session(): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path_16k) config = {"use_energy": True} mfcc = Mfcc.params(config).instantiate() mfcc_test = mfcc(input_data, sample_rate) real_mfcc_feats = np.array([ [ 9.819611, -30.58736, -7.088838, -10.67966, -1.646479, -4.36086 ], [ 9.328745, -30.73371, -6.128432, -7.930599, 3.208357, -1.086456 ], ]) if tf.executing_eagerly(): self.assertAllClose( mfcc_test.numpy()[0, 0:2, 0:6], real_mfcc_feats, rtol=1e-05, atol=1e-05, ) else: self.assertAllClose( mfcc_test.eval()[0, 0:2, 0:6], real_mfcc_feats, rtol=1e-05, atol=1e-05, )