def __init__(self, config: dict): super().__init__(config) config1 = copy.deepcopy(config) config1['is_fbank'] = False config1['output_type'] = 2 self.spect = Spectrum(config1) self.fbank = Fbank(config)
def compute_fbank(): parser = get_parser() args = parser.parse_args() config = {} config['sample_rate'] = float(args.sample_rate) config['upper_frequency_limit'] = float(args.upper_frequency_limit) config['lower_frequency_limit'] = float(args.lower_frequency_limit) config['filterbank_channel_count'] = float(args.filterbank_channel_count) config['window_length'] = args.window_length config['frame_length'] = args.frame_length config['output_type'] = args.output_type fbank = Fbank.params(config).instantiate() with kaldiio.ReadHelper(args.rspecifier, segments=args.segments) as reader, \ KaldiWriter(args.wspecifier, write_num_frames=args.write_num_frames, compress=args.compress, compression_method=args.compression_method) as writer: for utt_id, (sample_rate, array) in reader: if sample_rate != args.sample_rate: args.sample_rate = sample_rate array = array.astype(np.float32) audio_data = tf.constant(array, dtype=tf.float32) fbank_test = tf.squeeze(fbank(audio_data, args.sample_rate)) sess = tf.compat.v1.Session() fbank_feats = fbank_test.eval(session=sess) writer[utt_id] = fbank_feats
def test_fbank(self): wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav')) with self.cached_session(use_gpu=False, force_gpu=False): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path) config = { 'window_length': 0.025, 'output_type': 1, 'frame_length': 0.010, 'snip_edges': True } fbank = Fbank.params(config).instantiate() fbank_test = fbank(input_data, sample_rate) self.assertEqual(tf.rank(fbank_test).eval(), 3) real_fank_feats = np.array( [[3.768338, 4.946218, 6.289874, 6.330853, 6.761764, 6.884573], [3.803553, 5.450971, 6.547878, 5.796172, 6.397846, 7.242926]]) self.assertAllClose( np.squeeze(fbank_test.eval()[0:2, 0:6, 0]), real_fank_feats, rtol=1e-05, atol=1e-05)
def test_fbank(self): wav_path = str( Path(PACKAGE_ROOT_DIR).joinpath( 'layers/ops/data/sm1_cln.wav')) with self.cached_session(use_gpu=False, force_gpu=False): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path) config = {'window_length': 0.025, 'output_type': 1, 'frame_length': 0.010} fbank = Fbank.params(config).instantiate() fbank_test = fbank(input_data, sample_rate) self.assertEqual(tf.rank(fbank_test).eval(), 3)
def test_fbank(self): wav_path = str( Path(os.environ['MAIN_ROOT']).joinpath( 'delta/layers/ops/data/sm1_cln.wav')) with self.session(): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path) config = { 'window_length': 0.025, 'output_type': 1, 'frame_length': 0.010 } fbank = Fbank.params(config).instantiate() fbank_test = fbank(input_data, sample_rate) self.assertEqual(tf.rank(fbank_test).eval(), 3)
def test_fbank(self): wav_path = str(Path(PACKAGE_OPS_DIR).joinpath('data/sm1_cln.wav')) with self.cached_session(use_gpu=False, force_gpu=False): read_wav = ReadWav.params().instantiate() input_data, sample_rate = read_wav(wav_path) config = { 'window_length': 0.025, 'output_type': 1, 'frame_length': 0.010, 'snip_edges': True } fbank = Fbank.params(config).instantiate() fbank_test = fbank(input_data, sample_rate) self.assertEqual(tf.rank(fbank_test).eval(), 3) if tf.executing_eagerly(): print(fbank_test.numpy()[0:2, 0:6, 0]) else: print(fbank_test.eval()[0:2, 0:6, 0])
def __init__(self, config: dict): super().__init__(config) self.fbank = Fbank(config) self.pitch = Pitch(config)
def __init__(self, config: dict): super().__init__(config) self.framepow = Framepow(config) self.fbank = Fbank(config)