def call(self, audio_data, sample_rate=None): """ Caculate power of every frame in speech. :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor. :param sample_rate: [option]the samplerate of the signal we working with, default is 16kHz. :return:A float tensor of size (1, num_frames) containing power of every frame in speech. """ p = self.config with tf.name_scope('framepow'): if sample_rate == None: sample_rate = tf.constant(p.sample_rate, dtype=tf.int32) assert_op = tf.assert_equal(tf.constant(p.sample_rate), tf.cast(sample_rate, dtype=tf.int32)) with tf.control_dependencies([assert_op]): sample_rate = tf.cast(sample_rate, dtype=float) framepow = py_x_ops.frame_pow(audio_data, sample_rate, window_length=p.window_length, frame_length=p.frame_length) return framepow
def test_frmpow(self): ''' test frame_power op''' with self.session(): sample_rate, input_data = feat_lib.load_wav(self.wavpath, sr=16000) output = py_x_ops.frame_pow(input_data, sample_rate) output_true = np.array([ 0.000018, 0.000011, 0.000010, 0.000010, 0.000010, 0.000010, 0.000008, 0.000009, 0.000009, 0.000009, 0.000009, 0.000011, 0.090164, 0.133028, 0.156547, 0.053551, 0.056670, 0.097706, 0.405659, 2.119505, 4.296845, 6.139090, 6.623638, 6.136467, 7.595072, 7.904415, 7.655983, 6.771016, 5.706427, 4.220942, 3.259599, 2.218259, 1.911394, 2.234246, 3.056905, 2.534153, 0.464354, 0.013493, 0.021231, 0.148362, 0.364829, 0.627266, 0.494912, 0.366029, 0.315408, 0.312441, 0.323796, 0.267505, 0.152856, 0.045305 ]) self.assertEqual(tf.rank(output).eval(), 1) self.assertAllClose(output.eval().flatten()[:50], output_true)