示例#1
0
    def call(self, audio_data, sample_rate=None):
        """
        Caculate power of every frame in speech.
        :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor.
        :param sample_rate: [option]the samplerate of the signal we working with, default is 16kHz.
        :return:A float tensor of size (1, num_frames) containing power of every frame in speech.
        """

        p = self.config
        with tf.name_scope('framepow'):

            if sample_rate == None:
                sample_rate = tf.constant(p.sample_rate, dtype=tf.int32)

            assert_op = tf.assert_equal(tf.constant(p.sample_rate),
                                        tf.cast(sample_rate, dtype=tf.int32))
            with tf.control_dependencies([assert_op]):

                sample_rate = tf.cast(sample_rate, dtype=float)
                framepow = py_x_ops.frame_pow(audio_data,
                                              sample_rate,
                                              window_length=p.window_length,
                                              frame_length=p.frame_length)

                return framepow
    def test_frmpow(self):
        ''' test frame_power op'''
        with self.session():
            sample_rate, input_data = feat_lib.load_wav(self.wavpath, sr=16000)

            output = py_x_ops.frame_pow(input_data, sample_rate)

            output_true = np.array([
                0.000018, 0.000011, 0.000010, 0.000010, 0.000010, 0.000010,
                0.000008, 0.000009, 0.000009, 0.000009, 0.000009, 0.000011,
                0.090164, 0.133028, 0.156547, 0.053551, 0.056670, 0.097706,
                0.405659, 2.119505, 4.296845, 6.139090, 6.623638, 6.136467,
                7.595072, 7.904415, 7.655983, 6.771016, 5.706427, 4.220942,
                3.259599, 2.218259, 1.911394, 2.234246, 3.056905, 2.534153,
                0.464354, 0.013493, 0.021231, 0.148362, 0.364829, 0.627266,
                0.494912, 0.366029, 0.315408, 0.312441, 0.323796, 0.267505,
                0.152856, 0.045305
            ])
            self.assertEqual(tf.rank(output).eval(), 1)
            self.assertAllClose(output.eval().flatten()[:50], output_true)