Exemplo n.º 1
0
    def call(self, audio_data, sample_rate=None):
        """
    Caculate fbank features of audio data.
    :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor.
    :param sample_rate: [option]the samplerate of the signal we working with, default is 16kHz.
    :return: A float tensor of size (num_channels, num_frames, num_frequencies) containing
            fbank features of every frame in speech.
    """
        p = self.config
        with tf.name_scope('fbank'):

            if sample_rate == None:
                sample_rate = tf.constant(p.sample_rate, dtype=tf.int32)

            if p.upper_frequency_limit <= 0:
                p.upper_frequency_limit = p.sample_rate / 2.0 + p.upper_frequency_limit
            elif (p.upper_frequency_limit <= p.lower_frequency_limit) or (
                    p.upper_frequency_limit > p.sample_rate / 2.0):
                p.upper_frequency_limit = p.sample_rate / 2.0

            assert_op = tf.assert_equal(tf.constant(p.sample_rate),
                                        tf.cast(sample_rate, dtype=tf.int32))
            with tf.control_dependencies([assert_op]):

                spectrum = self.spect(audio_data, sample_rate)
                spectrum = tf.expand_dims(spectrum, 0)

                fbank = py_x_ops.fbank(
                    spectrum,
                    sample_rate,
                    upper_frequency_limit=p.upper_frequency_limit,
                    lower_frequency_limit=p.lower_frequency_limit,
                    filterbank_channel_count=p.filterbank_channel_count)

                return fbank
Exemplo n.º 2
0
    def test_fbank(self):
        """ test fbank op"""
        with self.session():
            data = np.arange(513)
            spectrogram = tf.constant(data[None, None, :], dtype=tf.float32)
            sample_rate = tf.constant(22050, tf.int32)
            output = py_x_ops.fbank(spectrogram,
                                    sample_rate,
                                    filterbank_channel_count=20)

            output_true = np.array([
                1.887894,
                2.2693727,
                2.576507,
                2.8156495,
                3.036504,
                3.2296343,
                3.4274294,
                3.5987632,
                3.771217,
                3.937401,
                4.0988584,
                4.2570987,
                4.4110703,
                4.563661,
                4.7140336,
                4.8626432,
                5.009346,
                5.1539173,
                5.2992935,
                5.442024,
            ])
            self.assertEqual(tf.rank(output).eval(), 3)
            self.assertEqual(output.shape, (1, 1, 20))
            self.assertAllClose(output.eval(), output_true[None, None, :])
Exemplo n.º 3
0
def fbank_feat(powspec,
               sr=8000,
               feature_size=40,
               nfft=512,
               lowfreq=0,
               highfreq=None):
    ''' powspec: [audio_channels, spectrogram_length, spectrogram_feat_dim]
      return : [auido_chnnels, nframe, nfbank]
  '''
    del nfft

    true_fn = lambda: tf.expand_dims(powspec, 0)
    false_fn = lambda: powspec
    powspec = tf.cond(tf.equal(tf.rank(powspec), 2), true_fn, false_fn)

    feat = py_x_ops.fbank(
        powspec,
        sr,
        filterbank_channel_count=feature_size,
        lower_frequency_limit=lowfreq,
        upper_frequency_limit=highfreq,
    )
    return feat