Exemplo n.º 1
0
    def call(self, audio_data, sample_rate):
        """
           Caculate fbank features of audio data.
           :param audio_data: the audio signal from which to compute spectrum. Should be an (1, N) tensor.
           :param sample_rate: the samplerate of the signal we working with.
           :return: A float tensor of size (num_channels, num_frames, num_frequencies) containing
                   fbank features of every frame in speech.
           """
        p = self.config

        with tf.name_scope('fbank'):

            spectrum = self.spect(audio_data, sample_rate)
            spectrum = tf.expand_dims(spectrum, 0)
            sample_rate = tf.cast(sample_rate, dtype=tf.int32)

            fbank = py_x_ops.fbank(
                spectrum,
                sample_rate,
                upper_frequency_limit=p.upper_frequency_limit,
                lower_frequency_limit=p.lower_frequency_limit,
                filterbank_channel_count=p.filterbank_channel_count)

            fbank = tf.squeeze(fbank, axis=0)
            shape = tf.shape(fbank)
            nframe = shape[0]
            nfbank = shape[1]
            if p.delta_delta:
                fbank = py_x_ops.delta_delta(fbank, p.order, p.window)
            if p.type == 'Fbank':
                fbank = self.cmvn(fbank)

            fbank = tf.reshape(fbank, (nframe, nfbank, p.channel))

            return fbank
Exemplo n.º 2
0
    def call(self, audio_data, sample_rate):
        """Caculate fbank features of audio data.

        Args:
            audio_data: the audio signal from which to compute spectrum.
            sample_rate: the sample rate of the signal we working with.

        Shape:
            - audio_data: :math:`(1, N)`
            - sample_rate: float
        """
        p = self.config

        with tf.name_scope('fbank'):

            spectrum = self.spect(audio_data, sample_rate)
            spectrum = tf.expand_dims(spectrum, 0)
            sample_rate = tf.cast(sample_rate, dtype=tf.int32)

            fbank = py_x_ops.fbank(
                spectrum,
                sample_rate,
                upper_frequency_limit=p.upper_frequency_limit,
                lower_frequency_limit=p.lower_frequency_limit,
                filterbank_channel_count=p.filterbank_channel_count,
                is_log10=p.is_log10)

            fbank = tf.squeeze(fbank, axis=0)
            shape = tf.shape(fbank)
            nframe = shape[0]
            nfbank = shape[1]
            if p.delta_delta:
                fbank = py_x_ops.delta_delta(fbank, p.order, p.window)
            if p.type == 'Fbank':
                fbank = self.cmvn(fbank)

            fbank = tf.reshape(fbank, (nframe, nfbank, p.channel))

            return fbank