示例#1
0
    def build(self, input_shape):

        # output size of DFT
        feature_size = self._compute_fft_size(int(input_shape[-1])) // 2 + 1
        self.feature_size = feature_size
        fft_mel_size = None

        if not self.use_tf_fft:
            # precompute mel matrix using np
            self.mel_weight_matrix = mel_table.SpectrogramToMelMatrix(
                num_mel_bins=self.num_mel_bins,
                num_spectrogram_bins=feature_size,
                audio_sample_rate=self.sample_rate,
                lower_edge_hertz=self.lower_edge_hertz,
                upper_edge_hertz=self.upper_edge_hertz)

            if self.mel_non_zero_only:
                fft_mel_size = self._get_non_zero_mel_size()
                self.mel_weight_matrix = self.mel_weight_matrix[:
                                                                fft_mel_size, :]

            self.mel_weight_matrix = tf.constant(self.mel_weight_matrix,
                                                 dtype=tf.float32)

        super(MagnitudeRDFTmel, self).build(input_shape, fft_mel_size)
示例#2
0
  def build(self, input_shape, fft_mel_size=None):
    super(MagnitudeRDFT, self).build(input_shape)
    frame_size = int(input_shape[-1])
    self.fft_size = self._compute_fft_size(frame_size)

    if (self.use_tf_fft and fft_mel_size):
      raise ValueError('TF FFT(True) is not compatible with fft_mel_size')

    if not self.use_tf_fft:
      # it is a real DFT with cos and sin functions only
      # for real and imaginary components accordingly:
      dft_real = np.asarray(
          np.cos(2.0 * np.pi *
                 np.outer(np.arange(self.fft_size), np.arange(self.fft_size)) /
                 self.fft_size),
          dtype=np.float32)
      dft_imag = np.asarray(
          -np.sin(2.0 * np.pi *
                  np.outer(np.arange(self.fft_size), np.arange(self.fft_size)) /
                  self.fft_size),
          dtype=np.float32)

      if fft_mel_size is None:
        dft_real_half = dft_real[:self.fft_size // 2 + 1, :]
        dft_imag_half = dft_imag[:self.fft_size // 2 + 1, :]
      else:
        dft_real_half = dft_real[:fft_mel_size, :]
        dft_imag_half = dft_imag[:fft_mel_size, :]

      dft_real = dft_real_half.transpose()
      dft_imag = dft_imag_half.transpose()

      # extract only array with size of input signal, so that
      # there will be no need to do padding of input signal (it is not FFT)
      # and there will be no multiplications with padded zeros
      self.real_dft_tensor = tf.constant(dft_real[:frame_size, :])
      self.imag_dft_tensor = tf.constant(dft_imag[:frame_size, :])
示例#3
0
    def build(self, input_shape):
        super(MelSpectrogram, self).build(input_shape)
        feature_size = int(input_shape[-1])

        if self.use_tf:
            # precompute mel matrix using tf
            self.mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
                num_mel_bins=self.num_mel_bins,
                num_spectrogram_bins=feature_size,
                sample_rate=self.sample_rate,
                lower_edge_hertz=self.lower_edge_hertz,
                upper_edge_hertz=self.upper_edge_hertz,
                dtype=tf.float32)
        else:
            # precompute mel matrix using np
            self.mel_weight_matrix = tf.constant(
                mel_table.SpectrogramToMelMatrix(
                    num_mel_bins=self.num_mel_bins,
                    num_spectrogram_bins=feature_size,
                    audio_sample_rate=self.sample_rate,
                    lower_edge_hertz=self.lower_edge_hertz,
                    upper_edge_hertz=self.upper_edge_hertz),
                dtype=tf.float32)