示例#1
0
def mel(sr, n_dft, n_mels=128, fmin=0.0, fmax=None):
    '''[np] create a filterbank matrix to combine stft bins into mel-frequency bins
    use Slaney
    Keunwoo: copied from Librosa, librosa.filters.mel
    
    n_mels: numbre of mel bands
    fmin : lowest frequency [Hz]
    fmax : highest frequency [Hz]
        If `None`, use `sr / 2.0`
    '''
    if fmax is None:
        fmax = float(sr) / 2

    # init
    n_mels = int(n_mels)
    weights = np.zeros((n_mels, int(1 + n_dft // 2)))

    # center freqs of each FFT bin
    dftfreqs = _dft_frequencies(sr=sr, n_dft=n_dft)

    # centre freqs of mel bands
    freqs = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax)
    # Slaney-style mel is scaled to be approx constant energy per channel
    enorm = 2.0 / (freqs[2:n_mels + 2] - freqs[:n_mels])

    for i in range(n_mels):
        # lower and upper slopes qfor all bins
        lower = (dftfreqs - freqs[i]) / (freqs[i + 1] - freqs[i])
        upper = (freqs[i + 2] - dftfreqs) / (freqs[i + 2] - freqs[i + 1])

        # .. then intersect them with each other and zero
        weights[i] = np.maximum(0, np.minimum(lower, upper)) * enorm[i]

    return weights.astype(K.floatx())
示例#2
0
 def __init__(self, frame_stream, specfmt="dB", mels_N=12):
     '''
     DFTStream(frame_stream, specfmt, mels_N)        
     Create a stream of discrete Fourier transform (DFT) frames using the
     specified sample frame stream. Only bins up to the Nyquist rate are
     returned in the stream Optional arguments:
     
     specfmt - DFT output:  
         "complex" - return complex DFT results
          "dB" [default] - return power spectrum 20log10(magnitude)
          "mag^2" - magnitude squared spectrum
          "Mel" - melodic scale
     mels_N - Number of Mel filters to use.  Only applicable when
         specfmt == "Mel".
     '''
     
     self.format_types = {"complex" : 0,
                          "mag^2" : 1,
                          "dB" : 2,
                          "Mel" : 3}
     self.framer = frame_stream
     self.frame_len = frame_stream.get_framelen_samples()
     try:
         self.format = self.format_types[specfmt]
     except KeyError:
         raise ValueError("Unknown specfmt {}.  Use one of [{}]".format(
             specfmt, ", ".join(self.format_types.keys())))
          
     # Number of frequency bins is the same as the number of bins in the
     # frame
     self.dft_bins = self.frame_len
     
     # Only bins up to the Nyquist rate are usable.  The DFT routine that
     # we are using will return up to and including the Nyuist (half bins
     # plus 1 if even)
     self.Nyquist_Hz = self.framer.get_Fs() / 2.0
     # We add 1.1 instead of 1, see numpy.around for details which 
     # np.round uses.
     self.bins_Nyquist = np.int(np.round((self.frame_len+1.1)/2.0))
              
     self.window = signal.get_window("hamming", self.frame_len)        
     
     if self.format == self.format_types["Mel"]:
         # Construct Mel filters
         self.mel_filters = mel(self.framer.get_Fs(), 
             self.dft_bins, mels_N)
         # Center frequencies of the Mel filters in Hz
         # Returns two more than are actually used (0 Hz and Nyquist)
         self.bins_Hz = mel_frequencies(mels_N+2, 
                fmin=0, fmax=self.Nyquist_Hz)
         self.bins_Hz = self.bins_Hz[1:-1]  # Remove ends
         self.bins_N = len(self.bins_Hz)
     else:
         self.bins_Hz = np.arange(self.bins_Nyquist) / self.bins_Nyquist * self.Nyquist_Hz      
         self.bins_N = self.bins_Hz.shape[0]
示例#3
0
def get_filterbank(n_filters=60,
                   NFFT=512,
                   fs=16000,
                   fmin=0.0,
                   fmax=None,
                   htk=False,
                   normalize=False):

    n_mels = n_filters

    if fmax is None:
        fmax = float(fs) / 2

    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)

    # Initialize the weights
    n_mels = int(n_mels)
    weights = np.zeros((n_mels, int(1 + NFFT // 2)))

    # Center freqs of each FFT bin
    fftfreqs = fft_frequencies(sr=fs, n_fft=NFFT)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)
    # to make evenly spaced filterbank, use fft_frequencies

    fdiff = np.diff(mel_f)
    ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i + 2] / fdiff[i + 1]

        # .. then intersect them with each other and zero
        weights[i] = np.maximum(0, np.minimum(lower, upper))

    if normalize == True:
        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
        weights *= enorm[:, np.newaxis]

    return weights
示例#4
0
def prepare_mel_matrix(hparams, rate, return_numpy=True, GPU_backend=False):
    """ Create mel filter
    """
    # import tensorflow if needed
    if "tf" not in sys.modules:
        if not GPU_backend:
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
            os.environ["CUDA_VISIBLE_DEVICES"] = ""
        import tensorflow as tf
        tf.enable_eager_execution()
        assert tf.executing_eagerly()

    # create a filter to convolve with the spectrogram
    mel_matrix = tf.signal.linear_to_mel_weight_matrix(
        num_mel_bins=hparams.num_mel_bins,
        num_spectrogram_bins=int(hparams.n_fft / 2) + 1,
        sample_rate=rate,
        lower_edge_hertz=hparams.mel_lower_edge_hertz,
        upper_edge_hertz=hparams.mel_upper_edge_hertz,
        dtype=tf.dtypes.float32,
        name=None,
    )

    # gets the center frequencies of mel bands
    mel_f = mel_frequencies(
        n_mels=hparams.num_mel_bins + 2,
        fmin=hparams.mel_lower_edge_hertz,
        fmax=hparams.mel_upper_edge_hertz,
    )

    # Slaney-style mel is scaled to be approx constant energy per channel (from librosa)
    enorm = tf.dtypes.cast(
        tf.expand_dims(
            tf.constant(
                2.0
                / (mel_f[2 : hparams.num_mel_bins + 2] - mel_f[: hparams.num_mel_bins])
            ),
            0,
        ),
        tf.float32,
    )

    mel_matrix = tf.multiply(mel_matrix, enorm)
    mel_matrix = tf.divide(mel_matrix, tf.reduce_sum(mel_matrix, axis=0))
    if return_numpy:
        return mel_matrix.numpy()
    else:
        return mel_matrix
示例#5
0
def mel(sr, n_fft, n_mels=128, fmin=0.0, fmax=None, htk=False,
        norm=1):

    if fmax is None:
        fmax = float(sr) / 2

    if norm is not None and norm != 1 and norm != np.inf:
        raise ParameterError('Unsupported norm: {}'.format(repr(norm)))

    # Initialize the weights
    n_mels = int(n_mels)
    weights = np.zeros((n_mels, int(1 + n_fft // 2)))

    # Center freqs of each FFT bin
    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)

    fdiff = np.diff(mel_f)
    ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i+2] / fdiff[i+1]

        # .. then intersect them with each other and zero
        weights[i] = np.maximum(0, np.minimum(lower, upper))

    if norm == 1:
        # Slaney-style mel is scaled to be approx constant energy per channel
        enorm = 2.0 / (mel_f[2:n_mels+2] - mel_f[:n_mels])
        weights *= enorm[:, np.newaxis]

    # Only check weights if f_mel[0] is positive
    if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):
        # This means we have an empty channel somewhere
        warnings.warn('Empty filters detected in mel frequency basis. '
                      'Some channels will produce empty responses. '
                      'Try increasing your sampling rate (and fmax) or '
                      'reducing n_mels.')

    return weights