def compute_melmat(self): return mel.compute_melmat( num_mel_bands=self._config['samples'], freq_min=self._config['min_frequency'], freq_max=self._config['max_frequency'], num_fft_bands=int(self._config['nfft'] // 2) + 1, sample_rate=self._config['mic_rate'])
def _initialize_melbank(self): """Initialize all the melbank related variables""" # Few difference coefficient types for experimentation if self._config["coeffs_type"] == "triangle": melbank_mel = np.linspace( aubio.hztomel(self._config["min_frequency"]), aubio.hztomel(self._config["max_frequency"]), self._config["samples"] + 2, ) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel] ).astype(np.float32) self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_triangle_bands( self.melbank_frequencies, self._config["mic_rate"] ) self.melbank_frequencies = self.melbank_frequencies[1:-1] if self._config["coeffs_type"] == "bark": melbank_bark = np.linspace( 6.0 * np.arcsinh(self._config["min_frequency"] / 600.0), 6.0 * np.arcsinh(self._config["max_frequency"] / 600.0), self._config["samples"] + 2, ) self.melbank_frequencies = ( 600.0 * np.sinh(melbank_bark / 6.0) ).astype(np.float32) self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_triangle_bands( self.melbank_frequencies, self._config["mic_rate"] ) self.melbank_frequencies = self.melbank_frequencies[1:-1] # Slaney coefficients will always produce 40 samples spanning 133Hz to # 6000Hz if self._config["coeffs_type"] == "slaney": self.filterbank = aubio.filterbank(40, self._config["fft_size"]) self.filterbank.set_mel_coeffs_slaney(self._config["mic_rate"]) # Sanley frequencies are linear-log spaced where 133Hz to 1000Hz is linear # spaced and 1000Hz to 6000Hz is log spaced. It also produced a hardcoded # 40 samples. lowestFrequency = 133.3 linearSpacing = 66.6666666 logSpacing = 1.0711703 linearFilters = 13 logFilters = 27 linearSpacedFreqs = ( lowestFrequency + np.arange(0, linearFilters) * linearSpacing ) logSpacedFreqs = linearSpacedFreqs[-1] * np.power( logSpacing, np.arange(1, logFilters + 1) ) self._config["samples"] = 40 self.melbank_frequencies = np.hstack( (linearSpacedFreqs, logSpacedFreqs) ).astype(np.float32) # Standard mel coefficients if self._config["coeffs_type"] == "mel": self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_mel_coeffs( self._config["mic_rate"], self._config["min_frequency"], self._config["max_frequency"], ) # Frequencies wil be linearly spaced in the mel scale melbank_mel = np.linspace( aubio.hztomel(self._config["min_frequency"]), aubio.hztomel(self._config["max_frequency"]), self._config["samples"], ) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel] ) # HTK mel coefficients if self._config["coeffs_type"] == "htk": self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_mel_coeffs_htk( self._config["mic_rate"], self._config["min_frequency"], self._config["max_frequency"], ) # Frequencies wil be linearly spaced in the mel scale melbank_mel = np.linspace( aubio.hztomel(self._config["min_frequency"]), aubio.hztomel(self._config["max_frequency"]), self._config["samples"], ) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel] ) # Coefficients based on Scott's audio reactive led project if self._config["coeffs_type"] == "scott": (melmat, center_frequencies_hz, freqs,) = mel.compute_melmat( num_mel_bands=self._config["samples"], freq_min=self._config["min_frequency"], freq_max=self._config["max_frequency"], num_fft_bands=int(self._config["fft_size"] // 2) + 1, sample_rate=self._config["mic_rate"], ) self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_coeffs(melmat.astype(np.float32)) self.melbank_frequencies = center_frequencies_hz # "Mel"-spacing based on Scott's audio reactive led project. This # should in theory be the same as the above, but there seems to be # slight differences. Leaving both for science! if self._config["coeffs_type"] == "scott_mel": def hertz_to_scott(freq): return 3340.0 * log(1 + (freq / 250.0), 9) def scott_to_hertz(scott): return 250.0 * (9 ** (scott / 3340.0)) - 250.0 melbank_scott = np.linspace( hertz_to_scott(self._config["min_frequency"]), hertz_to_scott(self._config["max_frequency"]), self._config["samples"] + 2, ) self.melbank_frequencies = np.array( [scott_to_hertz(scott) for scott in melbank_scott] ).astype(np.float32) self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_triangle_bands( self.melbank_frequencies, self._config["mic_rate"] ) self.melbank_frequencies = self.melbank_frequencies[1:-1] # Modified scott_mel, spreads out the low range and compresses the # highs if self._config["coeffs_type"] == "matt_mel": def hertz_to_matt(freq): return 3700.0 * log(1 + (freq / 200.0), 13) def matt_to_hertz(matt): return 200.0 * (10 ** (matt / 3700.0)) - 200.0 melbank_matt = np.linspace( hertz_to_matt(self._config["min_frequency"]), hertz_to_matt(self._config["max_frequency"]), self._config["samples"] + 2, ) self.melbank_frequencies = np.array( [matt_to_hertz(matt) for matt in melbank_matt] ).astype(np.float32) self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_triangle_bands( self.melbank_frequencies, self._config["mic_rate"] ) self.melbank_frequencies = self.melbank_frequencies[1:-1] if self._config["coeffs_type"] == "fixed": ranges = FREQUENCY_RANGES.values() upper_edges_hz = np.zeros(len(ranges)) lower_edges_hz = np.zeros(len(ranges)) for idx, value in enumerate(ranges): lower_edges_hz[idx] = value.min upper_edges_hz[idx] = value.max ( melmat, center_frequencies_hz, freqs, ) = mel.compute_melmat_from_range( lower_edges_hz=lower_edges_hz, upper_edges_hz=upper_edges_hz, num_fft_bands=int(self._config["fft_size"] // 2) + 1, sample_rate=self._config["mic_rate"], ) self._config["samples"] = len(center_frequencies_hz) self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_coeffs(melmat.astype(np.float32)) self.melbank_frequencies = center_frequencies_hz if self._config["coeffs_type"] == "fixed_simple": ranges = FREQUENCY_RANGES_SIMPLE.values() upper_edges_hz = np.zeros(len(ranges)) lower_edges_hz = np.zeros(len(ranges)) for idx, value in enumerate(ranges): lower_edges_hz[idx] = value.min upper_edges_hz[idx] = value.max ( melmat, center_frequencies_hz, freqs, ) = mel.compute_melmat_from_range( lower_edges_hz=lower_edges_hz, upper_edges_hz=upper_edges_hz, num_fft_bands=int(self._config["fft_size"] // 2) + 1, sample_rate=self._config["mic_rate"], ) self._config["samples"] = len(center_frequencies_hz) self.filterbank = aubio.filterbank( self._config["samples"], self._config["fft_size"] ) self.filterbank.set_coeffs(melmat.astype(np.float32)) self.melbank_frequencies = center_frequencies_hz self.melbank_frequencies = self.melbank_frequencies.astype(int) # Normalize the filterbank triangles to a consistent height, the # default coeffs (for types other than legacy) will be normalized # by the triangles area which results in an uneven melbank if ( self._config["coeffs_type"] != "scott" and self._config["coeffs_type"] == "scott_mel" ): coeffs = self.filterbank.get_coeffs() coeffs /= np.max(coeffs, axis=-1)[:, None] self.filterbank.set_coeffs(coeffs) # Find the indexes for each of the frequency ranges self.lows_index = self.mids_index = self.highs_index = 1 for i in range(0, len(self.melbank_frequencies)): if ( self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE["Low (1-250Hz)"].max ): self.lows_index = i + 1 elif ( self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE["Mid (250Hz-4kHz)"].max ): self.mids_index = i + 1 elif ( self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE["High (4kHz-24kHz)"].max ): self.highs_index = i + 1 # Build up some of the common filters self.mel_gain = ExpFilter( np.tile(1e-1, self._config["samples"]), alpha_decay=0.01, alpha_rise=0.99, ) self.mel_smoothing = ExpFilter( np.tile(1e-1, self._config["samples"]), alpha_decay=0.2, alpha_rise=0.99, ) self.common_filter = ExpFilter(alpha_decay=0.99, alpha_rise=0.01)
def _initialize_melbank(self): """Initialize all the melbank related variables""" # Few difference coefficient types for experimentation if self._config['coeffs_type'] == 'triangle': melbank_mel = np.linspace( aubio.hztomel(self._config['min_frequency']), aubio.hztomel(self._config['max_frequency']), self._config['samples'] + 2) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]).astype(np.float32) self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config['mic_rate']) self.melbank_frequencies = self.melbank_frequencies[1:-1] if self._config['coeffs_type'] == 'bark': melbank_bark = np.linspace( 6.0 * np.arcsinh(self._config['min_frequency'] / 600.0), 6.0 * np.arcsinh(self._config['max_frequency'] / 600.0), self._config['samples'] + 2) self.melbank_frequencies = (600.0 * np.sinh(melbank_bark / 6.0)).astype( np.float32) self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config['mic_rate']) self.melbank_frequencies = self.melbank_frequencies[1:-1] # Slaney coefficients will always produce 40 samples spanning 133Hz to 6000Hz if self._config['coeffs_type'] == 'slaney': self.filterbank = aubio.filterbank(40, self._config['fft_size']) self.filterbank.set_mel_coeffs_slaney(self._config['mic_rate']) # Sanley frequencies are linear-log spaced where 133Hz to 1000Hz is linear # spaced and 1000Hz to 6000Hz is log spaced. It also produced a hardcoded # 40 samples. lowestFrequency = 133.3 linearSpacing = 66.6666666 logSpacing = 1.0711703 linearFilters = 13 logFilters = 27 linearSpacedFreqs = lowestFrequency + np.arange( 0, linearFilters) * linearSpacing logSpacedFreqs = linearSpacedFreqs[-1] * np.power( logSpacing, np.arange(1, logFilters + 1)) self._config['samples'] = 40 self.melbank_frequencies = np.hstack( (linearSpacedFreqs, logSpacedFreqs)).astype(np.float32) # Standard mel coefficients if self._config['coeffs_type'] == 'mel': self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_mel_coeffs(self._config['mic_rate'], self._config['min_frequency'], self._config['max_frequency']) # Frequencies wil be linearly spaced in the mel scale melbank_mel = np.linspace( aubio.hztomel(self._config['min_frequency']), aubio.hztomel(self._config['max_frequency']), self._config['samples']) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]) # HTK mel coefficients if self._config['coeffs_type'] == 'htk': self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_mel_coeffs_htk(self._config['mic_rate'], self._config['min_frequency'], self._config['max_frequency']) # Frequencies wil be linearly spaced in the mel scale melbank_mel = np.linspace( aubio.hztomel(self._config['min_frequency']), aubio.hztomel(self._config['max_frequency']), self._config['samples']) self.melbank_frequencies = np.array( [aubio.meltohz(mel) for mel in melbank_mel]) # Coefficients based on Scott's audio reactive led project if self._config['coeffs_type'] == 'scott': (melmat, center_frequencies_hz, freqs) = mel.compute_melmat( num_mel_bands=self._config['samples'], freq_min=self._config['min_frequency'], freq_max=self._config['max_frequency'], num_fft_bands=int(self._config['fft_size'] // 2) + 1, sample_rate=self._config['mic_rate']) self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_coeffs(melmat.astype(np.float32)) self.melbank_frequencies = center_frequencies_hz # "Mel"-spacing based on Scott's audio reactive led project. This # should in theory be the same as the above, but there seems to be # slight differences. Leaving both for science! if self._config['coeffs_type'] == 'scott_mel': def hertz_to_scott(freq): return 3340.0 * log(1 + (freq / 250.0), 9) def scott_to_hertz(scott): return 250.0 * (9**(scott / 3340.0)) - 250.0 melbank_scott = np.linspace( hertz_to_scott(self._config['min_frequency']), hertz_to_scott(self._config['max_frequency']), self._config['samples'] + 2) self.melbank_frequencies = np.array([ scott_to_hertz(scott) for scott in melbank_scott ]).astype(np.float32) self.filterbank = aubio.filterbank(self._config['samples'], self._config['fft_size']) self.filterbank.set_triangle_bands(self.melbank_frequencies, self._config['mic_rate']) self.melbank_frequencies = self.melbank_frequencies[1:-1] self.melbank_frequencies = self.melbank_frequencies.astype(int) # Normalize the filterbank triangles to a consistent height, the # default coeffs (for types other than legacy) will be normalized # by the triangles area which results in an uneven melbank if self._config['coeffs_type'] != 'scott' and self._config[ 'coeffs_type'] == 'scott_mel': coeffs = self.filterbank.get_coeffs() coeffs /= np.max(coeffs, axis=-1)[:, None] self.filterbank.set_coeffs(coeffs) # Find the indexes for each of the frequency ranges for i in range(0, len(self.melbank_frequencies) - 1): if self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE['low'].max: self.lows_index = i elif self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE[ 'mid'].max: self.mids_index = i elif self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE[ 'high'].max: self.highs_index = i # Build up some of the common filters self.mel_gain = ExpFilter(np.tile(1e-1, self._config['samples']), alpha_decay=0.01, alpha_rise=0.99) self.mel_smoothing = ExpFilter(np.tile(1e-1, self._config['samples']), alpha_decay=0.2, alpha_rise=0.99) self.common_filter = ExpFilter(alpha_decay=0.99, alpha_rise=0.01)