Пример #1
0
 def compute_melmat(self):
     return mel.compute_melmat(
         num_mel_bands=self._config['samples'],
         freq_min=self._config['min_frequency'],
         freq_max=self._config['max_frequency'],
         num_fft_bands=int(self._config['nfft'] // 2) + 1,
         sample_rate=self._config['mic_rate'])
Пример #2
0
    def _initialize_melbank(self):
        """Initialize all the melbank related variables"""

        # Few difference coefficient types for experimentation
        if self._config["coeffs_type"] == "triangle":
            melbank_mel = np.linspace(
                aubio.hztomel(self._config["min_frequency"]),
                aubio.hztomel(self._config["max_frequency"]),
                self._config["samples"] + 2,
            )
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel]
            ).astype(np.float32)

            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_triangle_bands(
                self.melbank_frequencies, self._config["mic_rate"]
            )
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        if self._config["coeffs_type"] == "bark":
            melbank_bark = np.linspace(
                6.0 * np.arcsinh(self._config["min_frequency"] / 600.0),
                6.0 * np.arcsinh(self._config["max_frequency"] / 600.0),
                self._config["samples"] + 2,
            )
            self.melbank_frequencies = (
                600.0 * np.sinh(melbank_bark / 6.0)
            ).astype(np.float32)

            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_triangle_bands(
                self.melbank_frequencies, self._config["mic_rate"]
            )
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        # Slaney coefficients will always produce 40 samples spanning 133Hz to
        # 6000Hz
        if self._config["coeffs_type"] == "slaney":
            self.filterbank = aubio.filterbank(40, self._config["fft_size"])
            self.filterbank.set_mel_coeffs_slaney(self._config["mic_rate"])

            # Sanley frequencies are linear-log spaced where 133Hz to 1000Hz is linear
            # spaced and 1000Hz to 6000Hz is log spaced. It also produced a hardcoded
            # 40 samples.
            lowestFrequency = 133.3
            linearSpacing = 66.6666666
            logSpacing = 1.0711703
            linearFilters = 13
            logFilters = 27
            linearSpacedFreqs = (
                lowestFrequency + np.arange(0, linearFilters) * linearSpacing
            )
            logSpacedFreqs = linearSpacedFreqs[-1] * np.power(
                logSpacing, np.arange(1, logFilters + 1)
            )

            self._config["samples"] = 40
            self.melbank_frequencies = np.hstack(
                (linearSpacedFreqs, logSpacedFreqs)
            ).astype(np.float32)

        # Standard mel coefficients
        if self._config["coeffs_type"] == "mel":
            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_mel_coeffs(
                self._config["mic_rate"],
                self._config["min_frequency"],
                self._config["max_frequency"],
            )

            # Frequencies wil be linearly spaced in the mel scale
            melbank_mel = np.linspace(
                aubio.hztomel(self._config["min_frequency"]),
                aubio.hztomel(self._config["max_frequency"]),
                self._config["samples"],
            )
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel]
            )

        # HTK mel coefficients
        if self._config["coeffs_type"] == "htk":
            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_mel_coeffs_htk(
                self._config["mic_rate"],
                self._config["min_frequency"],
                self._config["max_frequency"],
            )

            # Frequencies wil be linearly spaced in the mel scale
            melbank_mel = np.linspace(
                aubio.hztomel(self._config["min_frequency"]),
                aubio.hztomel(self._config["max_frequency"]),
                self._config["samples"],
            )
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel]
            )

        # Coefficients based on Scott's audio reactive led project
        if self._config["coeffs_type"] == "scott":
            (melmat, center_frequencies_hz, freqs,) = mel.compute_melmat(
                num_mel_bands=self._config["samples"],
                freq_min=self._config["min_frequency"],
                freq_max=self._config["max_frequency"],
                num_fft_bands=int(self._config["fft_size"] // 2) + 1,
                sample_rate=self._config["mic_rate"],
            )
            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_coeffs(melmat.astype(np.float32))
            self.melbank_frequencies = center_frequencies_hz

        # "Mel"-spacing based on Scott's audio reactive led project. This
        # should in theory be the same as the above, but there seems to be
        # slight differences. Leaving both for science!
        if self._config["coeffs_type"] == "scott_mel":

            def hertz_to_scott(freq):
                return 3340.0 * log(1 + (freq / 250.0), 9)

            def scott_to_hertz(scott):
                return 250.0 * (9 ** (scott / 3340.0)) - 250.0

            melbank_scott = np.linspace(
                hertz_to_scott(self._config["min_frequency"]),
                hertz_to_scott(self._config["max_frequency"]),
                self._config["samples"] + 2,
            )
            self.melbank_frequencies = np.array(
                [scott_to_hertz(scott) for scott in melbank_scott]
            ).astype(np.float32)

            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_triangle_bands(
                self.melbank_frequencies, self._config["mic_rate"]
            )
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        # Modified scott_mel, spreads out the low range and compresses the
        # highs
        if self._config["coeffs_type"] == "matt_mel":

            def hertz_to_matt(freq):
                return 3700.0 * log(1 + (freq / 200.0), 13)

            def matt_to_hertz(matt):
                return 200.0 * (10 ** (matt / 3700.0)) - 200.0

            melbank_matt = np.linspace(
                hertz_to_matt(self._config["min_frequency"]),
                hertz_to_matt(self._config["max_frequency"]),
                self._config["samples"] + 2,
            )
            self.melbank_frequencies = np.array(
                [matt_to_hertz(matt) for matt in melbank_matt]
            ).astype(np.float32)

            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_triangle_bands(
                self.melbank_frequencies, self._config["mic_rate"]
            )
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        if self._config["coeffs_type"] == "fixed":
            ranges = FREQUENCY_RANGES.values()
            upper_edges_hz = np.zeros(len(ranges))
            lower_edges_hz = np.zeros(len(ranges))
            for idx, value in enumerate(ranges):
                lower_edges_hz[idx] = value.min
                upper_edges_hz[idx] = value.max

            (
                melmat,
                center_frequencies_hz,
                freqs,
            ) = mel.compute_melmat_from_range(
                lower_edges_hz=lower_edges_hz,
                upper_edges_hz=upper_edges_hz,
                num_fft_bands=int(self._config["fft_size"] // 2) + 1,
                sample_rate=self._config["mic_rate"],
            )

            self._config["samples"] = len(center_frequencies_hz)
            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_coeffs(melmat.astype(np.float32))
            self.melbank_frequencies = center_frequencies_hz

        if self._config["coeffs_type"] == "fixed_simple":
            ranges = FREQUENCY_RANGES_SIMPLE.values()
            upper_edges_hz = np.zeros(len(ranges))
            lower_edges_hz = np.zeros(len(ranges))
            for idx, value in enumerate(ranges):
                lower_edges_hz[idx] = value.min
                upper_edges_hz[idx] = value.max

            (
                melmat,
                center_frequencies_hz,
                freqs,
            ) = mel.compute_melmat_from_range(
                lower_edges_hz=lower_edges_hz,
                upper_edges_hz=upper_edges_hz,
                num_fft_bands=int(self._config["fft_size"] // 2) + 1,
                sample_rate=self._config["mic_rate"],
            )

            self._config["samples"] = len(center_frequencies_hz)
            self.filterbank = aubio.filterbank(
                self._config["samples"], self._config["fft_size"]
            )
            self.filterbank.set_coeffs(melmat.astype(np.float32))
            self.melbank_frequencies = center_frequencies_hz

        self.melbank_frequencies = self.melbank_frequencies.astype(int)

        # Normalize the filterbank triangles to a consistent height, the
        # default coeffs (for types other than legacy) will be normalized
        # by the triangles area which results in an uneven melbank
        if (
            self._config["coeffs_type"] != "scott"
            and self._config["coeffs_type"] == "scott_mel"
        ):
            coeffs = self.filterbank.get_coeffs()
            coeffs /= np.max(coeffs, axis=-1)[:, None]
            self.filterbank.set_coeffs(coeffs)

        # Find the indexes for each of the frequency ranges
        self.lows_index = self.mids_index = self.highs_index = 1
        for i in range(0, len(self.melbank_frequencies)):
            if (
                self.melbank_frequencies[i]
                < FREQUENCY_RANGES_SIMPLE["Low (1-250Hz)"].max
            ):
                self.lows_index = i + 1
            elif (
                self.melbank_frequencies[i]
                < FREQUENCY_RANGES_SIMPLE["Mid (250Hz-4kHz)"].max
            ):
                self.mids_index = i + 1
            elif (
                self.melbank_frequencies[i]
                < FREQUENCY_RANGES_SIMPLE["High (4kHz-24kHz)"].max
            ):
                self.highs_index = i + 1

        # Build up some of the common filters
        self.mel_gain = ExpFilter(
            np.tile(1e-1, self._config["samples"]),
            alpha_decay=0.01,
            alpha_rise=0.99,
        )
        self.mel_smoothing = ExpFilter(
            np.tile(1e-1, self._config["samples"]),
            alpha_decay=0.2,
            alpha_rise=0.99,
        )
        self.common_filter = ExpFilter(alpha_decay=0.99, alpha_rise=0.01)
Пример #3
0
    def _initialize_melbank(self):
        """Initialize all the melbank related variables"""

        # Few difference coefficient types for experimentation
        if self._config['coeffs_type'] == 'triangle':
            melbank_mel = np.linspace(
                aubio.hztomel(self._config['min_frequency']),
                aubio.hztomel(self._config['max_frequency']),
                self._config['samples'] + 2)
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel]).astype(np.float32)

            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config['mic_rate'])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        if self._config['coeffs_type'] == 'bark':
            melbank_bark = np.linspace(
                6.0 * np.arcsinh(self._config['min_frequency'] / 600.0),
                6.0 * np.arcsinh(self._config['max_frequency'] / 600.0),
                self._config['samples'] + 2)
            self.melbank_frequencies = (600.0 *
                                        np.sinh(melbank_bark / 6.0)).astype(
                                            np.float32)

            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config['mic_rate'])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        # Slaney coefficients will always produce 40 samples spanning 133Hz to 6000Hz
        if self._config['coeffs_type'] == 'slaney':
            self.filterbank = aubio.filterbank(40, self._config['fft_size'])
            self.filterbank.set_mel_coeffs_slaney(self._config['mic_rate'])

            # Sanley frequencies are linear-log spaced where 133Hz to 1000Hz is linear
            # spaced and 1000Hz to 6000Hz is log spaced. It also produced a hardcoded
            # 40 samples.
            lowestFrequency = 133.3
            linearSpacing = 66.6666666
            logSpacing = 1.0711703
            linearFilters = 13
            logFilters = 27
            linearSpacedFreqs = lowestFrequency + np.arange(
                0, linearFilters) * linearSpacing
            logSpacedFreqs = linearSpacedFreqs[-1] * np.power(
                logSpacing, np.arange(1, logFilters + 1))

            self._config['samples'] = 40
            self.melbank_frequencies = np.hstack(
                (linearSpacedFreqs, logSpacedFreqs)).astype(np.float32)

        # Standard mel coefficients
        if self._config['coeffs_type'] == 'mel':
            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_mel_coeffs(self._config['mic_rate'],
                                           self._config['min_frequency'],
                                           self._config['max_frequency'])

            # Frequencies wil be linearly spaced in the mel scale
            melbank_mel = np.linspace(
                aubio.hztomel(self._config['min_frequency']),
                aubio.hztomel(self._config['max_frequency']),
                self._config['samples'])
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel])

        # HTK mel coefficients
        if self._config['coeffs_type'] == 'htk':
            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_mel_coeffs_htk(self._config['mic_rate'],
                                               self._config['min_frequency'],
                                               self._config['max_frequency'])

            # Frequencies wil be linearly spaced in the mel scale
            melbank_mel = np.linspace(
                aubio.hztomel(self._config['min_frequency']),
                aubio.hztomel(self._config['max_frequency']),
                self._config['samples'])
            self.melbank_frequencies = np.array(
                [aubio.meltohz(mel) for mel in melbank_mel])

        # Coefficients based on Scott's audio reactive led project
        if self._config['coeffs_type'] == 'scott':
            (melmat, center_frequencies_hz, freqs) = mel.compute_melmat(
                num_mel_bands=self._config['samples'],
                freq_min=self._config['min_frequency'],
                freq_max=self._config['max_frequency'],
                num_fft_bands=int(self._config['fft_size'] // 2) + 1,
                sample_rate=self._config['mic_rate'])
            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_coeffs(melmat.astype(np.float32))
            self.melbank_frequencies = center_frequencies_hz

        # "Mel"-spacing based on Scott's audio reactive led project. This
        # should in theory be the same as the above, but there seems to be
        # slight differences. Leaving both for science!
        if self._config['coeffs_type'] == 'scott_mel':

            def hertz_to_scott(freq):
                return 3340.0 * log(1 + (freq / 250.0), 9)

            def scott_to_hertz(scott):
                return 250.0 * (9**(scott / 3340.0)) - 250.0

            melbank_scott = np.linspace(
                hertz_to_scott(self._config['min_frequency']),
                hertz_to_scott(self._config['max_frequency']),
                self._config['samples'] + 2)
            self.melbank_frequencies = np.array([
                scott_to_hertz(scott) for scott in melbank_scott
            ]).astype(np.float32)

            self.filterbank = aubio.filterbank(self._config['samples'],
                                               self._config['fft_size'])
            self.filterbank.set_triangle_bands(self.melbank_frequencies,
                                               self._config['mic_rate'])
            self.melbank_frequencies = self.melbank_frequencies[1:-1]

        self.melbank_frequencies = self.melbank_frequencies.astype(int)

        # Normalize the filterbank triangles to a consistent height, the
        # default coeffs (for types other than legacy) will be normalized
        # by the triangles area which results in an uneven melbank
        if self._config['coeffs_type'] != 'scott' and self._config[
                'coeffs_type'] == 'scott_mel':
            coeffs = self.filterbank.get_coeffs()
            coeffs /= np.max(coeffs, axis=-1)[:, None]
            self.filterbank.set_coeffs(coeffs)

        # Find the indexes for each of the frequency ranges
        for i in range(0, len(self.melbank_frequencies) - 1):
            if self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE['low'].max:
                self.lows_index = i
            elif self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE[
                    'mid'].max:
                self.mids_index = i
            elif self.melbank_frequencies[i] < FREQUENCY_RANGES_SIMPLE[
                    'high'].max:
                self.highs_index = i

        # Build up some of the common filters
        self.mel_gain = ExpFilter(np.tile(1e-1, self._config['samples']),
                                  alpha_decay=0.01,
                                  alpha_rise=0.99)
        self.mel_smoothing = ExpFilter(np.tile(1e-1, self._config['samples']),
                                       alpha_decay=0.2,
                                       alpha_rise=0.99)
        self.common_filter = ExpFilter(alpha_decay=0.99, alpha_rise=0.01)