Пример #1
0
def interp_hcqt(audio_fpath=None, y=None, fs=None):
    """Compute the harmonic CQT from a given audio file

    Parameters
    ----------
    audio_fpath : str
        path to audio file

    Returns
    -------
    hcqt : np.ndarray
        Harmonic cqt
    time_grid : np.ndarray
        List of time stamps in seconds
    freq_grid : np.ndarray
        List of frequency values in Hz

    """
    if y is None:
        y, fs = librosa.load(audio_fpath, sr=SR)
    else:
        y = librosa.resample(y, fs, SR)
        fs = SR

    # How many bins do we need?
    # n_bins_max = 2**(ceil(log2(max(HARMONICS))) * BPO + n_bins)
    n_bins_plane = N_OCTAVES * BINS_PER_OCTAVE

    n_bins_master = int(np.ceil(np.log2(np.max(HARMONICS))) * BINS_PER_OCTAVE) + n_bins_plane

    cqt_master = np.abs(librosa.cqt(y=y, sr=fs,
                                    hop_length=HOP_LENGTH,
                                    fmin=FMIN,
                                    n_bins=n_bins_master,
                                    bins_per_octave=BINS_PER_OCTAVE))

    freq_grid = librosa.cqt_frequencies(N_OCTAVES * BINS_PER_OCTAVE,
                                        FMIN,
                                        bins_per_octave=BINS_PER_OCTAVE)

    freq_master = librosa.cqt_frequencies(n_bins_master, FMIN,
                                          bins_per_octave=BINS_PER_OCTAVE)

    hcqt = librosa.interp_harmonics(cqt_master,
                                    freq_master,
                                    HARMONICS)[:, :N_OCTAVES * BINS_PER_OCTAVE]

    log_hcqt = ((1.0/80.0) * librosa.core.amplitude_to_db(hcqt, ref=np.max)) + 1.0

    time_grid = librosa.core.frames_to_time(
        np.arange(log_hcqt.shape[-1]), sr=SR, hop_length=HOP_LENGTH
    )

    return log_hcqt, freq_grid, time_grid
Пример #2
0
def to_local_average_cents(salience, center=None):
    """
    find the weighted average cents near the argmax bin
    """

    if not hasattr(to_local_average_cents, 'cents_mapping'):
        # the bin number-to-cents mapping
        freq_grid = librosa.cqt_frequencies(config.cqt_bins, config.fmin, config.bins_per_octave)
        to_local_average_cents.mapping = (
                np.linspace(0, 7180, 360) + freq_grid[-1])

    if salience.ndim == 1:
        if center is None:
            center = int(np.argmax(salience))
        start = max(0, center - 4)
        end = min(len(salience), center + 5)
        salience = salience[start:end]
        product_sum = np.sum(
            salience * to_local_average_cents.mapping[start:end])
        weight_sum = np.sum(salience)
        return product_sum / weight_sum
    if salience.ndim == 2:
        return np.array([to_local_average_cents(salience[i, :]) for i in
                         range(salience.shape[0])])

    raise Exception("label should be either 1d or 2d ndarray")
Пример #3
0
def test_wavelet(sr, fmin, n_bins, bins_per_octave, filter_scale, pad_fft,
                 norm, gamma):

    freqs = librosa.cqt_frequencies(fmin=fmin,
                                    n_bins=n_bins,
                                    bins_per_octave=bins_per_octave)

    F, lengths = librosa.filters.wavelet(freqs=freqs,
                                         sr=sr,
                                         filter_scale=filter_scale,
                                         pad_fft=pad_fft,
                                         norm=norm,
                                         gamma=gamma)

    assert np.all(lengths <= F.shape[1])

    assert len(F) == n_bins

    if not pad_fft:
        return

    assert np.mod(np.log2(F.shape[1]), 1.0) == 0.0

    # Check for vanishing negative frequencies
    F_fft = np.abs(np.fft.fft(F, axis=1))
    # Normalize by row-wise peak
    F_fft = F_fft / np.max(F_fft, axis=1, keepdims=True)
    assert np.max(F_fft[:, -F_fft.shape[1] // 2:]) < 1e-3
Пример #4
0
def get_X(decision_length, fmin, hop_length, n_bins_per_octave, n_octaves,
          track_or_path):
    if isinstance(track_or_path, basestring):
        x_mono, sr = librosa.core.load(track_or_path, sr=None, mono=True)
    else:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            (sr, x_stereo) = track_or_path.audio_data
            warnings.resetwarnings()
        x_stereo = x_stereo.astype(np.float32)
        x_mono = np.sum(x_stereo, axis=1) / (32768.0 * 2)
    if x_mono.shape[0] < decision_length:
        padding_length = x_mono.shape[0] - decision_length
        padding = np.zeros(padding_length, dtype=np.float32)
        x_mono = np.hstack((x_mono, padding))
    n_bins = n_octaves * n_bins_per_octave
    freqs = librosa.cqt_frequencies(bins_per_octave=n_bins_per_octave,
                                    fmin=fmin,
                                    n_bins=n_bins)
    CQT = np.abs(
        librosa.cqt(x_mono,
                    bins_per_octave=n_bins_per_octave,
                    fmin=fmin,
                    hop_length=hop_length,
                    n_bins=n_bins,
                    sr=sr,
                    real=False))
    A_weights_dB = librosa.A_weighting(freqs, min_db=-80.0)
    A_weights = (10.0**(A_weights_dB / 10))
    X = np.log1p(1000.0 * CQT * A_weights[:, np.newaxis])
    X = X.astype(np.float32)
    return X
def get_freq_grid():
    """Get the hcqt frequency grid
    """
    (bins_per_octave, n_octaves, _, _, f_min, _, over_sample) = get_hcqt_params()
    freq_grid = librosa.cqt_frequencies(
        n_octaves*12*over_sample, f_min, bins_per_octave=bins_per_octave)
    return freq_grid
Пример #6
0
def extract_bar_cqt(sr, wav_data):
    """
    :param sr: Sample Rate of the Wav file
    :param wav_data: Single Channel Wav Data
    :return: splits of wav_data into bars by finding tempo dynamically
    """
    onset_env = librosa.onset.onset_strength(y=wav_data, sr=sr)
    prior = scipy.stats.lognorm(loc=np.log(120), scale=120, s=1)
    pulse = librosa.beat.plp(onset_envelope=onset_env,
                             sr=sr,
                             hop_length=Config.HOP_LENGTH,
                             prior=prior)
    beats_plp = np.flatnonzero(librosa.util.localmax(pulse))
    times = librosa.times_like(pulse, sr=sr)
    frequencies = librosa.cqt_frequencies(
        n_bins=Config.N_BINS,
        fmin=Config.F_MIN,
        bins_per_octave=Config.BINS_PER_OCTAVE)
    cqt = np.abs(
        librosa.cqt(wav_data,
                    sr=sr,
                    fmin=Config.F_MIN,
                    n_bins=Config.N_BINS,
                    bins_per_octave=Config.BINS_PER_OCTAVE))
    cqt_db = librosa.amplitude_to_db(cqt, ref=np.max)
    cqt_split = []
    for i, b in enumerate(beats_plp[:-1]):
        cqt_split.append(cqt_db[:, b:beats_plp[i + 1]])

    cqt_split.append(cqt_db[:, beats_plp[-1]:])
    return cqt_split, times[beats_plp]
Пример #7
0
def centroid(spectrum, config=dict()):
    '''
    Computes spectral centroid feature.

    Parameters
    ----------
    spectrum : np.ndarray [shape=(n_bins, n_frames)]
        Spectrum from which the feature is computed.

    config : dict
        Configuration dictionary. For full list of parameters with their description, see README file. This function use
        no parameters.

    Returns
    -------
    feature : np.ndarray [shape=(n_frames,)]
        Computed spectral centroid feature.
    '''

    freq = None

    spectrum_type = get(config, 'spectrum.type')
    if spectrum_type == 'cqt':
        freq = librosa.cqt_frequencies(get(config, 'spectrum.n_bins'),
                                       fmin=librosa.note_to_hz('C1'))
    elif spectrum_type == 'mel':
        freq = librosa.mel_frequencies(n_mels=get(config, 'spectrum.n_bins'),
                                       htk=True)

    return librosa.feature.spectral_centroid(S=spectrum, freq=freq)[0]
Пример #8
0
def get_cqt_index(pitch, hparams):
    """Get row closest to this pitch in a CQT spectrogram"""
    frequencies = librosa.cqt_frequencies(
        constants.TIMBRE_SPEC_BANDS,
        fmin=librosa.midi_to_hz(constants.MIN_TIMBRE_PITCH),
        bins_per_octave=constants.BINS_PER_OCTAVE)

    return np.abs(frequencies - librosa.midi_to_hz(pitch.numpy() - 1)).argmin()
Пример #9
0
 def estimate_pitch(self, segment, threshold):
     freqs = librosa.cqt_frequencies(n_bins=self.n_bins, fmin=librosa.note_to_hz('C1'),
                                     bins_per_octave=12)
     if segment.max() < threshold:
         return [None, np.mean((np.amax(segment, axis=0)))]
     else:
         f0 = int(np.mean((np.argmax(segment, axis=0))))
     return [freqs[f0], np.mean((np.amax(segment, axis=0)))]
Пример #10
0
def get_freq_grid():
    """Get the hcqt frequency grid
    Function from https://github.com/rabitt/ismir2017-deepsalience"""
    (bins_per_octave, n_octaves, _, _, f_min, _) = get_hcqt_params()
    freq_grid = librosa.cqt_frequencies(bins_per_octave * n_octaves,
                                        f_min,
                                        bins_per_octave=bins_per_octave)
    return freq_grid
def compute_hcqt(audio_fpath):
    """Compute the harmonic CQT from a given audio file

    Parameters
    ----------
    audio_fpath : str
        path to audio file

    Returns
    -------
    hcqt : np.ndarray
        Harmonic cqt
    time_grid : np.ndarray
        List of time stamps in seconds
    freq_grid : np.ndarray
        List of frequency values in Hz

    """
    if audio_fpath.endswith('npy'):

        log_hcqt = np.load(audio_fpath)

    else:

        y, fs = librosa.load(audio_fpath, sr=SR)

        cqt_list = []
        shapes = []
        for h in HARMONICS:
            cqt = librosa.cqt(y,
                              sr=fs,
                              hop_length=HOP_LENGTH,
                              fmin=FMIN * float(h),
                              n_bins=BINS_PER_OCTAVE * N_OCTAVES,
                              bins_per_octave=BINS_PER_OCTAVE)
            cqt_list.append(cqt)
            shapes.append(cqt.shape)

        shapes_equal = [s == shapes[0] for s in shapes]
        if not all(shapes_equal):
            min_time = np.min([s[1] for s in shapes])
            new_cqt_list = []
            for i in range(len(cqt_list)):
                new_cqt_list.append(cqt_list[i][:, :min_time])
            cqt_list = new_cqt_list

        log_hcqt = ((1.0 / 80.0) * librosa.core.amplitude_to_db(
            np.abs(np.array(cqt_list)), ref=np.max)) + 1.0

    freq_grid = librosa.cqt_frequencies(BINS_PER_OCTAVE * N_OCTAVES,
                                        FMIN,
                                        bins_per_octave=BINS_PER_OCTAVE)

    time_grid = librosa.core.frames_to_time(range(log_hcqt.shape[2]),
                                            sr=SR,
                                            hop_length=HOP_LENGTH)

    return log_hcqt, freq_grid, time_grid
def load_hcqt(hcqt_filepath):

    log_hcqt = np.load(hcqt_filepath)

    freq_grid = librosa.cqt_frequencies(BINS_PER_OCTAVE * N_OCTAVES,
                                        FMIN,
                                        bins_per_octave=BINS_PER_OCTAVE)

    time_grid = librosa.core.frames_to_time(range(log_hcqt.shape[2]),
                                            sr=SR,
                                            hop_length=HOP_LENGTH)

    return log_hcqt, freq_grid, time_grid
Пример #13
0
    def compute_pitches(self, display_plot_frame=-1):
        overall_chromagram = Chromagram()

        # first C = C3
        notes = librosa.cqt_frequencies(12, fmin=librosa.note_to_hz('C3'))

        self.specgram_to_plot = []

        for n in range(12):
            for octave in range(1, self.num_octave + 1):
                for harmonic in range(1, self.num_harmonic + 1):
                    f_candidate = notes[n] * octave * harmonic
                    window_size = int((8 / f_candidate) * self.fs)

                    chromagram = Chromagram()
                    for frame, x_t in enumerate(frame_cutter(self.x, window_size)):
                        real_window_size = max(x_t.shape[0], window_size)
                        window = numpy.hanning(real_window_size)
                        s, f = mlab.magnitude_spectrum(x_t, Fs=self.fs, window=window)
                        s = s[:int(s.shape[0]/2)]
                        f = f[:int(f.shape[0]/2)]
                        s[s < 0] = 0.0  # clip
                        might_append_1 = s.copy()
                        might_append_2 = []

                        for _ in range(self.harmonic_elim_runs):
                            max_freq_idx = s.argmax(axis=0)
                            max_f = f[max_freq_idx]
                            try:
                                note = librosa.hz_to_note(max_f, octave=False)
                                chromagram[note] += s[max_freq_idx]
                                might_append_2.append((max_freq_idx, max_f, note))
                            except (ValueError, OverflowError):
                                continue
                            eliminated = []
                            for harmonic_index_multiple in range(
                                1, self.harmonic_multiples_elim
                            ):
                                elim_freq = harmonic_index_multiple * max_f
                                elim_index = numpy.where(f == elim_freq)
                                s[elim_index] -= s[elim_index]
                        might_append_3 = s.copy()

                        if frame == display_plot_frame:
                            # plot once and stop
                            display_plot_frame = -1
                            _display_plots(self.clip_name, self.x, ((might_append_1, might_append_2, might_append_3)))

                    overall_chromagram += chromagram

        return overall_chromagram
Пример #14
0
def cqtgram(y, sr, hop_length=512, octave_bins=24, n_octaves=8, fmin=40, perceptual_weighting=False):
    s_complex = librosa.cqt(
        y,
        sr=sr,
        hop_length=hop_length,
        bins_per_octave=octave_bins,
        n_bins=octave_bins * n_octaves,
        fmin=fmin,
    )
    specgram = np.abs(s_complex)
    if perceptual_weighting:
        freqs = librosa.cqt_frequencies(specgram.shape[0], fmin=fmin, bins_per_octave=octave_bins)
        specgram = librosa.perceptual_weighting(specgram**2, freqs, ref=np.max)
    else:
        specgram = librosa.amplitude_to_db(specgram, ref=np.max)
    return specgram
Пример #15
0
def test_cqt_frequencies(n_bins, fmin, bins_per_octave, tuning):

    freqs = librosa.cqt_frequencies(n_bins,
                                    fmin,
                                    bins_per_octave=bins_per_octave,
                                    tuning=tuning)

    # Make sure we get the right number of bins
    assert len(freqs) == n_bins

    # And that the first bin matches fmin by tuning
    assert np.allclose(freqs[0], fmin * 2.0**(float(tuning) / bins_per_octave))

    # And that we have constant Q
    Q = np.diff(np.log2(freqs))
    assert np.allclose(Q, 1.0 / bins_per_octave)
Пример #16
0
    def predictOne(self, samples: Signal):
        """Calculates the cqt of the given audio using librosa.

        Args:
            samples (Signal): The samples of the audio.
            grid (list of float): The .

        Returns:
            tuple of List[float]: The cqt of the audio.

        """
        sr = samples.sampleRate
        hop_length = self.parameters["hopLength"].value
        n_bins = self.parameters["binNumber"].value
        cqt_sr = sr / hop_length
        cqt = librosa.cqt(samples.values,
                          sr=sr,
                          hop_length=hop_length,
                          n_bins=n_bins)
        linear_cqt = np.abs(cqt)

        if self.parameters["scale"].value == "Amplitude":
            result = linear_cqt
        elif self.parameters["scale"].value == "Power":
            result = linear_cqt**2
        elif self.parameters["scale"].value == "MSAF":
            result = librosa.amplitude_to_db(linear_cqt**2, ref=np.max)
            result += np.min(
                result
            ) * -1  # Inverting the db scale (don't know if this is correct)
        elif self.parameters["scale"].value == "Power dB":
            result = librosa.amplitude_to_db(
                linear_cqt,
                ref=np.max)  # Based on Librosa, standard power spectrum in dB
            result += np.min(result) * -1
        elif self.parameters["scale"].value == "Perceived dB":
            freqs = librosa.cqt_frequencies(linear_cqt.shape[0],
                                            fmin=librosa.note_to_hz('C1'))
            result = librosa.perceptual_weighting(linear_cqt**2,
                                                  freqs,
                                                  ref=np.max)
            result += np.min(result) * -1
        else:
            raise ValueError("parameterScale is not a correct value")

        return (Signal(result.T, sampleRate=cqt_sr), )
Пример #17
0
 def cqt(self, data: np.array = None, hop_lengths: List[int] = None, bins_per_octave: int = 12):
   # data: shape = (sample number, channel number)
   # hop_length: how many samples are between two selected sample segments
   if self.__opened == False:
     raise Exception('load an audio file first!');
   if hop_lengths is None:
     hop_lengths = [512] * (self.__channels if data is None else data.shape[-1]);
   assert len(hop_lengths) == self.__channels if data is None else len(hop_lengths) == data.shape[-1];
   normalized = self.normalize(data);
   channels = list();
   for i in range(normalized.shape[-1]):
     normalized_channel = normalized[:,i];
     channel_results = cqt(normalized_channel, self.__frame_rate, hop_lengths[i], fmin = note_to_hz('A0'), n_bins = 88, bins_per_octave = bins_per_octave); # results.shape = (84, hop number)
     channels.append(channel_results);
   spectrum = np.stack(channels, axis = 0); # spectrum.shape = (channel number, 88, hop number)
   freqs = cqt_frequencies(88, fmin = note_to_hz('A0'), bins_per_octave = bins_per_octave);
   return spectrum, freqs;
Пример #18
0
    def compute_pitches(self, display_plot_frame=-1):
        # first C = C3
        notes = librosa.cqt_frequencies(12, fmin=librosa.note_to_hz('C3'))

        divisor_ratio = (self.fs / 4.0) / self.frame_size
        self.dft_maxes = []

        overall_chromagram = Chromagram()

        for frame, x in enumerate(frame_cutter(self.x, self.frame_size)):
            chromagram = Chromagram()
            x = x * scipy.signal.hamming(self.frame_size)
            x_dft = numpy.sqrt(numpy.absolute(numpy.fft.rfft(x)))
            for n in range(12):
                chroma_sum = 0.0
                for octave in range(1, self.num_octave + 1):
                    note_sum = 0.0
                    for harmonic in range(1, self.num_harmonic + 1):
                        x_dft_max = float("-inf")  # sentinel

                        k_prime = numpy.round(
                            (notes[n] * octave * harmonic) / divisor_ratio)
                        k0 = int(k_prime - self.num_bins * harmonic)
                        k1 = int(k_prime + self.num_bins * harmonic)

                        best_ind = None
                        for k in range(k0, k1):
                            curr_ = x_dft[k]
                            if curr_ > x_dft_max:
                                x_dft_max = curr_
                                best_ind = k

                        note_sum += x_dft_max * (1.0 / harmonic)
                        self.dft_maxes.append((k0, best_ind, k1))
                    chroma_sum += note_sum
                chromagram[n] += chroma_sum

            overall_chromagram += chromagram

            if frame == display_plot_frame:
                _display_plots(self.clip_name, self.fs, self.frame_size, x_dft,
                               self.x, x, self.dft_maxes)
        return overall_chromagram
Пример #19
0
    def _compute_cqt(self, y, sr):
        """Compute a CQT.

        Parameters
        ----------
        y : np.array
            Audio signal
        sr : float
            Audio singal sample rate

        Returns
        -------
        cqt_log : np.array [n_samples, n_freqs]
            Log amplitude CQT.
        samples : np.array [n_samples]
            CQT time stamps.
        freqs : np.array [n_freqs]
            CQT frequencies.

        """
        fmin = librosa.note_to_hz(self.min_note)
        bins_per_octave = 12
        n_cqt_bins = bins_per_octave * self.n_octaves
        cqt = np.abs(
            librosa.cqt(y,
                        sr=sr,
                        hop_length=self.hop_size,
                        fmin=fmin,
                        filter_scale=self.filter_scale,
                        bins_per_octave=bins_per_octave,
                        n_bins=n_cqt_bins))

        cqt = self._norm_matrix(cqt)

        n_time_frames = cqt.shape[1]

        freqs = librosa.cqt_frequencies(fmin=fmin,
                                        bins_per_octave=bins_per_octave,
                                        n_bins=n_cqt_bins)
        samples = librosa.frames_to_samples(range(n_time_frames),
                                            hop_length=self.hop_size)

        return cqt, samples, freqs
Пример #20
0
 def toggle_y_axis(self):
     freqs = librosa.cqt_frequencies(300,
                                     fmin=librosa.note_to_hz('C2'),
                                     bins_per_octave=60)
     f_axis_dict = []
     if self.radio_y_frequency.isChecked(
     ) and self.radio_cqt_option.isChecked():
         freqs_formatted = ['%.2f' % elem for elem in freqs]
         f_axis_dict = list(dict(enumerate(freqs_formatted)).items())
         self.p1.setLabel('left', "Frequency", units='Hz')
         major_f_ticks = f_axis_dict[::300 // 4]
         del f_axis_dict[::300 // 4]
         minor_f_ticks = f_axis_dict
     elif self.radio_y_note.isChecked() and self.radio_cqt_option.isChecked(
     ):
         notes = librosa.hz_to_note(freqs)[::5]
         temp_dict = {}
         for i, note in enumerate(notes):
             temp_dict[i * 5] = note
         f_axis_dict = list(temp_dict.items())
         self.p1.setLabel('left', "Notes", units='')
         major_f_ticks = f_axis_dict[::60 // 4]
         del f_axis_dict[::60 // 4]
         minor_f_ticks = f_axis_dict
     elif self.radio_y_frequency.isChecked(
     ) and self.radio_plca_option.isChecked():
         freqs_formatted = ['%.2f' % elem for elem in freqs]
         freqs = freqs_formatted[::5]
         f_axis_dict = list(dict(enumerate(freqs)).items())
         self.p1.setLabel('left', "Frequency", units='Hz')
         major_f_ticks = f_axis_dict[::60 // 4]
         del f_axis_dict[::60 // 4]
         minor_f_ticks = f_axis_dict
     elif self.radio_y_note.isChecked(
     ) and self.radio_plca_option.isChecked():
         notes = librosa.hz_to_note(freqs)[::5]
         f_axis_dict = list(dict(enumerate(notes)).items())
         self.p1.setLabel('left', "Notes", units='')
         major_f_ticks = f_axis_dict[::60 // 4]
         del f_axis_dict[::60 // 4]
         minor_f_ticks = f_axis_dict
     newLeftTicks = self.p1.getAxis('left')
     newLeftTicks.setTicks([major_f_ticks, minor_f_ticks])
Пример #21
0
    def librosaSpectrum(self):
        import librosa.display
        measurementPath = os.path.join(os.path.dirname(__file__),
                                       '../test/data', 'eot.wav')
        # measurement = ms.loadSignalFromWav(measurementPath)
        y, sr = librosa.load(measurementPath, sr=500)
        # no of octaves in the CQT (have to divide SR by 8 because we remove 1 octave for nyquist and another to fit
        # the filter inside nyquist (I think)
        bins_per_octave = 24
        n_bins = math.floor(bins_per_octave * math.log2(sr / 8)) - 1
        fmin = 4.0
        cqt_freq = librosa.cqt_frequencies(n_bins,
                                           fmin,
                                           bins_per_octave=bins_per_octave)
        C = librosa.core.cqt(y,
                             sr,
                             hop_length=2**12,
                             fmin=4.0,
                             bins_per_octave=bins_per_octave,
                             n_bins=n_bins,
                             filter_scale=2)
        spectrum = np.sqrt(np.mean(np.abs(C)**2, axis=-1))
        peak = np.sqrt(np.max(np.abs(C)**2, axis=-1))
        plt.figure()
        plt.xlim(4, 250)
        plt.semilogx(cqt_freq,
                     librosa.amplitude_to_db(spectrum, ref=np.max(peak)))
        # plt.semilogx(cqt_freq, librosa.amplitude_to_db(peak, ref=np.max(peak)))

        measurement = Signal(y, fs=500)
        f, Pxx = measurement.peakSpectrum()
        # plt.semilogx(f, librosa.amplitude_to_db(Pxx, ref=np.max(Pxx)))
        f, Pxx_spec = measurement.spectrum()
        plt.semilogx(f, librosa.amplitude_to_db(Pxx_spec, ref=np.max(Pxx)))

        f, Pxx = measurement.peakSpectrum()
        f, Pxx_spec = measurement.spectrum()
        plt.semilogx(f, librosa.amplitude_to_db(Pxx_spec, ref=np.max(Pxx)))

        plt.tight_layout()
        plt.show()
Пример #22
0
def processAudio(f_method='fft', b_method='times'):

    # Get raw PCM data
    y, sr = librosa.load(PATH_TO_AUDIO,
                         duration=DURATION,
                         sr=SAMPLE_RATE,
                         mono=True)

    # Separate harmonics and percussives into two waveforms
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    # Beat track on the percussive signal
    tempo, beat_frames = librosa.beat.beat_track(y=y_percussive,
                                                 sr=SAMPLE_RATE)

    if b_method == 'times':
        B = librosa.frames_to_time(beat_frames, sr=SAMPLE_RATE)
    else:
        B = beat_frames

    if f_method == 'fft':
        D = librosa.stft(y, n_fft=FFT_SIZE, hop_length=HOP_LENGTH, center=True)
        F = librosa.fft_frequencies(sr=SAMPLE_RATE, n_fft=FFT_SIZE)

    elif f_method == 'cqt':
        D = librosa.cqt(y,
                        sr=SAMPLE_RATE,
                        hop_length=HOP_LENGTH,
                        n_bins=84,
                        bins_per_octave=12,
                        fmin=28)
        F = librosa.cqt_frequencies(84, 28, 12)

    D = np.transpose(librosa.amplitude_to_db(np.abs(D), ref=np.max))

    return {
        "d": D,
        "f": F,
        "b": B,
    }
Пример #23
0
 def cqtgram(self,
             y,
             hop_length=512,
             octave_bins=24,
             n_octaves=8,
             fmin=40,
             perceptual_weighting=False):
     S_complex = librosa.cqt(y,
                             sr=self.sr,
                             hop_length=hop_length,
                             bins_per_octave=octave_bins,
                             n_bins=octave_bins * n_octaves,
                             fmin=fmin,
                             real=False)
     S = np.abs(S_complex)
     if perceptual_weighting:
         freqs = librosa.cqt_frequencies(S.shape[0],
                                         fmin=fmin,
                                         bins_per_octave=octave_bins)
         S = librosa.perceptual_weighting(S**2, freqs, ref_power=np.max)
     else:
         S = librosa.logamplitude(S**2, ref_power=np.max)
     return S
Пример #24
0
                                       x_min=None,
                                       x_max=C.shape[1]-1)

###################################################
# And plot the final segmentation over original CQT


# sphinx_gallery_thumbnail_number = 5

import matplotlib.patches as patches
import json
# plt.figure(figsize=(12, 4))

bound_times = librosa.frames_to_time(bound_frames)
freqs = librosa.cqt_frequencies(n_bins=C.shape[0],
                                fmin=librosa.note_to_hz('C1'),
                                bins_per_octave=BINS_PER_OCTAVE)

print(len(bound_times))
with open("./output-segments.json", 'w+') as output:
    json.dump(bound_times.tolist(),output, indent=4)

librosa.display.specshow(C, y_axis='cqt_hz', sr=sr,
                         bins_per_octave=BINS_PER_OCTAVE,
                         x_axis='time')
ax = plt.gca()

for interval, label in zip(zip(bound_times, bound_times[1:]), bound_segs):
    ax.add_patch(patches.Rectangle((interval[0], freqs[0]),
                                   interval[1] - interval[0],
                                   freqs[-1],
Пример #25
0
def process_output(atb):
    freq_grid = librosa.cqt_frequencies(config.cqt_bins, config.fmin, config.bins_per_octave)
    time_grid = np.linspace(0, config.hoptime * atb.shape[0], atb.shape[0])
    time_grid, est_freqs = get_multif0(atb.T, freq_grid, time_grid)
    return time_grid, est_freqs
Пример #26
0
def icqt_recursive(C, sr=22050, hop_length=512, fmin=None, bins_per_octave=12, filter_scale=None, norm=1, scale=True, window='hann'):
    
    n_octaves = int(np.ceil(float(C.shape[0]) / bins_per_octave))


    if fmin is None:
        fmin = librosa.note_to_hz('C1')
    
    freqs = librosa.cqt_frequencies(C.shape[0], fmin,
                            bins_per_octave=bins_per_octave)[-bins_per_octave:]

    fmin_t = np.min(freqs)
    fmax_t = np.max(freqs)

    # Make the filter bank
    f, lengths = librosa.filters.constant_q(sr=sr,
                                            fmin=fmin_t,
                                            n_bins=bins_per_octave,
                                            bins_per_octave=bins_per_octave,
                                            filter_scale=filter_scale,
                                            norm=norm, window=window)
    
    if scale:
        f = f / np.sqrt(lengths[:, np.newaxis])
    
    else:
        f = f / lengths[:, np.newaxis]
        
    n_trim = f.shape[1] // 2
    
    # Hermitian the filters and sparsify
    f = librosa.util.sparsify_rows(f)
    
    y = None
    
    for octave in range(n_octaves - 1, -1, -1):
    
        # Compute the slice index for the current octave
        slice_ = slice(-(octave+1) * bins_per_octave - 1, -(octave) * bins_per_octave - 1)
        
        # Project onto the basis        
        C_ = C[slice_]
        fb = f[-C_.shape[0]:] #/ np.sqrt(lengths[-C_.shape[0]:, np.newaxis])
        Cf = fb.conj().T.dot(C_) 
        
        # Overlap-add the responses
        y_oct = np.zeros(int(f.shape[1] + (2**(-octave) *  hop_length * C.shape[1])), dtype=f.dtype)
        for i in range(Cf.shape[1]):
            y_oct[int(i * hop_length * 2**(-octave)):int(i * hop_length * 2**(-octave) + Cf.shape[0])] += Cf[:, i]
        
        if y is None:
            y = y_oct
            continue

        # Up-sample the previous buffer and add in the new one
        y = (librosa.core.resample(y.real, 1, 2, scale=True) + 
             1.j * librosa.core.resample(y.imag, 1, 2, scale=True))
        
        y = y[n_trim:-n_trim] / 2 + y_oct
        
    # Chop down the length
    y = librosa.util.fix_length(y.real, f.shape[1] + hop_length * C.shape[1])
    
    
    y *= 2**n_octaves
    
        
    # Trim off the center-padding
    return np.ascontiguousarray(y[n_trim : -n_trim])
Пример #27
0
librosa.display.specshow(librosa.logamplitude(melspec, ref_power=np.max), y_axis='mel',
                         sr=sr, cmap='viridis')
plt.xlabel('Original mel spectrum')

plt.subplot(2,1,2)
librosa.display.specshow(librosa.logamplitude(melspec2, ref_power=np.max),  y_axis='mel', sr=sr,
                         cmap='viridis', x_axis='time')
plt.xlabel('Reconstructed signal')

plt.tight_layout()


# In[799]:

sr_max = librosa.cqt_frequencies(n_bins=C.shape[0],
                        fmin=librosa.note_to_hz('C1'),
                        bins_per_octave=int(12*over_sample))[-1] * 2


# In[800]:

y_filt = librosa.resample(librosa.resample(y, sr, sr_max), sr_max, sr)


# In[801]:

mir_eval.separation.evaluate(y[np.newaxis, :], y2[np.newaxis, :])


# In[802]:
Пример #28
0
def decode_song(x, sr, detect_ends=True, draw=False):
    """
    Given an audio signal, returns the transcribed music as a DecodedSong class
    """
    n_bins = NUM_KEYS * BINS_PER_NOTE
    bins_per_octave = 12 * BINS_PER_NOTE
    Cxx = librosa.cqt(x,
                      sr=sr,
                      n_bins=n_bins,
                      bins_per_octave=bins_per_octave,
                      fmin=FMIN)
    fs = librosa.cqt_frequencies(n_bins, FMIN, bins_per_octave)
    ts = librosa.frames_to_time(np.arange(Cxx.shape[1]), sr=sr)

    Sxx = librosa.amplitude_to_db(np.abs(Cxx)**2)
    if draw and False:
        librosa.display.specshow(Sxx, sr=sr, x_axis='time', y_axis='off')
        plt.show()

    # compute onset strength envelope
    onset_envelope = get_onset_envelope(Sxx, ts, draw=draw)
    times = librosa.frames_to_time(np.arange(len(onset_envelope)), sr=sr)

    # display STFT for debugging
    """
    Sxx = librosa.core.stft(x, n_fft=4096, hop_length=512)
    fs = librosa.fft_frequencies(sr=sr, n_fft=4096)
    Sxx = librosa.amplitude_to_db(np.abs(Sxx)**2)
    if draw:
        librosa.display.specshow(Sxx, sr=sr, x_axis='time', y_axis='off')
        plt.show()
    """

    min_note_length = 0.075 * sr // 512 + 1  # ~ 75ms
    #onset_envelope = librosa.onset.onset_strength(y=y, sr=sr, feature=librosa.cqt)
    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_envelope,
                                              sr=sr,
                                              wait=min_note_length)

    # look ahead of note onsets to find best frames to detect note
    note_detect_frames = []
    for n in range(len(onset_frames)):
        if n == len(onset_frames) - 1:
            next_onset = len(onset_envelope) - 1
        else:
            next_onset = onset_frames[n + 1]
        onset_start = onset_frames[n]
        note_frame = int((next_onset - onset_start) * 0.5) + onset_start
        diff = np.diff(onset_envelope[onset_start:])
        decreasing = np.argwhere(diff < 0).flatten()
        if len(decreasing):  # found a place to detect note
            onset_forward_shift = int(0.15 // (ts[1] - ts[0]) + 1)
            note_frame2 = decreasing[0] + onset_forward_shift + onset_start
            note_frame = min(note_frame,
                             note_frame2)  # don't detect after next note
        note_detect_frames.append(note_frame)
    note_detect_frames = np.array(note_detect_frames)

    onset_times = ts[onset_frames]
    note_detect_times = ts[note_detect_frames]

    if draw:  # display onset envelope, onsets and note detection frames
        plt.plot(times, onset_envelope, label='Onset strength')
        plt.vlines(onset_times,
                   0,
                   onset_envelope.max(),
                   color='r',
                   alpha=0.9,
                   linestyle='--',
                   label='Onsets')
        plt.vlines(note_detect_times,
                   0,
                   onset_envelope.max(),
                   color='g',
                   alpha=0.9,
                   linestyle='--',
                   label='Note detect')
        plt.axis('tight')
        plt.legend(frameon=True, framealpha=0.75)
        plt.show()

    Sxx = librosa.amplitude_to_db(
        np.power(np.abs(Cxx).T, np.linspace(3, 2, num=Cxx.shape[0])).T)
    omit_notes = []
    song = DecodedSong()
    for n in list(range(
            len(onset_frames))):  # iterate through onsets and add notes
        onset_forward_shift = int(0.05 // (ts[1] - ts[0]) + 1)
        t = onset_times[n]
        if n == len(onset_frames) - 1:
            tnext = ts[-1]
        else:
            tnext = onset_times[n + 1]
        i = onset_frames[n] + onset_forward_shift
        i = note_detect_frames[n]
        i = min(i, Sxx.shape[1] - 1)
        bins, notes, freqs, volumes = get_note_bins_at_index(
            Sxx,
            fs,
            i,
            omit_notes=omit_notes,
            peak_note=False,
            draw=False,
            polyphonic=False)
        omit_notes = notes

        # if last note has NOT ended next note has to be of different frequency
        end_i = detect_note_end(bins[0], i, Sxx)
        tnext_detect = ts[end_i]
        if tnext_detect < tnext:
            omit_notes = []
        if freqs:
            for n in range(len(freqs)):
                freq = freqs[n]
                if detect_ends:
                    tnext = min(tnext_detect, tnext)
                song.add_note(Note(freq, t, tnext - t, volume=1))
    return song
Пример #29
0
import matplotlib.pyplot as plt
import librosa.display
import IPython.display as ipd
# Reference [d]: Classical MIDI Files, https://www.mfiles.co.uk/classical-midi.htm

model = None
history = None
trainx, trainy = ([], [])
validx, validy = ([], [])

BINS = 88
FREF = librosa.note_to_hz('A0')  # Reference frequency = 27.50 Hz -> A0
# For 84 frequency bins:
# FREF = librosa.note_to_hz('C1')  # Reference frequency = 32.70 Hz -> C1

fbins = librosa.cqt_frequencies(BINS, fmin=FREF)
FMAX = fbins[BINS-1]
STD = 25 # Standard deviation for the probability vector

def sample(filename):
    global trainx, trainy, validx, validy
    
    data, Sr = librosa.load("%s.mp3" % filename)
    D = np.abs(librosa.cqt(data, sr=Sr, fmin=FREF, n_bins=BINS))
    Spec = librosa.amplitude_to_db(librosa.magphase(D)[0], ref=np.min).T
    
    num_samples = 0 # Number of time frame samples per sound file

    with open("%s.txt" % filename) as f:
        for line in f:
            (i1, i2, i3) = line.split()
Пример #30
0
def segmentation(song, display=False):
    '''
    Takes in a song and then returns a class containing the spectrogram, bpm, and major segments
    It also fills the song's beatTrack and uses it in the segmentation algorithm.
    Algorithm written by: Brian McFee https://bmcfee.github.io/

    :param song: (Song)      | song to segment
    :param display: (bool)   | optional argument to display graph of segments using matPlotLib
    :return: seg_dict (dict) | dictionary of segments
    '''
    import numpy as np
    import scipy
    import matplotlib.pyplot as plt
    import sklearn.cluster

    y = song.load.y
    sr = song.load.sr
    beat_track = song.beat_track

    BINS_PER_OCTAVE = 12 * 3
    N_OCTAVES = 7
    C = librosa.amplitude_to_db(librosa.cqt(y=y,
                                            sr=sr,
                                            bins_per_octave=BINS_PER_OCTAVE,
                                            n_bins=N_OCTAVES *
                                            BINS_PER_OCTAVE),
                                ref=np.max)

    # To reduce dimensionality, we'll beat-synchronous the CQT
    tempo, beats = tuple(beat_track)

    Csync = librosa.util.sync(C, beats, aggregate=np.median)

    #####################################################################
    # Let's build a weighted recurrence matrix using beat-synchronous CQT
    # width=3 prevents links within the same bar
    # mode='affinity' here implements S_rep
    R = librosa.segment.recurrence_matrix(Csync,
                                          width=3,
                                          mode='affinity',
                                          sym=True)

    # Enhance diagonals with a median filter (Equation 2)
    df = librosa.segment.timelag_filter(scipy.ndimage.median_filter)
    Rf = df(R, size=(1, 7))

    ###################################################################
    # Now let's build the sequence matrix (S_loc) using mfcc-similarity

    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    Msync = librosa.util.sync(mfcc, beats)

    path_distance = np.sum(np.diff(Msync, axis=1)**2, axis=0)
    sigma = np.median(path_distance)
    path_sim = np.exp(-path_distance / sigma)

    R_path = np.diag(path_sim, k=1) + np.diag(path_sim, k=-1)

    ##########################################################
    # And compute the balanced combination

    deg_path = np.sum(R_path, axis=1)
    deg_rec = np.sum(Rf, axis=1)

    mu = deg_path.dot(deg_path + deg_rec) / np.sum((deg_path + deg_rec)**2)

    A = mu * Rf + (1 - mu) * R_path

    #####################################################
    # Now let's compute the normalized Laplacian
    L = scipy.sparse.csgraph.laplacian(A, normed=True)

    # and its spectral decomposition
    evals, evecs = scipy.linalg.eigh(L)

    # We can clean this up further with a median filter.
    # This can help smooth over small discontinuities
    evecs = scipy.ndimage.median_filter(evecs, size=(9, 1))

    # cumulative normalization is needed for symmetric normalize laplacian eigenvectors
    Cnorm = np.cumsum(evecs**2, axis=1)**0.5

    # If we want k clusters, use the first k normalized eigenvectors.
    k = 5

    X = evecs[:, :k] / Cnorm[:, k - 1:k]

    #############################################################
    # Let's use these k components to cluster beats into segments
    KM = sklearn.cluster.KMeans(n_clusters=k)

    seg_ids = KM.fit_predict(X)

    bound_beats = 1 + np.flatnonzero(seg_ids[:-1] != seg_ids[1:])

    bound_beats = librosa.util.fix_frames(bound_beats, x_min=0)

    bound_segs = list(seg_ids[bound_beats])

    bound_frames = beats[bound_beats]

    bound_frames = librosa.util.fix_frames(bound_frames,
                                           x_min=None,
                                           x_max=C.shape[1] - 1)

    bound_tuples = []
    for i in range(1, len(bound_frames)):
        bound_tuples.append((bound_frames[i - 1], bound_frames[i] - 1))
    bound_tuples = tuple(map(lambda x: librosa.frames_to_time(x),
                             bound_tuples))

    pairs = zip(bound_segs, bound_tuples)
    seg_dict = dict()
    for seg, frame in pairs:
        seg_dict.setdefault(seg, []).append(frame)

    if display:
        import matplotlib.patches as patches
        plt.figure(figsize=(12, 4))
        colors = plt.get_cmap('Paired', k)

        bound_times = librosa.frames_to_time(bound_frames)
        freqs = librosa.cqt_frequencies(n_bins=C.shape[0],
                                        fmin=librosa.note_to_hz('C1'),
                                        bins_per_octave=BINS_PER_OCTAVE)

        librosa.display.specshow(C,
                                 y_axis='cqt_hz',
                                 sr=sr,
                                 bins_per_octave=BINS_PER_OCTAVE,
                                 x_axis='time')
        ax = plt.gca()

        for interval, label in zip(zip(bound_times, bound_times[1:]),
                                   bound_segs):
            ax.add_patch(
                patches.Rectangle((interval[0], freqs[0]),
                                  interval[1] - interval[0],
                                  freqs[-1],
                                  facecolor=colors(label),
                                  alpha=0.50))

        plt.tight_layout()
        plt.show()

    return seg_dict