Exemplo n.º 1
0
    def __test_hybrid_cqt(pad_mode):
        D1 = librosa.hybrid_cqt(y, pad_mode='reflect')
        D2 = librosa.hybrid_cqt(y, pad_mode=pad_mode)

        assert D1.shape == D2.shape

        if pad_mode != 'reflect':
            assert not np.allclose(D1, D2)
        else:
            assert np.allclose(D1, D2)
Exemplo n.º 2
0
    def __test_hybrid_cqt(pad_mode):
        D1 = librosa.hybrid_cqt(y, pad_mode='reflect')
        D2 = librosa.hybrid_cqt(y, pad_mode=pad_mode)

        assert D1.shape == D2.shape

        if pad_mode != 'reflect':
            assert not np.allclose(D1, D2)
        else:
            assert np.allclose(D1, D2)
Exemplo n.º 3
0
def test_hybrid_cqt_multi(y_multi, scale, res_type):

    y, sr = y_multi

    # Assuming single-channel CQT is well behaved
    C0 = librosa.hybrid_cqt(y=y[0], sr=sr, scale=scale, res_type=res_type)
    C1 = librosa.hybrid_cqt(y=y[1], sr=sr, scale=scale, res_type=res_type)
    Call = librosa.hybrid_cqt(y=y, sr=sr, scale=scale, res_type=res_type)

    # Check each channel
    assert np.allclose(C0, Call[0])
    assert np.allclose(C1, Call[1])

    # Verify that they're not all the same
    assert not np.allclose(Call[0], Call[1])
Exemplo n.º 4
0
    def compute_features(self):
        """Actual implementation of the features.

        Returns
        -------
        pcp: np.array(N, F)
            The features, each row representing a feature vector for a give
            time frame/beat.
        """
        audio_harmonic, _ = self.compute_HPSS()
        pcp_cqt = (
            np.abs(
                librosa.hybrid_cqt(
                    audio_harmonic,
                    sr=self.sr,
                    hop_length=self.hop_length,
                    n_bins=self.n_bins,
                    norm=self.norm,
                    fmin=self.f_min,
                )
            )
            ** 2
        )
        pcp = librosa.feature.chroma_cqt(
            C=pcp_cqt, sr=self.sr, hop_length=self.hop_length, n_octaves=self.n_octaves, fmin=self.f_min
        ).T
        return pcp
Exemplo n.º 5
0
    def __test(hop_length, fmin, n_bins, bins_per_octave,
               tuning, resolution, norm, sparsity):

        C2 = librosa.hybrid_cqt(y, sr=sr,
                                hop_length=hop_length,
                                fmin=fmin, n_bins=n_bins,
                                bins_per_octave=bins_per_octave,
                                tuning=tuning, resolution=resolution,
                                norm=norm,
                                sparsity=sparsity)

        C1 = librosa.cqt(y, sr=sr,
                         hop_length=hop_length,
                         fmin=fmin, n_bins=n_bins,
                         bins_per_octave=bins_per_octave,
                         tuning=tuning, resolution=resolution,
                         norm=norm,
                         sparsity=sparsity)

        eq_(C1.shape, C2.shape)

        # Check for numerical comparability
        idx1 = (C1 > 1e-4 * C1.max())
        idx2 = (C2 > 1e-4 * C2.max())

        perc = 0.99

        thresh = 1e-3

        idx = idx1 | idx2

        assert np.percentile(np.abs(C1[idx] - C2[idx]),
                             perc) < thresh * max(C1.max(), C2.max())
Exemplo n.º 6
0
    def __test(hop_length, fmin, n_bins, bins_per_octave, tuning, resolution, norm, sparsity):

        C2 = librosa.hybrid_cqt(
            y,
            sr=sr,
            hop_length=hop_length,
            fmin=fmin,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            tuning=tuning,
            resolution=resolution,
            norm=norm,
            sparsity=sparsity,
        )

        C1 = librosa.cqt(
            y,
            sr=sr,
            hop_length=hop_length,
            fmin=fmin,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            tuning=tuning,
            resolution=resolution,
            norm=norm,
            sparsity=sparsity,
        )

        eq_(C1.shape, C2.shape)

        # Check for numerical comparability
        assert np.mean(np.abs(C1 - C2)) < 1e-3
Exemplo n.º 7
0
def audio_extract_pcp(
        audio, 
        sr,
        n_fft=4096,
        hop_len=int(4096 * 0.75),
        pcp_bins=84,
        pcp_norm=np.inf,
        pcp_f_min=27.5,
        pcp_n_octaves=6):

    audio_harmonic, _ = librosa.effects.hpss(audio)
    pcp_cqt = np.abs(librosa.hybrid_cqt(
                audio_harmonic,
                sr=sr,
                hop_length=hop_len,
                n_bins=pcp_bins,
                norm=pcp_norm,
                fmin=pcp_f_min)) ** 2

    pcp = librosa.feature.chroma_cqt(
                C=pcp_cqt,
                sr=sr,
                hop_length=hop_len,
                n_octaves=pcp_n_octaves,
                fmin=pcp_f_min).T
    return pcp
Exemplo n.º 8
0
def test_hybrid_cqt(
    y_hybrid,
    sr,
    hop_length,
    fmin,
    n_bins,
    bins_per_octave,
    tuning,
    resolution,
    norm,
    sparsity,
    res_type,
):
    # This test verifies that hybrid and full cqt agree down to 1e-4
    # on 99% of bins which are nonzero (> 1e-8) in either representation.

    C2 = librosa.hybrid_cqt(
        y_hybrid,
        sr=sr,
        hop_length=hop_length,
        fmin=fmin,
        n_bins=n_bins,
        bins_per_octave=bins_per_octave,
        tuning=tuning,
        filter_scale=resolution,
        norm=norm,
        sparsity=sparsity,
        res_type=res_type,
    )

    C1 = np.abs(
        librosa.cqt(
            y_hybrid,
            sr=sr,
            hop_length=hop_length,
            fmin=fmin,
            n_bins=n_bins,
            bins_per_octave=bins_per_octave,
            tuning=tuning,
            filter_scale=resolution,
            norm=norm,
            sparsity=sparsity,
            res_type=res_type,
        ))

    assert C1.shape == C2.shape

    # Check for numerical comparability
    idx1 = C1 > 1e-4 * C1.max()
    idx2 = C2 > 1e-4 * C2.max()

    perc = 0.99

    thresh = 1e-3

    idx = idx1 | idx2

    assert np.percentile(np.abs(C1[idx] - C2[idx]),
                         perc) < thresh * max(C1.max(), C2.max())
Exemplo n.º 9
0
    def __test(sr, hop_length, y):

        hcqt = librosa.hybrid_cqt(y=y, sr=sr, hop_length=hop_length, tuning=0)

        response = np.mean(np.abs(hcqt)**2, axis=1)

        continuity = np.abs(np.diff(response))

        assert np.max(continuity) < 5e-4, continuity
Exemplo n.º 10
0
    def __test(sr, hop_length, y):

        hcqt = librosa.hybrid_cqt(y=y, sr=sr, hop_length=hop_length, tuning=0)

        response = np.mean(np.abs(hcqt)**2, axis=1)

        continuity = np.abs(np.diff(response))

        assert np.max(continuity) < 5e-4, continuity
Exemplo n.º 11
0
def test_hybrid_cqt_white_noise(y_white, sr_white, fmin, n_bins, scale):
    C = librosa.hybrid_cqt(y=y_white, sr=sr_white, fmin=fmin, n_bins=n_bins, scale=scale)

    if not scale:
        lengths = librosa.filters.constant_q_lengths(sr_white, fmin, n_bins=n_bins)
        C /= np.sqrt(lengths[:, np.newaxis])

    assert np.allclose(np.mean(C, axis=1), 1.0, atol=2.5e-1), np.mean(C, axis=1)
    assert np.allclose(np.std(C, axis=1), 0.5, atol=5e-1), np.std(C, axis=1)
Exemplo n.º 12
0
    def extract_features(self, y, sr):
        try:
            if self.params['normalize']:
                rms = np.sqrt(np.mean(y * y))
                if rms > 1e-4:
                    y = y / rms
        except KeyError:
            pass
        try:
            if self.params['remove_silence']:
                y = self.remove_silence(y,
                                        window=32,
                                        hop=32,
                                        threshold=self.params['sil_threshold'])
        except KeyError:
            pass
        if self.params['method'] == 'FFT':
            x = np.abs(
                librosa.stft(y,
                             n_fft=self.params['n_fft'],
                             hop_length=self.params['hop_length']))
            x = librosa.logamplitude(x**2)
            #x = np.abs(librosa.stft(y,n_fft=320))

        elif self.params['method'] == 'Mel Spectrogram':
            x = librosa.feature.melspectrogram(
                y,
                sr,
                n_fft=self.params['n_fft'],
                hop_length=self.params['hop_length'],
                n_mels=self.params['n_mels'])
            x = librosa.logamplitude(x**2)
            #x = librosa.feature.melspectrogram(y,sr,n_fft=320,n_mels=160)

        elif self.params['method'] == 'CQT':
            x = librosa.hybrid_cqt(
                y,
                sr,
                hop_length=self.params['hop_length'],
                n_bins=self.params['n_bins'],
                bins_per_octave=self.params['bins_per_octave'])
            x = librosa.logamplitude(x**2)
            #x = librosa.hybrid_cqt(y,sr, hop=128, n_bins=144, bins_per_octave=24)

        elif self.params['method'] == 'MFCC':
            x = librosa.feature.mfcc(y,
                                     sr,
                                     n_fft=self.params['n_fft'],
                                     n_mels=self.params['n_mels'],
                                     n_mfcc=self.params['n_mfcc'],
                                     hop_length=self.params['hop_length'])
            delta = librosa.feature.delta(x)
            d_delta = librosa.feature.delta(x, order=2)
            x = np.concatenate([x, delta, d_delta], axis=0)

        return torch.FloatTensor(x)
Exemplo n.º 13
0
def test_hybrid_cqt_impulse(y_impulse, sr_impulse, hop_impulse):
    # Test to resolve issue #341
    # Updated in #417 to use integrated energy instead of pointwise max

    hcqt = librosa.hybrid_cqt(y=y_impulse, sr=sr_impulse, hop_length=hop_impulse, tuning=0)

    response = np.mean(np.abs(hcqt) ** 2, axis=1)

    continuity = np.abs(np.diff(response))

    assert np.max(continuity) < 5e-4, continuity
Exemplo n.º 14
0
    def nietoPCP(self, samples: Signal):
        sr = samples.sampleRate
        hop_length = self.parameters["hopLength"].value
        pcp_sr = sr / hop_length

        audio_harmonic, _ = librosa.effects.hpss(samples.values)
        # I double checked, and the parameters are the one used in MSAF. 7 octave in pcp_cqt and 6 octaves in pcp
        pcp_cqt = np.abs(librosa.hybrid_cqt(audio_harmonic, sr=sr, hop_length=hop_length, n_bins=7 * 12, norm=np.inf,
                                            fmin=27.5))**2
        pcp = librosa.feature.chroma_cqt(C=pcp_cqt, sr=sr, hop_length=hop_length, n_octaves=6, fmin=27.5).T

        return (Signal(pcp, sampleRate=pcp_sr), )
Exemplo n.º 15
0
    def __test(fmin, n_bins, scale, sr, y):

        C = librosa.hybrid_cqt(y=y, sr=sr,
                               fmin=fmin,
                               n_bins=n_bins,
                               scale=scale)

        if not scale:
            lengths = librosa.filters.constant_q_lengths(sr, fmin,
                                                         n_bins=n_bins)
            C /= np.sqrt(lengths[:, np.newaxis])

        assert np.allclose(np.mean(C, axis=1), 1.0, atol=2.5e-1), np.mean(C, axis=1)
        assert np.allclose(np.std(C, axis=1), 0.5, atol=5e-1), np.std(C, axis=1)
Exemplo n.º 16
0
    def __test(sr, hop_length, y):

        hcqt = librosa.hybrid_cqt(y=y, sr=sr, hop_length=hop_length, tuning=0)

        max_response = np.max(np.abs(hcqt), axis=1)

        ref_response = np.max(max_response)
        continuity = np.abs(np.diff(max_response))

        # Test that continuity is never violated by more than 75% point-wise energy
        assert np.max(continuity) <= 0.6 * ref_response, np.max(continuity)

        # Test that peak-energy deviation is bounded
        assert np.std(max_response) < 0.5 * ref_response, np.std(max_response)
Exemplo n.º 17
0
    def __test(sr, hop_length, y):

        hcqt = librosa.hybrid_cqt(y=y, sr=sr, hop_length=hop_length, tuning=0)

        max_response = np.max(np.abs(hcqt), axis=1)


        ref_response = np.max(max_response)
        continuity = np.abs(np.diff(max_response))

        # Test that continuity is never violated by more than 75% point-wise energy
        assert np.max(continuity) <= 0.6 * ref_response, np.max(continuity)

        # Test that peak-energy deviation is bounded
        assert np.std(max_response) < 0.5 * ref_response, np.std(max_response)
Exemplo n.º 18
0
    def _load_music(self, music_file):
        y, sr = librosa.load(music_file)
        C = librosa.hybrid_cqt(y, sr, fmin=librosa.note_to_hz('C2'), n_bins=72)
        CQT = librosa.amplitude_to_db(C, ref=np.max)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)[:13, :]

        tempo, beats = librosa.beat.beat_track(y=y, sr=sr)

        k = 1 + 2 * math.ceil(math.log(len(beats), 2))

        C = librosa.util.sync(C, beats)  # mean aggregate
        mfcc = librosa.util.sync(mfcc, beats)
        C = librosa.feature.stack_memory(C)
        mfcc = librosa.feature.stack_memory(mfcc)
        C_t = C.transpose()
        mfcc_t = mfcc.transpose()
        return C_t, mfcc_t, k
Exemplo n.º 19
0
    def analysis(self, save_dir):
        wav_name = os.path.splitext(os.path.split(self.wav_dir)[1])[0]
        fig = plt.figure(figsize=(50, 10), dpi=100)
        fig.tight_layout()

        if not os.path.exists(save_dir):
            os.mkdir(save_dir)
            print('making path  ', save_dir)

        wav = self.x_wav.reshape(-1)
        S = librosa.hybrid_cqt(wav,
                               fmin=librosa.midi_to_hz(21),
                               sr=self.sr,
                               hop_length=128,
                               bins_per_octave=4 * 12,
                               n_bins=88 * 4,
                               filter_scale=0.5)

        fig.add_subplot(413)
        plt.pcolormesh(self.y_pred_pad + self.notes * 20, cmap='jet')
        fig.add_subplot(414)
        plt.xlim(-0.5, self.x_wav.shape[0] - 0.5)
        plt.plot(range(self.x_wav.shape[0]), self.y_pred_prob, 'ro-')
        fig.add_subplot(411)
        plt.xlim(-0.5, len(wav) - 0.5)
        plt.plot(wav)
        fig.add_subplot(412)
        plt.pcolormesh(np.abs(S), cmap='jet')

        plt.gca().xaxis.set_major_locator(plt.NullLocator())
        plt.gca().yaxis.set_major_locator(plt.NullLocator())
        plt.subplots_adjust(top=1,
                            bottom=0,
                            right=1,
                            left=0,
                            hspace=0,
                            wspace=0)
        plt.margins(0, 0)

        plt.savefig('{}/{}.jpg'.format(save_dir, wav_name))
        plt.show()
        plt.clf()
        print('saving presion analysis')
Exemplo n.º 20
0
    def compute_features(self):
        """Actual implementation of the features.

        Returns
        -------
        pcp: np.array(N, F)
            The features, each row representing a feature vector for a give
            time frame/beat.
        """
        audio_harmonic, _ = self.compute_HPSS()
        pcp_cqt = np.abs(librosa.hybrid_cqt(audio_harmonic,
                                            sr=self.sr,
                                            hop_length=self.hop_length,
                                            n_bins=self.n_bins,
                                            norm=self.norm,
                                            fmin=self.f_min)) ** 2
        pcp = librosa.feature.chroma_cqt(C=pcp_cqt,
                                         sr=self.sr,
                                         hop_length=self.hop_length,
                                         n_octaves=self.n_octaves,
                                         fmin=self.f_min).T
        return pcp
def get_features(file, fft=4096, hop=1024, ref=np.max, norm=np.inf):
    print(file)
    features = {}

    y, sr = lb.load(file)
    # print('y:', y.shape)
    # print('sr:', sr)

    tempo, beats = lb.beat.beat_track(y=y, sr=sr, trim=False, hop_length=hop)
    beat_track = {'bpm' : tempo, 'beats' : beats.tolist()}

    lin_cqt = np.abs(lb.cqt(y=y, sr=sr, hop_length=hop, norm=norm)) ** 2
    cqt = lb.amplitude_to_db(lin_cqt, ref=ref)
    features['cqt'] = lb.util.sync(cqt, beats).tolist()

    lin_cens = np.abs(lb.feature.chroma_cens(y=y, sr=sr, hop_length=hop)) ** 2
    cens = lb.amplitude_to_db(lin_cens, ref=ref)
    features['cens'] = lb.util.sync(cens, beats).tolist()

    harmony, _ = lb.effects.hpss(y=y)
    pcp_cqt = np.abs(lb.hybrid_cqt(harmony, sr=sr, hop_length=hop, norm=norm, fmin=27.5)) ** 2
    pcp = lb.feature.chroma_cqt(C=pcp_cqt, sr=sr, hop_length=hop, n_octaves=6, fmin=27.5)
    features['pcp'] = lb.util.sync(pcp, beats).tolist()

    tonnetz = lb.feature.tonnetz(chroma=pcp)
    features['tonnetz'] = lb.util.sync(tonnetz, beats).tolist()

    mel = lb.feature.melspectrogram(y=y, sr=sr, n_fft=fft, hop_length=hop)
    log_mel = lb.amplitude_to_db(mel, ref=ref)
    mfcc = lb.feature.mfcc(S=log_mel, n_mfcc=14)
    features['mfcc'] = lb.util.sync(mfcc, beats).tolist()

    tempogram = lb.feature.tempogram(y=y, sr=sr, hop_length=hop, win_length=192)
    features['tempogram'] = lb.util.sync(tempogram, beats).tolist()

    return {'beat_track' : beat_track, 'features' : features}
Exemplo n.º 22
0
def _calculate_pcp(y, sr):
    pcp_cqt = np.abs(librosa.hybrid_cqt(y=y, sr=sr))**2
    return librosa.feature.chroma_cqt(C=pcp_cqt, sr=sr)
Exemplo n.º 23
0
    def analysis(self):
        print_scalar = 25
        fig_size = 5
        fig = plt.figure(figsize=(1.5 * fig_size, 2 * fig_size), dpi=100)
        fig.tight_layout()
        onset_pred_split = 44

        for j, i in enumerate(self.P_false_index):
            if i < 26: continue
            if (i + 1 + print_scalar) > len(self.y_pred): continue
            P_save_dir = 'pic/analysis/{}'.format(
                self.input_dir.split('/')[-2])
            if not os.path.exists(P_save_dir):
                os.mkdir(P_save_dir)
                os.mkdir(P_save_dir + '/precision')

            hight_light = np.zeros_like(
                self.y_onset_pad[:, i - print_scalar:i + 1 + print_scalar])
            hight_light[:, print_scalar] = 100
            hight_light = hight_light[onset_pred_split:, :]
            padding_onset = np.concatenate(
                (40 * self.y_onset_pad[:onset_pred_split, i - print_scalar:i +
                                       1 + print_scalar], hight_light +
                 20 * self.y_pred_pad[onset_pred_split:,
                                      i - print_scalar:i + 1 + print_scalar]),
                axis=0)

            wav = self.x_wav[i - print_scalar:i + 1 +
                             print_scalar, :].reshape(-1)
            S = librosa.hybrid_cqt(wav,
                                   fmin=librosa.midi_to_hz(21),
                                   sr=self.sr,
                                   hop_length=128,
                                   bins_per_octave=4 * 12,
                                   n_bins=88 * 4,
                                   filter_scale=1)

            fig.add_subplot(413)
            plt.pcolormesh(
                self.y_groundtruth[:, i - print_scalar:i + 1 + print_scalar] +
                padding_onset,
                vmin=0,
                vmax=50,
                cmap='jet')
            fig.add_subplot(414)
            plt.xlim(-0.5, 50.5)
            plt.plot(range(51),
                     self.y_onset[i - print_scalar:i + 1 + print_scalar],
                     'ro-')
            plt.plot(range(51),
                     self.y_pred_prob[i - print_scalar:i + 1 + print_scalar],
                     'bo-')
            fig.add_subplot(411)
            plt.xlim(-0.5, len(wav) - 0.5)
            plt.plot(wav)
            fig.add_subplot(412)
            plt.pcolormesh(np.abs(S), cmap='jet')

            plt.gca().xaxis.set_major_locator(plt.NullLocator())
            plt.gca().yaxis.set_major_locator(plt.NullLocator())
            plt.subplots_adjust(top=1,
                                bottom=0,
                                right=1,
                                left=0,
                                hspace=0,
                                wspace=0)
            plt.margins(0, 0)

            plt.savefig(
                '{}/precision/i-{}__time-{:.2f}__prob-{:.2f}__true.jpg'.format(
                    P_save_dir, i, 440 * i / self.sr, self.y_pred_prob[i]))
            # plt.show()
            plt.clf()
            print('saving presion analysis {}/{}'.format(
                j, len(self.P_false_index)),
                  end='\r')


##########################################################################################################################################################################
##########################################################################################################################################################################
        for j, i in enumerate(self.R_false_index):
            if i < 26: continue
            R_save_dir = 'pic/analysis/{}'.format(
                self.input_dir.split('/')[-2])
            if not os.path.exists(R_save_dir + '/recall'):
                os.mkdir(R_save_dir + '/recall')

            hight_light = np.zeros_like(
                self.y_onset_pad[:, i - print_scalar:i + 1 + print_scalar])
            hight_light[:, print_scalar] = 100
            hight_light = hight_light[onset_pred_split:, :]
            padding_onset = np.concatenate(
                (40 * self.y_onset_pad[:onset_pred_split, i - print_scalar:i +
                                       1 + print_scalar] + hight_light,
                 20 * self.y_pred_pad[onset_pred_split:,
                                      i - print_scalar:i + 1 + print_scalar]),
                axis=0)

            wav = self.x_wav[i - print_scalar:i + 1 +
                             print_scalar, :].reshape(-1)
            S = librosa.hybrid_cqt(wav,
                                   fmin=librosa.midi_to_hz(21),
                                   sr=self.sr,
                                   hop_length=128,
                                   bins_per_octave=4 * 12,
                                   n_bins=88 * 4,
                                   filter_scale=1)

            fig.add_subplot(413)
            plt.pcolormesh(
                self.y_groundtruth[:, i - print_scalar:i + 1 + print_scalar] +
                padding_onset,
                vmin=0,
                vmax=50,
                cmap='jet')
            fig.add_subplot(414)
            plt.xlim(-0.5, 50.5)
            plt.plot(range(51),
                     self.y_onset[i - print_scalar:i + 1 + print_scalar],
                     'ro-')
            plt.plot(range(51),
                     self.y_pred_prob[i - print_scalar:i + 1 + print_scalar],
                     'bo-')
            fig.add_subplot(411)
            plt.xlim(-0.5, len(wav) - 0.5)
            plt.plot(wav)
            fig.add_subplot(412)
            plt.pcolormesh(np.abs(S), cmap='jet')

            plt.gca().xaxis.set_major_locator(plt.NullLocator())
            plt.gca().yaxis.set_major_locator(plt.NullLocator())
            plt.subplots_adjust(top=1,
                                bottom=0,
                                right=1,
                                left=0,
                                hspace=0,
                                wspace=0)
            plt.margins(0, 0)

            plt.savefig(
                '{}/recall/i-{}__time-{:.2f}__prob-{:.2f}__pred.jpg'.format(
                    R_save_dir, i, 440 * i / self.sr, self.y_pred_prob[i]))
            # plt.show()
            plt.clf()
            print('saving recall analysis {}/{}'.format(
                j, len(self.R_false_index)),
                  end='\r')
Exemplo n.º 24
0
    def extract_features(self, y, sr):
        try:
            if self.params['normalize']:
                rms = np.sqrt(np.mean(y * y))
                if rms > 1e-4:
                    y = y / rms
        except KeyError:
            pass
        try:
            if self.params['remove_silence']:
                y = self.remove_silence(y,
                                        window=32,
                                        hop=32,
                                        threshold=self.params['sil_threshold'])
        except KeyError:
            pass
        if self.params['method'] == 'FFT':
            x = np.abs(
                librosa.stft(y,
                             n_fft=self.params['n_fft'],
                             hop_length=self.params['hop_length']))
            x = librosa.logamplitude(x**2)
            #x = np.abs(librosa.stft(y,n_fft=320))

        elif self.params['method'] == 'MelSpectrogram':
            x = librosa.feature.melspectrogram(
                y,
                sr,
                n_fft=self.params['n_fft'],
                hop_length=self.params['hop_length'],
                n_mels=self.params['n_mels'])
            x = librosa.amplitude_to_db(x)
            #x = librosa.logamplitude(x**2)
            #x = librosa.feature.melspectrogram(y,sr,n_fft=320,n_mels=160)
            #
            print(x.shape)
            raise ValueError

        elif self.params['method'] == 'ACF':
            numZeros = self.params['hop_length'] - (len(y) %
                                                    self.params['hop_length'])
            ## Use center padding
            y2 = np.insert(y, 0, np.zeros(numZeros // 2))
            y2 = np.append(y2, np.zeros(numZeros - (numZeros // 2)))

            ind = 0
            blockSize = self.params['n_fft']
            while ((ind + blockSize) <= len(y2)):
                if ind == 0:
                    x = librosa.autocorrelate(
                        y2[ind:ind + blockSize],
                        max_size=(self.params['hop_length'] // 2))
                    x = np.expand_dims(x, 0)
                else:
                    x = np.vstack(
                        (x,
                         librosa.autocorrelate(
                             y2[ind:ind + blockSize],
                             max_size=(self.params['hop_length'] // 2))))
                ind += blockSize
            #x = x.transpose()
            x = resample(x, 96, t=None, axis=0)
            #print(x.shape)

        elif self.params['method'] == 'Cepstrum':
            x = librosa.stft(y,
                             n_fft=self.params['n_fft'],
                             hop_length=self.params['hop_length'])
            x = np.log(x)
            for i in range(len(x)):
                x[i] = np.absolute(np.fft.ifft(x[i]))
            x = x.real.astype('float32')
            x = resample(x, 96, t=None, axis=0)

        elif self.params['method'] == 'CQT':
            #x = librosa.hybrid_cqt(y,sr, hop_length=self.params['hop_length'], n_bins=self.params['n_bins'], bins_per_octave=self.params['bins_per_octave'])
            x = librosa.hybrid_cqt(y,
                                   sr,
                                   hop_length=128,
                                   n_bins=144,
                                   bins_per_octave=24)
            x = librosa.amplitude_to_db(x**2)

        elif self.params['method'] == 'MFCC':
            x = librosa.feature.mfcc(y,
                                     sr,
                                     n_fft=self.params['n_fft'],
                                     n_mels=self.params['n_mels'],
                                     n_mfcc=self.params['n_mfcc'],
                                     hop_length=self.params['hop_length'])
            delta = librosa.feature.delta(x)
            d_delta = librosa.feature.delta(x, order=2)
            x = np.concatenate([x, delta, d_delta], axis=0)

        return torch.FloatTensor(x)
Exemplo n.º 25
0
cqt = librosa.cqt(y, sr)
cqt = np.abs(cqt)
cqt = cqt.astype(np.float32)
print(cqt.shape, cqt.dtype)
d_cqt = librosa.amplitude_to_db(cqt, ref=np.max)
librosa.display.specshow(d_cqt,
                         y_axis='log',
                         x_axis='time',
                         sr=sr,
                         cmap='viridis')
plt.colorbar(format='%+2.0f dB')
plt.title('cqt-spectrogram')
plt.show()

### hybrid-cqt-spectrogram (混合常量Q变换) (*84, t) *n_bins
hcqt = librosa.hybrid_cqt(y=y, sr=sr)
hcqt = np.abs(hcqt)
hcqt = hcqt.astype(np.float32)
print(hcqt.shape, hcqt.dtype)
d_hcqt = librosa.amplitude_to_db(hcqt, ref=np.max)
librosa.display.specshow(d_hcqt,
                         y_axis='log',
                         x_axis='time',
                         sr=sr,
                         cmap='viridis')
plt.colorbar(format='%+2.0f dB')
plt.title('hybrid-cqt-spectrogram')
plt.show()

### pseudo-cqt-spectrogram (伪常量Q变换) (*84, t) *n_bins
pcqt = librosa.pseudo_cqt(y=y, sr=sr)