def __test_hybrid_cqt(pad_mode): D1 = librosa.hybrid_cqt(y, pad_mode='reflect') D2 = librosa.hybrid_cqt(y, pad_mode=pad_mode) assert D1.shape == D2.shape if pad_mode != 'reflect': assert not np.allclose(D1, D2) else: assert np.allclose(D1, D2)
def test_hybrid_cqt_multi(y_multi, scale, res_type): y, sr = y_multi # Assuming single-channel CQT is well behaved C0 = librosa.hybrid_cqt(y=y[0], sr=sr, scale=scale, res_type=res_type) C1 = librosa.hybrid_cqt(y=y[1], sr=sr, scale=scale, res_type=res_type) Call = librosa.hybrid_cqt(y=y, sr=sr, scale=scale, res_type=res_type) # Check each channel assert np.allclose(C0, Call[0]) assert np.allclose(C1, Call[1]) # Verify that they're not all the same assert not np.allclose(Call[0], Call[1])
def compute_features(self): """Actual implementation of the features. Returns ------- pcp: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ audio_harmonic, _ = self.compute_HPSS() pcp_cqt = ( np.abs( librosa.hybrid_cqt( audio_harmonic, sr=self.sr, hop_length=self.hop_length, n_bins=self.n_bins, norm=self.norm, fmin=self.f_min, ) ) ** 2 ) pcp = librosa.feature.chroma_cqt( C=pcp_cqt, sr=self.sr, hop_length=self.hop_length, n_octaves=self.n_octaves, fmin=self.f_min ).T return pcp
def __test(hop_length, fmin, n_bins, bins_per_octave, tuning, resolution, norm, sparsity): C2 = librosa.hybrid_cqt(y, sr=sr, hop_length=hop_length, fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, resolution=resolution, norm=norm, sparsity=sparsity) C1 = librosa.cqt(y, sr=sr, hop_length=hop_length, fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, resolution=resolution, norm=norm, sparsity=sparsity) eq_(C1.shape, C2.shape) # Check for numerical comparability idx1 = (C1 > 1e-4 * C1.max()) idx2 = (C2 > 1e-4 * C2.max()) perc = 0.99 thresh = 1e-3 idx = idx1 | idx2 assert np.percentile(np.abs(C1[idx] - C2[idx]), perc) < thresh * max(C1.max(), C2.max())
def __test(hop_length, fmin, n_bins, bins_per_octave, tuning, resolution, norm, sparsity): C2 = librosa.hybrid_cqt( y, sr=sr, hop_length=hop_length, fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, resolution=resolution, norm=norm, sparsity=sparsity, ) C1 = librosa.cqt( y, sr=sr, hop_length=hop_length, fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, resolution=resolution, norm=norm, sparsity=sparsity, ) eq_(C1.shape, C2.shape) # Check for numerical comparability assert np.mean(np.abs(C1 - C2)) < 1e-3
def audio_extract_pcp( audio, sr, n_fft=4096, hop_len=int(4096 * 0.75), pcp_bins=84, pcp_norm=np.inf, pcp_f_min=27.5, pcp_n_octaves=6): audio_harmonic, _ = librosa.effects.hpss(audio) pcp_cqt = np.abs(librosa.hybrid_cqt( audio_harmonic, sr=sr, hop_length=hop_len, n_bins=pcp_bins, norm=pcp_norm, fmin=pcp_f_min)) ** 2 pcp = librosa.feature.chroma_cqt( C=pcp_cqt, sr=sr, hop_length=hop_len, n_octaves=pcp_n_octaves, fmin=pcp_f_min).T return pcp
def test_hybrid_cqt( y_hybrid, sr, hop_length, fmin, n_bins, bins_per_octave, tuning, resolution, norm, sparsity, res_type, ): # This test verifies that hybrid and full cqt agree down to 1e-4 # on 99% of bins which are nonzero (> 1e-8) in either representation. C2 = librosa.hybrid_cqt( y_hybrid, sr=sr, hop_length=hop_length, fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, filter_scale=resolution, norm=norm, sparsity=sparsity, res_type=res_type, ) C1 = np.abs( librosa.cqt( y_hybrid, sr=sr, hop_length=hop_length, fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, filter_scale=resolution, norm=norm, sparsity=sparsity, res_type=res_type, )) assert C1.shape == C2.shape # Check for numerical comparability idx1 = C1 > 1e-4 * C1.max() idx2 = C2 > 1e-4 * C2.max() perc = 0.99 thresh = 1e-3 idx = idx1 | idx2 assert np.percentile(np.abs(C1[idx] - C2[idx]), perc) < thresh * max(C1.max(), C2.max())
def __test(sr, hop_length, y): hcqt = librosa.hybrid_cqt(y=y, sr=sr, hop_length=hop_length, tuning=0) response = np.mean(np.abs(hcqt)**2, axis=1) continuity = np.abs(np.diff(response)) assert np.max(continuity) < 5e-4, continuity
def test_hybrid_cqt_white_noise(y_white, sr_white, fmin, n_bins, scale): C = librosa.hybrid_cqt(y=y_white, sr=sr_white, fmin=fmin, n_bins=n_bins, scale=scale) if not scale: lengths = librosa.filters.constant_q_lengths(sr_white, fmin, n_bins=n_bins) C /= np.sqrt(lengths[:, np.newaxis]) assert np.allclose(np.mean(C, axis=1), 1.0, atol=2.5e-1), np.mean(C, axis=1) assert np.allclose(np.std(C, axis=1), 0.5, atol=5e-1), np.std(C, axis=1)
def extract_features(self, y, sr): try: if self.params['normalize']: rms = np.sqrt(np.mean(y * y)) if rms > 1e-4: y = y / rms except KeyError: pass try: if self.params['remove_silence']: y = self.remove_silence(y, window=32, hop=32, threshold=self.params['sil_threshold']) except KeyError: pass if self.params['method'] == 'FFT': x = np.abs( librosa.stft(y, n_fft=self.params['n_fft'], hop_length=self.params['hop_length'])) x = librosa.logamplitude(x**2) #x = np.abs(librosa.stft(y,n_fft=320)) elif self.params['method'] == 'Mel Spectrogram': x = librosa.feature.melspectrogram( y, sr, n_fft=self.params['n_fft'], hop_length=self.params['hop_length'], n_mels=self.params['n_mels']) x = librosa.logamplitude(x**2) #x = librosa.feature.melspectrogram(y,sr,n_fft=320,n_mels=160) elif self.params['method'] == 'CQT': x = librosa.hybrid_cqt( y, sr, hop_length=self.params['hop_length'], n_bins=self.params['n_bins'], bins_per_octave=self.params['bins_per_octave']) x = librosa.logamplitude(x**2) #x = librosa.hybrid_cqt(y,sr, hop=128, n_bins=144, bins_per_octave=24) elif self.params['method'] == 'MFCC': x = librosa.feature.mfcc(y, sr, n_fft=self.params['n_fft'], n_mels=self.params['n_mels'], n_mfcc=self.params['n_mfcc'], hop_length=self.params['hop_length']) delta = librosa.feature.delta(x) d_delta = librosa.feature.delta(x, order=2) x = np.concatenate([x, delta, d_delta], axis=0) return torch.FloatTensor(x)
def test_hybrid_cqt_impulse(y_impulse, sr_impulse, hop_impulse): # Test to resolve issue #341 # Updated in #417 to use integrated energy instead of pointwise max hcqt = librosa.hybrid_cqt(y=y_impulse, sr=sr_impulse, hop_length=hop_impulse, tuning=0) response = np.mean(np.abs(hcqt) ** 2, axis=1) continuity = np.abs(np.diff(response)) assert np.max(continuity) < 5e-4, continuity
def nietoPCP(self, samples: Signal): sr = samples.sampleRate hop_length = self.parameters["hopLength"].value pcp_sr = sr / hop_length audio_harmonic, _ = librosa.effects.hpss(samples.values) # I double checked, and the parameters are the one used in MSAF. 7 octave in pcp_cqt and 6 octaves in pcp pcp_cqt = np.abs(librosa.hybrid_cqt(audio_harmonic, sr=sr, hop_length=hop_length, n_bins=7 * 12, norm=np.inf, fmin=27.5))**2 pcp = librosa.feature.chroma_cqt(C=pcp_cqt, sr=sr, hop_length=hop_length, n_octaves=6, fmin=27.5).T return (Signal(pcp, sampleRate=pcp_sr), )
def __test(fmin, n_bins, scale, sr, y): C = librosa.hybrid_cqt(y=y, sr=sr, fmin=fmin, n_bins=n_bins, scale=scale) if not scale: lengths = librosa.filters.constant_q_lengths(sr, fmin, n_bins=n_bins) C /= np.sqrt(lengths[:, np.newaxis]) assert np.allclose(np.mean(C, axis=1), 1.0, atol=2.5e-1), np.mean(C, axis=1) assert np.allclose(np.std(C, axis=1), 0.5, atol=5e-1), np.std(C, axis=1)
def __test(sr, hop_length, y): hcqt = librosa.hybrid_cqt(y=y, sr=sr, hop_length=hop_length, tuning=0) max_response = np.max(np.abs(hcqt), axis=1) ref_response = np.max(max_response) continuity = np.abs(np.diff(max_response)) # Test that continuity is never violated by more than 75% point-wise energy assert np.max(continuity) <= 0.6 * ref_response, np.max(continuity) # Test that peak-energy deviation is bounded assert np.std(max_response) < 0.5 * ref_response, np.std(max_response)
def _load_music(self, music_file): y, sr = librosa.load(music_file) C = librosa.hybrid_cqt(y, sr, fmin=librosa.note_to_hz('C2'), n_bins=72) CQT = librosa.amplitude_to_db(C, ref=np.max) mfcc = librosa.feature.mfcc(y=y, sr=sr)[:13, :] tempo, beats = librosa.beat.beat_track(y=y, sr=sr) k = 1 + 2 * math.ceil(math.log(len(beats), 2)) C = librosa.util.sync(C, beats) # mean aggregate mfcc = librosa.util.sync(mfcc, beats) C = librosa.feature.stack_memory(C) mfcc = librosa.feature.stack_memory(mfcc) C_t = C.transpose() mfcc_t = mfcc.transpose() return C_t, mfcc_t, k
def analysis(self, save_dir): wav_name = os.path.splitext(os.path.split(self.wav_dir)[1])[0] fig = plt.figure(figsize=(50, 10), dpi=100) fig.tight_layout() if not os.path.exists(save_dir): os.mkdir(save_dir) print('making path ', save_dir) wav = self.x_wav.reshape(-1) S = librosa.hybrid_cqt(wav, fmin=librosa.midi_to_hz(21), sr=self.sr, hop_length=128, bins_per_octave=4 * 12, n_bins=88 * 4, filter_scale=0.5) fig.add_subplot(413) plt.pcolormesh(self.y_pred_pad + self.notes * 20, cmap='jet') fig.add_subplot(414) plt.xlim(-0.5, self.x_wav.shape[0] - 0.5) plt.plot(range(self.x_wav.shape[0]), self.y_pred_prob, 'ro-') fig.add_subplot(411) plt.xlim(-0.5, len(wav) - 0.5) plt.plot(wav) fig.add_subplot(412) plt.pcolormesh(np.abs(S), cmap='jet') plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig('{}/{}.jpg'.format(save_dir, wav_name)) plt.show() plt.clf() print('saving presion analysis')
def compute_features(self): """Actual implementation of the features. Returns ------- pcp: np.array(N, F) The features, each row representing a feature vector for a give time frame/beat. """ audio_harmonic, _ = self.compute_HPSS() pcp_cqt = np.abs(librosa.hybrid_cqt(audio_harmonic, sr=self.sr, hop_length=self.hop_length, n_bins=self.n_bins, norm=self.norm, fmin=self.f_min)) ** 2 pcp = librosa.feature.chroma_cqt(C=pcp_cqt, sr=self.sr, hop_length=self.hop_length, n_octaves=self.n_octaves, fmin=self.f_min).T return pcp
def get_features(file, fft=4096, hop=1024, ref=np.max, norm=np.inf): print(file) features = {} y, sr = lb.load(file) # print('y:', y.shape) # print('sr:', sr) tempo, beats = lb.beat.beat_track(y=y, sr=sr, trim=False, hop_length=hop) beat_track = {'bpm' : tempo, 'beats' : beats.tolist()} lin_cqt = np.abs(lb.cqt(y=y, sr=sr, hop_length=hop, norm=norm)) ** 2 cqt = lb.amplitude_to_db(lin_cqt, ref=ref) features['cqt'] = lb.util.sync(cqt, beats).tolist() lin_cens = np.abs(lb.feature.chroma_cens(y=y, sr=sr, hop_length=hop)) ** 2 cens = lb.amplitude_to_db(lin_cens, ref=ref) features['cens'] = lb.util.sync(cens, beats).tolist() harmony, _ = lb.effects.hpss(y=y) pcp_cqt = np.abs(lb.hybrid_cqt(harmony, sr=sr, hop_length=hop, norm=norm, fmin=27.5)) ** 2 pcp = lb.feature.chroma_cqt(C=pcp_cqt, sr=sr, hop_length=hop, n_octaves=6, fmin=27.5) features['pcp'] = lb.util.sync(pcp, beats).tolist() tonnetz = lb.feature.tonnetz(chroma=pcp) features['tonnetz'] = lb.util.sync(tonnetz, beats).tolist() mel = lb.feature.melspectrogram(y=y, sr=sr, n_fft=fft, hop_length=hop) log_mel = lb.amplitude_to_db(mel, ref=ref) mfcc = lb.feature.mfcc(S=log_mel, n_mfcc=14) features['mfcc'] = lb.util.sync(mfcc, beats).tolist() tempogram = lb.feature.tempogram(y=y, sr=sr, hop_length=hop, win_length=192) features['tempogram'] = lb.util.sync(tempogram, beats).tolist() return {'beat_track' : beat_track, 'features' : features}
def _calculate_pcp(y, sr): pcp_cqt = np.abs(librosa.hybrid_cqt(y=y, sr=sr))**2 return librosa.feature.chroma_cqt(C=pcp_cqt, sr=sr)
def analysis(self): print_scalar = 25 fig_size = 5 fig = plt.figure(figsize=(1.5 * fig_size, 2 * fig_size), dpi=100) fig.tight_layout() onset_pred_split = 44 for j, i in enumerate(self.P_false_index): if i < 26: continue if (i + 1 + print_scalar) > len(self.y_pred): continue P_save_dir = 'pic/analysis/{}'.format( self.input_dir.split('/')[-2]) if not os.path.exists(P_save_dir): os.mkdir(P_save_dir) os.mkdir(P_save_dir + '/precision') hight_light = np.zeros_like( self.y_onset_pad[:, i - print_scalar:i + 1 + print_scalar]) hight_light[:, print_scalar] = 100 hight_light = hight_light[onset_pred_split:, :] padding_onset = np.concatenate( (40 * self.y_onset_pad[:onset_pred_split, i - print_scalar:i + 1 + print_scalar], hight_light + 20 * self.y_pred_pad[onset_pred_split:, i - print_scalar:i + 1 + print_scalar]), axis=0) wav = self.x_wav[i - print_scalar:i + 1 + print_scalar, :].reshape(-1) S = librosa.hybrid_cqt(wav, fmin=librosa.midi_to_hz(21), sr=self.sr, hop_length=128, bins_per_octave=4 * 12, n_bins=88 * 4, filter_scale=1) fig.add_subplot(413) plt.pcolormesh( self.y_groundtruth[:, i - print_scalar:i + 1 + print_scalar] + padding_onset, vmin=0, vmax=50, cmap='jet') fig.add_subplot(414) plt.xlim(-0.5, 50.5) plt.plot(range(51), self.y_onset[i - print_scalar:i + 1 + print_scalar], 'ro-') plt.plot(range(51), self.y_pred_prob[i - print_scalar:i + 1 + print_scalar], 'bo-') fig.add_subplot(411) plt.xlim(-0.5, len(wav) - 0.5) plt.plot(wav) fig.add_subplot(412) plt.pcolormesh(np.abs(S), cmap='jet') plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig( '{}/precision/i-{}__time-{:.2f}__prob-{:.2f}__true.jpg'.format( P_save_dir, i, 440 * i / self.sr, self.y_pred_prob[i])) # plt.show() plt.clf() print('saving presion analysis {}/{}'.format( j, len(self.P_false_index)), end='\r') ########################################################################################################################################################################## ########################################################################################################################################################################## for j, i in enumerate(self.R_false_index): if i < 26: continue R_save_dir = 'pic/analysis/{}'.format( self.input_dir.split('/')[-2]) if not os.path.exists(R_save_dir + '/recall'): os.mkdir(R_save_dir + '/recall') hight_light = np.zeros_like( self.y_onset_pad[:, i - print_scalar:i + 1 + print_scalar]) hight_light[:, print_scalar] = 100 hight_light = hight_light[onset_pred_split:, :] padding_onset = np.concatenate( (40 * self.y_onset_pad[:onset_pred_split, i - print_scalar:i + 1 + print_scalar] + hight_light, 20 * self.y_pred_pad[onset_pred_split:, i - print_scalar:i + 1 + print_scalar]), axis=0) wav = self.x_wav[i - print_scalar:i + 1 + print_scalar, :].reshape(-1) S = librosa.hybrid_cqt(wav, fmin=librosa.midi_to_hz(21), sr=self.sr, hop_length=128, bins_per_octave=4 * 12, n_bins=88 * 4, filter_scale=1) fig.add_subplot(413) plt.pcolormesh( self.y_groundtruth[:, i - print_scalar:i + 1 + print_scalar] + padding_onset, vmin=0, vmax=50, cmap='jet') fig.add_subplot(414) plt.xlim(-0.5, 50.5) plt.plot(range(51), self.y_onset[i - print_scalar:i + 1 + print_scalar], 'ro-') plt.plot(range(51), self.y_pred_prob[i - print_scalar:i + 1 + print_scalar], 'bo-') fig.add_subplot(411) plt.xlim(-0.5, len(wav) - 0.5) plt.plot(wav) fig.add_subplot(412) plt.pcolormesh(np.abs(S), cmap='jet') plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) plt.savefig( '{}/recall/i-{}__time-{:.2f}__prob-{:.2f}__pred.jpg'.format( R_save_dir, i, 440 * i / self.sr, self.y_pred_prob[i])) # plt.show() plt.clf() print('saving recall analysis {}/{}'.format( j, len(self.R_false_index)), end='\r')
def extract_features(self, y, sr): try: if self.params['normalize']: rms = np.sqrt(np.mean(y * y)) if rms > 1e-4: y = y / rms except KeyError: pass try: if self.params['remove_silence']: y = self.remove_silence(y, window=32, hop=32, threshold=self.params['sil_threshold']) except KeyError: pass if self.params['method'] == 'FFT': x = np.abs( librosa.stft(y, n_fft=self.params['n_fft'], hop_length=self.params['hop_length'])) x = librosa.logamplitude(x**2) #x = np.abs(librosa.stft(y,n_fft=320)) elif self.params['method'] == 'MelSpectrogram': x = librosa.feature.melspectrogram( y, sr, n_fft=self.params['n_fft'], hop_length=self.params['hop_length'], n_mels=self.params['n_mels']) x = librosa.amplitude_to_db(x) #x = librosa.logamplitude(x**2) #x = librosa.feature.melspectrogram(y,sr,n_fft=320,n_mels=160) # print(x.shape) raise ValueError elif self.params['method'] == 'ACF': numZeros = self.params['hop_length'] - (len(y) % self.params['hop_length']) ## Use center padding y2 = np.insert(y, 0, np.zeros(numZeros // 2)) y2 = np.append(y2, np.zeros(numZeros - (numZeros // 2))) ind = 0 blockSize = self.params['n_fft'] while ((ind + blockSize) <= len(y2)): if ind == 0: x = librosa.autocorrelate( y2[ind:ind + blockSize], max_size=(self.params['hop_length'] // 2)) x = np.expand_dims(x, 0) else: x = np.vstack( (x, librosa.autocorrelate( y2[ind:ind + blockSize], max_size=(self.params['hop_length'] // 2)))) ind += blockSize #x = x.transpose() x = resample(x, 96, t=None, axis=0) #print(x.shape) elif self.params['method'] == 'Cepstrum': x = librosa.stft(y, n_fft=self.params['n_fft'], hop_length=self.params['hop_length']) x = np.log(x) for i in range(len(x)): x[i] = np.absolute(np.fft.ifft(x[i])) x = x.real.astype('float32') x = resample(x, 96, t=None, axis=0) elif self.params['method'] == 'CQT': #x = librosa.hybrid_cqt(y,sr, hop_length=self.params['hop_length'], n_bins=self.params['n_bins'], bins_per_octave=self.params['bins_per_octave']) x = librosa.hybrid_cqt(y, sr, hop_length=128, n_bins=144, bins_per_octave=24) x = librosa.amplitude_to_db(x**2) elif self.params['method'] == 'MFCC': x = librosa.feature.mfcc(y, sr, n_fft=self.params['n_fft'], n_mels=self.params['n_mels'], n_mfcc=self.params['n_mfcc'], hop_length=self.params['hop_length']) delta = librosa.feature.delta(x) d_delta = librosa.feature.delta(x, order=2) x = np.concatenate([x, delta, d_delta], axis=0) return torch.FloatTensor(x)
cqt = librosa.cqt(y, sr) cqt = np.abs(cqt) cqt = cqt.astype(np.float32) print(cqt.shape, cqt.dtype) d_cqt = librosa.amplitude_to_db(cqt, ref=np.max) librosa.display.specshow(d_cqt, y_axis='log', x_axis='time', sr=sr, cmap='viridis') plt.colorbar(format='%+2.0f dB') plt.title('cqt-spectrogram') plt.show() ### hybrid-cqt-spectrogram (混合常量Q变换) (*84, t) *n_bins hcqt = librosa.hybrid_cqt(y=y, sr=sr) hcqt = np.abs(hcqt) hcqt = hcqt.astype(np.float32) print(hcqt.shape, hcqt.dtype) d_hcqt = librosa.amplitude_to_db(hcqt, ref=np.max) librosa.display.specshow(d_hcqt, y_axis='log', x_axis='time', sr=sr, cmap='viridis') plt.colorbar(format='%+2.0f dB') plt.title('hybrid-cqt-spectrogram') plt.show() ### pseudo-cqt-spectrogram (伪常量Q变换) (*84, t) *n_bins pcqt = librosa.pseudo_cqt(y=y, sr=sr)