def predict_onsets(self): proc_0 = CNNOnsetProcessor() proc_1 = OnsetPeakPickingProcessor(threshold = 0.95,fps=100) predicts = proc_1(proc_0(self.track_name)) #=====manually adding true onsets #predicts = [onset.prediction for onset in self.true_tablature.onsets] #==== return list(zip(predicts, [1]*len(predicts))) # here correct it when i can get confidence
def get_onsets(path): """ Finds onsets of audio file :param path: Path to audio file :return: List of times (in seconds) corresponding to onsets """ processor = OnsetPeakPickingProcessor(threshold=0.45, combine=.2) act = RNNOnsetProcessor()(path) onsets = processor(act) onsets = onsets.tolist() return onsets
def GuitarOnsetDetector(audio_filename, fs=44100): # TODO: ad hoc!!!! Derive it automatically. onset_threshold = 2 series_delta = 0.22 fps = 180 fs = 44100 hopSize = int(fs / fps) max_spectral_centroid = 3500 # fps must be a divisor of fs to obtain integer hopSize # (it just simplifies the code below) sodf = SpectralOnsetProcessor(onset_method='superflux', fps=fps, filterbank=LogarithmicFilterbank, num_bands=24, log=np.log10) sodf_onsets = sodf(audio_filename) # "fusion" with rms-diff. rms, cs = rms_centroids(audio_filename, frameSize=1024, hopSize=hopSize, sampleRate=fs) rms = signal.smooth(rms, int(fs / hopSize * 0.2)) rms = preprocessing.scale(rms, with_mean=False, copy=False) rms = rms[1:] - rms[:-1] sodf_onsets[rms <= 0] = 0 #sodf_onsets = sodf_onsets * np.power(rms, 0.01) #sodf_onsets[np.isnan(sodf_onsets)] = 0 proc = OnsetPeakPickingProcessor(fps=fps, threshold=onset_threshold) p_onsets = proc(sodf_onsets) p_onsets = combine_series(p_onsets, series_delta) smoothed = [] for i in range(len(p_onsets)): onset = p_onsets[i] duration = 0.5 if (i < len(p_onsets) - 1): duration = min((p_onsets[i + 1] - p_onsets[i]), duration) window_len = int(duration * fs / hopSize) s = int(float(onset) * fs / hopSize) d = min(window_len, len(cs) - s) w = eval('np.hanning(2*d)') w = w[d:] / np.sum(w[d:]) w = np.reshape(w, (1, d)) c = cs[s:s + d] smoothed.append(np.dot(w, c)[0]) result = [] for i in range(len(p_onsets)): if smoothed[i] < max_spectral_centroid: result.append(p_onsets[i]) return result
def ninos(filename,gamma=0.94): """ reference: Mounir, M., Karsmakers, P., & Van Waterschoot, T. (2016). Guitar note onset detection based on a spectral sparsity measure. European Signal Processing Conference. https://doi.org/10.1109/EUSIPCO.2016.7760394 """ N = 2048 hopSize = int(N/10) J = int(N*gamma/2) audio = MonoLoader(filename=filename, sampleRate=44100)() mag = [] for frame in FrameGenerator(audio, frameSize = N, hopSize = hopSize): m = CartesianToPolar()(FFT()(Windowing(type='hann')(frame)))[0] m = np.asarray(m) idx = np.argsort(m)[::-1][:J] mag.append(m[idx]) mag = np.asarray(mag) x2 = mag*mag inos=np.sum(x2,axis=1)/(np.sum(x2*x2,axis=1)**(0.25)) ninos = inos/(J**(0.25)) return OnsetPeakPickingProcessor(threshold=0.03,fps=44100/hopSize)(ninos)
import numpy from madmom.features.chords import DeepChromaChordRecognitionProcessor from madmom.audio.chroma import DeepChromaProcessor from madmom.features.beats import DBNBeatTrackingProcessor from madmom.features.beats import RNNBeatProcessor from madmom.features.onsets import OnsetPeakPickingProcessor from madmom.features.onsets import RNNOnsetProcessor #Setting up Deep Chroma Chord Recognition Processor dcp = DeepChromaProcessor() decode = DeepChromaChordRecognitionProcessor() chroma = dcp(sys.argv[1]) chords = decode(chroma) #Setting up Onset Peak Picking Processor proc = OnsetPeakPickingProcessor(fps=100, threshold=0.7, pre_avg=0.25, post_avg=0.25, smooth=0.01) act = RNNOnsetProcessor()(sys.argv[1]) beats = proc(act) #calculating msi beatsArray = numpy.array(beats) msi = numpy.mean(beatsArray[1:] - beatsArray[:-1]) * 1000 #generating and printing beatmap bmaFunctions.fancyPrint(bmaFunctions.assignKeys(beats, chords, sys.argv[3]), msi, sys.argv[2])
results_cnn[test_idx] = result.cpu().numpy() return results_cnn # In[ ]: predicted = None picked_beats = [] if PREDICT: # beat_picker = BeatTrackingProcessor(fps=FPS) # TODO: replace with OnsetPeakPickingProcessor(fps=FPS) beat_picker = OnsetPeakPickingProcessor( fps=FPS, threshold=THRESHOLD, pre_avg=PRE_AVG, post_avg=POST_AVG, pre_max=PRE_MAX, post_max=POST_MAX ) # TODO: replace with OnsetPeakPickingProcessor(fps=FPS) # predict beats if VERBOSE: print('predicting...') predicted = run_prediction(test_f) #[test_t[0], test_t[1]] # pick peaks if VERBOSE: print('picking beats...') for i, pred in enumerate(predicted): picked = beat_picker(
def predictOneSong(audioPath, featureOption, clfModelPath): predictions = [] if featureOption == 'convRandom': modelSavePath = './autoencoder/savedRandomAeModels/' features = extractRandomConvFeatures(audioPath, modelSavePath) #64 x M elif featureOption == 'convAe': modelSavePath = './autoencoder/savedAeModels/' features = extractConvFeatures(audioPath, modelSavePath) elif featureOption == 'convDae': modelSavePath = './autoencoder/savedDaeModels/' features = extractConvFeatures(audioPath, modelSavePath) elif featureOption == 'baseline': features = extractBaselineFeatures(audioPath) #60 x M else: print('unknown feature option') #==== onset detection onsetDetector = CNNOnsetProcessor() nvt = onsetDetector(audioPath) peakPicker = OnsetPeakPickingProcessor(fps=100) onsets = peakPicker(nvt) onsetsInFrames = [ round(np.divide(onset, HOPSIZE / FS)) for onset in onsets ] #==== collect feature of interest X = [] timeStamp = [] for i in range(0, len(onsetsInFrames)): midIndex = int(onsetsInFrames[i]) curTime = onsets[i] frontFrame = 0 rearFrame = 2 splicedFeature = featureSplicing(features, midIndex, frontFrame, rearFrame) X.append(splicedFeature) timeStamp.append(curTime) #print(np.shape(X)) #==== drum transcription tmp = np.load(clfModelPath) classifiers = tmp['arr_0'] normParams = tmp['arr_1'] clfBd = classifiers[0] clfSd = classifiers[1] clfHh = classifiers[2] maxVec = normParams[0] minVec = normParams[1] XScaled = scaleMatrixWithMinMax(X, maxVec, minVec) predictions = [] for i in range(0, len(timeStamp)): curFeature = XScaled[i] curFeature = np.expand_dims(curFeature, axis=0) detectBd = clfBd.predict(curFeature) detectSd = clfSd.predict(curFeature) detectHh = clfHh.predict(curFeature) if detectBd: predictions.append((timeStamp[i], 'KD')) if detectSd: predictions.append((timeStamp[i], 'SD')) if detectHh: predictions.append((timeStamp[i], 'HH')) return predictions
def rectifiedComplexDomain(filename): audio = MonoLoader(filename=filename, sampleRate=44100)() return OnsetPeakPickingProcessor()(SpectralOnsetProcessor(onset_method='rectified_complex_domain')(audio))
def PhaseDev(filename): audio = MonoLoader(filename=filename, sampleRate=44100)() return OnsetPeakPickingProcessor()(SpectralOnsetProcessor(onset_method='phase_deviation')(audio))
def modifiedKL(filename): audio = MonoLoader(filename=filename, sampleRate=44100)() return OnsetPeakPickingProcessor()(SpectralOnsetProcessor(onset_method='modified_kullback_leibler')(audio))
def RNNOnsetDetector(filename): audio = MonoLoader(filename=filename, sampleRate=44100)() return OnsetPeakPickingProcessor()(RNNOnsetProcessor()(audio))