def audio_to_midi_melody(infile, outfile): #Load audio and define parameters audio = es.MonoLoader(filename = infile)() winSize = 2048 hopSize = 128 Fs = 44100 pend = np.size(audio) - winSize #ultimo valor de ventana para calcular pin =0 #Apuntador totalTime = np.size(audio)/float(Fs) #Instanciar funciones de essentia w = es.Windowing(type = 'hann', size = winSize) yin = es.PitchYinFFT(frameSize = winSize, minFrequency = 20, maxFrequency = 2000) spectrum = es.Spectrum() #FUncion para calcular espectro por frame calcEnergy = es.Energy() # predMel = es.PredominantMelody(numberHarmonics = 15, filterIterations = 3, frameSize = 2048, hopSize = 128, minFrequency = 20, maxFrequency = 2000, minDuration = 50) predMel = es.PredominantPitchMelodia(numberHarmonics = 15, filterIterations = 3, frameSize = 2048, hopSize = 128, minFrequency = 20, maxFrequency = 2000, minDuration = 50) onsetDet = es.OnsetDetection(method = 'flux') #Inicializar vectores vectSize = np.ceil(np.size(audio))/hopSize peakDet = es.PeakDetection(threshold = 0.15, maxPosition = vectSize, range = vectSize, maxPeaks = 300) #has to be normalized melodyV = [] confV = [] energy = [] freqBands = [] for frame in es.FrameGenerator(audio, winSize, hopSize): spec = spectrum(w(frame)) #Calcular el espectro melody, conf = yin(spec) #Calcular melodia con algoritmo yin melody = 0 if conf < 0.5 else melody #0 para valores con poca confidence melodyV.append(melody) ener = calcEnergy(frame) #Calcular energia por frame energy.append(ener) freqBands.append(es.FrequencyBands()(spec)) #onsets[i] = onsetDet(spec, spec) #Convertir valores a float32 compatible con Essentia melodyV = arrayCast(melodyV) confV = arrayCast(confV) energy = arrayCast(energy) predMelody, confid = predMel(audio) #Calcular melodia con otro algoritmo melodyV = freq2cent(melodyV) predMelody = freq2cent(predMelody) energy = np.log10(energy+.0000001) energy = energy/float(max(energy)) #Normalize Energy smoothEnergy = medfilt(arrayCast(energy), .7) #Usando filtro para alisar curva, valor en ms onsets, amplitudes = peakDet(smoothEnergy) #detectar picos noteOnsets = arrayCast(segmentByFreq(melodyV.copy(), totalTime)) bpm, bpmAmpl = es.NoveltyCurveFixedBpmEstimator()(es.NoveltyCurve()(arrayCast(freqBands))) saveMIDI(outfile, noteOnsets, predMelody, bpm[0], Fs, hopSize) #Graficar plt.subplot(3,1,1) plt.plot(np.linspace(0, totalTime, np.size(melodyV)), melodyV, 'b') plt.plot(np.linspace(0, totalTime, np.size(predMelody)), predMelody, 'r') plt.vlines(noteOnsets*hopSize/Fs, min(melodyV), max(melodyV)) plt.subplot(3,1,2) plt.plot(np.linspace(0, totalTime, np.size(energy)),energy) plt.vlines(onsets*hopSize/Fs, min(energy), max(energy)) plt.subplot(3,1,3) plt.plot(np.linspace(0, totalTime, np.size(quantizeNote(cambioNota(melodyV)))),quantizeNote(cambioNota(melodyV))) plt.show()
confV = [] energy = [] freqBands = [] for frame in es.FrameGenerator(audio, winSize, hopSize): spec = spectrum(w(frame)) #Calcular el espectro melody, conf = yin(spec) #Calcular melodia con algoritmo yin melody = 0 if conf < 0.5 else melody #0 para valores con poca confidence melodyV.append(melody) ener = calcEnergy(frame) #Calcular energia por frame energy.append(ener) freqBands.append(es.FrequencyBands()(spec)) #onsets[i] = onsetDet(spec, spec) #Convertir valores a float32 compatible con Essentia melodyV = arrayCast(melodyV) confV = arrayCast(confV) energy = arrayCast(energy) predMelody, confid = predMel(audio) #Calcular melodia con otro algoritmo melodyV = freq2cent(melodyV) predMelody = freq2cent(predMelody) energy = np.log10(energy+.0000001) energy = energy/float(max(energy)) #Normalize Energy smoothEnergy = medfilt(arrayCast(energy), .7) #Usando filtro para alisar curva, valor en ms onsets, amplitudes = peakDet(smoothEnergy) #detectar picos noteOnsets = arrayCast(segmentByFreq(melodyV.copy()))