def computeLowLevel(input_file, pool, startTime, endTime, namespace=''): llspace = 'lowlevel.' rhythmspace = 'rhythm.' if namespace: llspace = namespace + '.lowlevel.' rhythmspace = namespace + '.rhythm.' rgain, sampleRate, downmix = getAnalysisMetadata(pool) loader = streaming.EqloudLoader(filename=input_file, sampleRate=sampleRate, startTime=startTime, endTime=endTime, replayGain=rgain, downmix=downmix) lowlevel.compute(loader.audio, loader.audio, pool, startTime, endTime, namespace) essentia.run(loader) # check if we processed enough audio for it to be useful, in particular did # we manage to get an estimation for the loudness (2 seconds required) if not pool.containsKey(llspace + "loudness"): INFO('ERROR: File is too short (< 2sec)... Aborting...') sys.exit(2) numOnsets = len(pool[rhythmspace + 'onset_times']) sampleRate = pool['metadata.audio_properties.analysis_sample_rate'] onset_rate = numOnsets / float(loader.audio.totalProduced()) * sampleRate pool.set(rhythmspace + 'onset_rate', onset_rate)
def get_file_bpm(audio_path: str): loader = MonoLoader(filename=audio_path) rhythm_extractor = RhythmExtractor2013(method="degara") # code for if using essentia.standard # slightly less memory effecient # audio = loader() # rhythm = rhythm_extractor(audio) # return rhythm[0] # code for percival estimator # pool = essentia.Pool() # percival_bpm_estimator = PercivalBpmEstimator() # loader.audio >> rhythm_extractor.signal # percival_bpm_estimator.bpm >> (pool, 'rhythm') pool = essentia.Pool() loader.audio >> rhythm_extractor.signal rhythm_extractor.ticks >> None rhythm_extractor.confidence >> None rhythm_extractor.bpm >> (pool, 'rhythm') rhythm_extractor.estimates >> None rhythm_extractor.bpmIntervals >> None essentia.run(loader) return pool['rhythm']
def computeBpmHistogram(self, noveltyCurve, frameSize=4, overlap=2, frameRate=44100./128., window='hann', zeroPadding=0, constantTempo=False, minBpm=30): pool=Pool() bpmHist = BpmHistogram(frameRate=frameRate, frameSize=frameSize, overlap=overlap, zeroPadding=zeroPadding, constantTempo=constantTempo, windowType='hann', minBpm=minBpm) gen = VectorInput(noveltyCurve) gen.data >> bpmHist.novelty bpmHist.bpm >> (pool, 'bpm') bpmHist.bpmCandidates >> (pool, 'bpmCandidates') bpmHist.bpmMagnitudes >> (pool, 'bpmMagnitudes') bpmHist.frameBpms >> None #(pool, 'frameBpms') bpmHist.tempogram >> (pool, 'tempogram') bpmHist.ticks >> (pool, 'ticks') bpmHist.ticksMagnitude >> (pool, 'ticksMagnitude') bpmHist.sinusoid >> (pool, 'sinusoid') essentia.run(gen) return pool
def estimate_main_band(infile): """ Estimate if this is a low, mid, or high track. Not _really_ sure if this does what I need it to, but some quick tests looked right. """ loader = streaming.MonoLoader(filename=infile) framecutter = streaming.FrameCutter() windowing = streaming.Windowing(type="blackmanharris62") spectrum = streaming.Spectrum() freqbands = streaming.FrequencyBands(frequencyBands=[0, 250, 750, 4000]) pool = Pool() loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> freqbands.spectrum freqbands.bands >> (pool, 'bands') run(loader) sums = np.sum(pool['bands'], axis=0) band = np.argmax(sums) if band == 0: return 'low' elif band == 1: return 'mid' elif band == 2: return 'high'
def callback(data): # update audio buffer buffer[:] = array(unpack('f' * bufferSize, data)) # generate predictions reset(vimp) run(vimp)
def get_bpm(file_in): pool = Pool() loader = streaming.MonoLoader(filename=file_in) bt = streaming.RhythmExtractor2013() bpm_histogram = streaming.BpmHistogramDescriptors() # BPM histogram output size is 250 centroid = streaming.Centroid(range=250) loader.audio >> bt.signal bt.bpm >> (pool, 'bpm') bt.ticks >> None bt.confidence >> (pool, 'confidence') bt.estimates >> None bt.bpmIntervals >> bpm_histogram.bpmIntervals bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak') bpm_histogram.firstPeakWeight >> None bpm_histogram.firstPeakSpread >> None bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak') bpm_histogram.secondPeakWeight >> None bpm_histogram.secondPeakSpread >> None bpm_histogram.histogram >> (pool, 'bpm_histogram') bpm_histogram.histogram >> centroid.array centroid.centroid >> (pool, 'bpm_centroid') run(loader) return pool['bpm']
def get_key(file_in): """ Estimates the key and scale for an audio file. """ loader = streaming.MonoLoader(filename=file_in) framecutter = streaming.FrameCutter() windowing = streaming.Windowing(type="blackmanharris62") spectrum = streaming.Spectrum() spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude", magnitudeThreshold=1e-05, minFrequency=40, maxFrequency=5000, maxPeaks=10000) pool = Pool() hpcp = streaming.HPCP() key = streaming.Key() loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies hpcp.hpcp >> key.pcp key.key >> (pool, 'tonal.key_key') key.scale >> (pool, 'tonal.key_scale') key.strength >> (pool, 'tonal.key_strength') run(loader) return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
def estimate_key(infile): """ Estimates the key and scale for an audio file. """ loader = streaming.MonoLoader(filename=infile) framecutter = streaming.FrameCutter() windowing = streaming.Windowing(type="blackmanharris62") spectrum = streaming.Spectrum() spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude", magnitudeThreshold=1e-05, minFrequency=40, maxFrequency=5000, maxPeaks=10000) pool = Pool() hpcp = streaming.HPCP() key = streaming.Key() loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies hpcp.hpcp >> key.pcp key.key >> (pool, 'tonal.key_key') key.scale >> (pool, 'tonal.key_scale') key.strength >> (pool, 'tonal.key_strength') run(loader) return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
def computeBpmHistogram(self, noveltyCurve, frameSize=4, overlap=2, frameRate=44100. / 128., window='hann', zeroPadding=0, constantTempo=False, minBpm=30): pool = Pool() bpmHist = ess.BpmHistogram(frameRate=frameRate, frameSize=frameSize, overlap=overlap, zeroPadding=zeroPadding, constantTempo=constantTempo, windowType='hann', minBpm=minBpm) gen = ess.VectorInput(noveltyCurve) gen.data >> bpmHist.novelty bpmHist.bpm >> (pool, 'bpm') bpmHist.bpmCandidates >> (pool, 'bpmCandidates') bpmHist.bpmMagnitudes >> (pool, 'bpmMagnitudes') bpmHist.frameBpms >> None #(pool, 'frameBpms') bpmHist.tempogram >> (pool, 'tempogram') bpmHist.ticks >> (pool, 'ticks') bpmHist.ticksMagnitude >> (pool, 'ticksMagnitude') bpmHist.sinusoid >> (pool, 'sinusoid') essentia.run(gen) return pool
def computeSegmentation(filename, pool): sampleRate = 44100 frameSize = 2048 hopSize = frameSize / 2 audio = EqloudLoader(filename=filename, downmix=pool['downmix'], sampleRate=sampleRate) fc = FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='keep') w = Windowing(type='blackmanharris62') spec = Spectrum() mfcc = MFCC(highFrequencyBound=8000) tmpPool = essentia.Pool() audio.audio >> fc.signal fc.frame >> w.frame >> spec.frame spec.spectrum >> mfcc.spectrum mfcc.bands >> (tmpPool, 'mfcc_bands') mfcc.mfcc >> (tmpPool, 'mfcc_coeff') essentia.run(audio) # compute transpose of features array, don't call numpy.matrix.transpose # because essentia f***s it up!! features = copy.deepcopy(tmpPool['mfcc_coeff'].transpose()) segments = std.SBic(cpw=1.5, size1=1000, inc1=300, size2=600, inc2=50)(features) for segment in segments: pool.add('segments', segment * hopSize / sampleRate)
def get_bpm(audiofile): pool = essentia.Pool() loader = MonoLoader(filename = audiofile) bt = RhythmExtractor2013() bpm_histogram = BpmHistogramDescriptors() centroid = Centroid(range=250) # BPM histogram output size is 250 loader.audio >> bt.signal bt.bpm >> (pool, 'bpm') bt.ticks >> None bt.confidence >> None bt.estimates >> None bt.bpmIntervals >> bpm_histogram.bpmIntervals bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak') bpm_histogram.firstPeakWeight >> None bpm_histogram.firstPeakSpread >> None bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak') bpm_histogram.secondPeakWeight >> None bpm_histogram.secondPeakSpread >> None essentia.run(loader) return "BPM:", pool['bpm']
def analysisSynthesisStreaming(params, signal): out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero = False); w = es.Windowing(type = "hann"); fft = es.FFT(size = params['frameSize']); ifft = es.IFFT(size = params['frameSize']); overl = es.OverlapAdd (frameSize = params['frameSize'], hopSize = params['hopSize'], gain = 1./params['frameSize']); # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize']/2)) insignal = VectorInput (signal) insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> ifft.fft ifft.frame >> overl.frame overl.signal >> (pool, 'audio') essentia.run(insignal) # remove first half window frames outaudio = pool['audio'] outaudio = outaudio [2*params['hopSize']:] return outaudio
def computeLowLevel(input_file, pool, startTime, endTime, namespace=''): llspace = 'lowlevel.' rhythmspace = 'rhythm.' if namespace : llspace = namespace + '.lowlevel.' rhythmspace = namespace + '.rhythm.' rgain, sampleRate, downmix = getAnalysisMetadata(pool) loader = streaming.EqloudLoader(filename = input_file, sampleRate = sampleRate, startTime = startTime, endTime = endTime, replayGain = rgain, downmix = downmix) lowlevel.compute(loader.audio, loader.audio, pool, startTime, endTime, namespace) essentia.run(loader) # check if we processed enough audio for it to be useful, in particular did # we manage to get an estimation for the loudness (2 seconds required) if not pool.containsKey(llspace + "loudness"): INFO('ERROR: File is too short (< 2sec)... Aborting...') sys.exit(2) numOnsets = len(pool[rhythmspace + 'onset_times']) sampleRate = pool['metadata.audio_properties.analysis_sample_rate'] onset_rate = numOnsets/float(loader.audio.totalProduced())*sampleRate pool.set(rhythmspace + 'onset_rate', onset_rate)
def analSineModelStreaming(params, signal): #out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero = False); w = es.Windowing(type = "hann"); fft = es.FFT(size = params['frameSize']); smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize']/2)) insignal = VectorInput (signal) insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') essentia.run(insignal) # remove first half window frames mags = pool['magnitudes'] freqs = pool['frequencies'] phases = pool['phases'] # remove short tracks minFrames = int( params['minSineDur'] * params['sampleRate'] / params['hopSize']); freqsClean = cleaningSineTracks(freqs, minFrames) pool['frequencies'].data = freqsClean return mags, freqsClean, phases
def estimate_bpm(infile): """ Estimates the BPM for an audio file. """ pool = Pool() loader = streaming.MonoLoader(filename=infile) bt = streaming.RhythmExtractor2013() bpm_histogram = streaming.BpmHistogramDescriptors() centroid = streaming.Centroid(range=250) # BPM histogram output size is 250 loader.audio >> bt.signal bt.bpm >> (pool, 'bpm') bt.ticks >> None bt.confidence >> (pool, 'confidence') bt.estimates >> None bt.bpmIntervals >> bpm_histogram.bpmIntervals bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak') bpm_histogram.firstPeakWeight >> None bpm_histogram.firstPeakSpread >> None bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak') bpm_histogram.secondPeakWeight >> None bpm_histogram.secondPeakSpread >> None bpm_histogram.histogram >> (pool, 'bpm_histogram') bpm_histogram.histogram >> centroid.array centroid.centroid >> (pool, 'bpm_centroid') run(loader) return pool['bpm']
def extract_spectral_complexity(file): print("Extracting spectral complexity...") pool = essentia.Pool() loader = MonoLoader(filename=file) frameCutter = FrameCutter(frameSize=44100 * 20, hopSize=5 * 44100) w = Windowing(type='hann') spec = Spectrum() mfcc = MFCC() # Pool to store the restults pool = essentia.Pool() spectralComplexity = SpectralComplexity() # Connect the input and outputs loader.audio >> frameCutter.signal # Spectral Complexity frameCutter.frame >> w.frame >> spec.frame spec.spectrum >> spectralComplexity.spectrum spectralComplexity.spectralComplexity >> (pool, "spectral complexity") essentia.run(loader) df = pd.DataFrame() df["Sprectral Complexity"] = pool["spectral complexity"] return df
def analysisSynthesisStreaming(params, signal): out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) w = es.Windowing(type="hann") fft = es.FFT(size=params['frameSize']) ifft = es.IFFT(size=params['frameSize']) overl = es.OverlapAdd(frameSize=params['frameSize'], hopSize=params['hopSize']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize'] / 2)) insignal = VectorInput(signal) insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> ifft.fft ifft.frame >> overl.frame overl.signal >> (pool, 'audio') essentia.run(insignal) # remove first half window frames outaudio = pool['audio'] outaudio = outaudio[2 * params['hopSize']:] return outaudio
def computeSegmentation(filename, pool): sampleRate = 44100 frameSize = 2048 hopSize = frameSize/2 audio = EqloudLoader(filename = filename, downmix=pool['downmix'], sampleRate=sampleRate) fc = FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='keep') w = Windowing(type='blackmanharris62') spec = Spectrum() mfcc = MFCC(highFrequencyBound=8000) tmpPool = essentia.Pool() audio.audio >> fc.signal fc.frame >> w.frame >> spec.frame spec.spectrum >> mfcc.spectrum mfcc.bands >> (tmpPool, 'mfcc_bands') mfcc.mfcc>> (tmpPool, 'mfcc_coeff') essentia.run(audio) # compute transpose of features array, don't call numpy.matrix.transpose # because essentia f***s it up!! features = copy.deepcopy(tmpPool['mfcc_coeff'].transpose()) segments = std.SBic(cpw=1.5, size1=1000, inc1=300, size2=600, inc2=50)(features) for segment in segments: pool.add('segments', segment*hopSize/sampleRate)
def computeNoveltyCurve(self, filename, frameSize=1024, hopSize=512, windowType='hann', weightCurveType='inverse_quadratic', sampleRate=44100.0, startTime=0, endTime=2000): loader = EasyLoader(filename=filename, startTime=startTime, endTime=endTime, sampleRate=sampleRate, downmix='left') fc = FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames="keep", startFromZero=False, lastFrameToEndOfFile=True) window = Windowing(type=windowType, zeroPhase=True, zeroPadding=1024-frameSize) freqBands = FrequencyBands(sampleRate=sampleRate) # using barkbands by default pool = Pool() spec = Spectrum() loader.audio >> fc.signal fc.frame >> window.frame >> spec.frame spec.spectrum >> freqBands.spectrum freqBands.bands >> (pool, 'frequency_bands') essentia.run(loader) noveltyCurve = std.NoveltyCurve(frameRate=sampleRate/float(hopSize), weightCurveType=weightCurveType)(pool['frequency_bands']) return noveltyCurve
def analHpsModelStreaming(params, signal): #out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) w = es.Windowing(type="blackmanharris92") spec = es.Spectrum(size=params['frameSize']) # pitch detection pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'], sampleRate=params['sampleRate']) smanal = es.HpsModelAnal(sampleRate=params['sampleRate'], hopSize=params['hopSize'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope'], minFrequency=params['minFrequency'], maxFrequency=params['maxFrequency'], stocf=params['stocf']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize'] // 2)) insignal = VectorInput (signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> spec.frame spec.spectrum >> pitchDetect.spectrum fcut.frame >> smanal.frame pitchDetect.pitch >> smanal.pitch pitchDetect.pitch >> (pool, 'pitch') pitchDetect.pitchConfidence >> (pool, 'pitchConfidence') smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') smanal.stocenv >> (pool, 'stocenv') essentia.run(insignal) # remove first half window frames mags = pool['magnitudes'] freqs = pool['frequencies'] phases = pool['phases'] pitchConf = pool['pitchConfidence'] # remove short tracks minFrames = int(params['minSineDur'] * params['sampleRate'] / params['hopSize']) freqsClean = cleaningHarmonicTracks(freqs, minFrames, pitchConf) pool['frequencies'].data = freqsClean return mags, freqsClean, phases
def analHpsModelStreaming(params, signal): #out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) w = es.Windowing(type="blackmanharris92") spec = es.Spectrum(size=params['frameSize']) # pitch detection pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'], sampleRate=params['sampleRate']) smanal = es.HpsModelAnal(sampleRate=params['sampleRate'], hopSize=params['hopSize'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope'], minFrequency=params['minFrequency'], maxFrequency=params['maxFrequency'], stocf=params['stocf']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize']/2)) insignal = VectorInput (signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> spec.frame spec.spectrum >> pitchDetect.spectrum fcut.frame >> smanal.frame pitchDetect.pitch >> smanal.pitch pitchDetect.pitch >> (pool, 'pitch') pitchDetect.pitchConfidence >> (pool, 'pitchConfidence') smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') smanal.stocenv >> (pool, 'stocenv') essentia.run(insignal) # remove first half window frames mags = pool['magnitudes'] freqs = pool['frequencies'] phases = pool['phases'] pitchConf = pool['pitchConfidence'] # remove short tracks minFrames = int(params['minSineDur'] * params['sampleRate'] / params['hopSize']) freqsClean = cleaningHarmonicTracks(freqs, minFrames, pitchConf) pool['frequencies'].data = freqsClean return mags, freqsClean, phases
def writeBeatFile(filename, pool) : beatFilename = os.path.splitext(filename)[0] + '_beat.wav' #'out_beat.wav' # audio = EasyLoader(filename=filename, downmix='mix', startTime=STARTTIME, endTime=ENDTIME) writer = MonoWriter(filename=beatFilename) onsetsMarker = AudioOnsetsMarker(onsets=pool['ticks']) audio.audio >> onsetsMarker.signal >> writer.audio essentia.run(audio) return beatFilename
def analsynthHprModelStreaming(params, signal): out = array([0.0]) pool = essentia.Pool() # windowing and FFT fcut = es.FrameCutter(frameSize=params["frameSize"], hopSize=params["hopSize"], startFromZero=False) w = es.Windowing(type="blackmanharris92") spec = es.Spectrum(size=params["frameSize"]) # pitch detection pitchDetect = es.PitchYinFFT(frameSize=params["frameSize"], sampleRate=params["sampleRate"]) smanal = es.HprModelAnal( sampleRate=params["sampleRate"], hopSize=params["hopSize"], maxnSines=params["maxnSines"], magnitudeThreshold=params["magnitudeThreshold"], freqDevOffset=params["freqDevOffset"], freqDevSlope=params["freqDevSlope"], minFrequency=params["minFrequency"], maxFrequency=params["maxFrequency"], ) synFFTSize = min(params["frameSize"] / 4, 4 * params["hopSize"]) # make sure the FFT size is appropriate smsyn = es.SprModelSynth(sampleRate=params["sampleRate"], fftSize=synFFTSize, hopSize=params["hopSize"]) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params["frameSize"] / 2)) insignal = VectorInput(signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> spec.frame spec.spectrum >> pitchDetect.spectrum fcut.frame >> smanal.frame pitchDetect.pitch >> smanal.pitch pitchDetect.pitchConfidence >> (pool, "pitchConfidence") pitchDetect.pitch >> (pool, "pitch") # synthesis smanal.magnitudes >> smsyn.magnitudes smanal.frequencies >> smsyn.frequencies smanal.phases >> smsyn.phases smanal.res >> smsyn.res smsyn.frame >> (pool, "frames") smsyn.sineframe >> (pool, "sineframes") smsyn.resframe >> (pool, "resframes") essentia.run(insignal) outaudio = framesToAudio(pool["frames"]) outaudio = outaudio[2 * params["hopSize"] :] return outaudio, pool
def analsynthSineModelStreaming(params, signal): out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) w = es.Windowing(type="blackmanharris92") fft = es.FFT(size=params['frameSize']) smanal = es.SineModelAnal(sampleRate=params['sampleRate'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope']) smsyn = es.SineModelSynth(sampleRate=params['sampleRate'], fftSize=params['frameSize'], hopSize=params['hopSize']) ifft = es.IFFT(size=params['frameSize']) overl = es.OverlapAdd(frameSize=params['frameSize'], hopSize=params['hopSize'], gain=1. / params['frameSize']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize'] / 2)) insignal = VectorInput(signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') # synthesis smanal.magnitudes >> smsyn.magnitudes smanal.frequencies >> smsyn.frequencies smanal.phases >> smsyn.phases smsyn.fft >> ifft.fft ifft.frame >> overl.frame overl.signal >> (pool, 'audio') essentia.run(insignal) # remove short tracks freqs = pool['frequencies'] minFrames = int(params['minSineDur'] * params['sampleRate'] / params['hopSize']) freqsClean = cleaningSineTracks(freqs, minFrames) pool['frequencies'].data = freqsClean # remove first half window frames outaudio = pool['audio'] outaudio = outaudio[2 * params['hopSize']:] return outaudio, pool
def computeMidLevel(input_file, pool, startTime, endTime, namespace=''): rgain, sampleRate, downmix = getAnalysisMetadata(pool) loader = streaming.EqloudLoader(filename=input_file, sampleRate=sampleRate, startTime=startTime, endTime=endTime, replayGain=rgain, downmix=downmix) midlevel.compute(loader.audio, pool, startTime, endTime, namespace) essentia.run(loader)
def computeMidLevel(input_file, pool, startTime, endTime, namespace=''): rgain, sampleRate, downmix = getAnalysisMetadata(pool) loader = streaming.EqloudLoader(filename = input_file, sampleRate = sampleRate, startTime = startTime, endTime = endTime, replayGain = rgain, downmix = downmix) midlevel.compute(loader.audio, pool, startTime, endTime, namespace) essentia.run(loader)
def callback(data): buffer[:] = array(unpack('f' * bufferSize, data)) mfccBuffer = np.zeros([numberBands]) reset(vectorInput) run(vectorInput) mfccBuffer = np.roll(mfccBuffer, -patchSize) mfccBuffer = pool['mfcc'][-patchSize] features = mfccBuffer features = features.tolist() return features
def sines(filename, outfile, params): ''' Extract the sinusoidal components of an audio file ''' import essentia from essentia.streaming import (MonoLoader, MonoWriter, FrameCutter, Windowing, SineModelAnal, SineModelSynth, FFT, IFFT, OverlapAdd) loader = MonoLoader(filename=filename, sampleRate=params['sampleRate']) awrite = MonoWriter(filename=outfile, sampleRate=params['sampleRate']) fcut = FrameCutter( frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) overl = OverlapAdd( frameSize=params['frameSize'], hopSize=params['hopSize'], gain=1.0 / params['frameSize']) w = Windowing(type="blackmanharris92") fft = FFT(size=params['frameSize']) ifft = IFFT(size=params['frameSize']) smanal = SineModelAnal( sampleRate=params['sampleRate'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope'], minFrequency=params['minFrequency'], maxFrequency=params['maxFrequency'], ) syn = SineModelSynth( sampleRate=params['sampleRate'], fftSize=params['frameSize'], hopSize=params['hopSize']) # analysis loader.audio >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft # synth smanal.magnitudes >> syn.magnitudes smanal.frequencies >> syn.frequencies smanal.phases >> syn.phases # ifft syn.fft >> ifft.fft ifft.frame >> overl.frame overl.signal >> awrite.audio essentia.run(loader)
def estimate_danceability(infile): loader = streaming.MonoLoader(filename=infile) dance = streaming.Danceability() pool = Pool() loader.audio >> dance.signal dance.danceability >> (pool, 'danceability') run(loader) return pool['danceability']
def subtract(filename, outfile, params): ''' Subtract the sinusoidal components as computed by the sinusoidal model. ''' import essentia from essentia.streaming import (MonoLoader, MonoWriter, FrameCutter, Windowing, SineModelAnal, SineSubtraction, FFT, VectorInput) loader = MonoLoader(filename=filename, sampleRate=params['sampleRate']) awrite = MonoWriter(filename=outfile, sampleRate=params['sampleRate']) fcut = FrameCutter( frameSize=params['frameSize'], hopSize=params['hopSize']) w = Windowing(type="blackmanharris92") fft = FFT(size=params['frameSize']) smanal = SineModelAnal( sampleRate=params['sampleRate'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope'], minFrequency=params['minFrequency'], maxFrequency=params['maxFrequency'], ) subtrFFTSize = min(params['frameSize'] / 4, 4 * params['hopSize']) smsub = SineSubtraction( sampleRate=params['sampleRate'], fftSize=subtrFFTSize, hopSize=params['hopSize']) pool = essentia.Pool() # analysis loader.audio >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft # subtraction fcut.frame >> smsub.frame smanal.magnitudes >> smsub.magnitudes smanal.frequencies >> smsub.frequencies smanal.phases >> smsub.phases smsub.frame >> (pool, 'frames') essentia.run(loader) outaudio = pool['frames'].flatten() outvector = VectorInput(outaudio) outvector.data >> awrite.audio essentia.run(outvector)
def computeBeatsLoudness(filename, pool): loader = MonoLoader(filename=filename, sampleRate=pool['samplerate'], downmix=pool['downmix']) ticks = pool['ticks']#[pool['bestTicksStart']:pool['bestTicksStart']+32] beatsLoud = BeatsLoudness(sampleRate = pool['samplerate'], frequencyBands = barkBands, #EqBands, #scheirerBands, #barkBands, beats=ticks) loader.audio >> beatsLoud.signal beatsLoud.loudness >> (pool, 'loudness') beatsLoud.loudnessBandRatio >> (pool, 'loudnessBandRatio') essentia.run(loader)
def computeOnsets(filename, pool): loader = EasyLoader(filename=filename, sampleRate=pool['samplerate'], startTime=STARTTIME, endTime=ENDTIME, downmix=pool['downmix']) onset = OnsetRate() loader.audio >> onset.signal onset.onsetTimes >> (pool, 'ticks') onset.onsetRate >> None essentia.run(loader) pool.set('size', loader.audio.totalProduced()) pool.set('length', pool['size']/pool['samplerate'])
def run(self): global frame_g while not self.stoprequest.isSet(): pool = essentia.Pool() self.frame_q.get() v_in2 = VectorInput(frame_g) onset = OnsetRate() v_in2.data >> onset.signal onset.onsetRate >> (pool, 'Rhythm.onsetRate') onset.onsetTimes >> None essentia.run(v_in2) self.result_q.put(pool)
def analsynthSpsModelStreaming(params, signal): out = array([0.]) pool = essentia.Pool() # windowing and FFT fcut = es.FrameCutter(frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) w = es.Windowing(type="blackmanharris92") spec = es.Spectrum(size=params['frameSize']) smanal = es.SpsModelAnal(sampleRate=params['sampleRate'], hopSize=params['hopSize'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope'], minFrequency=params['minFrequency'], maxFrequency=params['maxFrequency'], stocf=params['stocf']) synFFTSize = min( int(params['frameSize'] / 4), 4 * params['hopSize']) # make sure the FFT size is appropriate smsyn = es.SpsModelSynth(sampleRate=params['sampleRate'], fftSize=synFFTSize, hopSize=params['hopSize'], stocf=params['stocf']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize'] // 2)) insignal = VectorInput(signal) # analysis insignal.data >> fcut.signal fcut.frame >> smanal.frame # synthesis smanal.magnitudes >> smsyn.magnitudes smanal.frequencies >> smsyn.frequencies smanal.phases >> smsyn.phases smanal.stocenv >> smsyn.stocenv smsyn.frame >> (pool, 'frames') smsyn.sineframe >> (pool, 'sineframes') smsyn.stocframe >> (pool, 'stocframes') essentia.run(insignal) outaudio = framesToAudio(pool['frames']) outaudio = outaudio[2 * params['hopSize']:] return outaudio, pool
def analsynthHarmonicMaskStreaming(params, signal): out = array([0.]) pool = essentia.Pool() # windowing and FFT fcut = es.FrameCutter(frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) w = es.Windowing(type="blackmanharris92") fft = es.FFT(size=params['frameSize']) spec = es.Spectrum(size=params['frameSize']) # pitch detection pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'], sampleRate=params['sampleRate']) hmask = es.HarmonicMask(sampleRate=params['sampleRate'], binWidth=params['binWidth'], attenuation=params['attenuation_dB']) ifft = es.IFFT(size=params['frameSize']) overl = es.OverlapAdd(frameSize=params['frameSize'], hopSize=params['hopSize']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize'] // 2)) insignal = VectorInput(signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> spec.frame w.frame >> fft.frame spec.spectrum >> pitchDetect.spectrum fft.fft >> hmask.fft pitchDetect.pitch >> hmask.pitch pitchDetect.pitchConfidence >> (pool, 'pitchConfidence') hmask.fft >> ifft.fft ifft.frame >> overl.frame overl.signal >> (pool, 'audio') essentia.run(insignal) # remove first half window frames outaudio = pool['audio'] outaudio = outaudio[2 * params['hopSize']:] return outaudio, pool
def computeMidLevel(input_file, neqPool, eqPool, startTime, endTime, namespace=''): rgain, sampleRate, downmix = getAnalysisMetadata(neqPool) loader = streaming.EasyLoader(filename = input_file, sampleRate = sampleRate, startTime = startTime, endTime = endTime, replayGain = rgain, downmix = downmix) eqloud = streaming.EqualLoudness() loader.audio >> eqloud.signal midlevel.compute(loader.audio, neqPool, startTime, endTime, namespace) midlevel.compute(eqloud.signal, eqPool, startTime, endTime, namespace) essentia.run(loader)
def estimate_chroma(self, uid): loader = esstr.MonoLoader( filename=self.audio_path_extractor.audio_path_name(uid)) framecutter = esstr.FrameCutter(hopSize=self.hop_size, frameSize=self.frame_size) windowing = esstr.Windowing(type="blackmanharris62") spectrum = esstr.Spectrum() spectralpeaks = esstr.SpectralPeaks(orderBy="magnitude", magnitudeThreshold=1e-05, minFrequency=40, maxFrequency=5000, maxPeaks=10000) hpcp = esstr.HPCP(size=12, referenceFrequency=self.tuning_freq, harmonics=8, bandPreset=True, minFrequency=float(40), maxFrequency=float(5000), bandSplitFrequency=500.0, weightType="cosine", nonLinear=True, windowSize=1.0) """ hpcp = esstr.HPCP( size=12, referenceFrequency = tuningFreq, harmonics = 8, bandPreset = True, minFrequency = 40.0, maxFrequency = 5000.0, bandSplitFrequency = 250.0, weightType = "cosine", nonLinear = False, windowSize = 1.0) """ pool = essentia.Pool() # connect algorithms together loader.audio >> framecutter.signal framecutter.frame >> windowing.frame >> spectrum.frame spectrum.spectrum >> spectralpeaks.spectrum spectrum.spectrum >> (pool, 'spectrum.magnitude') spectralpeaks.magnitudes >> hpcp.magnitudes spectralpeaks.frequencies >> hpcp.frequencies hpcp.hpcp >> (pool, 'chroma.hpcp') essentia.run(loader) # roll from 'A' based to 'C' based chroma = pool['chroma.hpcp'] chroma = np.roll(chroma, shift=-3, axis=1) return chroma
def analsynthHprModelStreaming(params, signal): out = array([0.]) pool = essentia.Pool() # windowing and FFT fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero = False); w = es.Windowing(type = "blackmanharris92"); spec = es.Spectrum(size = params['frameSize']); # pitch detection pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'], sampleRate = params['sampleRate']) smanal = es.HprModelAnal(sampleRate = params['sampleRate'], hopSize = params['hopSize'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'], minFrequency = params['minFrequency'], maxFrequency = params['maxFrequency']) synFFTSize = min(params['frameSize']/4, 4*params['hopSize']); # make sure the FFT size is appropriate smsyn = es.SprModelSynth(sampleRate = params['sampleRate'], fftSize = synFFTSize, hopSize = params['hopSize']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize']/2)) insignal = VectorInput (signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> spec.frame spec.spectrum >> pitchDetect.spectrum fcut.frame >> smanal.frame pitchDetect.pitch >> smanal.pitch pitchDetect.pitchConfidence >> (pool, 'pitchConfidence') pitchDetect.pitch >> (pool, 'pitch') print freqsClean # synthesis smanal.magnitudes >> smsyn.magnitudes smanal.frequencies >> smsyn.frequencies smanal.phases >> smsyn.phases smanal.res >> smsyn.res smsyn.frame >> (pool, 'frames') smsyn.sineframe >> (pool, 'sineframes') smsyn.resframe >> (pool, 'resframes') essentia.run(insignal) outaudio = framesToAudio(pool['frames']) outaudio = outaudio [2*params['hopSize']:] return outaudio, pool
def analsynthHarmonicMaskStreaming(params, signal): out = array([0.]) pool = essentia.Pool() # windowing and FFT fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero = False); w = es.Windowing(type = "blackmanharris92"); fft = es.FFT(size = params['frameSize']); spec = es.Spectrum(size = params['frameSize']); # pitch detection pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'], sampleRate = params['sampleRate']) hmask= es.HarmonicMask(sampleRate = params['sampleRate'], binWidth = params['binWidth'], attenuation = params['attenuation_dB']) ifft = es.IFFT(size = params['frameSize']); overl = es.OverlapAdd (frameSize = params['frameSize'], hopSize = params['hopSize']); # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize']/2)) insignal = VectorInput (signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> spec.frame w.frame >> fft.frame spec.spectrum >> pitchDetect.spectrum fft.fft >> hmask.fft pitchDetect.pitch >> hmask.pitch pitchDetect.pitchConfidence >> (pool, 'pitchConfidence') hmask.fft >> ifft.fft ifft.frame >> overl.frame overl.signal >> (pool, 'audio') essentia.run(insignal) # remove first half window frames outaudio = pool['audio'] outaudio = outaudio [2*params['hopSize']:] return outaudio, pool
def run(self): global frame_g while not self.stoprequest.isSet(): pool = essentia.Pool() self.frame_q.get() v_in = VectorInput(frame_g) beat_tracker = RhythmExtractor2013(method="degara") v_in.data >> beat_tracker.signal beat_tracker.ticks >> (pool, 'Rhythm.ticks') beat_tracker.bpm >> (pool, 'Rhythm.bpm') beat_tracker.confidence >> None beat_tracker.estimates >> None beat_tracker.bpmIntervals >> None essentia.run(v_in) self.result_q.put(pool)
def chromaprint(self, analysisTime=30): """ This algorithm computes the fingerprint of the input signal using Chromaprint algorithm. It is a wrapper of the Chromaprint library Returns: The chromaprints are returned as base64-encoded strings. """ vec_input = ess.VectorInput(self.audio_vector) chromaprinter = ess.Chromaprinter(analysisTime=analysisTime, sampleRate=self.fs) pool = Pool() vec_input.data >> chromaprinter.signal chromaprinter.fingerprint >> (pool, 'chromaprint') run(vec_input) return pool['chromaprint']
def analsynthSineModelStreaming(params, signal): out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero = False); w = es.Windowing(type = "blackmanharris92"); fft = es.FFT(size = params['frameSize']); smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope']) smsyn = es.SineModelSynth(sampleRate = params['sampleRate'], fftSize = params['frameSize'], hopSize = params['hopSize']) ifft = es.IFFT(size = params['frameSize']); overl = es.OverlapAdd (frameSize = params['frameSize'], hopSize = params['hopSize']); # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize']/2)) insignal = VectorInput (signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') # synthesis smanal.magnitudes >> smsyn.magnitudes smanal.frequencies >> smsyn.frequencies smanal.phases >> smsyn.phases smsyn.fft >> ifft.fft ifft.frame >> overl.frame overl.signal >> (pool, 'audio') essentia.run(insignal) # remove short tracks freqs = pool['frequencies'] minFrames = int( params['minSineDur'] * params['sampleRate'] / params['hopSize']); freqsClean = cleaningSineTracks(freqs, minFrames) pool['frequencies'].data = freqsClean # remove first half window frames outaudio = pool['audio'] outaudio = outaudio [2*params['hopSize']:] return outaudio, pool
def analsynthSineSubtractionStreaming(params, signal): out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) w = es.Windowing(type="blackmanharris92") fft = es.FFT(size=params['frameSize']) smanal = es.SineModelAnal(sampleRate=params['sampleRate'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope']) subtrFFTSize = min(params['frameSize'] / 4, 4 * params['hopSize']) smsub = es.SineSubtraction(sampleRate=params['sampleRate'], fftSize=subtrFFTSize, hopSize=params['hopSize']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize'] / 2)) insignal = VectorInput(signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') # subtraction fcut.frame >> smsub.frame smanal.magnitudes >> smsub.magnitudes smanal.frequencies >> smsub.frequencies smanal.phases >> smsub.phases smsub.frame >> (pool, 'frames') essentia.run(insignal) print pool['frames'].shape outaudio = framesToAudio(pool['frames']) outaudio = outaudio[2 * params['hopSize']:] return outaudio, pool
def analSprModelStreaming(params, signal): #out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize=params['frameSize'], hopSize=params['hopSize'], startFromZero=False) w = es.Windowing(type="blackmanharris92") spec = es.Spectrum(size=params['frameSize']) smanal = es.SprModelAnal(sampleRate=params['sampleRate'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope'], minFrequency=params['minFrequency'], maxFrequency=params['maxFrequency']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize'] / 2)) insignal = VectorInput(signal) # analysis insignal.data >> fcut.signal fcut.frame >> smanal.frame smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') smanal.res >> (pool, 'res') essentia.run(insignal) # remove first half window frames mags = pool['magnitudes'] freqs = pool['frequencies'] phases = pool['phases'] # remove short tracks minFrames = int(params['minSineDur'] * params['sampleRate'] / params['hopSize']) freqsClean = cleaningSineTracks(freqs, minFrames) pool['frequencies'].data = freqsClean return mags, freqsClean, phases
def extract_loudness(file): pool = essentia.Pool() loader = MonoLoader(filename=file) frameCutter = FrameCutter(frameSize=44100 * 60, hopSize=44100 * 5) loader.audio >> frameCutter.signal #Loudness loudness = Loudness() frameCutter.frame >> loudness.signal loudness.loudness >> (pool, "lowlevel.loudness") essentia.run(loader) df = pd.DataFrame() df["Loudness"] = pool["lowlevel.loudness"] return df
def __rhythm_extractor_process(filename, result): pool = essentia.Pool() loader = MonoLoader(filename=filename) bt = RhythmExtractor2013() loader.audio >> bt.signal bt.bpm >> (pool, 'bpm') bt.ticks >> (pool, 'ticks') bt.confidence >> (pool, 'confidence') bt.estimates >> (pool, 'estimates') bt.bpmIntervals >> (pool, 'bpmIntervals') essentia.run(loader) result.append({ 'bpm': pool['bpm'], 'ticks': list(pool['ticks']), 'confidence': pool['confidence'], 'estimates': list(pool['estimates']), 'bpmIntervals': list(pool['bpmIntervals']) }) pool.clear()
def callback(data): # update audio buffer buffer[:] = array(unpack('f' * bufferSize, data)) # generate predictions reset(vimp) run(vimp) # update mel and activation buffers melBuffer[:] = np.roll(melBuffer, -patchSize) melBuffer[:, -patchSize:] = pool['melbands'][-patchSize:,:].T img_mel.set_data(melBuffer) actBuffer[:] = np.roll(actBuffer, -1) actBuffer[:, -1] = softmax(20 * pool['model/Sigmoid'][-1,:].T) img_act.set_data(actBuffer) # update plots f.canvas.draw()
def analsynthSpsModelStreaming(params, signal): out = array([0.]) pool = essentia.Pool() # windowing and FFT fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero = False); w = es.Windowing(type = "blackmanharris92"); spec = es.Spectrum(size = params['frameSize']); smanal = es.SpsModelAnal(sampleRate = params['sampleRate'], hopSize = params['hopSize'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'], minFrequency = params['minFrequency'], maxFrequency = params['maxFrequency'], stocf = params['stocf']) synFFTSize = min(params['frameSize']/4, 4*params['hopSize']); # make sure the FFT size is appropriate smsyn = es.SpsModelSynth(sampleRate = params['sampleRate'], fftSize = synFFTSize, hopSize = params['hopSize'], stocf = params['stocf']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize']/2)) insignal = VectorInput (signal) # analysis insignal.data >> fcut.signal fcut.frame >> smanal.frame # synthesis smanal.magnitudes >> smsyn.magnitudes smanal.frequencies >> smsyn.frequencies smanal.phases >> smsyn.phases smanal.stocenv >> smsyn.stocenv smsyn.frame >> (pool, 'frames') smsyn.sineframe >> (pool, 'sineframes') smsyn.stocframe >> (pool, 'stocframes') essentia.run(insignal) outaudio = framesToAudio(pool['frames']) outaudio = outaudio [2*params['hopSize']:] return outaudio, pool
def sfxPitch(pool, namespace=''): sfxspace = 'sfx.' llspace = 'lowlevel.' if namespace: sfxspace = namespace + '.sfx.' llspace = namespace + '.lowlevel.' pitch = pool[llspace+'pitch'] gen = streaming.VectorInput(pitch) maxtt = streaming.MaxToTotal() mintt = streaming.MinToTotal() amt = streaming.AfterMaxToBeforeMaxEnergyRatio() gen.data >> maxtt.envelope gen.data >> mintt.envelope gen.data >> amt.pitch maxtt.maxToTotal >> (pool, sfxspace+'pitch_max_to_total') mintt.minToTotal >> (pool, sfxspace+'pitch_min_to_total') amt.afterMaxToBeforeMaxEnergyRatio >> (pool, sfxspace+'pitch_after_max_to_before_max_energy_ratio') essentia.run(gen) pc = standard.Centroid(range=len(pitch)-1)(pitch) pool.set(sfxspace+'pitch_centroid', pc)
def extract_HPCP(_filename): # initialization HPCP_size=36 pool = Pool() key = Key() loader = MonoLoader(filename = _filename) frameCutter = FrameCutter(frameSize = 4096, hopSize = 512) window = Windowing(type = 'blackmanharris62') spectrum = Spectrum() spectralPeaks= SpectralPeaks(orderBy="magnitude", magnitudeThreshold=1e-05, minFrequency=100.0, maxFrequency=5000.0, maxPeaks=10000) hpcp = HPCP(size = HPCP_size, bandPreset = False, minFrequency = 100.0, maxFrequency = 5000.0, weightType = 'squaredCosine', nonLinear = False, sampleRate= 44100.0, windowSize = 4.0/3.0) # audio-->frame cutter-->windowing-->spectrum-->spectral peaks-->HPCP loader.audio >> frameCutter.signal frameCutter.frame >> window.frame >> spectrum.frame spectrum.spectrum >> spectralPeaks.spectrum spectralPeaks.magnitudes >> hpcp.magnitudes spectralPeaks.frequencies >> hpcp.frequencies hpcp.hpcp>> (pool, 'hpcp vector') essentia.run(loader) hpcpVector= pool['hpcp vector'] globalHPCP=[] #taking mean value for each frame to get global HPCP globalHPCP=hpcpVector.mean(axis=0) #normalized global HPCP globalHPCP=normalize(globalHPCP) return globalHPCP
def computeLowLevel(input_file, neqPool, eqPool, startTime, endTime, namespace=""): llspace = "lowlevel." rhythmspace = "rhythm." if namespace: llspace = namespace + ".lowlevel." rhythmspace = namespace + ".rhythm." rgain, sampleRate, downmix = getAnalysisMetadata(neqPool) loader = streaming.EasyLoader( filename=input_file, sampleRate=sampleRate, startTime=startTime, endTime=endTime, replayGain=rgain, downmix=downmix, ) eqloud = streaming.EqualLoudness() loader.audio >> eqloud.signal lowlevel.compute(eqloud.signal, loader.audio, neqPool, startTime, endTime, namespace) lowlevel.compute(eqloud.signal, eqloud.signal, eqPool, startTime, endTime, namespace) essentia.run(loader) # check if we processed enough audio for it to be useful, in particular did # we manage to get an estimation for the loudness (2 seconds required) if not neqPool.containsKey(llspace + "loudness") and not eqPool.containsKey(llspace + "loudness"): INFO("ERROR: File is too short (< 2sec)... Aborting...") sys.exit(2) sampleRate = neqPool["metadata.audio_properties.analysis_sample_rate"] numOnsets = len(neqPool[rhythmspace + "onset_times"]) onset_rate = numOnsets / float(loader.audio.totalProduced()) * sampleRate neqPool.set(rhythmspace + "onset_rate", onset_rate) numOnsets = len(eqPool[rhythmspace + "onset_times"]) onset_rate = numOnsets / float(loader.audio.totalProduced()) * sampleRate eqPool.set(rhythmspace + "onset_rate", onset_rate)
def analsynthSineSubtractionStreaming(params, signal): out = numpy.array(0) pool = essentia.Pool() fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero = False); w = es.Windowing(type = "blackmanharris92"); fft = es.FFT(size = params['frameSize']); smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope']) subtrFFTSize = min(params['frameSize']/4, 4* params['hopSize']) smsub = es.SineSubtraction(sampleRate = params['sampleRate'], fftSize = subtrFFTSize, hopSize = params['hopSize']) # add half window of zeros to input signal to reach same ooutput length signal = numpy.append(signal, zeros(params['frameSize']/2)) insignal = VectorInput (signal) # analysis insignal.data >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') # subtraction fcut.frame >> smsub.frame smanal.magnitudes >> smsub.magnitudes smanal.frequencies >> smsub.frequencies smanal.phases >> smsub.phases smsub.frame >> (pool, 'frames') essentia.run(insignal) print pool['frames'].shape outaudio = framesToAudio(pool['frames']) outaudio = outaudio [2*params['hopSize']:] return outaudio, pool
# analysis loader.audio >> fcut.signal fcut.frame >> w.frame w.frame >> fft.frame fft.fft >> smanal.fft smanal.magnitudes >> (pool, 'magnitudes') smanal.frequencies >> (pool, 'frequencies') smanal.phases >> (pool, 'phases') # subtraction fcut.frame >> smsub.frame smanal.magnitudes >> smsub.magnitudes smanal.frequencies >> smsub.frequencies smanal.phases >> smsub.phases smsub.frame >> (pool, 'frames') essentia.run(loader) #store to file outaudio = pool['frames'].flatten() awrite = es.MonoWriter (filename = outputFilename, sampleRate = params['sampleRate']); outvector = es.VectorInput(outaudio) outvector.data >> awrite.audio essentia.run(outvector)
pool = essentia.Pool() readMetadata(args[0], pool) replaygain.compute(args[0], pool, startTime, endTime) rgain, sampleRate, downmix = getAnalysisMetadata(pool) loader = streaming.EqloudLoader(filename = args[0], sampleRate = sampleRate, startTime = startTime, endTime = endTime, replayGain = rgain, downmix = downmix) compute(loader.audio, loader.audio, pool, startTime, endTime, sampleRate=analysisSampleRate) essentia.run(loader) # check if we processed enough audio for it to be useful, in particular did # we manage to get an estimation for the loudness (2 seconds required) if not pool.containsKey(llspace + "loudness"): INFO('ERROR: File is too short (< 2sec)... Aborting...') sys.exit(2) numOnsets = len(pool[rhythmspace + 'onset_times']) sampleRate = pool['metadata.audio_properties.analysis_sample_rate'] onset_rate = numOnsets/float(source.totalProduced())*sampleRate pool.set(rhythmspace + 'onset_rate', onset_rate)
def runResetRun(gen, *args, **kwargs): # 0. Find networks which contain algorithms who do not play nice with our # little trick. In particular, we have a test for multiplexer that runs # multiple generators... def isValid(algo): if isinstance(algo, essentia.streaming.VectorInput) and not algo.connections.values()[0]: # non-connected VectorInput, we don't want to get too fancy here... return False if algo.name() == 'Multiplexer': return False for output, inputs in algo.connections.iteritems(): for inp in inputs: if isinstance(inp, essentia.streaming._StreamConnector) and not isValid(inp.input_algo): return False return True if not isValid(gen): print 'Network is not capable of doing the run/reset/run trick, doing it the normal way...' essentia.run(gen) return # 1. Find all the outputs in the network that are connected to pools--aka # pool feeders and for each pool feeder, disconnect the given pool, # store it, and connect a dummy pool in its place def useDummy(algo, output, input): if not hasattr(output, 'originalPools'): output.originalPools = [] output.dummyPools = [] # disconnect original output.originalPools.append(input) output.disconnect(input) # connect dummy dummy = essentia.Pool() output.dummyPools.append((dummy, input[1])) output >> output.dummyPools[-1] mapPools(gen, useDummy) # 2. Run the network essentia.run(gen) # 3. Reset the network essentia.reset(gen) # 4. For each pool feeder, disconnect the dummy pool and reconnect the # original pool def useOriginal(algo, output, input): # disconnect dummy output.disconnect(input) # the dummy pools and the original pools should have the same index idx = output.dummyPools.index(input) output.dummyPools.remove(input) # connect original output >> output.originalPools[idx] # don't need these anymore if len(output.dummyPools) == 0: del output.dummyPools del output.originalPools mapPools(gen, useOriginal) # 5. Run the network for the second and final time return essentia.run(gen)