def compute(audio, pool, options): # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # frame algorithms frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = essentia.Spectrum(size=frameSize) # spectral algorithms energy = essentia.Energy() mfcc = essentia.MFCC(highFrequencyBound=8000) INFO('Computing Low-Level descriptors necessary for segmentation...') # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 progress = Progress(total=total_frames) for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) pool.add(namespace + '.' + 'scope', frameScope) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # need the energy for getting the thumbnail pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum)) # mfcc (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) pool.add(namespace + '.' + 'spectral_mfcc', frame_mfcc) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize progress.finish()
def compute(audio, pool, options): # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # frame algorithms frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize) window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType) spectrum = essentia.Spectrum(size = frameSize) # spectral algorithms energy = essentia.Energy() mfcc = essentia.MFCC(highFrequencyBound = 8000) INFO('Computing Low-Level descriptors necessary for segmentation...') # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize*0.5 progress = Progress(total = total_frames) for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) pool.add(namespace + '.' + 'scope', frameScope) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # need the energy for getting the thumbnail pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum)) # mfcc (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) pool.add(namespace + '.' + 'spectral_mfcc', frame_mfcc) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize progress.finish()
def compute(audio, pool, options): # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # temporal descriptors lpc = essentia.LPC(order = 10, type = 'warped', sampleRate = sampleRate) zerocrossingrate = essentia.ZeroCrossingRate() # frame algorithms frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize) window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType) spectrum = essentia.Spectrum(size = frameSize) # spectral algorithms barkbands = essentia.BarkBands(sampleRate = sampleRate) centralmoments = essentia.SpectralCentralMoments() crest = essentia.Crest() centroid = essentia.SpectralCentroid() decrease = essentia.SpectralDecrease() spectral_contrast = essentia.SpectralContrast(frameSize = frameSize, sampleRate = sampleRate, numberBands = 6, lowFrequencyBound = 20, highFrequencyBound = 11000, neighbourRatio = 0.4, staticDistribution = 0.15) distributionshape = essentia.DistributionShape() energy = essentia.Energy() # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers energyband_bass = essentia.EnergyBand(startCutoffFrequency = 20.0, stopCutoffFrequency = 150.0, sampleRate = sampleRate) energyband_middle_low = essentia.EnergyBand(startCutoffFrequency = 150.0, stopCutoffFrequency = 800.0, sampleRate = sampleRate) energyband_middle_high = essentia.EnergyBand(startCutoffFrequency = 800.0, stopCutoffFrequency = 4000.0, sampleRate = sampleRate) energyband_high = essentia.EnergyBand(startCutoffFrequency = 4000.0, stopCutoffFrequency = 20000.0, sampleRate = sampleRate) flatnessdb = essentia.FlatnessDB() flux = essentia.Flux() harmonic_peaks = essentia.HarmonicPeaks() hfc = essentia.HFC() mfcc = essentia.MFCC() rolloff = essentia.RollOff() rms = essentia.RMS() strongpeak = essentia.StrongPeak() # pitch algorithms pitch_detection = essentia.PitchDetection(frameSize = frameSize, sampleRate = sampleRate) pitch_salience = essentia.PitchSalience() # dissonance spectral_peaks = essentia.SpectralPeaks(sampleRate = sampleRate, orderBy='frequency') dissonance = essentia.Dissonance() # spectral complexity # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame spectral_complexity = essentia.SpectralComplexity(magnitudeThreshold = 0.005) INFO('Computing Low-Level descriptors...') # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize*0.5 pitches, pitch_confidences = [],[] progress = Progress(total = total_frames) scPool = essentia.Pool() # pool for spectral contrast for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) # silence rate pool.add(namespace + '.' + 'silence_rate_60dB', essentia.isSilent(frame)) pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30)) pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20)) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue # temporal descriptors pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame)) (frame_lpc, frame_lpc_reflection) = lpc(frame) pool.add(namespace + '.' + 'temporal_lpc', frame_lpc) frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # spectrum-based descriptors power_spectrum = frame_spectrum ** 2 pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum)) pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum)) pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum)) pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum)) pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum)) pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum)) # central moments descriptors frame_centralmoments = centralmoments(power_spectrum) (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments) pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis) pool.add(namespace + '.' + 'spectral_spread', frame_spread) pool.add(namespace + '.' + 'spectral_skewness', frame_skewness) # dissonance (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) frame_dissonance = dissonance(frame_frequencies, frame_magnitudes) pool.add(namespace + '.' + 'dissonance', frame_dissonance) # mfcc (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) pool.add(namespace + '.' + 'mfcc', frame_mfcc) # spectral contrast (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum) scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs) scPool.add(namespace + '.' + 'scvalleys', sc_valleys) # barkbands-based descriptors frame_barkbands = barkbands(frame_spectrum) pool.add(namespace + '.' + 'barkbands', frame_barkbands) pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands)) pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands)) barkbands_centralmoments = essentia.CentralMoments(range = len(frame_barkbands) - 1) (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape(barkbands_centralmoments(frame_barkbands)) pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread) pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness) pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) if frame_pitch > 0 and frame_pitch <= 20000.: pool.add(namespace + '.' + 'pitch', frame_pitch) pitches.append(frame_pitch) pitch_confidences.append(frame_pitch_confidence) pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence) frame_pitch_salience = pitch_salience(frame_spectrum[:-1]) pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience) # spectral complexity pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum)) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize # if no 'temporal_zerocrossingrate' it means that this is a silent file if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace): raise essentia.EssentiaError('This is a silent file!') spectralContrastPCA(scPool, pool) # build pitch value histogram from math import log from numpy import bincount # convert from Hz to midi notes midipitches = [] unknown = 0 for freq in pitches: if freq > 0. and freq <= 12600: midipitches.append(12*(log(freq/6.875)/0.69314718055995)-3.) else: unknown += 1 if len(midipitches) > 0: # compute histogram midipitchhist = bincount(midipitches) # set 0 midi pitch to be the number of pruned value midipitchhist[0] = unknown # normalise midipitchhist = [val/float(sum(midipitchhist)) for val in midipitchhist] # zero pad for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0) else: midipitchhist = [0.]*128 midipitchhist[0] = 1. # pitchhist = essentia.array(zip(range(len(midipitchhist)), midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist)#, pool.GlobalScope) # the code below is the same as the one above: #for note in midipitchhist: # pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note) # print "midi note:", note pitch_centralmoments = essentia.CentralMoments(range = len(midipitchhist) - 1) (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape(pitch_centralmoments(midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread)#, pool.GlobalScope) progress.finish()
def compute(audio, pool, options): INFO('Computing SFX descriptors...') # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # frame algorithms frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize) window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType) spectrum = essentia.Spectrum(size = frameSize) # pitch algorithm pitch_detection = essentia.PitchDetection(frameSize = 2048, sampleRate = sampleRate) # sfx descriptors spectral_peaks = essentia.SpectralPeaks(sampleRate = sampleRate, orderBy = 'frequency') harmonic_peaks = essentia.HarmonicPeaks() inharmonicity = essentia.Inharmonicity() odd2evenharmonicenergyratio = essentia.OddToEvenHarmonicEnergyRatio() tristimulus = essentia.Tristimulus() # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize*0.5 progress = Progress(total = total_frames) for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) # spectral peaks based descriptors (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) (frame_harmonic_frequencies, frame_harmonic_magnitudes) = harmonic_peaks(frame_frequencies, frame_magnitudes, frame_pitch) if len(frame_harmonic_frequencies) > 1: frame_inharmonicity = inharmonicity(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'inharmonicity', frame_inharmonicity) frame_tristimulus = tristimulus(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'tristimulus', frame_tristimulus) frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'odd2evenharmonicenergyratio', frame_odd2evenharmonicenergyratio) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize envelope = essentia.Envelope() file_envelope = envelope(audio) # temporal statistics decrease = essentia.AudioDecrease(blockSize = len(audio)) pool.add(namespace + '.' + 'temporal_decrease', decrease(file_envelope))#, pool.GlobalScope) centralmoments = essentia.AudioCentralMoments(blockSize = len(audio)) file_centralmoments = centralmoments(file_envelope) distributionshape = essentia.DistributionShape() (file_spread, file_skewness, file_kurtosis) = distributionshape(file_centralmoments) pool.add(namespace + '.' + 'temporal_spread', file_spread)#, pool.GlobalScope) pool.add(namespace + '.' + 'temporal_skewness', file_skewness)#, pool.GlobalScope) pool.add(namespace + '.' + 'temporal_kurtosis', file_kurtosis)#, pool.GlobalScope) centroid = essentia.AudioCentroid(blockSize = len(audio)) pool.add(namespace + '.' + 'temporal_centroid', centroid(file_envelope))#, pool.GlobalScope) # effective duration effectiveduration = essentia.EffectiveDuration() pool.add(namespace + '.' + 'effective_duration', effectiveduration(file_envelope))#, pool.GlobalScope) # log attack time logattacktime = essentia.LogAttackTime() pool.add(namespace + '.' + 'logattacktime', logattacktime(audio))#, pool.GlobalScope) # strong decay strongdecay = essentia.StrongDecay() pool.add(namespace + '.' + 'strongdecay', strongdecay(file_envelope))#, pool.GlobalScope) # dynamic profile flatness = essentia.FlatnessSFX() pool.add(namespace + '.' + 'flatness', flatness(file_envelope))#, pool.GlobalScope) # onsets number onsets_number = len(pool.value('rhythm.onset_times')[0]) pool.add(namespace + '.' + 'onsets_number', onsets_number)#, pool.GlobalScope) # morphological descriptors max_to_total = essentia.MaxToTotal() pool.add(namespace + '.' + 'max_to_total', max_to_total(file_envelope))#, pool.GlobalScope) tc_to_total = essentia.TCToTotal(sampleRate = sampleRate) pool.add(namespace + '.' + 'tc_to_total', tc_to_total(file_envelope))#, pool.GlobalScope) derivativeSFX = essentia.DerivativeSFX(sampleRate = sampleRate) (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope) pool.add(namespace + '.' + 'der_av_after_max', der_av_after_max)#, pool.GlobalScope) pool.add(namespace + '.' + 'max_der_before_max', max_der_before_max)#, pool.GlobalScope) # pitch profile pitch = pool.value('lowlevel.pitch') if len(pitch) > 1: pool.add(namespace + '.' + 'pitch_max_to_total', max_to_total(pitch))#, pool.GlobalScope) min_to_total = essentia.MinToTotal() pool.add(namespace + '.' + 'pitch_min_to_total', min_to_total(pitch))#, pool.GlobalScope) pitch_centroid = essentia.Centroid(range = len(pitch)-1) pool.add(namespace + '.' + 'pitch_centroid', pitch_centroid(pitch))#, pool.GlobalScope) pitch_after_max_to_before_max_energy_ratio = essentia.AfterMaxToBeforeMaxEnergyRatio() pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', pitch_after_max_to_before_max_energy_ratio(pitch))#, pool.GlobalScope) else: pool.add(namespace + '.' + 'pitch_max_to_total', 0.0)#, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_min_to_total', 0.0)#, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_centroid', 0.0)#, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', 0.0)#, pool.GlobalScope) progress.finish()
def compute(audio, pool, options): INFO('Computing Tempo extractor...') use_onset = options['useOnset'] use_bands = options['useBands'] # frameNumber * hopSize ~= about 6 seconds hopSize = options['hopSize'] frameSize = options['frameSize'] frameNumber = options['frameNumber'] frameHop = options['frameHop'] sampleRate = options['sampleRate'] tolerance = 0.24 period_tol = 2 windowType = options['windowType'] bands_freq = [ 40.0, 413.16, 974.51, 1818.94, 3089.19, 5000.0, 7874.4, 12198.29, 17181.13 ] bands_gain = [2.0, 3.0, 2.0, 1.0, 1.2, 2.0, 3.0, 2.5] maxbpm = 208 minbpm = 40 last_beat_interval = 0.025 frame_time = float(hopSize) / float(sampleRate) frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType) if use_onset: fft = essentia.FFT(size=frameSize) cartesian2polar = essentia.CartesianToPolar() onset_hfc = essentia.OnsetDetection(method='hfc', sampleRate=sampleRate) onset_complex = essentia.OnsetDetection(method='complex', sampleRate=sampleRate) if use_bands: espectrum = essentia.Spectrum(size=frameSize) tempotapbands = essentia.FrequencyBands(frequencyBands=bands_freq) temposcalebands = essentia.TempoScaleBands(bandsGain=bands_gain) tempotap = essentia.TempoTap(numberFrames=frameNumber, sampleRate=sampleRate, frameHop=frameHop) tempotapticks = essentia.TempoTapTicks(hopSize=hopSize, sampleRate=sampleRate, frameHop=frameHop) frameTime = float(hopSize) / float(sampleRate) frameRate = 1. / frameTime nframes = 0 bpm_estimates_list = [] ticks = [] matchingPeriods = [] oldhfc = 0 fileLength = len(audio) / sampleRate startSilence = 0 oldSilence = 0 endSilence = round(fileLength * sampleRate / hopSize) + 1 for frame in frames: windowed_frame = window(frame) features = [] if use_onset: complex_fft = fft(windowed_frame) (spectrum, phase) = cartesian2polar(complex_fft) hfc = onset_hfc(spectrum, phase) complexdomain = onset_complex(spectrum, phase) difhfc = max(hfc - oldhfc, 0) oldhfc = hfc features += [hfc, difhfc, complexdomain] if use_bands: spectrum_frame = espectrum(windowed_frame) bands = tempotapbands(spectrum_frame) (scaled_bands, cumul) = temposcalebands(bands) features += list(scaled_bands) features = essentia.array(features) (periods, phases) = tempotap(features) (these_ticks, these_matchingPeriods) = tempotapticks(periods, phases) for period in these_matchingPeriods: if period != 0: matchingPeriods += [period] ticks += list(these_ticks) if nframes < 5. * sampleRate / hopSize: if isSilent(frame) and startSilence == nframes - 1: startSilence = nframes if nframes > (fileLength - 5.) * sampleRate / hopSize: if isSilent(frame): if oldSilence != nframes - 1: endSilence = nframes oldSilence = nframes nframes += 1 # make sure we do not kill beat too close to music if startSilence > 0: startSilence -= 1 endSilence += 1 # fill the rest of buffer with zeros features = essentia.array([0] * len(features)) while nframes % frameNumber != 0: (periods, phases) = tempotap(features) (these_ticks, these_matchingPeriods) = tempotapticks(periods, phases) ticks += list(these_ticks) matchingPeriods += list(these_matchingPeriods) nframes += 1 if len(ticks) > 2: # fill up to end of file if fileLength > ticks[-1]: lastPeriod = ticks[-1] - ticks[-2] while ticks[-1] + lastPeriod < fileLength - last_beat_interval: if ticks[-1] > fileLength - last_beat_interval: break ticks.append(ticks[-1] + lastPeriod) if len(ticks) > 1: # remove all negative ticks i = 0 while i < len(ticks): if ticks[i] < startSilence / sampleRate * hopSize: ticks.pop(i) else: i += 1 # kill all ticks from 350ms before the end of the song i = 0 while i < len(ticks): if ticks[i] > endSilence / sampleRate * hopSize: ticks.pop(i) else: i += 1 # prune values closer than tolerance i = 1 while i < len(ticks): if ticks[i] - ticks[i - 1] < tolerance: ticks.pop(i) else: i += 1 # prune all backward offbeat i = 3 while i < len(ticks): if abs( (ticks[i] - ticks[i-2]) - 1.5 * (ticks[i] - ticks[i-1]) ) < 0.100 \ and abs( (ticks[i] - ticks[i-1]) - (ticks[i-2] - ticks[i-3]) ) < 0.100 : ticks.pop(i - 2) else: i += 1 for period in matchingPeriods: if period != 0: bpm_estimates_list += [lagtobpm(period, sampleRate, hopSize)] #else: # bpm_estimates_list += [ 0 ] # bpm estimates for bpm_estimate in bpm_estimates_list: pool.add(namespace + '.' + 'bpm_estimates', bpm_estimate) # estimate the bpm from the list of candidates if len(bpm_estimates_list) > 0: estimates = [bpm / 2. for bpm in bpm_estimates_list] closestBpm = argmax(bincount(estimates)) * 2. matching = [] for bpm in bpm_estimates_list: if abs(closestBpm - bpm) < period_tol: matching.append(bpm) if (len(matching) < 1): # something odd happened bpm = closestBpm else: bpm = mean(matching) else: bpm = 0. # convert to floats, as python bindings yet not support numpy.float32 ticks = [float(tick) for tick in ticks] pool.add(namespace + '.' + 'bpm', bpm) #, pool.GlobalScope) pool.add(namespace + '.' + 'beats_position', ticks) #, pool.GlobalScope bpm_intervals = [ticks[i] - ticks[i - 1] for i in range(1, len(ticks))] pool.add(namespace + '.' + 'bpm_intervals', bpm_intervals) #, pool.GlobalScope from numpy import histogram tempotap_bpms = [60. / i for i in bpm_intervals] if len(tempotap_bpms) > 0: weight, values = histogram(tempotap_bpms, bins=250, range=(0, 250), normed=True) else: weight, values = [0.], [0.] first_peak_weights = [0] * 250 secnd_peak_weights = [0] * 250 for i in range(max(argmax(weight) - 4, 0), min(argmax(weight) + 5, len(weight))): first_peak_weights[i] = weight[i] weight[i] = 0. for i in range(max(argmax(weight) - 4, 0), min(argmax(weight) + 5, len(weight))): secnd_peak_weights[i] = weight[i] weight[i] = 0. pool.add(namespace + '.' + 'first_peak_bpm', values[argmax(first_peak_weights)]) #, pool.GlobalScope pool.add( namespace + '.' + 'first_peak_weight', first_peak_weights[argmax(first_peak_weights)]) #, pool.GlobalScope if sum(first_peak_weights) != 0.: pool.add(namespace + '.' + 'first_peak_spread', 1. - first_peak_weights[argmax(first_peak_weights)] / sum(first_peak_weights)) #, pool.GlobalScope else: pool.add(namespace + '.' + 'first_peak_spread', 0.) #, pool.GlobalScope pool.add(namespace + '.' + 'second_peak_bpm', values[argmax(secnd_peak_weights)]) #, pool.GlobalScope pool.add( namespace + '.' + 'second_peak_weight', secnd_peak_weights[argmax(secnd_peak_weights)]) #, pool.GlobalScope if sum(secnd_peak_weights) != 0.: pool.add(namespace + '.' + 'second_peak_spread', 1. - secnd_peak_weights[argmax(secnd_peak_weights)] / sum(secnd_peak_weights)) #, pool.GlobalScope else: pool.add(namespace + '.' + 'second_peak_spread', 0.) #, pool.GlobalScope ''' def rubato(ticks): bpm_rubato_python = [] tolerance = 0.08 i = 5 tmp1 = 60./ float(ticks[i ] - ticks[i-1]) tmp2 = 60./ float(ticks[i-1] - ticks[i-2]) tmp3 = 60./ float(ticks[i-2] - ticks[i-3]) tmp4 = 60./ float(ticks[i-3] - ticks[i-4]) tmp5 = 60./ float(ticks[i-4] - ticks[i-5]) for i in range(6, len(ticks)): if ( abs(1. - tmp1 / tmp4) >= tolerance and abs(1. - tmp2 / tmp5) >= tolerance and abs(1. - tmp2 / tmp4) >= tolerance and abs(1. - tmp1 / tmp5) >= tolerance and abs(1. - tmp1 / tmp2) <= tolerance and abs(1. - tmp4 / tmp5) <= tolerance ): bpm_rubato_python.append(ticks[i-2]) tmp5 = tmp4; tmp4 = tmp3; tmp3 = tmp2; tmp2 = tmp1 tmp1 = 60./ (ticks[i] - ticks[i-1]) print bpm_rubato_python return bpm_rubato_python ''' # FIXME we need better rubato algorithm #rubato = essentia.BpmRubato() #bpm_rubato_start, bpm_rubato_stop = rubato(ticks) #pool.add(namespace + '.' + 'rubato_start', bpm_rubato_start)#, pool.GlobalScope #pool.add(namespace + '.' + 'rubato_stop', bpm_rubato_stop)#, pool.GlobalScope) INFO('100% done...')
def compute(audio, pool, options): # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # temporal descriptors lpc = ess.LPC(order=10, type='warped', sampleRate=sampleRate) zerocrossingrate = ess.ZeroCrossingRate() # frame algorithms frames = ess.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = ess.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = ess.Spectrum(size=frameSize) # spectral algorithms barkbands = ess.BarkBands(sampleRate=sampleRate) centralmoments = ess.CentralMoments() crest = ess.Crest() centroid = ess.Centroid() decrease = ess.Decrease() spectral_contrast = ess.SpectralContrast(frameSize=frameSize, sampleRate=sampleRate, numberBands=6, lowFrequencyBound=20, highFrequencyBound=11000, neighbourRatio=0.4, staticDistribution=0.15) distributionshape = ess.DistributionShape() energy = ess.Energy() # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers energyband_bass = ess.EnergyBand(startCutoffFrequency=20.0, stopCutoffFrequency=150.0, sampleRate=sampleRate) energyband_middle_low = ess.EnergyBand(startCutoffFrequency=150.0, stopCutoffFrequency=800.0, sampleRate=sampleRate) energyband_middle_high = ess.EnergyBand(startCutoffFrequency=800.0, stopCutoffFrequency=4000.0, sampleRate=sampleRate) energyband_high = ess.EnergyBand(startCutoffFrequency=4000.0, stopCutoffFrequency=20000.0, sampleRate=sampleRate) flatnessdb = ess.FlatnessDB() flux = ess.Flux() harmonic_peaks = ess.HarmonicPeaks() hfc = ess.HFC() mfcc = ess.MFCC() rolloff = ess.RollOff() rms = ess.RMS() strongpeak = ess.StrongPeak() # pitch algorithms pitch_detection = ess.PitchYinFFT(frameSize=frameSize, sampleRate=sampleRate) pitch_salience = ess.PitchSalience() # dissonance spectral_peaks = ess.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency') dissonance = ess.Dissonance() # spectral complexity # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame spectral_complexity = ess.SpectralComplexity(magnitudeThreshold=0.005) INFO('Computing Low-Level descriptors...') # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 pitches, pitch_confidences = [], [] progress = Progress(total=total_frames) #scPool = es.Pool() # pool for spectral contrast for frame in frames: frameScope = [start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate] # pool.setCurrentScope(frameScope) # silence rate # pool.add(namespace + '.' + 'silence_rate_60dB', es.isSilent(frame)) pool.add(namespace + '.' + 'silence_rate_60dB', is_silent_threshold(frame, -60)) pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30)) pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20)) if options['skipSilence'] and es.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue # temporal descriptors pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame)) (frame_lpc, frame_lpc_reflection) = lpc(frame) pool.add(namespace + '.' + 'temporal_lpc', frame_lpc) frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # spectrum-based descriptors power_spectrum = frame_spectrum ** 2 pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum)) pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum)) pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum)) pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum)) pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum)) pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum)) # central moments descriptors frame_centralmoments = centralmoments(power_spectrum) (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments) pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis) pool.add(namespace + '.' + 'spectral_spread', frame_spread) pool.add(namespace + '.' + 'spectral_skewness', frame_skewness) # dissonance (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) frame_dissonance = dissonance(frame_frequencies, frame_magnitudes) pool.add(namespace + '.' + 'dissonance', frame_dissonance) # mfcc (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) pool.add(namespace + '.' + 'mfcc', frame_mfcc) # spectral contrast (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum) #scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs) #scPool.add(namespace + '.' + 'scvalleys', sc_valleys) pool.add(namespace + '.' + 'spectral_contrast', sc_coeffs) # barkbands-based descriptors frame_barkbands = barkbands(frame_spectrum) pool.add(namespace + '.' + 'barkbands', frame_barkbands) pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands)) pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands)) barkbands_centralmoments = ess.CentralMoments(range=len(frame_barkbands) - 1) (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape( barkbands_centralmoments(frame_barkbands)) pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread) pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness) pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) if frame_pitch > 0 and frame_pitch <= 20000.: pool.add(namespace + '.' + 'pitch', frame_pitch) pitches.append(frame_pitch) pitch_confidences.append(frame_pitch_confidence) pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence) frame_pitch_salience = pitch_salience(frame_spectrum[:-1]) pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience) # spectral complexity pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum)) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize # if no 'temporal_zerocrossingrate' it means that this is a silent file if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace): raise ess.EssentiaError('This is a silent file!') #spectralContrastPCA(scPool, pool) # build pitch value histogram from math import log from numpy import bincount # convert from Hz to midi notes midipitches = [] unknown = 0 for freq in pitches: if freq > 0. and freq <= 12600: midipitches.append(12 * (log(freq / 6.875) / 0.69314718055995) - 3.) else: unknown += 1 if len(midipitches) > 0: # compute histogram midipitchhist = bincount(midipitches) # set 0 midi pitch to be the number of pruned value midipitchhist[0] = unknown # normalise midipitchhist = [val / float(sum(midipitchhist)) for val in midipitchhist] # zero pad for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0) else: midipitchhist = [0.] * 128 midipitchhist[0] = 1. # pitchhist = ess.array(zip(range(len(midipitchhist)), midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist) # , pool.GlobalScope) # the code below is the same as the one above: # for note in midipitchhist: # pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note) # print "midi note:", note pitch_centralmoments = ess.CentralMoments(range=len(midipitchhist) - 1) (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape( pitch_centralmoments(midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread) # , pool.GlobalScope) progress.finish()
def compute(audio, pool, options): INFO("Computing SFX descriptors...") # analysis parameters sampleRate = options["sampleRate"] frameSize = options["frameSize"] hopSize = options["hopSize"] windowType = options["windowType"] # frame algorithms frames = ess.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = ess.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = ess.Spectrum(size=frameSize) # pitch algorithm pitch_detection = ess.PitchYinFFT(frameSize=2048, sampleRate=sampleRate) # sfx descriptors spectral_peaks = ess.SpectralPeaks(sampleRate=sampleRate, orderBy="frequency") harmonic_peaks = ess.HarmonicPeaks() inharmonicity = ess.Inharmonicity() odd2evenharmonicenergyratio = ess.OddToEvenHarmonicEnergyRatio() tristimulus = ess.Tristimulus() # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 progress = Progress(total=total_frames) for frame in frames: frameScope = [start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate] # pool.setCurrentScope(frameScope) if options["skipSilence"] and es.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) # spectral peaks based descriptors frame_frequencies, frame_magnitudes = spectral_peaks(frame_spectrum) # ERROR CORRECTION - hoinx 2015-12 errIdx = np.where(frame_frequencies < 1) frame_frequencies = np.delete(frame_frequencies, errIdx) frame_magnitudes = np.delete(frame_magnitudes, errIdx) (frame_harmonic_frequencies, frame_harmonic_magnitudes) = harmonic_peaks( frame_frequencies, frame_magnitudes, frame_pitch ) if len(frame_harmonic_frequencies) > 1: frame_inharmonicity = inharmonicity(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + "." + "inharmonicity", frame_inharmonicity) frame_tristimulus = tristimulus(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + "." + "tristimulus", frame_tristimulus) frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio( frame_harmonic_frequencies, frame_harmonic_magnitudes ) pool.add(namespace + "." + "odd2evenharmonicenergyratio", frame_odd2evenharmonicenergyratio) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize envelope = ess.Envelope() file_envelope = envelope(audio) # temporal statistics decrease = ess.Decrease() pool.add(namespace + "." + "temporal_decrease", decrease(file_envelope)) # , pool.GlobalScope) centralmoments = ess.CentralMoments() file_centralmoments = centralmoments(file_envelope) distributionshape = ess.DistributionShape() (file_spread, file_skewness, file_kurtosis) = distributionshape(file_centralmoments) pool.add(namespace + "." + "temporal_spread", file_spread) # , pool.GlobalScope) pool.add(namespace + "." + "temporal_skewness", file_skewness) # , pool.GlobalScope) pool.add(namespace + "." + "temporal_kurtosis", file_kurtosis) # , pool.GlobalScope) centroid = ess.Centroid() pool.add(namespace + "." + "temporal_centroid", centroid(file_envelope)) # , pool.GlobalScope) # effective duration effectiveduration = ess.EffectiveDuration() pool.add(namespace + "." + "effective_duration", effectiveduration(file_envelope)) # , pool.GlobalScope) # log attack time logattacktime = ess.LogAttackTime() pool.add(namespace + "." + "logattacktime", logattacktime(audio)) # , pool.GlobalScope) # strong decay strongdecay = ess.StrongDecay() pool.add(namespace + "." + "strongdecay", strongdecay(file_envelope)) # , pool.GlobalScope) # dynamic profile flatness = ess.FlatnessSFX() pool.add(namespace + "." + "flatness", flatness(file_envelope)) # , pool.GlobalScope) """ # onsets number onsets_number = len(pool['rhythm.onset_times'][0]) pool.add(namespace + '.' + 'onsets_number', onsets_number) # , pool.GlobalScope) """ # morphological descriptors max_to_total = ess.MaxToTotal() pool.add(namespace + "." + "max_to_total", max_to_total(file_envelope)) # , pool.GlobalScope) tc_to_total = ess.TCToTotal() pool.add(namespace + "." + "tc_to_total", tc_to_total(file_envelope)) # , pool.GlobalScope) derivativeSFX = ess.DerivativeSFX() (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope) pool.add(namespace + "." + "der_av_after_max", der_av_after_max) # , pool.GlobalScope) pool.add(namespace + "." + "max_der_before_max", max_der_before_max) # , pool.GlobalScope) # pitch profile """ pitch = pool['lowlevel.pitch'] if len(pitch) > 1: pool.add(namespace + '.' + 'pitch_max_to_total', max_to_total(pitch)) # , pool.GlobalScope) min_to_total = ess.MinToTotal() pool.add(namespace + '.' + 'pitch_min_to_total', min_to_total(pitch)) # , pool.GlobalScope) pitch_centroid = ess.Centroid(range=len(pitch) - 1) pool.add(namespace + '.' + 'pitch_centroid', pitch_centroid(pitch)) # , pool.GlobalScope) pitch_after_max_to_before_max_energy_ratio = ess.AfterMaxToBeforeMaxEnergyRatio() pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', pitch_after_max_to_before_max_energy_ratio(pitch)) # , pool.GlobalScope) else: pool.add(namespace + '.' + 'pitch_max_to_total', 0.0) # , pool.GlobalScope) pool.add(namespace + '.' + 'pitch_min_to_total', 0.0) # , pool.GlobalScope) pool.add(namespace + '.' + 'pitch_centroid', 0.0) # , pool.GlobalScope) pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', 0.0) # , pool.GlobalScope) """ progress.finish()
def compute(audio, pool, options): INFO('Computing Tempo extractor...') use_onset = options['useOnset'] use_bands = options['useBands'] # frameNumber * hopSize ~= about 6 seconds hopSize = options['hopSize'] frameSize = options['frameSize'] frameNumber = options['frameNumber'] frameHop = options['frameHop'] sampleRate = options['sampleRate'] tolerance = 0.24 period_tol = 2 windowType = options['windowType'] bands_freq = [40.0, 413.16, 974.51, 1818.94, 3089.19, 5000.0, 7874.4, 12198.29, 17181.13] bands_gain = [2.0, 3.0, 2.0, 1.0, 1.2, 2.0, 3.0, 2.5] maxbpm = 208 minbpm = 40 last_beat_interval = 0.025 frame_time = float(hopSize) / float(sampleRate) frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize) window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType) if use_onset: fft = essentia.FFT(size = frameSize) cartesian2polar = essentia.CartesianToPolar() onset_hfc = essentia.OnsetDetection(method = 'hfc', sampleRate = sampleRate) onset_complex = essentia.OnsetDetection(method = 'complex', sampleRate = sampleRate) if use_bands: espectrum = essentia.Spectrum(size = frameSize) tempotapbands = essentia.FrequencyBands(frequencyBands = bands_freq) temposcalebands = essentia.TempoScaleBands(bandsGain = bands_gain) tempotap = essentia.TempoTap(numberFrames = frameNumber, sampleRate = sampleRate, frameHop = frameHop) tempotapticks = essentia.TempoTapTicks(hopSize = hopSize, sampleRate = sampleRate, frameHop = frameHop) frameTime = float(hopSize) / float(sampleRate) frameRate = 1. / frameTime nframes = 0 bpm_estimates_list = [] ticks = [] matchingPeriods = [] oldhfc = 0 fileLength = len(audio)/sampleRate startSilence = 0 oldSilence = 0 endSilence = round(fileLength * sampleRate / hopSize) + 1 for frame in frames: windowed_frame = window(frame) features = [] if use_onset: complex_fft = fft(windowed_frame) (spectrum,phase) = cartesian2polar(complex_fft) hfc = onset_hfc(spectrum,phase) complexdomain = onset_complex(spectrum,phase) difhfc = max(hfc - oldhfc,0) oldhfc = hfc features += [hfc,difhfc,complexdomain] if use_bands: spectrum_frame = espectrum(windowed_frame) bands = tempotapbands(spectrum_frame) (scaled_bands, cumul) = temposcalebands(bands) features += list(scaled_bands) features = essentia.array(features) (periods, phases) = tempotap(features) (these_ticks, these_matchingPeriods) = tempotapticks(periods, phases) for period in these_matchingPeriods: if period != 0: matchingPeriods += [ period ] ticks += list(these_ticks) if nframes < 5. * sampleRate / hopSize: if isSilent(frame) and startSilence == nframes - 1: startSilence = nframes if nframes > (fileLength - 5.) * sampleRate / hopSize: if isSilent(frame): if oldSilence != nframes - 1: endSilence = nframes oldSilence = nframes nframes += 1 # make sure we do not kill beat too close to music if startSilence > 0: startSilence -= 1 endSilence += 1 # fill the rest of buffer with zeros features = essentia.array([0]*len(features)) while nframes % frameNumber != 0: (periods, phases) = tempotap(features) (these_ticks, these_matchingPeriods) = tempotapticks(periods, phases) ticks += list(these_ticks) matchingPeriods += list(these_matchingPeriods) nframes += 1 if len(ticks) > 2: # fill up to end of file if fileLength > ticks[-1]: lastPeriod = ticks[-1] - ticks[-2] while ticks[-1] + lastPeriod < fileLength - last_beat_interval: if ticks[-1] > fileLength - last_beat_interval: break ticks.append(ticks[-1] + lastPeriod) if len(ticks) > 1: # remove all negative ticks i = 0 while i < len(ticks): if ticks[i] < startSilence / sampleRate * hopSize: ticks.pop(i) else: i += 1 # kill all ticks from 350ms before the end of the song i = 0 while i < len(ticks): if ticks[i] > endSilence / sampleRate * hopSize: ticks.pop(i) else: i += 1 # prune values closer than tolerance i = 1 while i < len(ticks): if ticks[i] - ticks[i-1] < tolerance: ticks.pop(i) else: i += 1 # prune all backward offbeat i = 3 while i < len(ticks): if abs( (ticks[i] - ticks[i-2]) - 1.5 * (ticks[i] - ticks[i-1]) ) < 0.100 \ and abs( (ticks[i] - ticks[i-1]) - (ticks[i-2] - ticks[i-3]) ) < 0.100 : ticks.pop(i-2) else: i += 1 for period in matchingPeriods: if period != 0: bpm_estimates_list += [ lagtobpm(period, sampleRate, hopSize) ] #else: # bpm_estimates_list += [ 0 ] # bpm estimates for bpm_estimate in bpm_estimates_list: pool.add(namespace + '.' + 'bpm_estimates', bpm_estimate) # estimate the bpm from the list of candidates if len(bpm_estimates_list) > 0: estimates = [bpm/2. for bpm in bpm_estimates_list] closestBpm = argmax(bincount(estimates))*2. matching = [] for bpm in bpm_estimates_list: if abs(closestBpm - bpm) < period_tol: matching.append(bpm) if (len(matching) < 1): # something odd happened bpm = closestBpm else : bpm = mean(matching) else: bpm = 0. # convert to floats, as python bindings yet not support numpy.float32 ticks = [float(tick) for tick in ticks] pool.add(namespace + '.' + 'bpm', bpm)#, pool.GlobalScope) pool.add(namespace + '.' + 'beats_position', ticks)#, pool.GlobalScope bpm_intervals = [ticks[i] - ticks[i-1] for i in range(1, len(ticks))] pool.add(namespace + '.' + 'bpm_intervals', bpm_intervals)#, pool.GlobalScope from numpy import histogram tempotap_bpms = [60./i for i in bpm_intervals] if len(tempotap_bpms) > 0: weight, values = histogram(tempotap_bpms, bins = 250, range = (0,250), normed=True) else: weight, values = [0.], [0.] first_peak_weights = [0] * 250 secnd_peak_weights = [0] * 250 for i in range(max(argmax(weight)-4,0), min(argmax(weight)+5,len(weight)) ): first_peak_weights[i] = weight[i] weight[i] = 0. for i in range(max(argmax(weight)-4,0), min(argmax(weight)+5,len(weight)) ): secnd_peak_weights[i] = weight[i] weight[i] = 0. pool.add(namespace + '.' + 'first_peak_bpm', values[argmax(first_peak_weights)])#, pool.GlobalScope pool.add(namespace + '.' + 'first_peak_weight', first_peak_weights[argmax(first_peak_weights)])#, pool.GlobalScope if sum(first_peak_weights) != 0.: pool.add(namespace + '.' + 'first_peak_spread', 1.-first_peak_weights[argmax(first_peak_weights)]/sum(first_peak_weights))#, pool.GlobalScope else: pool.add(namespace + '.' + 'first_peak_spread', 0.)#, pool.GlobalScope pool.add(namespace + '.' + 'second_peak_bpm', values[argmax(secnd_peak_weights)])#, pool.GlobalScope pool.add(namespace + '.' + 'second_peak_weight', secnd_peak_weights[argmax(secnd_peak_weights)])#, pool.GlobalScope if sum(secnd_peak_weights) != 0.: pool.add(namespace + '.' + 'second_peak_spread', 1.-secnd_peak_weights[argmax(secnd_peak_weights)]/sum(secnd_peak_weights))#, pool.GlobalScope else: pool.add(namespace + '.' + 'second_peak_spread', 0.)#, pool.GlobalScope ''' def rubato(ticks): bpm_rubato_python = [] tolerance = 0.08 i = 5 tmp1 = 60./ float(ticks[i ] - ticks[i-1]) tmp2 = 60./ float(ticks[i-1] - ticks[i-2]) tmp3 = 60./ float(ticks[i-2] - ticks[i-3]) tmp4 = 60./ float(ticks[i-3] - ticks[i-4]) tmp5 = 60./ float(ticks[i-4] - ticks[i-5]) for i in range(6, len(ticks)): if ( abs(1. - tmp1 / tmp4) >= tolerance and abs(1. - tmp2 / tmp5) >= tolerance and abs(1. - tmp2 / tmp4) >= tolerance and abs(1. - tmp1 / tmp5) >= tolerance and abs(1. - tmp1 / tmp2) <= tolerance and abs(1. - tmp4 / tmp5) <= tolerance ): bpm_rubato_python.append(ticks[i-2]) tmp5 = tmp4; tmp4 = tmp3; tmp3 = tmp2; tmp2 = tmp1 tmp1 = 60./ (ticks[i] - ticks[i-1]) print bpm_rubato_python return bpm_rubato_python ''' # FIXME we need better rubato algorithm #rubato = essentia.BpmRubato() #bpm_rubato_start, bpm_rubato_stop = rubato(ticks) #pool.add(namespace + '.' + 'rubato_start', bpm_rubato_start)#, pool.GlobalScope #pool.add(namespace + '.' + 'rubato_stop', bpm_rubato_stop)#, pool.GlobalScope) INFO('100% done...')
def compute(audio, pool, options): INFO('Computing Tonal descriptors...') sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] zeroPadding = options['zeroPadding'] windowType = options['windowType'] frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=zeroPadding, type=windowType) spectrum = essentia.Spectrum(size=(frameSize + zeroPadding) / 2) spectral_peaks = essentia.SpectralPeaks(maxPeaks=10000, magnitudeThreshold=0.00001, minFrequency=40, maxFrequency=5000, orderBy="frequency") tuning = essentia.TuningFrequency() # computing the tuning frequency tuning_frequency = 440.0 for frame in frames: frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) #if len(frame_frequencies) > 0: (tuning_frequency, tuning_cents) = tuning(frame_frequencies, frame_magnitudes) pool.add(namespace + '.' + 'tuning_frequency', tuning_frequency) #, pool.GlobalScope) # computing the HPCPs spectral_whitening = essentia.SpectralWhitening() hpcp_key_size = 36 hpcp_chord_size = 36 hpcp_tuning_size = 120 hpcp_key = essentia.HPCP(size=hpcp_key_size, referenceFrequency=tuning_frequency, bandPreset=False, minFrequency=40.0, maxFrequency=5000.0, weightType='squaredCosine', nonLinear=False, windowSize=4.0 / 3.0, sampleRate=sampleRate) hpcp_chord = essentia.HPCP(size=hpcp_chord_size, referenceFrequency=tuning_frequency, harmonics=8, bandPreset=True, minFrequency=40.0, maxFrequency=5000.0, splitFrequency=500.0, weightType='cosine', nonLinear=True, windowSize=0.5, sampleRate=sampleRate) hpcp_tuning = essentia.HPCP(size=hpcp_tuning_size, referenceFrequency=tuning_frequency, harmonics=8, bandPreset=True, minFrequency=40.0, maxFrequency=5000.0, splitFrequency=500.0, weightType='cosine', nonLinear=True, windowSize=0.5, sampleRate=sampleRate) # intializing the HPCP arrays hpcps_key = [] hpcps_chord = [] hpcps_tuning = [] # computing HPCP loop frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 progress = Progress(total=total_frames) for frame in frames: #frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # spectral peaks (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) if (len(frame_frequencies) > 0): # spectral_whitening frame_magnitudes_white = spectral_whitening( frame_spectrum, frame_frequencies, frame_magnitudes) frame_hpcp_key = hpcp_key(frame_frequencies, frame_magnitudes_white) frame_hpcp_chord = hpcp_chord(frame_frequencies, frame_magnitudes_white) frame_hpcp_tuning = hpcp_tuning(frame_frequencies, frame_magnitudes_white) else: frame_hpcp_key = essentia.array([0] * hpcp_key_size) frame_hpcp_chord = essentia.array([0] * hpcp_chord_size) frame_hpcp_tuning = essentia.array([0] * hpcp_tuning_size) # key HPCP hpcps_key.append(frame_hpcp_key) # add HPCP to the pool pool.add(namespace + '.' + 'hpcp', frame_hpcp_key) # chords HPCP hpcps_chord.append(frame_hpcp_chord) # tuning system HPCP hpcps_tuning.append(frame_hpcp_tuning) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize progress.finish() # check if silent file if len(hpcps_key) == 0: raise EssentiaError('This is a silent file!') # key detection key_detector = essentia.Key(profileType='temperley') average_hpcps_key = numpy.average(essentia.array(hpcps_key), axis=0) average_hpcps_key = normalize(average_hpcps_key) # thpcps max_arg = numpy.argmax(average_hpcps_key) thpcp = [] for i in range(max_arg, len(average_hpcps_key)): thpcp.append(float(average_hpcps_key[i])) for i in range(max_arg): thpcp.append(float(average_hpcps_key[i])) pool.add(namespace + '.' + 'thpcp', thpcp) #, pool.GlobalScope ) (key, scale, key_strength, first_to_second_relative_strength) = key_detector( essentia.array(average_hpcps_key)) pool.add(namespace + '.' + 'key_key', key) #, pool.GlobalScope) pool.add(namespace + '.' + 'key_scale', scale) #, pool.GlobalScope) pool.add(namespace + '.' + 'key_strength', key_strength) #, pool.GlobalScope) # chord detection chord_detector = essentia.Key(profileType='tonictriad', usePolyphony=False) hpcp_frameSize = 2.0 # 2 seconds hpcp_number = int(hpcp_frameSize * (sampleRate / hopSize - 1)) for hpcp_index in range(len(hpcps_chord)): hpcp_index_begin = max(0, hpcp_index - hpcp_number) hpcp_index_end = min(hpcp_index + hpcp_number, len(hpcps_chord)) average_hpcps_chord = numpy.average(essentia.array( hpcps_chord[hpcp_index_begin:hpcp_index_end]), axis=0) average_hpcps_chord = normalize(average_hpcps_chord) (key, scale, strength, first_to_second_relative_strength) = chord_detector( essentia.array(average_hpcps_chord)) if scale == 'minor': chord = key + 'm' else: chord = key frame_second_scope = [ hpcp_index_begin * hopSize / sampleRate, hpcp_index_end * hopSize / sampleRate ] pool.add(namespace + '.' + 'chords_progression', chord) #, frame_second_scope) pool.add(namespace + '.' + 'chords_strength', strength) #, frame_second_scope) # tuning system features keydetector = essentia.Key(profileType='diatonic') average_hpcps_tuning = numpy.average(essentia.array(hpcps_tuning), axis=0) average_hpcps_tuning = normalize(average_hpcps_tuning) (key, scale, diatonic_strength, first_to_second_relative_strength) = keydetector( essentia.array(average_hpcps_tuning)) pool.add(namespace + '.' + 'tuning_diatonic_strength', diatonic_strength) #, pool.GlobalScope) (equal_tempered_deviation, nontempered_energy_ratio, nontempered_peaks_energy_ratio ) = essentia.HighResolutionFeatures()(average_hpcps_tuning) pool.add(namespace + '.' + 'tuning_equal_tempered_deviation', equal_tempered_deviation) #, pool.GlobalScope) pool.add(namespace + '.' + 'tuning_nontempered_energy_ratio', nontempered_energy_ratio) #, pool.GlobalScope) pool.add(namespace + '.' + 'tuning_nontempered_peaks_energy_ratio', nontempered_peaks_energy_ratio) #, pool.GlobalScope)
def compute(audio, pool, options): INFO('Computing Tonal descriptors...') sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] zeroPadding = options['zeroPadding'] windowType = options['windowType'] frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize) window = essentia.Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType) spectrum = essentia.Spectrum(size = (frameSize + zeroPadding) / 2) spectral_peaks = essentia.SpectralPeaks(maxPeaks = 10000, magnitudeThreshold = 0.00001, minFrequency = 40, maxFrequency = 5000, orderBy = "frequency") tuning = essentia.TuningFrequency() # computing the tuning frequency tuning_frequency = 440.0 for frame in frames: frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) #if len(frame_frequencies) > 0: (tuning_frequency, tuning_cents) = tuning(frame_frequencies, frame_magnitudes) pool.add(namespace + '.' + 'tuning_frequency', tuning_frequency)#, pool.GlobalScope) # computing the HPCPs spectral_whitening = essentia.SpectralWhitening() hpcp_key_size = 36 hpcp_chord_size = 36 hpcp_tuning_size = 120 hpcp_key = essentia.HPCP(size = hpcp_key_size, referenceFrequency = tuning_frequency, bandPreset = False, minFrequency = 40.0, maxFrequency = 5000.0, weightType = 'squaredCosine', nonLinear = False, windowSize = 4.0/3.0, sampleRate = sampleRate) hpcp_chord = essentia.HPCP(size = hpcp_chord_size, referenceFrequency = tuning_frequency, harmonics = 8, bandPreset = True, minFrequency = 40.0, maxFrequency = 5000.0, splitFrequency = 500.0, weightType = 'cosine', nonLinear = True, windowSize = 0.5, sampleRate = sampleRate) hpcp_tuning = essentia.HPCP(size = hpcp_tuning_size, referenceFrequency = tuning_frequency, harmonics = 8, bandPreset = True, minFrequency = 40.0, maxFrequency = 5000.0, splitFrequency = 500.0, weightType = 'cosine', nonLinear = True, windowSize = 0.5, sampleRate = sampleRate) # intializing the HPCP arrays hpcps_key = [] hpcps_chord = [] hpcps_tuning = [] # computing HPCP loop frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize) total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 progress = Progress(total = total_frames) for frame in frames: #frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # spectral peaks (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) if (len(frame_frequencies) > 0): # spectral_whitening frame_magnitudes_white = spectral_whitening(frame_spectrum, frame_frequencies, frame_magnitudes) frame_hpcp_key = hpcp_key(frame_frequencies, frame_magnitudes_white) frame_hpcp_chord = hpcp_chord(frame_frequencies, frame_magnitudes_white) frame_hpcp_tuning = hpcp_tuning(frame_frequencies, frame_magnitudes_white) else: frame_hpcp_key = essentia.array([0] * hpcp_key_size) frame_hpcp_chord = essentia.array([0] * hpcp_chord_size) frame_hpcp_tuning = essentia.array([0] * hpcp_tuning_size) # key HPCP hpcps_key.append(frame_hpcp_key) # add HPCP to the pool pool.add(namespace + '.' +'hpcp', frame_hpcp_key) # chords HPCP hpcps_chord.append(frame_hpcp_chord) # tuning system HPCP hpcps_tuning.append(frame_hpcp_tuning) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize progress.finish() # check if silent file if len(hpcps_key) == 0: raise EssentiaError('This is a silent file!') # key detection key_detector = essentia.Key(profileType = 'temperley') average_hpcps_key = numpy.average(essentia.array(hpcps_key), axis=0) average_hpcps_key = normalize(average_hpcps_key) # thpcps max_arg = numpy.argmax( average_hpcps_key ) thpcp=[] for i in range( max_arg, len(average_hpcps_key) ): thpcp.append( float(average_hpcps_key[i]) ) for i in range( max_arg ): thpcp.append( float(average_hpcps_key[i]) ) pool.add(namespace + '.' +'thpcp', thpcp)#, pool.GlobalScope ) (key, scale, key_strength, first_to_second_relative_strength) = key_detector(essentia.array(average_hpcps_key)) pool.add(namespace + '.' +'key_key', key)#, pool.GlobalScope) pool.add(namespace + '.' +'key_scale', scale)#, pool.GlobalScope) pool.add(namespace + '.' +'key_strength', key_strength)#, pool.GlobalScope) # chord detection chord_detector = essentia.Key(profileType = 'tonictriad', usePolyphony = False) hpcp_frameSize = 2.0 # 2 seconds hpcp_number = int(hpcp_frameSize * (sampleRate / hopSize - 1)) for hpcp_index in range(len(hpcps_chord)): hpcp_index_begin = max(0, hpcp_index - hpcp_number) hpcp_index_end = min(hpcp_index + hpcp_number, len(hpcps_chord)) average_hpcps_chord = numpy.average(essentia.array(hpcps_chord[hpcp_index_begin : hpcp_index_end]), axis=0) average_hpcps_chord = normalize(average_hpcps_chord) (key, scale, strength, first_to_second_relative_strength) = chord_detector(essentia.array(average_hpcps_chord)) if scale == 'minor': chord = key + 'm' else: chord = key frame_second_scope = [hpcp_index_begin * hopSize / sampleRate, hpcp_index_end * hopSize / sampleRate] pool.add(namespace + '.' +'chords_progression', chord)#, frame_second_scope) pool.add(namespace + '.' +'chords_strength', strength)#, frame_second_scope) # tuning system features keydetector = essentia.Key(profileType = 'diatonic') average_hpcps_tuning = numpy.average(essentia.array(hpcps_tuning), axis=0) average_hpcps_tuning = normalize(average_hpcps_tuning) (key, scale, diatonic_strength, first_to_second_relative_strength) = keydetector(essentia.array(average_hpcps_tuning)) pool.add(namespace + '.' +'tuning_diatonic_strength', diatonic_strength)#, pool.GlobalScope) (equal_tempered_deviation, nontempered_energy_ratio, nontempered_peaks_energy_ratio) = essentia.HighResolutionFeatures()(average_hpcps_tuning) pool.add(namespace + '.' +'tuning_equal_tempered_deviation', equal_tempered_deviation)#, pool.GlobalScope) pool.add(namespace + '.' +'tuning_nontempered_energy_ratio', nontempered_energy_ratio)#, pool.GlobalScope) pool.add(namespace + '.' +'tuning_nontempered_peaks_energy_ratio', nontempered_peaks_energy_ratio)#, pool.GlobalScope)
def compute(audio, pool, options): INFO('Computing SFX descriptors...') # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # frame algorithms frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = essentia.Spectrum(size=frameSize) # pitch algorithm pitch_detection = essentia.PitchDetection(frameSize=2048, sampleRate=sampleRate) # sfx descriptors spectral_peaks = essentia.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency') harmonic_peaks = essentia.HarmonicPeaks() inharmonicity = essentia.Inharmonicity() odd2evenharmonicenergyratio = essentia.OddToEvenHarmonicEnergyRatio() tristimulus = essentia.Tristimulus() # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 progress = Progress(total=total_frames) for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) # spectral peaks based descriptors (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) (frame_harmonic_frequencies, frame_harmonic_magnitudes) = harmonic_peaks(frame_frequencies, frame_magnitudes, frame_pitch) if len(frame_harmonic_frequencies) > 1: frame_inharmonicity = inharmonicity(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'inharmonicity', frame_inharmonicity) frame_tristimulus = tristimulus(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'tristimulus', frame_tristimulus) frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio( frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'odd2evenharmonicenergyratio', frame_odd2evenharmonicenergyratio) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize envelope = essentia.Envelope() file_envelope = envelope(audio) # temporal statistics decrease = essentia.AudioDecrease(blockSize=len(audio)) pool.add(namespace + '.' + 'temporal_decrease', decrease(file_envelope)) #, pool.GlobalScope) centralmoments = essentia.AudioCentralMoments(blockSize=len(audio)) file_centralmoments = centralmoments(file_envelope) distributionshape = essentia.DistributionShape() (file_spread, file_skewness, file_kurtosis) = distributionshape(file_centralmoments) pool.add(namespace + '.' + 'temporal_spread', file_spread) #, pool.GlobalScope) pool.add(namespace + '.' + 'temporal_skewness', file_skewness) #, pool.GlobalScope) pool.add(namespace + '.' + 'temporal_kurtosis', file_kurtosis) #, pool.GlobalScope) centroid = essentia.AudioCentroid(blockSize=len(audio)) pool.add(namespace + '.' + 'temporal_centroid', centroid(file_envelope)) #, pool.GlobalScope) # effective duration effectiveduration = essentia.EffectiveDuration() pool.add(namespace + '.' + 'effective_duration', effectiveduration(file_envelope)) #, pool.GlobalScope) # log attack time logattacktime = essentia.LogAttackTime() pool.add(namespace + '.' + 'logattacktime', logattacktime(audio)) #, pool.GlobalScope) # strong decay strongdecay = essentia.StrongDecay() pool.add(namespace + '.' + 'strongdecay', strongdecay(file_envelope)) #, pool.GlobalScope) # dynamic profile flatness = essentia.FlatnessSFX() pool.add(namespace + '.' + 'flatness', flatness(file_envelope)) #, pool.GlobalScope) # onsets number onsets_number = len(pool.value('rhythm.onset_times')[0]) pool.add(namespace + '.' + 'onsets_number', onsets_number) #, pool.GlobalScope) # morphological descriptors max_to_total = essentia.MaxToTotal() pool.add(namespace + '.' + 'max_to_total', max_to_total(file_envelope)) #, pool.GlobalScope) tc_to_total = essentia.TCToTotal(sampleRate=sampleRate) pool.add(namespace + '.' + 'tc_to_total', tc_to_total(file_envelope)) #, pool.GlobalScope) derivativeSFX = essentia.DerivativeSFX(sampleRate=sampleRate) (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope) pool.add(namespace + '.' + 'der_av_after_max', der_av_after_max) #, pool.GlobalScope) pool.add(namespace + '.' + 'max_der_before_max', max_der_before_max) #, pool.GlobalScope) # pitch profile pitch = pool.value('lowlevel.pitch') if len(pitch) > 1: pool.add(namespace + '.' + 'pitch_max_to_total', max_to_total(pitch)) #, pool.GlobalScope) min_to_total = essentia.MinToTotal() pool.add(namespace + '.' + 'pitch_min_to_total', min_to_total(pitch)) #, pool.GlobalScope) pitch_centroid = essentia.Centroid(range=len(pitch) - 1) pool.add(namespace + '.' + 'pitch_centroid', pitch_centroid(pitch)) #, pool.GlobalScope) pitch_after_max_to_before_max_energy_ratio = essentia.AfterMaxToBeforeMaxEnergyRatio( ) pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', pitch_after_max_to_before_max_energy_ratio( pitch)) #, pool.GlobalScope) else: pool.add(namespace + '.' + 'pitch_max_to_total', 0.0) #, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_min_to_total', 0.0) #, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_centroid', 0.0) #, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', 0.0) #, pool.GlobalScope) progress.finish()