def compute(audio, pool, options): # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # temporal descriptors lpc = essentia.LPC(order=10, type='warped', sampleRate=sampleRate) zerocrossingrate = essentia.ZeroCrossingRate() # frame algorithms frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = essentia.Spectrum(size=frameSize) # spectral algorithms barkbands = essentia.BarkBands(sampleRate=sampleRate) centralmoments = essentia.SpectralCentralMoments() crest = essentia.Crest() centroid = essentia.SpectralCentroid() decrease = essentia.SpectralDecrease() spectral_contrast = essentia.SpectralContrast(frameSize=frameSize, sampleRate=sampleRate, numberBands=6, lowFrequencyBound=20, highFrequencyBound=11000, neighbourRatio=0.4, staticDistribution=0.15) distributionshape = essentia.DistributionShape() energy = essentia.Energy() # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers energyband_bass = essentia.EnergyBand(startCutoffFrequency=20.0, stopCutoffFrequency=150.0, sampleRate=sampleRate) energyband_middle_low = essentia.EnergyBand(startCutoffFrequency=150.0, stopCutoffFrequency=800.0, sampleRate=sampleRate) energyband_middle_high = essentia.EnergyBand(startCutoffFrequency=800.0, stopCutoffFrequency=4000.0, sampleRate=sampleRate) energyband_high = essentia.EnergyBand(startCutoffFrequency=4000.0, stopCutoffFrequency=20000.0, sampleRate=sampleRate) flatnessdb = essentia.FlatnessDB() flux = essentia.Flux() harmonic_peaks = essentia.HarmonicPeaks() hfc = essentia.HFC() mfcc = essentia.MFCC() rolloff = essentia.RollOff() rms = essentia.RMS() strongpeak = essentia.StrongPeak() # pitch algorithms pitch_detection = essentia.PitchDetection(frameSize=frameSize, sampleRate=sampleRate) pitch_salience = essentia.PitchSalience() # dissonance spectral_peaks = essentia.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency') dissonance = essentia.Dissonance() # spectral complexity # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame spectral_complexity = essentia.SpectralComplexity(magnitudeThreshold=0.005) INFO('Computing Low-Level descriptors...') # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 pitches, pitch_confidences = [], [] progress = Progress(total=total_frames) scPool = essentia.Pool() # pool for spectral contrast for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) # silence rate pool.add(namespace + '.' + 'silence_rate_60dB', essentia.isSilent(frame)) pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30)) pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20)) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue # temporal descriptors pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame)) (frame_lpc, frame_lpc_reflection) = lpc(frame) pool.add(namespace + '.' + 'temporal_lpc', frame_lpc) frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # spectrum-based descriptors power_spectrum = frame_spectrum**2 pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum)) pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum)) pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum)) pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum)) pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum)) pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum)) pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum)) pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum)) # central moments descriptors frame_centralmoments = centralmoments(power_spectrum) (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments) pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis) pool.add(namespace + '.' + 'spectral_spread', frame_spread) pool.add(namespace + '.' + 'spectral_skewness', frame_skewness) # dissonance (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) frame_dissonance = dissonance(frame_frequencies, frame_magnitudes) pool.add(namespace + '.' + 'dissonance', frame_dissonance) # mfcc (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) pool.add(namespace + '.' + 'mfcc', frame_mfcc) # spectral contrast (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum) scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs) scPool.add(namespace + '.' + 'scvalleys', sc_valleys) # barkbands-based descriptors frame_barkbands = barkbands(frame_spectrum) pool.add(namespace + '.' + 'barkbands', frame_barkbands) pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands)) pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands)) barkbands_centralmoments = essentia.CentralMoments( range=len(frame_barkbands) - 1) (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape( barkbands_centralmoments(frame_barkbands)) pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread) pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness) pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) if frame_pitch > 0 and frame_pitch <= 20000.: pool.add(namespace + '.' + 'pitch', frame_pitch) pitches.append(frame_pitch) pitch_confidences.append(frame_pitch_confidence) pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence) frame_pitch_salience = pitch_salience(frame_spectrum[:-1]) pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience) # spectral complexity pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum)) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize # if no 'temporal_zerocrossingrate' it means that this is a silent file if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace): raise essentia.EssentiaError('This is a silent file!') spectralContrastPCA(scPool, pool) # build pitch value histogram from math import log from numpy import bincount # convert from Hz to midi notes midipitches = [] unknown = 0 for freq in pitches: if freq > 0. and freq <= 12600: midipitches.append(12 * (log(freq / 6.875) / 0.69314718055995) - 3.) else: unknown += 1 if len(midipitches) > 0: # compute histogram midipitchhist = bincount(midipitches) # set 0 midi pitch to be the number of pruned value midipitchhist[0] = unknown # normalise midipitchhist = [ val / float(sum(midipitchhist)) for val in midipitchhist ] # zero pad for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0) else: midipitchhist = [0.] * 128 midipitchhist[0] = 1. # pitchhist = essentia.array(zip(range(len(midipitchhist)), midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist) #, pool.GlobalScope) # the code below is the same as the one above: #for note in midipitchhist: # pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note) # print "midi note:", note pitch_centralmoments = essentia.CentralMoments(range=len(midipitchhist) - 1) (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape( pitch_centralmoments(midipitchhist)) pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread) #, pool.GlobalScope) progress.finish()
def compute(audio, pool, options): INFO('Computing SFX descriptors...') # analysis parameters sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] windowType = options['windowType'] # frame algorithms frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType) spectrum = essentia.Spectrum(size=frameSize) # pitch algorithm pitch_detection = essentia.PitchDetection(frameSize=2048, sampleRate=sampleRate) # sfx descriptors spectral_peaks = essentia.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency') harmonic_peaks = essentia.HarmonicPeaks() inharmonicity = essentia.Inharmonicity() odd2evenharmonicenergyratio = essentia.OddToEvenHarmonicEnergyRatio() tristimulus = essentia.Tristimulus() # used for a nice progress display total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 progress = Progress(total=total_frames) for frame in frames: frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ] #pool.setCurrentScope(frameScope) if options['skipSilence'] and essentia.isSilent(frame): total_frames -= 1 start_of_frame += hopSize continue frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) # pitch descriptors frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum) # spectral peaks based descriptors (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum) (frame_harmonic_frequencies, frame_harmonic_magnitudes) = harmonic_peaks(frame_frequencies, frame_magnitudes, frame_pitch) if len(frame_harmonic_frequencies) > 1: frame_inharmonicity = inharmonicity(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'inharmonicity', frame_inharmonicity) frame_tristimulus = tristimulus(frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'tristimulus', frame_tristimulus) frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio( frame_harmonic_frequencies, frame_harmonic_magnitudes) pool.add(namespace + '.' + 'odd2evenharmonicenergyratio', frame_odd2evenharmonicenergyratio) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize envelope = essentia.Envelope() file_envelope = envelope(audio) # temporal statistics decrease = essentia.AudioDecrease(blockSize=len(audio)) pool.add(namespace + '.' + 'temporal_decrease', decrease(file_envelope)) #, pool.GlobalScope) centralmoments = essentia.AudioCentralMoments(blockSize=len(audio)) file_centralmoments = centralmoments(file_envelope) distributionshape = essentia.DistributionShape() (file_spread, file_skewness, file_kurtosis) = distributionshape(file_centralmoments) pool.add(namespace + '.' + 'temporal_spread', file_spread) #, pool.GlobalScope) pool.add(namespace + '.' + 'temporal_skewness', file_skewness) #, pool.GlobalScope) pool.add(namespace + '.' + 'temporal_kurtosis', file_kurtosis) #, pool.GlobalScope) centroid = essentia.AudioCentroid(blockSize=len(audio)) pool.add(namespace + '.' + 'temporal_centroid', centroid(file_envelope)) #, pool.GlobalScope) # effective duration effectiveduration = essentia.EffectiveDuration() pool.add(namespace + '.' + 'effective_duration', effectiveduration(file_envelope)) #, pool.GlobalScope) # log attack time logattacktime = essentia.LogAttackTime() pool.add(namespace + '.' + 'logattacktime', logattacktime(audio)) #, pool.GlobalScope) # strong decay strongdecay = essentia.StrongDecay() pool.add(namespace + '.' + 'strongdecay', strongdecay(file_envelope)) #, pool.GlobalScope) # dynamic profile flatness = essentia.FlatnessSFX() pool.add(namespace + '.' + 'flatness', flatness(file_envelope)) #, pool.GlobalScope) # onsets number onsets_number = len(pool.value('rhythm.onset_times')[0]) pool.add(namespace + '.' + 'onsets_number', onsets_number) #, pool.GlobalScope) # morphological descriptors max_to_total = essentia.MaxToTotal() pool.add(namespace + '.' + 'max_to_total', max_to_total(file_envelope)) #, pool.GlobalScope) tc_to_total = essentia.TCToTotal(sampleRate=sampleRate) pool.add(namespace + '.' + 'tc_to_total', tc_to_total(file_envelope)) #, pool.GlobalScope) derivativeSFX = essentia.DerivativeSFX(sampleRate=sampleRate) (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope) pool.add(namespace + '.' + 'der_av_after_max', der_av_after_max) #, pool.GlobalScope) pool.add(namespace + '.' + 'max_der_before_max', max_der_before_max) #, pool.GlobalScope) # pitch profile pitch = pool.value('lowlevel.pitch') if len(pitch) > 1: pool.add(namespace + '.' + 'pitch_max_to_total', max_to_total(pitch)) #, pool.GlobalScope) min_to_total = essentia.MinToTotal() pool.add(namespace + '.' + 'pitch_min_to_total', min_to_total(pitch)) #, pool.GlobalScope) pitch_centroid = essentia.Centroid(range=len(pitch) - 1) pool.add(namespace + '.' + 'pitch_centroid', pitch_centroid(pitch)) #, pool.GlobalScope) pitch_after_max_to_before_max_energy_ratio = essentia.AfterMaxToBeforeMaxEnergyRatio( ) pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', pitch_after_max_to_before_max_energy_ratio( pitch)) #, pool.GlobalScope) else: pool.add(namespace + '.' + 'pitch_max_to_total', 0.0) #, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_min_to_total', 0.0) #, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_centroid', 0.0) #, pool.GlobalScope) pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', 0.0) #, pool.GlobalScope) progress.finish()