def extractDefaultFeatures(audio, outputDir): # compute all features for all sounds extractor = ess.Extractor(dynamics=True, dynamicsFrameSize=88200, dynamicsHopSize=44100, highLevel=True, lowLevel=True, lowLevelFrameSize=2048, lowLevelHopSize=1024, midLevel=True, namespace="", relativeIoi=False, rhythm=True, sampleRate=44100, tonalFrameSize=4096, tonalHopSize=2048, tuning=True) pool = essentia.Pool() pool = extractor(audio) aggPool = ess.PoolAggregator()(pool) if not path.exists(outputDir): makedirs(outputDir) ess.YamlOutput(filename=outputDir + "features.json", format="json", doubleCheck=True)(aggPool)
def featureExtraction(soundfiles): # extractor = esst.LowLevelSpectralExtractor() extractor = esst.Extractor(dynamics = False, dynamicsFrameSize = 88200, dynamicsHopSize = 44100, highLevel = False, lowLevel = True, lowLevelFrameSize = 2048, lowLevelHopSize = 1024, midLevel = True, namespace = "", relativeIoi = False, rhythm = False, sampleRate = 44100, tonalFrameSize = 4096, tonalHopSize = 2048, tuning = True) #soundfiles = listdir(inputPath) for file in soundfiles: path1= '/Users/helena/Desktop/SMC/ASP/sms-tools/workspace/A10/code/downloaded/' name=file[70:-4] + '_features.json' outPath = path1 + 'features/' + name print file audioLoader = esst.MonoLoader(filename=file) audio = audioLoader() pool = essentia.Pool() pool = extractor(audio) aggPool = esst.PoolAggregator()(pool) output = esst.YamlOutput(filename = outPath, format='json') output(aggPool) print (outPath + ' exported')
def featureExtraction(soundfiles): #extractor = esst.LowLevelSpectralExtractor() extractor = esst.Extractor(dynamics=True, dynamicsFrameSize=88200, dynamicsHopSize=44100, highLevel=True, lowLevel=True, lowLevelFrameSize=2048, lowLevelHopSize=1024, midLevel=True, namespace="", relativeIoi=False, rhythm=True, sampleRate=44100, tonalFrameSize=4096, tonalHopSize=2048, tuning=True) #soundfiles = listdir(inputPath) for file, outPath in soundfiles: audioLoader = esst.MonoLoader(filename=file) audio = audioLoader() pool = essentia.Pool() pool = extractor(audio) aggPool = esst.PoolAggregator()(pool) esst.YamlOutput(filename=outPath + 'features.json', format='json')(aggPool) print(file + ' exported')
def computeAggregation(pool, segments_namespace=''): stats = ['mean', 'var', 'min', 'max', 'dmean', 'dmean2', 'dvar', 'dvar2'] exceptions = {'lowlevel.mfcc': ['mean', 'cov', 'icov']} for namespace in segments_namespace: exceptions[namespace + '.lowlevel.mfcc'] = ['mean', 'cov', 'icov'] if segments_namespace: exceptions['segmentation.timestamps'] = ['copy'] return standard.PoolAggregator(defaultStats=stats, exceptions=exceptions)(pool)
def main_danceability(args): """main_danceability Compute the danceability feature over input waveform and plot it """ audio = loadaudio(args) # create the pool and the necessary algorithms pool = e.Pool() w = estd.Windowing() spec = estd.Spectrum() centroid = estd.SpectralCentroidTime() # compute the centroid for all frames in our audio and add it to the pool for frame in estd.FrameGenerator(audio, frameSize = 1024, hopSize = 512): c = centroid(spec(w(frame))) pool.add('lowlevel.centroid', c) # aggregate the results aggrpool = estd.PoolAggregator(defaultStats = [ 'mean', 'var' ])(pool) # create the pool and the necessary algorithms pool = e.Pool() w = estd.Windowing() # spec = estd.Spectrum() # centroid = estd.SpectralCentroidTime() danceability = estd.Danceability(maxTau = 10000, minTau = 300, sampleRate = args.samplerate) # compute the centroid for all frames in our audio and add it to the pool for frame in estd.FrameGenerator(audio, frameSize = 10 * args.samplerate, hopSize = 5 * args.samplerate): dreal, ddfa = danceability(w(frame)) print(("d", dreal)) # , "frame", frame pool.add('rhythm.danceability', dreal) print((type(pool['rhythm.danceability']))) # aggregate the results # aggrpool = estd.PoolAggregator(defaultStats = [ 'mean', 'var' ])(pool) # write result to file # estd.YamlOutput(filename = args.file + '.features.yaml')(aggrpool) fig, gs = makefig(rows = 2, cols = 2) ax = fig.axes ax[0].plot(pool['rhythm.danceability'])
def aggregatePoolArraysToNumbers(input_pool): """ Retorna uma cópia de input_pool onde usou es_mode.PoolAggregator() para calcular as estatísticas dos arrays extraídos do áudio e insereri-las como números """ output_pool = duplicatePool(input_pool) pool_arrays = es.Pool() for feat in output_pool.descriptorNames(pk_array): pool_arrays.merge(feat, output_pool[feat]) aggr = es_mode.PoolAggregator()(pool_arrays) for feat in aggr.descriptorNames(): newName = feat.replace("array", "number") output_pool.mergeSingle(newName, aggr[feat]) return output_pool
def reComputeDescriptors(inputAudioFile, outputJsonFile): """ :param inputAudioFile: :param outputJsonFile: :return: """ M = 2048 N = 2048 H = 1024 fs = 44100 W = 'blackmanharris62' # analysis parameters options = {} options['sampleRate'] = fs options['frameSize'] = M options['hopSize'] = H options['windowType'] = W options['skipSilence'] = True audio = ess.MonoLoader(filename=inputAudioFile, sampleRate=fs)() pool = es.Pool() sfx.compute(audio, pool, options) esx.compute(audio, pool, options) #output = ess.YamlOutput(filename='joeTestOut/essExtract_Pool.json', format='json') #output(pool) #calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean', 'var']) calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean']) aggrPool = calc_Mean_Var(pool) #output = ess.YamlOutput(filename='joeTestOut/essExtract_AggrPool.json', format='json') #output = ess.YamlOutput(filename=outputJsonFile, format='json') #output(aggrPool) features = makeFeatures(aggrPool) json.dump(features, open(outputJsonFile, 'w'))
def reComputeDescriptors(inputAudioFile, outputJsonFile): """ :param inputAudioFile: :param outputJsonFile: :return: """ M = 2048 N = 2048 H = 1024 fs = 44100 W = 'blackmanharris62' #spectrum = ess.Spectrum(size=N) spectrum = ess.Spectrum() #window = ess.Windowing(size=M, type=W) window = ess.Windowing(type=W) #mfcc = ess.MFCC(numberCoefficients=12, inputSize=N/2+1) mfcc = ess.MFCC() spectral_peaks = ess.SpectralPeaks(minFrequency=1, maxFrequency=20000, maxPeaks=100, sampleRate=fs, magnitudeThreshold=0, orderBy="magnitude") dissonance = ess.Dissonance() #pitch_detection = ess.PitchYinFFT(frameSize=M, sampleRate=fs) pitch_detection = ess.PitchYinFFT() harmonic_peaks = ess.HarmonicPeaks() inharmonicity = ess.Inharmonicity() #spectral_contrast = ess.SpectralContrast(sampleRate=fs) spectral_contrast = ess.SpectralContrast() centroid = ess.Centroid() log_attack_time = ess.LogAttackTime() hfc = ess.HFC() # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame, see spectral_complexity = ess.SpectralComplexity(magnitudeThreshold=0.005) energy = ess.Energy() x = ess.MonoLoader(filename=inputAudioFile, sampleRate=fs)() frames = ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True) E = [] numFrames = 0 for frame in frames: numFrames += 1 E_frame = energy(frame) E.append(E_frame) E_max = np.max(E) frames = ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True) pools = [(t, es.Pool()) for t in dscr.threshold] for frame in frames: eNorm = energy(frame) / E_max threshPools = [] for t, pool in pools: if eNorm >= t: threshPools.append(pool) mX = spectrum(window(frame)) mfcc_bands, mfcc_coeffs = mfcc(mX) [pool.add('lowlevel.mfcc', mfcc_coeffs) for pool in threshPools] #[pool.add('lowlevel.mfcc_bands', mfcc_bands) for pool in threshPools] pfreq, pmag = spectral_peaks(mX) inds = pfreq.argsort() pfreq_sorted = pfreq[inds] pmag_sorted = pmag[inds] diss = dissonance(pfreq_sorted, pmag_sorted) [pool.add('lowlevel.dissonance', diss) for pool in threshPools] pitch, pitch_confidence = pitch_detection(mX) phfreq, phmag = harmonic_peaks(pfreq_sorted, pmag_sorted, pitch) if len(phfreq) > 1: inharm = inharmonicity(phfreq, phmag) [pool.add('sfx.inharmonicity', inharm) for pool in threshPools] sc_coeffs, sc_valleys = spectral_contrast(mX) [pool.add('lowlevel.spectral_contrast', sc_coeffs) for pool in threshPools] c = centroid(mX) [pool.add('lowlevel.spectral_centroid', c) for pool in threshPools] lat = log_attack_time(frame) [pool.add('sfx.logattacktime', lat) for pool in threshPools] h = hfc(mX) [pool.add('lowlevel.hfc', h) for pool in threshPools] spec_complx = spectral_complexity(mX) [pool.add('lowlevel.spectral_complexity', spec_complx) for pool in threshPools] #calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean', 'var']) calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean']) aggrPools = [calc_Mean_Var(pool) for t, pool in pools] features = {} [appendFeatures(features, aggrPools[i], ("ethc"+str(dscr.thresholdSelect[i]))) for i in range(len(aggrPools))] json.dump(features, open(outputJsonFile, 'w'))
def reComputeDescriptors(inputAudioFile, outputJsonFile): """ :param inputAudioFile: :param outputJsonFile: :return: """ #help(ess.SpectralContrast) """ orig M = 1024 N = 1024 H = 512 fs = 44100 W = 'hann' """ """ freesound Real sampleRate = 44100; int frameSize = 2048; int hopSize = 1024; int zeroPadding = 0; string silentFrames ="noise"; string windowType = "blackmanharris62"; // Silence Rate Real thresholds_dB[] = { -20, -30, -60 }; vector<Real> thresholds(ARRAY_SIZE(thresholds_dB)); for (uint i=0; i<thresholds.size(); i++) { thresholds[i] = db2lin(thresholds_dB[i]/2.0); } """ M = 2048 N = 2048 H = 1024 fs = 44100 W = 'blackmanharris62' #silentFrames = "noise" #thresholds_dB = np.array([ -20, -30, -60 ]) #thresholds = np.power (10.0, thresholds_dB / 20) #spectrum = ess.Spectrum(size=N) spectrum = ess.Spectrum() #window = ess.Windowing(size=M, type=W) window = ess.Windowing(type=W) #mfcc = ess.MFCC(numberCoefficients=12, inputSize=N/2+1) mfcc = ess.MFCC() spectral_peaks = ess.SpectralPeaks(minFrequency=1, maxFrequency=20000, maxPeaks=100, sampleRate=fs, magnitudeThreshold=0, orderBy="magnitude") dissonance = ess.Dissonance() #pitch_detection = ess.PitchYinFFT(frameSize=M, sampleRate=fs) pitch_detection = ess.PitchYinFFT() harmonic_peaks = ess.HarmonicPeaks() inharmonicity = ess.Inharmonicity() #spectral_contrast = ess.SpectralContrast(sampleRate=fs) spectral_contrast = ess.SpectralContrast() centroid = ess.Centroid() log_attack_time = ess.LogAttackTime() hfc = ess.HFC() energy = ess.Energy() x = ess.MonoLoader(filename=inputAudioFile, sampleRate=fs)() frames = ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True) pool = es.Pool() for frame in frames: mX = spectrum(window(frame)) mfcc_bands, mfcc_coeffs = mfcc(mX) pool.add('lowlevel.mfcc', mfcc_coeffs) pool.add('lowlevel.mfcc_bands', mfcc_bands) pfreq, pmag = spectral_peaks(mX) inds = pfreq.argsort() pfreq_sorted = pfreq[inds] pmag_sorted = pmag[inds] diss = dissonance(pfreq_sorted, pmag_sorted) pool.add('lowlevel.dissonance', diss) pitch, pitch_confidence = pitch_detection(mX) phfreq, phmag = harmonic_peaks(pfreq_sorted, pmag_sorted, pitch) if len(phfreq) > 1: inharm = inharmonicity(phfreq, phmag) pool.add('sfx.inharmonicity', inharm) sc_coeffs, sc_valleys = spectral_contrast(mX) pool.add('lowlevel.spectral_contrast', sc_coeffs) c = centroid(mX) pool.add('lowlevel.spectral_centroid', c) lat = log_attack_time(frame) pool.add('sfx.logattacktime', lat) h = hfc(mX) pool.add('lowlevel.hfc', h) calc_Mean_Var = ess.PoolAggregator(defaultStats=['mean', 'var']) aggrPool = calc_Mean_Var(pool) features = makeFeatures(aggrPool) json.dump(features, open(outputJsonFile, 'w'))
# So let's redo the previous using a Pool pool = es.Pool() for frame in ess.FrameGenerator(audio, frameSize=1024, hopSize=512): mfcc_bands, mfcc_coeffs = mfcc(spectrum(w(frame))) pool.add('lowlevel.mfcc', mfcc_coeffs) pool.add('lowlevel.mfcc_bands', mfcc_bands) """ plotMfcc = pool['lowlevel.mfcc'].T[1:,:] plt.pcolormesh(plotMfcc) """ #output = es.YamlOutput(filename = 'mfcc.sig') output = ess.YamlOutput(filename='joeTestOut/mfcc.json', format='json') output(pool) # Say we're not interested in all the MFCC frames, but just their mean & variance. # To this end, we have the PoolAggregator algorithm, that can do all sorts of # aggregation: mean, variance, min, max, etc... aggrPool = ess.PoolAggregator(defaultStats=['mean', 'var'])(pool) print 'Original pool descriptor names:' print pool.descriptorNames() print print 'Aggregated pool descriptor names:' print aggrPool.descriptorNames() output = ess.YamlOutput(filename='joeTestOut/mfccaggr.json', format='json') output(aggrPool)
def compute_features(complete_path): result = [] meta_result = [] file_count = 0 # for loop over files for file in os.listdir(complete_path): if file.endswith(".wav"): file_count+=1 # print(file +' : ' + str(file_count)) # load our audio into an array audio = es.MonoLoader(filename=complete_path + file, sampleRate=44100)() # create the pool and the necessary algorithms pool = essentia.Pool() window = es.Windowing() energy = es.Energy() spectrum = es.Spectrum() centroid = es.Centroid(range=22050) rolloff = es.RollOff() crest = es.Crest() speak = es.StrongPeak() rmse = es.RMS() mfcc = es.MFCC() flux = es.Flux() barkbands = es.BarkBands( sampleRate = 44100) zerocrossingrate = es.ZeroCrossingRate() meta = es.MetadataReader(filename=complete_path + file, failOnError=True)() pool_meta, duration, bitrate, samplerate, channels = meta[7:] # centralmoments = es.SpectralCentralMoments() # distributionshape = es.DistributionShape() # compute the centroid for all frames in our audio and add it to the pool for frame in es.FrameGenerator(audio, frameSize = 1024, hopSize = 512): frame_windowed = window(frame) frame_spectrum = spectrum(frame_windowed) c = centroid(frame_spectrum) pool.add('spectral.centroid', c) cr = crest(frame_spectrum) pool.add('spectral crest', cr) r = rolloff(frame_spectrum) pool.add('spectral rolloff', r) sp = speak(frame_spectrum) pool.add('strong peak', sp) rms = rmse(frame_spectrum) pool.add('RMS', rms) pool.add('spectral_energy', energy(frame_spectrum)) # (frame_melbands, frame_mfcc) = mfcc(frame_spectrum) # pool.add('frame_MFCC', frame_mfcc) fl = flux(frame_spectrum) pool.add('spectral flux', fl) # bbands = barkbands(frame_spectrum) # pool.add('bark bands', bbands) zcr = zerocrossingrate(frame_spectrum) pool.add('zero crossing rate', zcr) # frame_centralmoments = centralmoments(power_spectrum) # (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments) # pool.add('spectral_kurtosis', frame_kurtosis) # pool.add('spectral_spread', frame_spread) # pool.add('spectral_skewness', frame_skewness) # aggregate the results (find mean if needed) aggrpool = es.PoolAggregator(defaultStats = ['mean'])(pool) #,'stdev' ])(pool) pool_meta.set("duration", duration) pool_meta.set("filename", os.path.relpath(file)) # write pools to lists pool_arr = pool_to_array(aggrpool) result.append(pool_arr) meta_arr = pool_to_array(pool_meta) meta_result.append(meta_arr) features_df = pd.DataFrame.from_records(result) features_df.columns = ['centroid', 'crest','roll off','strong peak','rms','energy','flux','zcr'] meta_df = pd.DataFrame.from_records(meta_result) meta_df.columns = ['duration','filename','metadata.tags.comment'] del meta_df['metadata.tags.comment'] return features_df,meta_df