def _ExtractAll(self, audio_location, sample_rate): # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('zcr: ZCR') fp.addFeature('mfcc: MFCC') fp.addFeature('mfcc_D1: MFCC > Derivate DOrder=1') #fp.addFeature('mfcc_D2: MFCC > Derivate DOrder=2') fp.addFeature('flux: SpectralFlux') fp.addFeature('energy: Energy') fp.addFeature('loudness: Loudness') fp.addFeature('obsi: OBSI') fp.addFeature('sharpness: PerceptualSharpness') fp.addFeature('spread: PerceptualSpread') fp.addFeature('rolloff: SpectralRolloff') fp.addFeature('variation: SpectralVariation') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() #print features["zcr"] # returns the array of features extracted return features
class MFCCExtractor: def __init__(self, **args): self.engine = Engine() self.afp = AudioFileProcessor() self.block_size = (1024 if not 'block_size' in args else args['block_size']) self.step_size = (512 if not 'step_size' in args else args['step_size']) def extract(self, path, **args): self.path = path print path self.wave = wave.open(path, 'r') self.rate = self.wave.getframerate() fp = FeaturePlan(sample_rate=self.rate) fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format(self.block_size, self.step_size)) self.engine.load(fp.getDataFlow()) self.afp.processFile(self.engine, path) feats = self.engine.readAllOutputs() if 'save_to_disk' in args: self.__save_to_disk(feats['mfcc']) return feats['mfcc'] def __save_to_disk(self, feats): new_path = self.path + ".mfc.csv" np.savetxt(new_path, feats, delimiter=",")
def _energy(audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "energy: Energy PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed #- blockSize (default=1024): output frames size #- stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan # blockSize, stepSize could be added too. 1024, 512 default fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('Energy: Energy') #('energy: Energy blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
class MFCCExtractor: def __init__(self, **args): self.engine = Engine() self.afp = AudioFileProcessor() self.block_size = (1024 if not 'block_size' in args else args['block_size']) self.step_size = (512 if not 'step_size' in args else args['step_size']) def extract(self, path, **args): self.path = path print path self.wave = wave.open(path, 'r') self.rate = self.wave.getframerate() fp = FeaturePlan(sample_rate=self.rate) fp.addFeature('mfcc: MFCC blockSize={} stepSize={}'.format( self.block_size, self.step_size)) self.engine.load(fp.getDataFlow()) self.afp.processFile(self.engine, path) feats = self.engine.readAllOutputs() if 'save_to_disk' in args: self.__save_to_disk(feats['mfcc']) return feats['mfcc'] def __save_to_disk(self, feats): new_path = self.path + ".mfc.csv" np.savetxt(new_path, feats, delimiter=",")
def _ExtractZCRAndFlux(self, audio_location, sample_rate): fp = FeaturePlan(sample_rate=sample_rate) fp.addFeature('zcr: ZCR') fp.addFeature('flux: SpectralFlux') df = fp.getDataFlow() engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine, audio_location) features = engine.readAllOutputs() return features
def startEngine(path): global features engine = Engine() engine.load(df) afp = AudioFileProcessor() afp.processFile(engine,path) features = engine.readAllOutputs() return 'Yaafe引擎启动成功'
def _zcr(self, audio_location, sample_rate): # This function behave the same as 'python yaafe.py -r SAMPLERATE -f \ # "zcr: ZCR blockSize=1024 stepSize=512" WAV-LOCATION' # SAMPLERATE = samplerate of the file being processed # zcr = name for the process (zcr1, zcr2... ) # ZCR = the feature that is being extracted # blockSize = output frames size # stepSize = step between consecutive frames # Build a dataflow object using FeaturePlan # blockSize, stepSize could be added too. 1024, 512 default fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('ZCR: ZCR') # Get dataflow df = fp.getDataFlow() # Or load it from a file # df = DataFlow() # df.load(dataflow_file) # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # extract features from an audio file and write results to csv files # afp.setOutputFormat('csv','output',{'Precision':'8'}) # afp.processFile(engine,audiofile) # this creates output/myaudio.wav.mfcc.csv, # output/myaudio.wav.mfcc_d1.csv and # output/myaudio.wav.mfcc_d2.csv files. # Clear the engine so it can be used again #engine.reset() # returns the array of features extracted return features
class Extractor(object): def __init__(self): self.fp = FeaturePlan(sample_rate=44100) self.fp.addFeature('feat: ') self.engine = Engine() self.engine.load(self.fp.getDataFlow()) self.afp = AudioFileProcessor() self.mfcc = [] self.songs = [] def recurse(self, directory): results = [] for root, dirs, files in os.walk(directory): for f in files: current_file = os.path.join(root, f) if current_file.endswith('wav') and current_file not in results: results.append(current_file) print results return results def extract_feature(self, f): self.afp.processFile(self.engine, f) feats = self.engine.readAllOutputs() return feats['feat'] def build_feature_space(self): self.space = FeatureSpace('mfcc') last_feature = None for f in self.recurse(sys.argv[1]): data = self.extract_feature(f) temp_feature = Feature(f, ['mfcc', data]) self.space.add(temp_feature) last_feature = temp_feature print self.space.min_dist(last_feature)
def _mfcc(self, audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "mfcc: MFCC PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed #- CepsIgnoreFirstCoeff (default=1): 0 keeps the first cepstral coeffcient, 1 ignore it #- CepsNbCoeffs (default=13): Number of cepstral coefficient to keep. #- FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None #- MelMaxFreq (default=6854.0): Maximum frequency of the mel filter bank #- MelMinFreq (default=130.0): Minimum frequency of the mel filter bank #- MelNbFilters (default=40): Number of mel filters #- blockSize (default=1024): output frames size #- stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('mfcc: MFCC') #('mfcc: MFCC CepsIgnoreFirstCoeff=0 \ #CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6854\ #MelMinFreq=130 MelNbFilters=40 blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
def _SpectralFlux(self, audio_location, sample_rate): # This function behaves the same as 'python yaafe.py -r SAMPLERATE -f \ # "flux: SpectralFlux PARAMETERS" WAV-LOCATION' # SAMPLERATE : Samplerate of the file being processed # - FFTLength (default=0): Frame's length on which perform FFT. Original # frame is padded with zeros or truncated to reach this size. If 0 then # use original frame length. # - FFTWindow (default=Hanning): Weighting window to apply before fft. Hanning|Hamming|None # - FluxSupport (default=All): support of flux computation. if 'All' then # use all bins (default), if 'Increase' then use only bins which are increasing # - blockSize (default=1024): output frames size # - stepSize (default=512): step between consecutive frames # Build a dataflow object using FeaturePlan fp = FeaturePlan(sample_rate=sample_rate) # Using *.addFeature() multiple extractions can be called with a # single call fp.addFeature('Flux: SpectralFlux') #('flux: SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All\ # blockSize=1024 stepSize=512') # Get dataflow df = fp.getDataFlow() # Configure engine engine = Engine() engine.load(df) # extract features from audio using AudioFileProcessor afp = AudioFileProcessor() afp.processFile(engine, audio_location) # features array holds all the extracted features features = engine.readAllOutputs() # returns the array of features extracted return features
class Yaafe(EnhancedObject): '''Yaafe toolbox wrapper. To be used with Database object. Attributes: sample_rate The Files' sample rate plan_filename The Featue Plan filename Methods: process(audiofile) Process audiofile and return features get_X(entries_list, feat) Fetch array of processed data from Database get_y Fetch subdir i's from Database ` Init: Yaafe(sample_rate, feature_plan) ''' _features = { 'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'auto': ("AutoCorrelation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'lpc': ("LPC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'loudness': ("Loudness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'obsir': ("OBSIR blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128" " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), } def __init__(self, sample_rate, features=None): if features is None: features = self._features self.sample_rate = sample_rate self.initialize(features) self.features = features def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan(sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self def save_fplan(self, name): """ Save a feature plan (text file) """ text_file = open("{}.txt".format(name), 'w') for name, desc in self.features.items(): text_file.write("{}: {}".format(name, desc)) text_file.close() def process(self, audiofile): """ Process function for running a file through yaafe's feature extractor """ self.afp.processFile(self.engine, audiofile) out = self.engine.readAllOutputs() self.engine.flush() return sorted(out)
""" Stand-alone application to demonstrate yaafe's transformations """ import numpy as np import matplotlib.pyplot as plt from yaafelib import FeaturePlan, Engine, AudioFileProcessor FPLAN = FeaturePlan(sample_rate=44100) FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128") FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > " "StatisticalIntegrator NbFrames=40 StepNbFrames=8") ENGINE = Engine() ENGINE.load(FPLAN.getDataFlow()) PROCESSOR = AudioFileProcessor() PROCESSOR.processFile(ENGINE, 'track.wav') DATA = ENGINE.readAllOutputs() ENGINE.flush() X_MFCC = DATA['mfcc'] X_MFCC_STAT = DATA['mfcc_stat'] MFCC_DESC = list() for i in range(1, 14): desc = "Average for Band {}".format(i) MFCC_DESC.append(desc) for i in range(1, 14): desc = "Standard Dev. for Band {}".format(i) MFCC_DESC.append(desc) plt.ion() FIG = plt.figure()
class Yaafe(EnhancedObject): '''Yaafe toolbox wrapper. To be used with Database object. Attributes: sample_rate The Files' sample rate plan_filename The Featue Plan filename Methods: process(audiofile) Process audiofile and return features get_X(entries_list, feat) Fetch array of processed data from Database get_y Fetch subdir i's from Database ` Init: Yaafe(sample_rate, feature_plan) ''' _features = { 'spec_rolloff': ("SpectralRolloff blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_shape': ("SpectralShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spec_flux': ("SpectralFlux blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'amp_mod': ("AmplitudeModulation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'auto': ("AutoCorrelation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'lpc': ("LPC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'loudness': ("Loudness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'mfcc': ("MFCC blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'mel_spectrum': ("MelSpectrum blockSize=512, stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'obsi': ("OBSI blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'obsir': ("OBSIR blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_sharp': ("PerceptualSharpness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'perc_spread': ("PerceptualSpread blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_crest': ("SpectralCrestFactorPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spec_decr': ("SpectralDecrease blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat': ("SpectralFlatness blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_flat_band': ("SpectralFlatnessPerBand blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), 'spect_slope': ("SpectralSlope blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'spect_var': ("SpectralVariation blockSize=512 stepSize=128 >" "StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'temp_shape': ("TemporalShapeStatistics blockSize=512 stepSize=128 " "> StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'zcr': ("ZCR blockSize=512 stepSize=128 > StatisticalIntegrator " "NbFrames=40 StepNbFrames=8"), 'env_shape': ("EnvelopeShapeStatistics blockSize=512 stepSize=128" " > StatisticalIntegrator NbFrames=40 StepNbFrames=8"), 'comp_onest': ("ComplexDomainOnsetDetection blockSize=512 " "stepSize=128 > StatisticalIntegrator NbFrames=40 " "StepNbFrames=8"), } def __init__(self, sample_rate, features=None): if features is None: features = self._features self.sample_rate = sample_rate self.initialize(features) self.features = features def initialize(self, feature_dict): """ Run the required boilerplate for yaafe """ self.feature_dict = feature_dict self.fp = FeaturePlan( sample_rate=self.sample_rate, normalize=0.98) for name, desc in self.feature_dict.items(): self.fp.addFeature("{0}: {1}".format(name, desc)) self.df = self.fp.getDataFlow() self.engine = Engine() self.engine.load(self.df) self.afp = AudioFileProcessor() return self def save_fplan(self, name): """ Save a feature plan (text file) """ text_file = open("{}.txt".format(name), 'w') for name, desc in self.features.items(): text_file.write("{}: {}".format(name, desc)) text_file.close() def process(self, audiofile): """ Process function for running a file through yaafe's feature extractor """ self.afp.processFile(self.engine, audiofile) out = self.engine.readAllOutputs() self.engine.flush() return sorted(out)
""" Stand-alone application to demonstrate yaafe's transformations """ import numpy as np import matplotlib.pyplot as plt from yaafelib import FeaturePlan, Engine, AudioFileProcessor FPLAN = FeaturePlan(sample_rate=44100) FPLAN.addFeature("mfcc: MFCC blockSize=512 stepSize=128") FPLAN.addFeature("mfcc_stat: MFCC blockSize=512 stepSize=128 > " "StatisticalIntegrator NbFrames=40 StepNbFrames=8") ENGINE = Engine() ENGINE.load(FPLAN.getDataFlow()) PROCESSOR = AudioFileProcessor() PROCESSOR.processFile(ENGINE, 'track.wav') DATA = ENGINE.readAllOutputs() ENGINE.flush() X_MFCC = DATA['mfcc'] X_MFCC_STAT = DATA['mfcc_stat'] MFCC_DESC = list() for i in range(1, 14): desc = "Average for Band {}".format(i) MFCC_DESC.append(desc) for i in range(1, 14): desc = "Standard Dev. for Band {}".format(i) MFCC_DESC.append(desc) plt.ion() FIG = plt.figure() FIG.set_size_inches(14, 8)