def extract(self, wav): """Extract features Parameters ---------- wav : string Path to wav file. Returns ------- features : SlidingWindowFeature """ # hack data_flow, stack = self.get_flow_and_stack() engine = yaafelib.Engine() engine.load(data_flow) sample_rate, raw_audio = scipy.io.wavfile.read(wav) assert sample_rate == self.sample_rate, "sample rate mismatch" audio = np.array(raw_audio, dtype=np.float64, order='C').reshape(1, -1) features = engine.processAudio(audio) data = np.hstack([features[name] for name in stack]) sliding_window = YaafeFrame(blockSize=self.block_size, stepSize=self.step_size, sampleRate=self.sample_rate) return SlidingWindowFeature(data, sliding_window)
def extract_audio_features(sigdata): '''Extracts a bunch of audio features using YAAFE ''' window = 'Hanning' # using 80 / 40 here produces NaNs in mel spectrum, for some reason block = 120 step = 60 fp = yl.FeaturePlan(sample_rate=SAMPLE_RATE) fp.addFeature('CDOD: ComplexDomainOnsetDetection FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('LPC: LPC LPCNbCoeffs=4 blockSize=%d stepSize=%d' % (block, step)) fp.addFeature('MelSpec: MelSpectrum FFTWindow=%s MelMaxFreq=600 MelMinFreq=30 MelNbFilters=40 blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('MFCC: MFCC CepsIgnoreFirstCoeff=1 CepsNbCoeffs=12 FFTWindow=%s MelMaxFreq=600 MelMinFreq=30 MelNbFilters=40 blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('SF: SpectralFlux FFTWindow=%s FluxSupport=Increase blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('SpecStats: SpectralShapeStatistics FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('SpecSlope: SpectralSlope FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step)) fp.addFeature('SpecVar: SpectralVariation FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step)) df = fp.getDataFlow() # df.display() engine = yl.Engine() engine.load(df) feats = [] for cnt in range(sigdata.shape[0]): signal = np.reshape(sigdata[cnt,:],[1,-1]) feats.append(engine.processAudio(signal)) return feats
def createAFP(): engine = yaafelib.Engine() fp = yaafelib.FeaturePlan(sample_rate=16000) fp.addFeature('energy: Energy') fp.addFeature('mfcc: MFCC blockSize=2048 stepSize=1024') df = fp.getDataFlow() engine.load(fp.getDataFlow()) afp = yaafelib.AudioFileProcessor() return afp, engine
def feature_indices(): fp = yaafe.FeaturePlan() fp.loadFeaturePlan('features.txt') df = fp.getDataFlow() engine = yaafe.Engine() engine.load(fp.getDataFlow()) indices = sorted([(name, feat['size']) for (name, feat) in engine.getOutputs().items()]) return sum([[(name, i) for i in range(size)] for (name, size) in indices], [])
def __init__(self, duration=0.025, step=0.010, stack=1): # add sample_rate as argument super(YaafeFeatureExtractor, self).__init__() #self.sample_rate = sample_rate self.duration = duration self.step = step self.stack = stack start = -0.5 * self.duration self.engine_ = yaafelib.Engine()
def __init__(self, app_config, rate): self.ExtractedFeaturesList = ['LPC1_mean', 'LSF7_min', 'SpectralFlatness_min', 'SSS_centroid_min', 'SSS_spread_min', 'PerceptualSpread_min', 'SpectralSlope_min', 'PerceptualSharpness_min', 'SpectralDecrease_max', 'OBSI0_mm', 'SpectralRolloff_min'] self._rate = rate feature_plan = yaafelib.FeaturePlan(sample_rate=rate) feature_plan_path = os.path.join(app_config.program_directory, 'features.config') success = feature_plan.loadFeaturePlan(feature_plan_path) if not success: sys.exit('Feature plan not loaded correctly') self._engine = yaafelib.Engine() self._engine.load(feature_plan.getDataFlow())
def __init__(self, duration=0.025, step=0.010, stack=1): super(YaafeFeatureExtractor, self).__init__() self.duration = duration self.step = step self.stack = stack start = -0.5 * self.duration self.sliding_window_ = SlidingWindow(start=start, duration=self.duration, step=self.step) self.engine_ = yaafelib.Engine()
def yaafe2features(wavefiles, out_file, feature_type='MFCC'): """Generate features with yaafe and put them in h5features format. Whole wavefiles are encoded as internal h5features files. To use them with abkhazia's ABX tasks, these need to be segmented according to an abkhazia segments.txt (abkhazia/utilities/segment_features.py can be used for this) Supported feature types: - 'MFCC' (default) - 'CMSP13' (cubic-root-compressed 13-frequency-channels Mel spectrogram) """ assert feature_type in ['MFCC', 'CMSP13'], \ 'Unsupported feature_type {0}'.format(feature_type) feature_plan = ya.FeaturePlan(sample_rate=16000) if feature_type == 'MFCC': feat_name = 'mfcc' feature_plan.addFeature('{0}: MFCC blockSize=400 stepSize=160'.format( feat_name)) # 0.025s + 0.01s elif feature_type == 'CMSP13': feat_name = 'melsp' feature_plan.addFeature( '{0}: MelSpectrum MelNbFilters=13 blockSize=400 stepSize=160'. format(feat_name)) # 0.025s + 0.01s engine = ya.Engine() engine.load(feature_plan.getDataFlow()) wav_ids = [] times = [] features = [] for wavefile in wavefiles: wav_ids.append(p.splitext(p.basename(wavefile))[0]) afp = ya.AudioFileProcessor() afp.processFile(engine, wavefile) feat_out = engine.readAllOutputs()[feat_name] if feature_type == 'CMSP13': # need to add compression by hand feat_out = np.power(feat_out, 1 / 3.) # times according to: # http://yaafe.sourceforge.net/features.html?highlight=mfcc#yaafefeatures.Frames nframes = feat_out.shape[0] # 0.01 here is ad hoc and dependent on 160 above times.append(0.01 * np.arange(nframes)) features.append(feat_out) h5features.write(out_file, 'features', wav_ids, times, features)
def _wave2features(self, wavearray): engine = yaafelib.Engine() featureplan = yaafelib.FeaturePlan(sample_rate=self.samplerate) for name, recipe in self.definition: assert featureplan.addFeature("{name}: {recipe}".format(name=name, recipe=recipe)); dataflow = featureplan.getDataFlow() engine.load(dataflow) wavearray = np.array(wavearray, dtype=np.float64, order='C').reshape((1, -1)) features = engine.processAudio(wavearray) engine.reset() return features
def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None): super(Yaafe, self).setup(channels, samplerate, blocksize, totalframes) # Setup Yaafe Feature plan and Dataflow yaafe_feature_plan = yaafelib.FeaturePlan(sample_rate=samplerate) for feat in self.feature_plan: yaafe_feature_plan.addFeature(feat) self.data_flow = yaafe_feature_plan.getDataFlow() # Configure a YAAFE engine self.yaafe_engine = yaafelib.Engine() self.yaafe_engine.load(self.data_flow) self.yaafe_engine.reset()
def features(self, feature_set): if len(self.frames) == 0: return None fp = yaafe.FeaturePlan() if feature_set in ['auto', 'all']: fp.loadFeaturePlan('features.txt') else: fp.loadFeaturePlan('features_reduced.txt') df = fp.getDataFlow() engine = yaafe.Engine() engine.load(fp.getDataFlow()) feats = engine.processAudio(np.array([self.frames])) attributes = [mean(values) for (k, values) in sorted(feats.items())] return np.concatenate(attributes)
def __init__(self, fs: int, config: dict): yaafe_config = {} for feature_name, feature_params in config.items(): if feature_params['use']: specs = feature_name + ' ' + str( feature_params['params']).replace("'", '').replace( ",", "").replace(": ", "=")[1:-1] yaafe_config[feature_name] = specs if yaafe_config: feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True) for feature_name, setting in yaafe_config.items(): feature_plan.addFeature(feature_name + ': ' + setting) data_flow = feature_plan.getDataFlow() self.engine = yaafelib.Engine() self.engine.load(data_flow) else: self.engine = None
def __init__(self, fs: int, block_size=1024, step_size=None, selected_features='all'): if not step_size: step_size = block_size // 2 features_config = { 'Chroma': f'Chroma2 CQTAlign=c CQTBinsPerOctave=48 CQTMinFreq=27.5 CQTNbOctaves=7 CZBinsPerSemitone=1 CZNbCQTBinsAggregatedToPCPBin=-1 CZTuning=440 stepSize={step_size}', 'LPC': f'LPC LPCNbCoeffs=1 blockSize={block_size} stepSize={step_size}', 'LSF': f'LSF blockSize={block_size} stepSize={step_size}', 'MFCC': f'MFCC CepsIgnoreFirstCoeff=1 CepsNbCoeffs=13 FFTWindow=Hanning MelMaxFreq=6000.0 MelMinFreq=400.0 MelNbFilters=40 blockSize={block_size} stepSize={step_size}', 'OBSI': f'OBSI FFTLength=0 FFTWindow=Hanning OBSIMinFreq=27.5 blockSize={block_size} stepSize={step_size}', 'SpectralCrestFactorPerBand': f'SpectralCrestFactorPerBand FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'SpectralDecrease': f'SpectralDecrease FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'SpectralFlatness': f'SpectralFlatness FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'SpectralFlux': f'SpectralFlux FFTLength=0 FFTWindow=Hanning FluxSupport=All blockSize={block_size} stepSize={step_size}', 'SpectralRolloff': f'SpectralRolloff FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'SpectralVariation': f'SpectralVariation FFTLength=0 FFTWindow=Hanning blockSize={block_size} stepSize={step_size}', 'ZCR': f'ZCR blockSize={block_size} stepSize={step_size}' } self.fs = fs if selected_features == 'all': selected_features = features_config.keys() feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True) for feature_name, setting in features_config.items(): if feature_name in selected_features: feature_plan.addFeature(feature_name + ': ' + setting) data_flow = feature_plan.getDataFlow() self.engine = yaafelib.Engine() self.engine.load(data_flow)
def calculate_spectrogram(y, fs, block_size=1024, step_size=None): if step_size is None: step_size = block_size // 2 feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True) feature_plan.addFeature( f'MagnitudeSpectrum: MagnitudeSpectrum blockSize={block_size} stepSize={step_size}' ) data_flow = feature_plan.getDataFlow() engine = yaafelib.Engine() engine.load(data_flow) features = engine.processAudio(y.reshape(1, -1).astype('float64')) noverlap = block_size // 2 spectrum = features['MagnitudeSpectrum'] time = np.linspace(noverlap / fs, (len(y) - noverlap) / fs, spectrum.shape[0]) freq = np.linspace(0, fs // 2, num=spectrum.shape[-1]) return freq, time, spectrum
def __call__(self, wav): """Extract features Parameters ---------- wav : string Path to wav file. Returns ------- features : SlidingWindowFeature """ definition = self.definition() # --- prepare the feature plan feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate) for name, recipe in definition: assert feature_plan.addFeature( "{name}: {recipe}".format(name=name, recipe=recipe)) # --- prepare the Yaafe engine data_flow = feature_plan.getDataFlow() engine = yaafelib.Engine() engine.load(data_flow) sample_rate, raw_audio = scipy.io.wavfile.read(wav) assert sample_rate == self.sample_rate, "sample rate mismatch" audio = np.array(raw_audio, dtype=np.float64, order='C').reshape(1, -1) features = engine.processAudio(audio) data = np.hstack([features[name] for name, _ in definition]) sliding_window = YaafeFrame( blockSize=self.block_size, stepSize=self.step_size, sampleRate=self.sample_rate) return SlidingWindowFeature(data, sliding_window)
def extract_feature(filename, offset): fp = yaafelib.FeaturePlan(sample_rate=22050, resample=True) fp.loadFeaturePlan('static/featureplan.txt') engine = yaafelib.Engine() engine.load(fp.getDataFlow()) print(filename) print offset sound = AudioSegment.from_file(filename) halfway_point = int(offset) * 1000 end = halfway_point + 30000 first_half = sound[halfway_point:end] filename = os.path.join( app.config['UPLOAD_FOLDER'], os.path.splitext(os.path.basename(filename))[0] + str(offset) + ".cliped.wav") if not os.path.isfile(filename): first_half.export(filename, format="wav") afp = yaafelib.AudioFileProcessor() afp.processFile(engine, filename) feats = engine.readAllOutputs() return preprocessed(feats)
def __init__( self, sample_rate=16000, block_size=512, step_size=256, e=True, coefs=11, De=False, DDe=False, D=False, DD=False, ): super(YaafeMFCC, self).__init__(sample_rate=sample_rate, block_size=block_size, step_size=step_size) self.e = e self.coefs = coefs self.De = De self.DDe = DDe self.D = D self.DD = DD self.definition_ = self.definition() # --- prepare the feature plan feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate) for name, recipe in self.definition_: assert feature_plan.addFeature("{name}: {recipe}".format( name=name, recipe=recipe)) # --- prepare the Yaafe engine data_flow = feature_plan.getDataFlow() self.engine = yaafelib.Engine() self.engine.load(data_flow)
def process(self, signal, rate, segments, wavelet_decomposition_level=6, frame_overlap=512, wavelet_type='sym10'): """ Extract features """ self.ExtractedFeaturesList = ['LPC1_mean', 'LSF7_min', 'SpectralFlatness_min', 'SSS_centroid_min', 'SSS_spread_min', 'PerceptualSpread_min', 'SpectralSlope_min', 'PerceptualSharpness_min', 'SpectralDecrease_max', 'OBSI0_mm', 'SpectralRolloff_min'] self._signal = signal self._rate = rate self._segments = segments """ Calculate spectral and temporal features """ feature_plan = yaafelib.FeaturePlan(sample_rate=rate) success = feature_plan.loadFeaturePlan('features.config') if not success: sys.exit('Feature plan not loaded correctly') engine = yaafelib.Engine() engine.load(feature_plan.getDataFlow()) self.Features = engine.processAudio(np.array([signal.astype('float64')])) """ Initialize wavelet features Based on "Wavelets in Recognition of Bird Sounds" by A. Selin et al. EURASIP Journal on Advances in Signal Processing 2007, 2007:051806 """ # wavelets_calculator = wavelets.Wavelets(wavelet_type) # wavelet_coefficients = wavelets_calculator.decompose(signal, wavelet_decomposition_level) # no_segments = len(segments) self.ExtractedFeatures = np.zeros(shape=(no_segments, len(self.ExtractedFeaturesList))) LPC1 = self.Features['LPC'][:,1] LSF7 = self.Features['LSF'][:,7] SpectralFlatness = self.Features['SpectralFlatness'] SSS_centroid = self.Features['SpectralShapeStatistics'][:,0] SSS_spread = self.Features['SpectralShapeStatistics'][:,1] PerceptualSpread = self.Features['PerceptualSpread'] SpectralSlope = self.Features['SpectralSlope'] PerceptualSharpness = self.Features['PerceptualSharpness'] SpectralDecrease = self.Features['SpectralDecrease'] OBSI0 = self.Features['OBSI'][:,0] SpectralRolloff = self.Features['SpectralRolloff'] for i, segment in enumerate(segments): start = int(segment[0] / frame_overlap) - 1 end = int(segment[1] / frame_overlap) + 1 self.ExtractedFeatures[i,0] = LPC1[start:end].mean() self.ExtractedFeatures[i,1] = LSF7[start:end].min() self.ExtractedFeatures[i,2] = SpectralFlatness[start:end].min() self.ExtractedFeatures[i,3] = SSS_centroid[start:end].min() self.ExtractedFeatures[i,4] = SSS_spread[start:end].min() self.ExtractedFeatures[i,5] = PerceptualSpread[start:end].min() self.ExtractedFeatures[i,6] = SpectralSlope[start:end].min() self.ExtractedFeatures[i,7] = PerceptualSharpness[start:end].min() self.ExtractedFeatures[i,8] = SpectralDecrease[start:end].max() self.ExtractedFeatures[i,9] = maxmin(OBSI0[start:end]) self.ExtractedFeatures[i,10] = SpectralRolloff[start:end].min() return self.ExtractedFeatures
def compute_features(dataStruct): """ This function takes a data structure dictionnaire, and renders several audio features as spectral rolloff, spectral slope etc... and store the data into the datastructure. Args: - dataStruct: dictionnaire containing filepath, labels, and list of classes Returns: - dataSet: same as dataStruct, with the given spectral features """ ### --- INIT --- ### # DSP settings Nwin_bin = 1024 Hop_bin = round(Nwin_bin) # Const Nex = len(dataStruct["filepath"]) # Number of files # Listing audio features features_yaafe = [ 'SpectralFlatness', 'SpectralRolloff', 'PerceptualSharpness', 'PerceptualSpread', 'SpectralDecrease', 'SpectralVariation', 'SpectralFlux' ] features_libro = [ 'Loudness', 'SpectralCentroid', 'SpectralContrast', 'SpectralRolloff', 'SpectralBandwidth' ] dataStruct["SpectralFeatures"] = features_yaafe + features_libro # New fields dataStruct["signal"] = [] dataStruct["sRate"] = [] # Creating three fields per descriptor: full temporal vector, mean, and # standard deviation for f in dataStruct["SpectralFeatures"]: dataStruct[f] = [] dataStruct[f + 'Mean'] = [] dataStruct[f + 'Std'] = [] dataStruct[f + 'Max'] = [] ### --- Compute Feature --- ### print('\t \t \t Feature Extraction') # Computing the set of features for curFile in range(Nex): print('%s' % dataStruct["filepath"][curFile]) # Loading signal curSignal, curSRate = lib.load(dataStruct["filepath"][curFile], mono=True, offset=0) # Storing signal data dataStruct["signal"].append(curSignal) dataStruct["sRate"].append(curSRate) """ YAAFE Extraction """ # Create YAAFE extraction engine fp = yaf.FeaturePlan(sample_rate=curSRate) # Formatting string for DSP for f in features_yaafe: fp.addFeature(f+': '+f+' blockSize='+str(Nwin_bin)+\ ' stepSize='+str(Hop_bin)) engine = yaf.Engine() engine.load(fp.getDataFlow()) features = engine.processAudio(curSignal.astype('float64')\ .reshape((1, curSignal.shape[0]))) # Computing mean and std for each for key, val in sorted(features.items()): dataStruct[key].append(val) dataStruct[key + 'Mean'].append(np.mean(val)) dataStruct[key + 'Std'].append(np.std(val)) dataStruct[key + 'Max'].append(np.max(val)) """ Librosa extraction """ # Add the specific features from Librosa dataStruct["Loudness"].append(lib.feature.rmse(curSignal)) # Compute the spectral centroid. [y, sr, S, n_fft, ...] dataStruct["SpectralCentroid"].append( lib.feature.spectral_centroid(curSignal)) # Compute spectral contrast [R16] , sr, S, n_fft, ...]) dataStruct["SpectralContrast"].append( lib.feature.spectral_contrast(curSignal)) # Compute roll-off frequency dataStruct["SpectralRolloff"].append( lib.feature.spectral_rolloff(curSignal)) # Compute Bandwidth dataStruct["SpectralBandwidth"].append( lib.feature.spectral_bandwidth(curSignal)) # Computing mean and std for each for f in features_libro: val = dataStruct[f][-1] dataStruct[f + 'Mean'].append(np.mean(val)) dataStruct[f + 'Std'].append(np.std(val)) dataStruct[f + 'Max'].append(np.max(val)) ### --- Formatting --- ### return dataStruct
if args.derivatives: fp.addFeature('mfcc_d1: {} > Derivate DOrder=1'.format(mfcc_features)) fp.addFeature('mfcc_d2: {} > Derivate DOrder=2'.format(mfcc_features)) fp.addFeature('energy: {}'.format(energy_features)) if args.derivatives: fp.addFeature('energy_d1: {} > Derivate DOrder=1'.format(energy_features)) fp.addFeature('energy_d2: {} > Derivate DOrder=2'.format(energy_features)) if args.derivatives: keys = ['mfcc', 'mfcc_d1', 'mfcc_d2', 'energy', 'energy_d1', 'energy_d2'] else: keys = ['mfcc', 'energy'] df = fp.getDataFlow() engine = yaafelib.Engine() engine.load(df) afp = yaafelib.AudioFileProcessor() outfile = open(args.output, 'wb') total = 0 for filename in args.inputs: tar = tarfile.open(filename) total += len([f for f in tar if f.isfile()]) for j, filename in enumerate(args.inputs): tar = tarfile.open(filename) files = sorted([f for f in tar if f.isfile()], key=lambda f: f.name) for i, fileinfo in enumerate(files):
def export_features( path=None, audiofiles=None, out='../audio_features', train_file_path=None, pca_params_path="/data/lisa/exp/faces/emotiw_final/caglar_audio/pca_params.pkl" ): # prepare the FeaturePlan plan = yaafelib.FeaturePlan(sample_rate=48000, normalize=0.99) size_info = 'blockSize=1248 stepSize=624' if pca is None: global pca features = [ 'ZCR', 'TemporalShapeStatistics', 'Energy', 'MagnitudeSpectrum', 'SpectralVariation', 'SpectralSlope', 'SpectralRolloff', 'SpectralShapeStatistics', 'SpectralFlux', 'SpectralFlatness', 'SpectralDecrease', 'SpectralFlatnessPerBand', 'SpectralCrestFactorPerBand', 'AutoCorrelation', 'LPC', 'LSF', 'ComplexDomainOnsetDetection', 'MelSpectrum', 'MFCC: MFCC CepsNbCoeffs=22', 'MFCC_d1: MFCC %s > Derivate DOrder=1', 'MFCC_d2: MFCC %s > Derivate DOrder=2', 'Envelope', 'EnvelopeShapeStatistics', 'AmplitudeModulation', 'Loudness', 'PerceptualSharpness', 'PerceptualSpread', 'OBSI', 'OBSIR' ] for f in features: if ':' not in f: f = '%s: %s' % (f, f) if '%s' not in f: f += ' %s' plan.addFeature(f % size_info) dataflow = plan.getDataFlow() engine = yaafelib.Engine() engine.load(dataflow) processor = yaafelib.AudioFileProcessor() subsets = {'full': 'full'} def train_pca(pca=None): if pca is not None: return pca assert train_file_path is not None print "Training pca..." pca = defaultdict(PCA) audiofiles_ = glob.glob('%s/*/*.mp3' % train_file_path) if not (os.path.exists(pca_params_path)): # extract features from audio files for audiofile in audiofiles_: processor.processFile(engine, audiofile) features = engine.readAllOutputs() for subset, keys in subsets.iteritems(): if keys == 'full': keys = sorted(features.keys()) output = numpy.concatenate([features[k].T for k in keys]).T if 'Train' in audiofile: pca[subset].add(output) pca_params = {} pca_params["mean"] = pca["full"].mean pca_params["covariance"] = pca["full"].covariance pca_params["num_frames"] = pca["full"].num_frames pca_params["ndim"] = pca["full"].ndim cPickle.dump(pca_params, file_create(pca_params_path), cPickle.HIGHEST_PROTOCOL) else: pca_params = cPickle.load(open(pca_params_path, "rb")) pca["full"].covariance = pca_params["covariance"] pca["full"].mean = pca_params["mean"] pca["full"].num_frames = pca_params["num_frames"] pca["full"].ndim = pca_params["ndim"] print "PCA training finished." return pca assert audiofiles is not None pca = train_pca(pca) assert pca is not None for f in features: if ':' not in f: f = '%s: %s' % (f, f) if '%s' not in f: f += ' %s' plan.addFeature(f % size_info) # extract features from audio files for audiofile in audiofiles: audiofile = os.path.join(path, audiofile) processor.processFile(engine, audiofile) features = engine.readAllOutputs() for subset, keys in subsets.iteritems(): if keys == 'full': keys = sorted(features.keys()) output = numpy.concatenate([features[k].T for k in keys]).T pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace( path, out) cPickle.dump(output, file_create(pickle_file), cPickle.HIGHEST_PROTOCOL) for subset in subsets.iterkeys(): pca[subset].pca(diagonal=True) cPickle.dump(pca[subset], file_create('%s/%s.pca' % (out, subset))) print 'Rewriting PCA data...' sys.stdout.flush() for audiofile in audiofiles: for subset in subsets.iterkeys(): pickle_file = os.path.join(out, audiofile).replace( '.mp3', '.%s.pkl' % subset) #pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace(path, out) matrix = cPickle.load(file(pickle_file)) matrix = pca[subset].feature(matrix) cPickle.dump(matrix, file_create(pickle_file.replace('.pkl', '.pca.pkl')), cPickle.HIGHEST_PROTOCOL)
def transform(self, X): X_prime = None params = {'block_size': 256, 'step_size': 128, 'mel_min_freq': 0.0, 'mel_max_freq': 500.0, 'mel_nb_filters': 50, 'ceps_ign_first_coef': 0, 'fft_len': 0, 'do1len': 5, 'do2len': 1, 'slope_step_nbframes': 5, 'slope_nbframes': 9, } fp = yf.FeaturePlan(sample_rate=self.sample_rate) fp.addFeature('melspec: MelSpectrum FFTWindow=Hanning MelNbFilters=%(mel_nb_filters)d' ' blockSize=%(block_size)d stepSize=%(step_size)d' ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' % params) if self.diff: fp.addFeature('melspec_diff1: MelSpectrum FFTWindow=Hanning MelNbFilters=%(mel_nb_filters)d' ' blockSize=%(block_size)d stepSize=%(step_size)d' ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' ' > Derivate DOrder=1 DO1Len=%(do1len)d' % params) if self.diff2: fp.addFeature('melspec_diff2: MelSpectrum FFTWindow=Hanning MelNbFilters=%(mel_nb_filters)d' ' blockSize=%(block_size)d stepSize=%(step_size)d' ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' ' > Derivate DOrder=2 DO2Len=%(do2len)d' % params) if self.slope: fp.addFeature('melspec_slope: MelSpectrum FFTWindow=Hanning MelNbFilters=%(mel_nb_filters)d' ' blockSize=%(block_size)d stepSize=%(step_size)d' ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' ' > SlopeIntegrator NbFrames=%(slope_nbframes)d StepNbFrames=%(slope_step_nbframes)d' % params) df = fp.getDataFlow() engine = yf.Engine() engine.load(df) X = X.astype(np.float64) x_shape = (1, X.shape[1]) for i, x in enumerate(X): x = x.reshape(x_shape) feats = engine.processAudio(x) ## if i == 0: ## import IPython ## IPython.embed() if X_prime is None: fx_groups = tuple(feats.keys()) n_features = 0 for fx_group in fx_groups: n_features += feats[fx_group].ravel().shape[0] X_prime = np.empty((X.shape[0], n_features), dtype=np.float64) print 'n_groups:', len(fx_groups) print 'n_features:', n_features offset = 0 for fx_group in fx_groups: fxs = feats[fx_group].ravel() if fx_group == 'melspec': # log melspec features fxs = np.log10(fxs) X_prime[i, offset:(offset + fxs.shape[0])] = fxs offset += fxs.shape[0] return X_prime
trainAudio = './IRMAS-Dataset/Training' trainFeats = './trainFeatures.dat' testAudio = './IRMAS-Dataset/Testing' testFeats = './testFeatures.dat' model = './model.svm' # Get the instruments and their class indices instruments = getInstruments(trainAudio) # Specify features fp = yl.FeaturePlan(sample_rate=44100) fp.loadFeaturePlan('featureplan.txt') # Initialize yaafe tools df = fp.getDataFlow() eng = yl.Engine() eng.load(df) dimensions = 0 # The sum of the dimensions of the features ftSizes = eng.getOutputs().items() for ftSize in ftSizes: dimensions += int(ftSize[1]['size']) afp = yl.AudioFileProcessor() # Remove previous model files for k in range(len(instruments)): classFile = model + '.' + str(k) if (os.path.isfile(classFile)): os.remove(classFile) # Write training features print('\nExtracting training features\n')
import numpy as np import yaafelib as yaafe __author__ = 'zerickson' # Initialization fp = yaafe.FeaturePlan(sample_rate=16000) fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256') fp.addFeature('sr: SpectralRolloff blockSize=512 stepSize=256') fp.addFeature('sf: SpectralFlux blockSize=512 stepSize=256') engine = yaafe.Engine() engine.load(fp.getDataFlow()) # Get input metadata print engine.getInputs() # Get output metadata print engine.getOutputs() # Extract features from a random numpy array audio = np.random.randn(1, 1000000) feats = engine.processAudio(audio) print feats['mfcc'].shape print feats['sf'].shape print feats['sr'].shape # Extracting features block per block engine.reset() # Iterate over 10 random blocks of audio data for i in range(1, 10): # Generate random data audio = np.random.rand(1,32000) engine.writeInput('audio', audio)
def write_features(audiofile, sample_rate, data): """Extract features then write means and std devs to data (tab) file. Returns True if extraction was successful, False if unsuccessful. Arguments: audioFile -- WAV file to process sampleRate -- sample rate of the audio file in Hz data -- the data file to write to """ N_MFCC = 12 # Number of MFCC coefficients N_LLD = 2 # Number of other low-level descriptors N_FUNCS = 4 # Number of functionals # Add features to extract featplan = yf.FeaturePlan(sample_rate=sample_rate, resample=False) featplan.addFeature('mfcc: MFCC CepsIgnoreFirstCoeff=0 CepsNbCoeffs=12 \ FFTWindow=Hanning MelMinFreq=1200 MelMaxFreq=5050') featplan.addFeature('energy: Energy') featplan.addFeature('zcr: ZCR') # Configure an Engine engine = yf.Engine() engine.load(featplan.getDataFlow()) # Extract features afp = yf.AudioFileProcessor() afp.processFile(engine, audiofile) # 2D numpy arrays mfccs = engine.readOutput('mfcc') energy = engine.readOutput('energy') zcr = engine.readOutput('zcr') # Write header lines if they don't exist data.seek(0, 0) if not data.readline(): # Write attribute header line for i in range(N_MFCC): # MFCC header data.write("mfcc" + str(i + 1) + "_mean" + "\t") data.write("mfcc" + str(i + 1) + "_std" + "\t") data.write("mfcc" + str(i + 1) + "_skew" + "\t") data.write("mfcc" + str(i + 1) + "_kurtosis" + "\t") #Energy header data.write("energy_mean" + "\t") data.write("energy_std" + "\t") data.write("energy_skew" + "\t") data.write("energy_kurtosis" + "\t") # ZCR header data.write("zcr_mean" + "\t") data.write("zcr_std" + "\t") data.write("zcr_skew" + "\t") data.write("zcr_kurtosis" + "\t") # Filename and classification headers data.write("filename" + '\t') data.write("classification" + "\n") # Write attribute type line for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)): data.write("continuous" + "\t") # filename is a string data.write("string" + '\t') # Classification is discrete data.write("discrete" + "\n") # Write flags for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)): data.write('\t') data.write("meta" + '\t') data.write("class" + '\n') data.seek(0, 2) # Go to end of file. # Write feature data if mfccs.size > 0 and energy.size > 0 and zcr.size > 0: # Write MFCCs for i in range(mfccs[0].size): mfcc_mean = mfccs[:, i].mean() mfcc_std = mfccs[:, i].std() mfcc_skew = spstats.skew(mfccs[:, i]) mfcc_kurt = spstats.kurtosis(mfccs[:, i]) data.write( str(mfcc_mean) + '\t' + str(mfcc_std) + '\t' + str(mfcc_skew) + '\t' + str(mfcc_kurt) + '\t') # Write energy for i in range(energy[0].size): energy_mean = energy[:, i].mean() energy_std = energy[:, i].std() energy_skew = spstats.skew(energy[:, i]) energy_kurt = spstats.kurtosis(energy[:, i]) data.write( str(energy_mean) + '\t' + str(energy_std) + '\t' + str(energy_skew) + '\t' + str(energy_kurt) + '\t') # Write ZCR for i in range(zcr[0].size): zcr_mean = zcr[:, i].mean() zcr_std = zcr[:, i].std() zcr_skew = spstats.skew(zcr[:, i]) zcr_kurt = spstats.kurtosis(energy[:, i]) data.write( str(zcr_mean) + '\t' + str(zcr_std) + '\t' + str(zcr_skew) + '\t' + str(zcr_kurt) + '\t') return True else: return False
def main(): parser = OptionParser(version='yaafe.py, Yaafe v%s' % yaafe.getYaafeVersion()) parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='display more output') parser.add_option('-l', '--list', dest='listFeatures', action='store_true', default=False, help='list all available features and output formats') parser.add_option('-d', '--describe', dest='describe', default=None, help='describe a feature or an output format') parser.add_option('-f', '--feature', action='append', dest='feature', metavar='FEATUREDEFINITION', help='feature to extract') parser.add_option('-c', '--config-file', dest='configFile', default=None, help='feature extraction plan') parser.add_option('-r', '--rate', dest='sample_rate', type='int', default=None, help='working samplerate in Hz.') parser.add_option('', '--resample', dest='resample', action='store_true', default=False, help='Resample input signal to the analysis sample rate') parser.add_option('-n', '--normalize', dest='normalize', action='store_true', default=False, help='normalize input signal by removing mean and scale maximum absolute value to 0.98 (or other value given with --normalize-max)') parser.add_option('', '--normalize-max', dest='normalize_max', type='float', default=0.98, help='Normalize input signal so that maximum absolute value reached given value (see -n, --normalize)') parser.add_option('-i', '--input', dest='input_list', default=None, help='text file, each line is an audio file to process') parser.add_option('-b', '--base-dir', dest='out_dir', default='', help='output directory base') parser.add_option('-o', '--output-format', dest='format', default='csv', choices=output_format_choices, help='Features output format: %s' % '|'.join(output_format_choices)) parser.add_option('-p', '--output-params', dest='formatparams', action='append', default=[], metavar='key=value', help='add an output format parameter (can be used multiple times, use -l options to list output formats and parameters)') parser.add_option('', '--dump-dataflow', dest='dumpDataflow', default='', metavar='FILE', help='output dataflow plan (suitable for process with yaafe-engine)') parser.add_option('', '--dump-graph', dest='dumpGraph', default='', metavar='FILE', help="output dataflow in dot format (suitable for display with graphviz") parser.add_option('-s', '--data-block-size', dest='buffer_size', type='int', default=None, help='Prefered size for data blocks.') parser.add_option('', '--show', dest='showFeatures', default=None, help='Show all features in a H5 file') (options, audiofiles) = parser.parse_args() if options.listFeatures: listFeatures() return if options.describe: if options.describe in yaafe.getOutputFormatList(): describeOutputFormat(options.describe) else: describeFeature(options.describe) return if options.showFeatures: showFeatures(options.showFeatures) return if not options.sample_rate: print "ERROR: please specify sample rate !" return if options.buffer_size: yaafe.setPreferedDataBlockSize(options.buffer_size) if options.verbose: yaafe.setVerbose(True) # initialize feature plan fp = yaafe.FeaturePlan(sample_rate=options.sample_rate, normalize=(options.normalize_max if options.normalize else None), resample=options.resample) if options.configFile: if not fp.loadFeaturePlan(options.configFile): return elif options.feature: for feat in options.feature: if not fp.addFeature(feat): return else: print "ERROR: please specify features using either a config file or -f [feature]" return if options.dumpDataflow: fp.getDataFlow().save(options.dumpDataflow) if options.dumpGraph: fp.getDataFlow().save(options.dumpGraph) # read audio file list if options.input_list: fin = open(options.input_list, 'r') for line in fin: audiofiles.append(line.strip()) fin.close() if audiofiles: # initialize engine engine = yaafe.Engine() if not engine.load(fp.getDataFlow()): return # initialize file processor afp = yaafe.AudioFileProcessor() oparams = dict() for pstr in options.formatparams: pstrdata = pstr.split('=') if len(pstrdata) != 2: print 'ERROR: invalid parameter syntax in "%s" (should be "key=value")' % pstr return oparams[pstrdata[0]] = pstrdata[1] afp.setOutputFormat(options.format, options.out_dir, oparams) # process audio files for audiofile in audiofiles: afp.processFile(engine, audiofile)