Пример #1
0
    def extract(self, wav):
        """Extract features

        Parameters
        ----------
        wav : string
            Path to wav file.

        Returns
        -------
        features : SlidingWindowFeature

        """

        # hack
        data_flow, stack = self.get_flow_and_stack()

        engine = yaafelib.Engine()
        engine.load(data_flow)

        sample_rate, raw_audio = scipy.io.wavfile.read(wav)
        assert sample_rate == self.sample_rate, "sample rate mismatch"

        audio = np.array(raw_audio, dtype=np.float64, order='C').reshape(1, -1)

        features = engine.processAudio(audio)
        data = np.hstack([features[name] for name in stack])

        sliding_window = YaafeFrame(blockSize=self.block_size,
                                    stepSize=self.step_size,
                                    sampleRate=self.sample_rate)

        return SlidingWindowFeature(data, sliding_window)
Пример #2
0
def extract_audio_features(sigdata):
    '''Extracts a bunch of audio features using YAAFE
    '''
    window = 'Hanning'
    # using 80 / 40 here produces NaNs in mel spectrum, for some reason
    block = 120
    step = 60
    
    fp = yl.FeaturePlan(sample_rate=SAMPLE_RATE)
    fp.addFeature('CDOD: ComplexDomainOnsetDetection FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('LPC: LPC LPCNbCoeffs=4 blockSize=%d stepSize=%d' % (block, step))
    fp.addFeature('MelSpec: MelSpectrum FFTWindow=%s MelMaxFreq=600 MelMinFreq=30 MelNbFilters=40 blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('MFCC: MFCC CepsIgnoreFirstCoeff=1 CepsNbCoeffs=12 FFTWindow=%s MelMaxFreq=600 MelMinFreq=30 MelNbFilters=40 blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('SF: SpectralFlux FFTWindow=%s FluxSupport=Increase blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('SpecStats: SpectralShapeStatistics FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('SpecSlope: SpectralSlope FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('SpecVar: SpectralVariation FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step))
    df = fp.getDataFlow()
    # df.display()
    
    engine = yl.Engine()
    engine.load(df)
    
    feats = []
    for cnt in range(sigdata.shape[0]):
        signal = np.reshape(sigdata[cnt,:],[1,-1])
        feats.append(engine.processAudio(signal))
    
    return feats
Пример #3
0
def createAFP():
    engine = yaafelib.Engine()
    fp = yaafelib.FeaturePlan(sample_rate=16000)
    fp.addFeature('energy: Energy')
    fp.addFeature('mfcc: MFCC blockSize=2048 stepSize=1024')
    df = fp.getDataFlow()
    engine.load(fp.getDataFlow())
    afp = yaafelib.AudioFileProcessor()
    return afp, engine
Пример #4
0
def feature_indices():
    fp = yaafe.FeaturePlan()
    fp.loadFeaturePlan('features.txt')
    df = fp.getDataFlow()
    engine = yaafe.Engine()
    engine.load(fp.getDataFlow())
    indices = sorted([(name, feat['size'])
                      for (name, feat) in engine.getOutputs().items()])
    return sum([[(name, i) for i in range(size)] for (name, size) in indices],
               [])
Пример #5
0
    def __init__(self, duration=0.025, step=0.010, stack=1):
        # add sample_rate as argument
        super(YaafeFeatureExtractor, self).__init__()

        #self.sample_rate = sample_rate
        self.duration = duration
        self.step = step
        self.stack = stack

        start = -0.5 * self.duration

        self.engine_ = yaafelib.Engine()
Пример #6
0
 def __init__(self, app_config, rate):
     self.ExtractedFeaturesList = ['LPC1_mean', 'LSF7_min', 'SpectralFlatness_min',
                                   'SSS_centroid_min', 'SSS_spread_min', 'PerceptualSpread_min',
                                   'SpectralSlope_min', 'PerceptualSharpness_min', 'SpectralDecrease_max',
                                   'OBSI0_mm', 'SpectralRolloff_min']
     self._rate = rate
     feature_plan = yaafelib.FeaturePlan(sample_rate=rate)
     feature_plan_path = os.path.join(app_config.program_directory, 'features.config')
     success = feature_plan.loadFeaturePlan(feature_plan_path)
     if not success:
         sys.exit('Feature plan not loaded correctly')
     self._engine = yaafelib.Engine()
     self._engine.load(feature_plan.getDataFlow())
Пример #7
0
    def __init__(self, duration=0.025, step=0.010, stack=1):

        super(YaafeFeatureExtractor, self).__init__()

        self.duration = duration
        self.step = step
        self.stack = stack

        start = -0.5 * self.duration
        self.sliding_window_ = SlidingWindow(start=start,
                                             duration=self.duration,
                                             step=self.step)

        self.engine_ = yaafelib.Engine()
Пример #8
0
def yaafe2features(wavefiles, out_file, feature_type='MFCC'):
    """Generate features with yaafe and put them in h5features format.

    Whole wavefiles are encoded as internal h5features files.
    To use them with abkhazia's ABX tasks, these need to be segmented
    according to an abkhazia segments.txt
    (abkhazia/utilities/segment_features.py can be used for this)

    Supported feature types:
    - 'MFCC' (default)
    - 'CMSP13' (cubic-root-compressed 13-frequency-channels Mel spectrogram)
    """
    assert feature_type in ['MFCC', 'CMSP13'], \
        'Unsupported feature_type {0}'.format(feature_type)

    feature_plan = ya.FeaturePlan(sample_rate=16000)
    if feature_type == 'MFCC':
        feat_name = 'mfcc'
        feature_plan.addFeature('{0}: MFCC blockSize=400 stepSize=160'.format(
            feat_name))  # 0.025s + 0.01s
    elif feature_type == 'CMSP13':
        feat_name = 'melsp'
        feature_plan.addFeature(
            '{0}: MelSpectrum MelNbFilters=13 blockSize=400 stepSize=160'.
            format(feat_name))  # 0.025s + 0.01s

    engine = ya.Engine()
    engine.load(feature_plan.getDataFlow())

    wav_ids = []
    times = []
    features = []
    for wavefile in wavefiles:
        wav_ids.append(p.splitext(p.basename(wavefile))[0])
        afp = ya.AudioFileProcessor()
        afp.processFile(engine, wavefile)
        feat_out = engine.readAllOutputs()[feat_name]

        if feature_type == 'CMSP13':
            # need to add compression by hand
            feat_out = np.power(feat_out, 1 / 3.)

        # times according to:
        # http://yaafe.sourceforge.net/features.html?highlight=mfcc#yaafefeatures.Frames
        nframes = feat_out.shape[0]
        # 0.01 here is ad hoc and dependent on 160 above
        times.append(0.01 * np.arange(nframes))
        features.append(feat_out)
    h5features.write(out_file, 'features', wav_ids, times, features)
Пример #9
0
 def _wave2features(self, wavearray):
     engine = yaafelib.Engine()
     featureplan = yaafelib.FeaturePlan(sample_rate=self.samplerate)
     
     for name, recipe in self.definition:
         assert featureplan.addFeature("{name}: {recipe}".format(name=name, recipe=recipe));
         
     dataflow = featureplan.getDataFlow()
     engine.load(dataflow)
         
     wavearray = np.array(wavearray, dtype=np.float64, order='C').reshape((1, -1))
     features = engine.processAudio(wavearray)
     engine.reset()
     
     return features
Пример #10
0
    def setup(self,
              channels=None,
              samplerate=None,
              blocksize=None,
              totalframes=None):
        super(Yaafe, self).setup(channels, samplerate, blocksize, totalframes)

        # Setup Yaafe Feature plan and Dataflow
        yaafe_feature_plan = yaafelib.FeaturePlan(sample_rate=samplerate)
        for feat in self.feature_plan:
            yaafe_feature_plan.addFeature(feat)

        self.data_flow = yaafe_feature_plan.getDataFlow()

        # Configure a YAAFE engine
        self.yaafe_engine = yaafelib.Engine()
        self.yaafe_engine.load(self.data_flow)
        self.yaafe_engine.reset()
Пример #11
0
    def features(self, feature_set):
        if len(self.frames) == 0:
            return None

        fp = yaafe.FeaturePlan()
        if feature_set in ['auto', 'all']:
            fp.loadFeaturePlan('features.txt')
        else:
            fp.loadFeaturePlan('features_reduced.txt')

        df = fp.getDataFlow()
        engine = yaafe.Engine()
        engine.load(fp.getDataFlow())
        feats = engine.processAudio(np.array([self.frames]))

        attributes = [mean(values) for (k, values) in sorted(feats.items())]

        return np.concatenate(attributes)
Пример #12
0
    def __init__(self, fs: int, config: dict):
        yaafe_config = {}
        for feature_name, feature_params in config.items():
            if feature_params['use']:
                specs = feature_name + ' ' + str(
                    feature_params['params']).replace("'", '').replace(
                        ",", "").replace(": ", "=")[1:-1]
                yaafe_config[feature_name] = specs

        if yaafe_config:
            feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True)
            for feature_name, setting in yaafe_config.items():
                feature_plan.addFeature(feature_name + ': ' + setting)
            data_flow = feature_plan.getDataFlow()
            self.engine = yaafelib.Engine()
            self.engine.load(data_flow)
        else:
            self.engine = None
Пример #13
0
    def __init__(self,
                 fs: int,
                 block_size=1024,
                 step_size=None,
                 selected_features='all'):
        if not step_size:
            step_size = block_size // 2

        features_config = {
            'Chroma':
            f'Chroma2 CQTAlign=c  CQTBinsPerOctave=48  CQTMinFreq=27.5  CQTNbOctaves=7  CZBinsPerSemitone=1  CZNbCQTBinsAggregatedToPCPBin=-1  CZTuning=440  stepSize={step_size}',
            'LPC':
            f'LPC LPCNbCoeffs=1  blockSize={block_size}  stepSize={step_size}',
            'LSF': f'LSF blockSize={block_size}  stepSize={step_size}',
            'MFCC':
            f'MFCC CepsIgnoreFirstCoeff=1  CepsNbCoeffs=13  FFTWindow=Hanning  MelMaxFreq=6000.0  MelMinFreq=400.0  MelNbFilters=40  blockSize={block_size}  stepSize={step_size}',
            'OBSI':
            f'OBSI FFTLength=0  FFTWindow=Hanning  OBSIMinFreq=27.5  blockSize={block_size}  stepSize={step_size}',
            'SpectralCrestFactorPerBand':
            f'SpectralCrestFactorPerBand FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'SpectralDecrease':
            f'SpectralDecrease FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'SpectralFlatness':
            f'SpectralFlatness FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'SpectralFlux':
            f'SpectralFlux FFTLength=0  FFTWindow=Hanning  FluxSupport=All  blockSize={block_size}  stepSize={step_size}',
            'SpectralRolloff':
            f'SpectralRolloff FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'SpectralVariation':
            f'SpectralVariation FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'ZCR': f'ZCR blockSize={block_size}  stepSize={step_size}'
        }

        self.fs = fs
        if selected_features == 'all':
            selected_features = features_config.keys()
        feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True)
        for feature_name, setting in features_config.items():
            if feature_name in selected_features:
                feature_plan.addFeature(feature_name + ': ' + setting)
        data_flow = feature_plan.getDataFlow()
        self.engine = yaafelib.Engine()
        self.engine.load(data_flow)
Пример #14
0
def calculate_spectrogram(y, fs, block_size=1024, step_size=None):
    if step_size is None:
        step_size = block_size // 2
    feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True)
    feature_plan.addFeature(
        f'MagnitudeSpectrum: MagnitudeSpectrum blockSize={block_size} stepSize={step_size}'
    )
    data_flow = feature_plan.getDataFlow()
    engine = yaafelib.Engine()
    engine.load(data_flow)
    features = engine.processAudio(y.reshape(1, -1).astype('float64'))

    noverlap = block_size // 2
    spectrum = features['MagnitudeSpectrum']
    time = np.linspace(noverlap / fs, (len(y) - noverlap) / fs,
                       spectrum.shape[0])
    freq = np.linspace(0, fs // 2, num=spectrum.shape[-1])

    return freq, time, spectrum
Пример #15
0
    def __call__(self, wav):
        """Extract features

        Parameters
        ----------
        wav : string
            Path to wav file.

        Returns
        -------
        features : SlidingWindowFeature

        """

        definition = self.definition()

        # --- prepare the feature plan
        feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate)
        for name, recipe in definition:
            assert feature_plan.addFeature(
                "{name}: {recipe}".format(name=name, recipe=recipe))

        # --- prepare the Yaafe engine
        data_flow = feature_plan.getDataFlow()

        engine = yaafelib.Engine()
        engine.load(data_flow)

        sample_rate, raw_audio = scipy.io.wavfile.read(wav)
        assert sample_rate == self.sample_rate, "sample rate mismatch"

        audio = np.array(raw_audio, dtype=np.float64, order='C').reshape(1, -1)

        features = engine.processAudio(audio)
        data = np.hstack([features[name] for name, _ in definition])

        sliding_window = YaafeFrame(
            blockSize=self.block_size, stepSize=self.step_size,
            sampleRate=self.sample_rate)

        return SlidingWindowFeature(data, sliding_window)
Пример #16
0
def extract_feature(filename, offset):
    fp = yaafelib.FeaturePlan(sample_rate=22050, resample=True)
    fp.loadFeaturePlan('static/featureplan.txt')
    engine = yaafelib.Engine()
    engine.load(fp.getDataFlow())
    print(filename)
    print offset

    sound = AudioSegment.from_file(filename)

    halfway_point = int(offset) * 1000
    end = halfway_point + 30000
    first_half = sound[halfway_point:end]
    filename = os.path.join(
        app.config['UPLOAD_FOLDER'],
        os.path.splitext(os.path.basename(filename))[0] + str(offset) +
        ".cliped.wav")
    if not os.path.isfile(filename):
        first_half.export(filename, format="wav")
    afp = yaafelib.AudioFileProcessor()
    afp.processFile(engine, filename)
    feats = engine.readAllOutputs()
    return preprocessed(feats)
Пример #17
0
    def __init__(
        self,
        sample_rate=16000,
        block_size=512,
        step_size=256,
        e=True,
        coefs=11,
        De=False,
        DDe=False,
        D=False,
        DD=False,
    ):

        super(YaafeMFCC, self).__init__(sample_rate=sample_rate,
                                        block_size=block_size,
                                        step_size=step_size)

        self.e = e
        self.coefs = coefs
        self.De = De
        self.DDe = DDe
        self.D = D
        self.DD = DD

        self.definition_ = self.definition()

        # --- prepare the feature plan
        feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate)
        for name, recipe in self.definition_:
            assert feature_plan.addFeature("{name}: {recipe}".format(
                name=name, recipe=recipe))

        # --- prepare the Yaafe engine
        data_flow = feature_plan.getDataFlow()

        self.engine = yaafelib.Engine()
        self.engine.load(data_flow)
Пример #18
0
    def process(self, signal, rate, segments, wavelet_decomposition_level=6, frame_overlap=512, wavelet_type='sym10'):
        """ Extract features """
        
        self.ExtractedFeaturesList = ['LPC1_mean', 'LSF7_min', 'SpectralFlatness_min',
                                      'SSS_centroid_min', 'SSS_spread_min', 'PerceptualSpread_min',
                                      'SpectralSlope_min', 'PerceptualSharpness_min', 'SpectralDecrease_max',
                                      'OBSI0_mm', 'SpectralRolloff_min']
 
        self._signal = signal
        self._rate = rate
        self._segments = segments

        """ Calculate spectral and temporal features """
        feature_plan = yaafelib.FeaturePlan(sample_rate=rate)
        success = feature_plan.loadFeaturePlan('features.config')
        if not success:
            sys.exit('Feature plan not loaded correctly')

        engine = yaafelib.Engine()
        engine.load(feature_plan.getDataFlow())    
        self.Features = engine.processAudio(np.array([signal.astype('float64')]))
        
        """ Initialize wavelet features
            Based on "Wavelets in Recognition of Bird Sounds" by A. Selin et al.
            EURASIP Journal on Advances in Signal Processing 2007, 2007:051806 """
#        wavelets_calculator = wavelets.Wavelets(wavelet_type)
#        wavelet_coefficients = wavelets_calculator.decompose(signal, wavelet_decomposition_level)
#        
        no_segments = len(segments)
                
        self.ExtractedFeatures = np.zeros(shape=(no_segments, len(self.ExtractedFeaturesList)))          
        
        LPC1 = self.Features['LPC'][:,1]
        LSF7 = self.Features['LSF'][:,7]
        SpectralFlatness = self.Features['SpectralFlatness']
        SSS_centroid = self.Features['SpectralShapeStatistics'][:,0]
        SSS_spread = self.Features['SpectralShapeStatistics'][:,1]
        PerceptualSpread = self.Features['PerceptualSpread']
        SpectralSlope = self.Features['SpectralSlope']
        PerceptualSharpness = self.Features['PerceptualSharpness']
        SpectralDecrease = self.Features['SpectralDecrease']
        OBSI0 = self.Features['OBSI'][:,0]
        SpectralRolloff = self.Features['SpectralRolloff']
        
        for i, segment in enumerate(segments):
            start = int(segment[0] / frame_overlap) - 1
            end = int(segment[1] / frame_overlap) + 1
            
            self.ExtractedFeatures[i,0] = LPC1[start:end].mean()
            self.ExtractedFeatures[i,1] = LSF7[start:end].min()
            self.ExtractedFeatures[i,2] = SpectralFlatness[start:end].min()
            self.ExtractedFeatures[i,3] = SSS_centroid[start:end].min()      
            self.ExtractedFeatures[i,4] = SSS_spread[start:end].min() 
            self.ExtractedFeatures[i,5] = PerceptualSpread[start:end].min()  
            self.ExtractedFeatures[i,6] = SpectralSlope[start:end].min()   
            self.ExtractedFeatures[i,7] = PerceptualSharpness[start:end].min() 
            self.ExtractedFeatures[i,8] = SpectralDecrease[start:end].max()  
            self.ExtractedFeatures[i,9] = maxmin(OBSI0[start:end])
            self.ExtractedFeatures[i,10] = SpectralRolloff[start:end].min()      
            
        return self.ExtractedFeatures
Пример #19
0
def compute_features(dataStruct):
    """ This function takes a data structure dictionnaire, and renders several
    audio features as spectral rolloff, spectral slope etc... and store the
    data into the datastructure.

    Args:
        - dataStruct: dictionnaire containing filepath, labels, and list of
          classes

    Returns:
        - dataSet: same as dataStruct, with the given spectral features
    """

    ### --- INIT --- ###
    # DSP settings
    Nwin_bin = 1024
    Hop_bin = round(Nwin_bin)

    # Const
    Nex = len(dataStruct["filepath"])  # Number of files

    # Listing audio features
    features_yaafe = [
        'SpectralFlatness', 'SpectralRolloff', 'PerceptualSharpness',
        'PerceptualSpread', 'SpectralDecrease', 'SpectralVariation',
        'SpectralFlux'
    ]
    features_libro = [
        'Loudness', 'SpectralCentroid', 'SpectralContrast', 'SpectralRolloff',
        'SpectralBandwidth'
    ]

    dataStruct["SpectralFeatures"] = features_yaafe + features_libro

    # New fields
    dataStruct["signal"] = []
    dataStruct["sRate"] = []

    # Creating three fields per descriptor: full temporal vector, mean, and
    # standard deviation
    for f in dataStruct["SpectralFeatures"]:
        dataStruct[f] = []
        dataStruct[f + 'Mean'] = []
        dataStruct[f + 'Std'] = []
        dataStruct[f + 'Max'] = []

    ### --- Compute Feature --- ###
    print('\t \t \t Feature Extraction')
    # Computing the set of features
    for curFile in range(Nex):
        print('%s' % dataStruct["filepath"][curFile])

        # Loading signal
        curSignal, curSRate = lib.load(dataStruct["filepath"][curFile],
                                       mono=True,
                                       offset=0)

        # Storing signal data
        dataStruct["signal"].append(curSignal)
        dataStruct["sRate"].append(curSRate)
        """ YAAFE Extraction """
        # Create YAAFE extraction engine
        fp = yaf.FeaturePlan(sample_rate=curSRate)

        # Formatting string for DSP
        for f in features_yaafe:
            fp.addFeature(f+': '+f+' blockSize='+str(Nwin_bin)+\
                          ' stepSize='+str(Hop_bin))

        engine = yaf.Engine()
        engine.load(fp.getDataFlow())
        features = engine.processAudio(curSignal.astype('float64')\
                                       .reshape((1, curSignal.shape[0])))

        # Computing mean and std for each
        for key, val in sorted(features.items()):
            dataStruct[key].append(val)
            dataStruct[key + 'Mean'].append(np.mean(val))
            dataStruct[key + 'Std'].append(np.std(val))
            dataStruct[key + 'Max'].append(np.max(val))
        """ Librosa extraction """
        # Add the specific features from Librosa
        dataStruct["Loudness"].append(lib.feature.rmse(curSignal))

        # Compute the spectral centroid. [y, sr, S, n_fft, ...]
        dataStruct["SpectralCentroid"].append(
            lib.feature.spectral_centroid(curSignal))

        # Compute spectral contrast [R16] , sr, S, n_fft, ...])
        dataStruct["SpectralContrast"].append(
            lib.feature.spectral_contrast(curSignal))

        # Compute roll-off frequency
        dataStruct["SpectralRolloff"].append(
            lib.feature.spectral_rolloff(curSignal))

        # Compute Bandwidth
        dataStruct["SpectralBandwidth"].append(
            lib.feature.spectral_bandwidth(curSignal))

        # Computing mean and std for each
        for f in features_libro:
            val = dataStruct[f][-1]
            dataStruct[f + 'Mean'].append(np.mean(val))
            dataStruct[f + 'Std'].append(np.std(val))
            dataStruct[f + 'Max'].append(np.max(val))

    ### --- Formatting --- ###
    return dataStruct
Пример #20
0
if args.derivatives:
    fp.addFeature('mfcc_d1: {} > Derivate DOrder=1'.format(mfcc_features))
    fp.addFeature('mfcc_d2: {} > Derivate DOrder=2'.format(mfcc_features))

fp.addFeature('energy: {}'.format(energy_features))
if args.derivatives:
    fp.addFeature('energy_d1: {} > Derivate DOrder=1'.format(energy_features))
    fp.addFeature('energy_d2: {} > Derivate DOrder=2'.format(energy_features))

if args.derivatives:
    keys = ['mfcc', 'mfcc_d1', 'mfcc_d2', 'energy', 'energy_d1', 'energy_d2']
else:
    keys = ['mfcc', 'energy']

df = fp.getDataFlow()
engine = yaafelib.Engine()
engine.load(df)
afp = yaafelib.AudioFileProcessor()

outfile = open(args.output, 'wb')

total = 0
for filename in args.inputs:
    tar = tarfile.open(filename)
    total += len([f for f in tar if f.isfile()])

for j, filename in enumerate(args.inputs):
    tar = tarfile.open(filename)
    files = sorted([f for f in tar if f.isfile()], key=lambda f: f.name)

    for i, fileinfo in enumerate(files):
Пример #21
0
def export_features(
    path=None,
    audiofiles=None,
    out='../audio_features',
    train_file_path=None,
    pca_params_path="/data/lisa/exp/faces/emotiw_final/caglar_audio/pca_params.pkl"
):

    # prepare the FeaturePlan
    plan = yaafelib.FeaturePlan(sample_rate=48000, normalize=0.99)
    size_info = 'blockSize=1248 stepSize=624'
    if pca is None:
        global pca

    features = [
        'ZCR', 'TemporalShapeStatistics', 'Energy', 'MagnitudeSpectrum',
        'SpectralVariation', 'SpectralSlope', 'SpectralRolloff',
        'SpectralShapeStatistics', 'SpectralFlux', 'SpectralFlatness',
        'SpectralDecrease', 'SpectralFlatnessPerBand',
        'SpectralCrestFactorPerBand', 'AutoCorrelation', 'LPC', 'LSF',
        'ComplexDomainOnsetDetection', 'MelSpectrum',
        'MFCC: MFCC CepsNbCoeffs=22', 'MFCC_d1: MFCC %s > Derivate DOrder=1',
        'MFCC_d2: MFCC %s > Derivate DOrder=2', 'Envelope',
        'EnvelopeShapeStatistics', 'AmplitudeModulation', 'Loudness',
        'PerceptualSharpness', 'PerceptualSpread', 'OBSI', 'OBSIR'
    ]

    for f in features:
        if ':' not in f: f = '%s: %s' % (f, f)
        if '%s' not in f: f += ' %s'
        plan.addFeature(f % size_info)

    dataflow = plan.getDataFlow()
    engine = yaafelib.Engine()
    engine.load(dataflow)
    processor = yaafelib.AudioFileProcessor()

    subsets = {'full': 'full'}

    def train_pca(pca=None):
        if pca is not None:
            return pca

        assert train_file_path is not None
        print "Training pca..."
        pca = defaultdict(PCA)
        audiofiles_ = glob.glob('%s/*/*.mp3' % train_file_path)
        if not (os.path.exists(pca_params_path)):
            # extract features from audio files
            for audiofile in audiofiles_:
                processor.processFile(engine, audiofile)
                features = engine.readAllOutputs()
                for subset, keys in subsets.iteritems():
                    if keys == 'full':
                        keys = sorted(features.keys())
                    output = numpy.concatenate([features[k].T for k in keys]).T

                    if 'Train' in audiofile:
                        pca[subset].add(output)

            pca_params = {}
            pca_params["mean"] = pca["full"].mean
            pca_params["covariance"] = pca["full"].covariance
            pca_params["num_frames"] = pca["full"].num_frames
            pca_params["ndim"] = pca["full"].ndim
            cPickle.dump(pca_params, file_create(pca_params_path),
                         cPickle.HIGHEST_PROTOCOL)
        else:
            pca_params = cPickle.load(open(pca_params_path, "rb"))
            pca["full"].covariance = pca_params["covariance"]
            pca["full"].mean = pca_params["mean"]
            pca["full"].num_frames = pca_params["num_frames"]
            pca["full"].ndim = pca_params["ndim"]

        print "PCA training finished."
        return pca

    assert audiofiles is not None
    pca = train_pca(pca)
    assert pca is not None

    for f in features:
        if ':' not in f: f = '%s: %s' % (f, f)
        if '%s' not in f: f += ' %s'
        plan.addFeature(f % size_info)

    # extract features from audio files
    for audiofile in audiofiles:
        audiofile = os.path.join(path, audiofile)
        processor.processFile(engine, audiofile)
        features = engine.readAllOutputs()
        for subset, keys in subsets.iteritems():
            if keys == 'full':
                keys = sorted(features.keys())
            output = numpy.concatenate([features[k].T for k in keys]).T
            pickle_file = audiofile.replace('.mp3',
                                            '.%s.pkl' % subset).replace(
                                                path, out)
            cPickle.dump(output, file_create(pickle_file),
                         cPickle.HIGHEST_PROTOCOL)

    for subset in subsets.iterkeys():
        pca[subset].pca(diagonal=True)
        cPickle.dump(pca[subset], file_create('%s/%s.pca' % (out, subset)))

    print 'Rewriting PCA data...'
    sys.stdout.flush()

    for audiofile in audiofiles:
        for subset in subsets.iterkeys():
            pickle_file = os.path.join(out, audiofile).replace(
                '.mp3', '.%s.pkl' % subset)
            #pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace(path, out)
            matrix = cPickle.load(file(pickle_file))
            matrix = pca[subset].feature(matrix)
            cPickle.dump(matrix,
                         file_create(pickle_file.replace('.pkl', '.pca.pkl')),
                         cPickle.HIGHEST_PROTOCOL)
Пример #22
0
    def transform(self, X):
        X_prime = None

        params = {'block_size': 256,
                  'step_size': 128,
                  'mel_min_freq': 0.0,
                  'mel_max_freq': 500.0,
                  'mel_nb_filters': 50,
                  'ceps_ign_first_coef': 0,
                  'fft_len': 0,
                  'do1len': 5,
                  'do2len': 1,
                  'slope_step_nbframes': 5,
                  'slope_nbframes': 9,
                  }

        fp = yf.FeaturePlan(sample_rate=self.sample_rate)

        fp.addFeature('melspec: MelSpectrum FFTWindow=Hanning  MelNbFilters=%(mel_nb_filters)d'
                      ' blockSize=%(block_size)d stepSize=%(step_size)d'
                      ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' % params)

        if self.diff:
            fp.addFeature('melspec_diff1: MelSpectrum FFTWindow=Hanning  MelNbFilters=%(mel_nb_filters)d'
                      ' blockSize=%(block_size)d stepSize=%(step_size)d'
                      ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f'
                      ' > Derivate DOrder=1 DO1Len=%(do1len)d' % params)

        if self.diff2:
            fp.addFeature('melspec_diff2: MelSpectrum FFTWindow=Hanning  MelNbFilters=%(mel_nb_filters)d'
                      ' blockSize=%(block_size)d stepSize=%(step_size)d'
                      ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f'
                      ' > Derivate DOrder=2 DO2Len=%(do2len)d' % params)

        if self.slope:
            fp.addFeature('melspec_slope: MelSpectrum FFTWindow=Hanning  MelNbFilters=%(mel_nb_filters)d'
                      ' blockSize=%(block_size)d stepSize=%(step_size)d'
                      ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f'
                      ' > SlopeIntegrator NbFrames=%(slope_nbframes)d  StepNbFrames=%(slope_step_nbframes)d' % params)

        df = fp.getDataFlow()
        engine = yf.Engine()
        engine.load(df)

        X = X.astype(np.float64)
        x_shape = (1, X.shape[1])

        for i, x in enumerate(X):
            x = x.reshape(x_shape)

            feats = engine.processAudio(x)

            ## if i == 0:
            ##     import IPython
            ##     IPython.embed()

            if X_prime is None:
                fx_groups = tuple(feats.keys())
                n_features = 0
                for fx_group in fx_groups:
                    n_features += feats[fx_group].ravel().shape[0]

                X_prime = np.empty((X.shape[0], n_features), dtype=np.float64)
                print 'n_groups:', len(fx_groups)
                print 'n_features:', n_features

            offset = 0
            for fx_group in fx_groups:
                fxs = feats[fx_group].ravel()
                if fx_group == 'melspec':
                    # log melspec features
                    fxs = np.log10(fxs)
                X_prime[i, offset:(offset + fxs.shape[0])] = fxs
                offset += fxs.shape[0]

        return X_prime
Пример #23
0
trainAudio = './IRMAS-Dataset/Training'
trainFeats = './trainFeatures.dat'
testAudio = './IRMAS-Dataset/Testing'
testFeats = './testFeatures.dat'
model = './model.svm'

# Get the instruments and their class indices
instruments = getInstruments(trainAudio)

# Specify features
fp = yl.FeaturePlan(sample_rate=44100)
fp.loadFeaturePlan('featureplan.txt')

# Initialize yaafe tools
df = fp.getDataFlow()
eng = yl.Engine()
eng.load(df)
dimensions = 0  # The sum of the dimensions of the features
ftSizes = eng.getOutputs().items()
for ftSize in ftSizes:
    dimensions += int(ftSize[1]['size'])
afp = yl.AudioFileProcessor()

# Remove previous model files
for k in range(len(instruments)):
    classFile = model + '.' + str(k)
    if (os.path.isfile(classFile)):
        os.remove(classFile)

# Write training features
print('\nExtracting training features\n')
Пример #24
0
import numpy as np
import yaafelib as yaafe

__author__ = 'zerickson'

# Initialization
fp = yaafe.FeaturePlan(sample_rate=16000)
fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256')
fp.addFeature('sr: SpectralRolloff blockSize=512 stepSize=256')
fp.addFeature('sf: SpectralFlux blockSize=512 stepSize=256')
engine = yaafe.Engine()
engine.load(fp.getDataFlow())
# Get input metadata
print engine.getInputs()
# Get output metadata
print engine.getOutputs()

# Extract features from a random numpy array
audio = np.random.randn(1, 1000000)
feats = engine.processAudio(audio)
print feats['mfcc'].shape
print feats['sf'].shape
print feats['sr'].shape

# Extracting features block per block
engine.reset()
# Iterate over 10 random blocks of audio data
for i in range(1, 10):
    # Generate random data
    audio = np.random.rand(1,32000)
    engine.writeInput('audio', audio)
Пример #25
0
Файл: sqk.py Проект: sloria/usv
def write_features(audiofile, sample_rate, data):
    """Extract features then write means and std devs to data (tab) file.
    Returns True if extraction was successful, False if unsuccessful.
    
    Arguments:
    audioFile -- WAV file to process
    sampleRate -- sample rate of the audio file in Hz
    data -- the data file to write to
    
    """
    N_MFCC = 12  # Number of MFCC coefficients
    N_LLD = 2  # Number of other low-level descriptors
    N_FUNCS = 4  # Number of functionals

    # Add features to extract
    featplan = yf.FeaturePlan(sample_rate=sample_rate, resample=False)
    featplan.addFeature('mfcc: MFCC CepsIgnoreFirstCoeff=0 CepsNbCoeffs=12 \
FFTWindow=Hanning MelMinFreq=1200 MelMaxFreq=5050')
    featplan.addFeature('energy: Energy')
    featplan.addFeature('zcr: ZCR')

    # Configure an Engine
    engine = yf.Engine()
    engine.load(featplan.getDataFlow())

    # Extract features
    afp = yf.AudioFileProcessor()
    afp.processFile(engine, audiofile)
    # 2D numpy arrays
    mfccs = engine.readOutput('mfcc')
    energy = engine.readOutput('energy')
    zcr = engine.readOutput('zcr')

    # Write header lines if they don't exist
    data.seek(0, 0)
    if not data.readline():
        # Write attribute header line
        for i in range(N_MFCC):
            # MFCC header
            data.write("mfcc" + str(i + 1) + "_mean" + "\t")
            data.write("mfcc" + str(i + 1) + "_std" + "\t")
            data.write("mfcc" + str(i + 1) + "_skew" + "\t")
            data.write("mfcc" + str(i + 1) + "_kurtosis" + "\t")

        #Energy header
        data.write("energy_mean" + "\t")
        data.write("energy_std" + "\t")
        data.write("energy_skew" + "\t")
        data.write("energy_kurtosis" + "\t")

        # ZCR header
        data.write("zcr_mean" + "\t")
        data.write("zcr_std" + "\t")
        data.write("zcr_skew" + "\t")
        data.write("zcr_kurtosis" + "\t")

        # Filename and classification headers
        data.write("filename" + '\t')
        data.write("classification" + "\n")

        # Write attribute type line
        for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)):
            data.write("continuous" + "\t")
        # filename is a string
        data.write("string" + '\t')
        # Classification is discrete
        data.write("discrete" + "\n")

        # Write flags
        for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)):
            data.write('\t')
        data.write("meta" + '\t')
        data.write("class" + '\n')
    data.seek(0, 2)  # Go to end of file.

    # Write feature data
    if mfccs.size > 0 and energy.size > 0 and zcr.size > 0:
        # Write MFCCs
        for i in range(mfccs[0].size):
            mfcc_mean = mfccs[:, i].mean()
            mfcc_std = mfccs[:, i].std()
            mfcc_skew = spstats.skew(mfccs[:, i])
            mfcc_kurt = spstats.kurtosis(mfccs[:, i])
            data.write(
                str(mfcc_mean) + '\t' + str(mfcc_std) + '\t' + str(mfcc_skew) +
                '\t' + str(mfcc_kurt) + '\t')
        # Write energy
        for i in range(energy[0].size):
            energy_mean = energy[:, i].mean()
            energy_std = energy[:, i].std()
            energy_skew = spstats.skew(energy[:, i])
            energy_kurt = spstats.kurtosis(energy[:, i])
            data.write(
                str(energy_mean) + '\t' + str(energy_std) + '\t' +
                str(energy_skew) + '\t' + str(energy_kurt) + '\t')
        # Write ZCR
        for i in range(zcr[0].size):
            zcr_mean = zcr[:, i].mean()
            zcr_std = zcr[:, i].std()
            zcr_skew = spstats.skew(zcr[:, i])
            zcr_kurt = spstats.kurtosis(energy[:, i])
            data.write(
                str(zcr_mean) + '\t' + str(zcr_std) + '\t' + str(zcr_skew) +
                '\t' + str(zcr_kurt) + '\t')
        return True
    else:
        return False
Пример #26
0
def main():

    parser = OptionParser(version='yaafe.py, Yaafe v%s' %
                          yaafe.getYaafeVersion())
    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
                      default=False, help='display more output')
    parser.add_option('-l', '--list', dest='listFeatures', action='store_true',
                      default=False,
                      help='list all available features and output formats')
    parser.add_option('-d', '--describe', dest='describe', default=None,
                      help='describe a feature or an output format')
    parser.add_option('-f', '--feature', action='append', dest='feature',
                      metavar='FEATUREDEFINITION', help='feature to extract')
    parser.add_option('-c', '--config-file', dest='configFile', default=None,
                      help='feature extraction plan')
    parser.add_option('-r', '--rate', dest='sample_rate', type='int',
                      default=None,
                      help='working samplerate in Hz.')
    parser.add_option('', '--resample', dest='resample', action='store_true',
                      default=False,
                      help='Resample input signal to the analysis sample rate')
    parser.add_option('-n', '--normalize', dest='normalize',
                      action='store_true', default=False,
                      help='normalize input signal by removing mean and scale maximum absolute value to 0.98 (or other value given with --normalize-max)')
    parser.add_option('', '--normalize-max', dest='normalize_max',
                      type='float', default=0.98,
                      help='Normalize input signal so that maximum absolute value reached given value (see -n, --normalize)')
    parser.add_option('-i', '--input', dest='input_list', default=None,
                      help='text file, each line is an audio file to process')
    parser.add_option('-b', '--base-dir', dest='out_dir', default='',
                      help='output directory base')
    parser.add_option('-o', '--output-format', dest='format', default='csv',
                      choices=output_format_choices,
                      help='Features output format: %s' % '|'.join(output_format_choices))
    parser.add_option('-p', '--output-params', dest='formatparams',
                      action='append', default=[],
                      metavar='key=value',
                      help='add an output format parameter (can be used multiple times, use -l options to list output formats and parameters)')
    parser.add_option('', '--dump-dataflow', dest='dumpDataflow', default='',
                      metavar='FILE',
                      help='output dataflow plan (suitable for process with yaafe-engine)')
    parser.add_option('', '--dump-graph', dest='dumpGraph', default='',
                      metavar='FILE',
                      help="output dataflow in dot format (suitable for display with graphviz")
    parser.add_option('-s', '--data-block-size', dest='buffer_size',
                      type='int', default=None,
                      help='Prefered size for data blocks.')
    parser.add_option('', '--show', dest='showFeatures', default=None,
                      help='Show all features in a H5 file')

    (options, audiofiles) = parser.parse_args()

    if options.listFeatures:
        listFeatures()
        return
    if options.describe:
        if options.describe in yaafe.getOutputFormatList():
            describeOutputFormat(options.describe)
        else:
            describeFeature(options.describe)
        return
    if options.showFeatures:
        showFeatures(options.showFeatures)
        return
    if not options.sample_rate:
        print "ERROR: please specify sample rate !"
        return
    if options.buffer_size:
        yaafe.setPreferedDataBlockSize(options.buffer_size)
    if options.verbose:
        yaafe.setVerbose(True)

    # initialize feature plan
    fp = yaafe.FeaturePlan(sample_rate=options.sample_rate,
                           normalize=(options.normalize_max
                                      if options.normalize else None),
                           resample=options.resample)

    if options.configFile:
        if not fp.loadFeaturePlan(options.configFile):
            return
    elif options.feature:
        for feat in options.feature:
            if not fp.addFeature(feat):
                return
    else:
        print "ERROR: please specify features using either a config file or -f [feature]"
        return

    if options.dumpDataflow:
        fp.getDataFlow().save(options.dumpDataflow)
    if options.dumpGraph:
        fp.getDataFlow().save(options.dumpGraph)

    # read audio file list
    if options.input_list:
        fin = open(options.input_list, 'r')
        for line in fin:
            audiofiles.append(line.strip())
        fin.close()

    if audiofiles:
        # initialize engine
        engine = yaafe.Engine()
        if not engine.load(fp.getDataFlow()):
            return
        # initialize file processor
        afp = yaafe.AudioFileProcessor()
        oparams = dict()
        for pstr in options.formatparams:
            pstrdata = pstr.split('=')
            if len(pstrdata) != 2:
                print 'ERROR: invalid parameter syntax in "%s" (should be "key=value")' % pstr
                return
            oparams[pstrdata[0]] = pstrdata[1]
        afp.setOutputFormat(options.format, options.out_dir, oparams)
        # process audio files
        for audiofile in audiofiles:
            afp.processFile(engine, audiofile)