示例#1
0
def extract_audio_features(sigdata):
    '''Extracts a bunch of audio features using YAAFE
    '''
    window = 'Hanning'
    # using 80 / 40 here produces NaNs in mel spectrum, for some reason
    block = 120
    step = 60
    
    fp = yl.FeaturePlan(sample_rate=SAMPLE_RATE)
    fp.addFeature('CDOD: ComplexDomainOnsetDetection FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('LPC: LPC LPCNbCoeffs=4 blockSize=%d stepSize=%d' % (block, step))
    fp.addFeature('MelSpec: MelSpectrum FFTWindow=%s MelMaxFreq=600 MelMinFreq=30 MelNbFilters=40 blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('MFCC: MFCC CepsIgnoreFirstCoeff=1 CepsNbCoeffs=12 FFTWindow=%s MelMaxFreq=600 MelMinFreq=30 MelNbFilters=40 blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('SF: SpectralFlux FFTWindow=%s FluxSupport=Increase blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('SpecStats: SpectralShapeStatistics FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('SpecSlope: SpectralSlope FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step))
    fp.addFeature('SpecVar: SpectralVariation FFTWindow=%s blockSize=%d stepSize=%d' % (window, block, step))
    df = fp.getDataFlow()
    # df.display()
    
    engine = yl.Engine()
    engine.load(df)
    
    feats = []
    for cnt in range(sigdata.shape[0]):
        signal = np.reshape(sigdata[cnt,:],[1,-1])
        feats.append(engine.processAudio(signal))
    
    return feats
示例#2
0
def createAFP():
    engine = yaafelib.Engine()
    fp = yaafelib.FeaturePlan(sample_rate=16000)
    fp.addFeature('energy: Energy')
    fp.addFeature('mfcc: MFCC blockSize=2048 stepSize=1024')
    df = fp.getDataFlow()
    engine.load(fp.getDataFlow())
    afp = yaafelib.AudioFileProcessor()
    return afp, engine
示例#3
0
def feature_indices():
    fp = yaafe.FeaturePlan()
    fp.loadFeaturePlan('features.txt')
    df = fp.getDataFlow()
    engine = yaafe.Engine()
    engine.load(fp.getDataFlow())
    indices = sorted([(name, feat['size'])
                      for (name, feat) in engine.getOutputs().items()])
    return sum([[(name, i) for i in range(size)] for (name, size) in indices],
               [])
示例#4
0
    def __call__(self, path):
        """Extract features

        Parameters
        ----------
        path : path to .wav file

        Returns
        -------
        data : numpy array

        """

        # --- load audio file

        sample_rate, y = wav.read(path)

        # --- update data_flow every time sample rate changes
        if not hasattr(self,
                       'sample_rate_') or self.sample_rate_ != sample_rate:
            self.sample_rate_ = sample_rate
            feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate_)
            for name, recipe in self.definition():
                assert feature_plan.addFeature("{name}: {recipe}".format(
                    name=name, recipe=recipe))
            data_flow = feature_plan.getDataFlow()
            self.engine_.load(data_flow)

        # Yaafe needs this: float64, column-contiguous, 2-dimensional
        y = np.array(y, dtype=np.float64, order='C').reshape((1, -1))

        # --- extract features
        features = self.engine_.processAudio(y)
        data = np.hstack([features[name] for name, _ in self.definition()])

        # --- stack features
        n_samples, n_features = data.shape
        zero_padding = self.stack // 2
        if self.stack % 2 == 0:
            expanded_data = np.concatenate(
                (np.zeros((zero_padding, n_features)) + data[0], data,
                 np.zeros((zero_padding - 1, n_features)) + data[-1]))
        else:
            expanded_data = np.concatenate(
                (np.zeros((zero_padding, n_features)) + data[0], data,
                 np.zeros((zero_padding, n_features)) + data[-1]))

        data = np.lib.stride_tricks.as_strided(expanded_data,
                                               shape=(n_samples,
                                                      n_features * self.stack),
                                               strides=data.strides)

        self.engine_.reset()

        return data
示例#5
0
 def __init__(self, app_config, rate):
     self.ExtractedFeaturesList = ['LPC1_mean', 'LSF7_min', 'SpectralFlatness_min',
                                   'SSS_centroid_min', 'SSS_spread_min', 'PerceptualSpread_min',
                                   'SpectralSlope_min', 'PerceptualSharpness_min', 'SpectralDecrease_max',
                                   'OBSI0_mm', 'SpectralRolloff_min']
     self._rate = rate
     feature_plan = yaafelib.FeaturePlan(sample_rate=rate)
     feature_plan_path = os.path.join(app_config.program_directory, 'features.config')
     success = feature_plan.loadFeaturePlan(feature_plan_path)
     if not success:
         sys.exit('Feature plan not loaded correctly')
     self._engine = yaafelib.Engine()
     self._engine.load(feature_plan.getDataFlow())
示例#6
0
def yaafe2features(wavefiles, out_file, feature_type='MFCC'):
    """Generate features with yaafe and put them in h5features format.

    Whole wavefiles are encoded as internal h5features files.
    To use them with abkhazia's ABX tasks, these need to be segmented
    according to an abkhazia segments.txt
    (abkhazia/utilities/segment_features.py can be used for this)

    Supported feature types:
    - 'MFCC' (default)
    - 'CMSP13' (cubic-root-compressed 13-frequency-channels Mel spectrogram)
    """
    assert feature_type in ['MFCC', 'CMSP13'], \
        'Unsupported feature_type {0}'.format(feature_type)

    feature_plan = ya.FeaturePlan(sample_rate=16000)
    if feature_type == 'MFCC':
        feat_name = 'mfcc'
        feature_plan.addFeature('{0}: MFCC blockSize=400 stepSize=160'.format(
            feat_name))  # 0.025s + 0.01s
    elif feature_type == 'CMSP13':
        feat_name = 'melsp'
        feature_plan.addFeature(
            '{0}: MelSpectrum MelNbFilters=13 blockSize=400 stepSize=160'.
            format(feat_name))  # 0.025s + 0.01s

    engine = ya.Engine()
    engine.load(feature_plan.getDataFlow())

    wav_ids = []
    times = []
    features = []
    for wavefile in wavefiles:
        wav_ids.append(p.splitext(p.basename(wavefile))[0])
        afp = ya.AudioFileProcessor()
        afp.processFile(engine, wavefile)
        feat_out = engine.readAllOutputs()[feat_name]

        if feature_type == 'CMSP13':
            # need to add compression by hand
            feat_out = np.power(feat_out, 1 / 3.)

        # times according to:
        # http://yaafe.sourceforge.net/features.html?highlight=mfcc#yaafefeatures.Frames
        nframes = feat_out.shape[0]
        # 0.01 here is ad hoc and dependent on 160 above
        times.append(0.01 * np.arange(nframes))
        features.append(feat_out)
    h5features.write(out_file, 'features', wav_ids, times, features)
示例#7
0
 def _wave2features(self, wavearray):
     engine = yaafelib.Engine()
     featureplan = yaafelib.FeaturePlan(sample_rate=self.samplerate)
     
     for name, recipe in self.definition:
         assert featureplan.addFeature("{name}: {recipe}".format(name=name, recipe=recipe));
         
     dataflow = featureplan.getDataFlow()
     engine.load(dataflow)
         
     wavearray = np.array(wavearray, dtype=np.float64, order='C').reshape((1, -1))
     features = engine.processAudio(wavearray)
     engine.reset()
     
     return features
示例#8
0
    def setup(self,
              channels=None,
              samplerate=None,
              blocksize=None,
              totalframes=None):
        super(Yaafe, self).setup(channels, samplerate, blocksize, totalframes)

        # Setup Yaafe Feature plan and Dataflow
        yaafe_feature_plan = yaafelib.FeaturePlan(sample_rate=samplerate)
        for feat in self.feature_plan:
            yaafe_feature_plan.addFeature(feat)

        self.data_flow = yaafe_feature_plan.getDataFlow()

        # Configure a YAAFE engine
        self.yaafe_engine = yaafelib.Engine()
        self.yaafe_engine.load(self.data_flow)
        self.yaafe_engine.reset()
示例#9
0
    def features(self, feature_set):
        if len(self.frames) == 0:
            return None

        fp = yaafe.FeaturePlan()
        if feature_set in ['auto', 'all']:
            fp.loadFeaturePlan('features.txt')
        else:
            fp.loadFeaturePlan('features_reduced.txt')

        df = fp.getDataFlow()
        engine = yaafe.Engine()
        engine.load(fp.getDataFlow())
        feats = engine.processAudio(np.array([self.frames]))

        attributes = [mean(values) for (k, values) in sorted(feats.items())]

        return np.concatenate(attributes)
示例#10
0
    def __init__(self, fs: int, config: dict):
        yaafe_config = {}
        for feature_name, feature_params in config.items():
            if feature_params['use']:
                specs = feature_name + ' ' + str(
                    feature_params['params']).replace("'", '').replace(
                        ",", "").replace(": ", "=")[1:-1]
                yaafe_config[feature_name] = specs

        if yaafe_config:
            feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True)
            for feature_name, setting in yaafe_config.items():
                feature_plan.addFeature(feature_name + ': ' + setting)
            data_flow = feature_plan.getDataFlow()
            self.engine = yaafelib.Engine()
            self.engine.load(data_flow)
        else:
            self.engine = None
示例#11
0
    def __init__(self,
                 fs: int,
                 block_size=1024,
                 step_size=None,
                 selected_features='all'):
        if not step_size:
            step_size = block_size // 2

        features_config = {
            'Chroma':
            f'Chroma2 CQTAlign=c  CQTBinsPerOctave=48  CQTMinFreq=27.5  CQTNbOctaves=7  CZBinsPerSemitone=1  CZNbCQTBinsAggregatedToPCPBin=-1  CZTuning=440  stepSize={step_size}',
            'LPC':
            f'LPC LPCNbCoeffs=1  blockSize={block_size}  stepSize={step_size}',
            'LSF': f'LSF blockSize={block_size}  stepSize={step_size}',
            'MFCC':
            f'MFCC CepsIgnoreFirstCoeff=1  CepsNbCoeffs=13  FFTWindow=Hanning  MelMaxFreq=6000.0  MelMinFreq=400.0  MelNbFilters=40  blockSize={block_size}  stepSize={step_size}',
            'OBSI':
            f'OBSI FFTLength=0  FFTWindow=Hanning  OBSIMinFreq=27.5  blockSize={block_size}  stepSize={step_size}',
            'SpectralCrestFactorPerBand':
            f'SpectralCrestFactorPerBand FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'SpectralDecrease':
            f'SpectralDecrease FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'SpectralFlatness':
            f'SpectralFlatness FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'SpectralFlux':
            f'SpectralFlux FFTLength=0  FFTWindow=Hanning  FluxSupport=All  blockSize={block_size}  stepSize={step_size}',
            'SpectralRolloff':
            f'SpectralRolloff FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'SpectralVariation':
            f'SpectralVariation FFTLength=0  FFTWindow=Hanning  blockSize={block_size}  stepSize={step_size}',
            'ZCR': f'ZCR blockSize={block_size}  stepSize={step_size}'
        }

        self.fs = fs
        if selected_features == 'all':
            selected_features = features_config.keys()
        feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True)
        for feature_name, setting in features_config.items():
            if feature_name in selected_features:
                feature_plan.addFeature(feature_name + ': ' + setting)
        data_flow = feature_plan.getDataFlow()
        self.engine = yaafelib.Engine()
        self.engine.load(data_flow)
示例#12
0
def calculate_spectrogram(y, fs, block_size=1024, step_size=None):
    if step_size is None:
        step_size = block_size // 2
    feature_plan = yaafelib.FeaturePlan(sample_rate=fs, normalize=True)
    feature_plan.addFeature(
        f'MagnitudeSpectrum: MagnitudeSpectrum blockSize={block_size} stepSize={step_size}'
    )
    data_flow = feature_plan.getDataFlow()
    engine = yaafelib.Engine()
    engine.load(data_flow)
    features = engine.processAudio(y.reshape(1, -1).astype('float64'))

    noverlap = block_size // 2
    spectrum = features['MagnitudeSpectrum']
    time = np.linspace(noverlap / fs, (len(y) - noverlap) / fs,
                       spectrum.shape[0])
    freq = np.linspace(0, fs // 2, num=spectrum.shape[-1])

    return freq, time, spectrum
示例#13
0
    def __call__(self, wav):
        """Extract features

        Parameters
        ----------
        wav : string
            Path to wav file.

        Returns
        -------
        features : SlidingWindowFeature

        """

        definition = self.definition()

        # --- prepare the feature plan
        feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate)
        for name, recipe in definition:
            assert feature_plan.addFeature(
                "{name}: {recipe}".format(name=name, recipe=recipe))

        # --- prepare the Yaafe engine
        data_flow = feature_plan.getDataFlow()

        engine = yaafelib.Engine()
        engine.load(data_flow)

        sample_rate, raw_audio = scipy.io.wavfile.read(wav)
        assert sample_rate == self.sample_rate, "sample rate mismatch"

        audio = np.array(raw_audio, dtype=np.float64, order='C').reshape(1, -1)

        features = engine.processAudio(audio)
        data = np.hstack([features[name] for name, _ in definition])

        sliding_window = YaafeFrame(
            blockSize=self.block_size, stepSize=self.step_size,
            sampleRate=self.sample_rate)

        return SlidingWindowFeature(data, sliding_window)
示例#14
0
    def get_flow_and_stack(self):

        feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate)
        stack = []

        # --- coefficients
        # 0 if energy is kept
        # 1 if energy is removed
        definition = ("mfcc: "
                      "MFCC CepsIgnoreFirstCoeff=%d CepsNbCoeffs=%d "
                      "blockSize=%d stepSize=%d" %
                      (0 if self.e else 1, self.coefs + self.e * 1,
                       self.block_size, self.step_size))
        assert feature_plan.addFeature(definition)
        stack.append('mfcc')

        # --- 1st order derivatives
        if self.D or self.De:
            definition = ("mfcc_d: "
                          "MFCC CepsIgnoreFirstCoeff=%d CepsNbCoeffs=%d "
                          "blockSize=%d stepSize=%d > Derivate DOrder=1" %
                          (0 if self.De else 1, self.D * self.coefs +
                           self.De * 1, self.block_size, self.step_size))
            assert feature_plan.addFeature(definition)
            stack.append('mfcc_d')

        # --- 2nd order derivatives
        if self.DD or self.DDe:
            definition = ("mfcc_dd: "
                          "MFCC CepsIgnoreFirstCoeff=%d CepsNbCoeffs=%d "
                          "blockSize=%d stepSize=%d > Derivate DOrder=2" %
                          (0 if self.DDe else 1, self.DD * self.coefs +
                           self.DDe * 1, self.block_size, self.step_size))
            assert feature_plan.addFeature(definition)
            stack.append('mfcc_dd')

        # --- prepare the Yaafe engine
        data_flow = feature_plan.getDataFlow()

        return data_flow, stack
示例#15
0
    def __init__(self, sad_model='etape'):
        """
        Parameters:
        ----------
        sad_model : string bellowing to 'etape' 'maya'
        alllows the selection of a SAD model:
        'etape' is more suited to radionews material
        'maya' is more suited to speech obtained in noisy environments
        """
        super(LimsiSad, self).__init__()

        # feature extraction defition
        spec = yaafelib.FeaturePlan(sample_rate=16000)
        spec.addFeature(
            'mfcc: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256')
        spec.addFeature(
            'mfccd1: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=1'
        )
        spec.addFeature(
            'mfccd2: MFCC CepsIgnoreFirstCoeff=0 blockSize=1024 stepSize=256 > Derivate DOrder=2'
        )
        spec.addFeature('zcr: ZCR blockSize=1024 stepSize=256')
        parent_analyzer = Yaafe(spec)
        self.parents.append(parent_analyzer)

        # informative parameters
        # these are not really taken into account by the system
        # these are bypassed by yaafe feature plan
        self.input_blocksize = 1024
        self.input_stepsize = 256

        # load gmm model
        if sad_model not in ['etape', 'maya']:
            raise ValueError(
                "argument sad_model %s not supported. Supported values are 'etape' or 'maya'"
                % sad_model)
        picfname = os.path.join(timeside.__path__[0], 'trained_models',
                                'limsi_sad_%s.pkl' % sad_model)
        self.gmms = pickle.load(open(picfname, 'rb'))
示例#16
0
def extract_feature(filename, offset):
    fp = yaafelib.FeaturePlan(sample_rate=22050, resample=True)
    fp.loadFeaturePlan('static/featureplan.txt')
    engine = yaafelib.Engine()
    engine.load(fp.getDataFlow())
    print(filename)
    print offset

    sound = AudioSegment.from_file(filename)

    halfway_point = int(offset) * 1000
    end = halfway_point + 30000
    first_half = sound[halfway_point:end]
    filename = os.path.join(
        app.config['UPLOAD_FOLDER'],
        os.path.splitext(os.path.basename(filename))[0] + str(offset) +
        ".cliped.wav")
    if not os.path.isfile(filename):
        first_half.export(filename, format="wav")
    afp = yaafelib.AudioFileProcessor()
    afp.processFile(engine, filename)
    feats = engine.readAllOutputs()
    return preprocessed(feats)
示例#17
0
    def __init__(
        self,
        sample_rate=16000,
        block_size=512,
        step_size=256,
        e=True,
        coefs=11,
        De=False,
        DDe=False,
        D=False,
        DD=False,
    ):

        super(YaafeMFCC, self).__init__(sample_rate=sample_rate,
                                        block_size=block_size,
                                        step_size=step_size)

        self.e = e
        self.coefs = coefs
        self.De = De
        self.DDe = DDe
        self.D = D
        self.DD = DD

        self.definition_ = self.definition()

        # --- prepare the feature plan
        feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate)
        for name, recipe in self.definition_:
            assert feature_plan.addFeature("{name}: {recipe}".format(
                name=name, recipe=recipe))

        # --- prepare the Yaafe engine
        data_flow = feature_plan.getDataFlow()

        self.engine = yaafelib.Engine()
        self.engine.load(data_flow)
示例#18
0
文件: sqk.py 项目: sloria/usv
def write_features(audiofile, sample_rate, data):
    """Extract features then write means and std devs to data (tab) file.
    Returns True if extraction was successful, False if unsuccessful.
    
    Arguments:
    audioFile -- WAV file to process
    sampleRate -- sample rate of the audio file in Hz
    data -- the data file to write to
    
    """
    N_MFCC = 12  # Number of MFCC coefficients
    N_LLD = 2  # Number of other low-level descriptors
    N_FUNCS = 4  # Number of functionals

    # Add features to extract
    featplan = yf.FeaturePlan(sample_rate=sample_rate, resample=False)
    featplan.addFeature('mfcc: MFCC CepsIgnoreFirstCoeff=0 CepsNbCoeffs=12 \
FFTWindow=Hanning MelMinFreq=1200 MelMaxFreq=5050')
    featplan.addFeature('energy: Energy')
    featplan.addFeature('zcr: ZCR')

    # Configure an Engine
    engine = yf.Engine()
    engine.load(featplan.getDataFlow())

    # Extract features
    afp = yf.AudioFileProcessor()
    afp.processFile(engine, audiofile)
    # 2D numpy arrays
    mfccs = engine.readOutput('mfcc')
    energy = engine.readOutput('energy')
    zcr = engine.readOutput('zcr')

    # Write header lines if they don't exist
    data.seek(0, 0)
    if not data.readline():
        # Write attribute header line
        for i in range(N_MFCC):
            # MFCC header
            data.write("mfcc" + str(i + 1) + "_mean" + "\t")
            data.write("mfcc" + str(i + 1) + "_std" + "\t")
            data.write("mfcc" + str(i + 1) + "_skew" + "\t")
            data.write("mfcc" + str(i + 1) + "_kurtosis" + "\t")

        #Energy header
        data.write("energy_mean" + "\t")
        data.write("energy_std" + "\t")
        data.write("energy_skew" + "\t")
        data.write("energy_kurtosis" + "\t")

        # ZCR header
        data.write("zcr_mean" + "\t")
        data.write("zcr_std" + "\t")
        data.write("zcr_skew" + "\t")
        data.write("zcr_kurtosis" + "\t")

        # Filename and classification headers
        data.write("filename" + '\t')
        data.write("classification" + "\n")

        # Write attribute type line
        for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)):
            data.write("continuous" + "\t")
        # filename is a string
        data.write("string" + '\t')
        # Classification is discrete
        data.write("discrete" + "\n")

        # Write flags
        for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)):
            data.write('\t')
        data.write("meta" + '\t')
        data.write("class" + '\n')
    data.seek(0, 2)  # Go to end of file.

    # Write feature data
    if mfccs.size > 0 and energy.size > 0 and zcr.size > 0:
        # Write MFCCs
        for i in range(mfccs[0].size):
            mfcc_mean = mfccs[:, i].mean()
            mfcc_std = mfccs[:, i].std()
            mfcc_skew = spstats.skew(mfccs[:, i])
            mfcc_kurt = spstats.kurtosis(mfccs[:, i])
            data.write(
                str(mfcc_mean) + '\t' + str(mfcc_std) + '\t' + str(mfcc_skew) +
                '\t' + str(mfcc_kurt) + '\t')
        # Write energy
        for i in range(energy[0].size):
            energy_mean = energy[:, i].mean()
            energy_std = energy[:, i].std()
            energy_skew = spstats.skew(energy[:, i])
            energy_kurt = spstats.kurtosis(energy[:, i])
            data.write(
                str(energy_mean) + '\t' + str(energy_std) + '\t' +
                str(energy_skew) + '\t' + str(energy_kurt) + '\t')
        # Write ZCR
        for i in range(zcr[0].size):
            zcr_mean = zcr[:, i].mean()
            zcr_std = zcr[:, i].std()
            zcr_skew = spstats.skew(zcr[:, i])
            zcr_kurt = spstats.kurtosis(energy[:, i])
            data.write(
                str(zcr_mean) + '\t' + str(zcr_std) + '\t' + str(zcr_skew) +
                '\t' + str(zcr_kurt) + '\t')
        return True
    else:
        return False
    return numFiles


# Main
trainAudio = './IRMAS-Dataset/Training'
trainFeats = './trainFeatures.dat'
testAudio = './IRMAS-Dataset/Testing'
testFeats = './testFeatures.dat'
model = './model.svm'

# Get the instruments and their class indices
instruments = getInstruments(trainAudio)

# Specify features
fp = yl.FeaturePlan(sample_rate=44100)
fp.loadFeaturePlan('featureplan.txt')

# Initialize yaafe tools
df = fp.getDataFlow()
eng = yl.Engine()
eng.load(df)
dimensions = 0  # The sum of the dimensions of the features
ftSizes = eng.getOutputs().items()
for ftSize in ftSizes:
    dimensions += int(ftSize[1]['size'])
afp = yl.AudioFileProcessor()

# Remove previous model files
for k in range(len(instruments)):
    classFile = model + '.' + str(k)
示例#20
0
    def process(self, signal, rate, segments, wavelet_decomposition_level=6, frame_overlap=512, wavelet_type='sym10'):
        """ Extract features """
        
        self.ExtractedFeaturesList = ['LPC1_mean', 'LSF7_min', 'SpectralFlatness_min',
                                      'SSS_centroid_min', 'SSS_spread_min', 'PerceptualSpread_min',
                                      'SpectralSlope_min', 'PerceptualSharpness_min', 'SpectralDecrease_max',
                                      'OBSI0_mm', 'SpectralRolloff_min']
 
        self._signal = signal
        self._rate = rate
        self._segments = segments

        """ Calculate spectral and temporal features """
        feature_plan = yaafelib.FeaturePlan(sample_rate=rate)
        success = feature_plan.loadFeaturePlan('features.config')
        if not success:
            sys.exit('Feature plan not loaded correctly')

        engine = yaafelib.Engine()
        engine.load(feature_plan.getDataFlow())    
        self.Features = engine.processAudio(np.array([signal.astype('float64')]))
        
        """ Initialize wavelet features
            Based on "Wavelets in Recognition of Bird Sounds" by A. Selin et al.
            EURASIP Journal on Advances in Signal Processing 2007, 2007:051806 """
#        wavelets_calculator = wavelets.Wavelets(wavelet_type)
#        wavelet_coefficients = wavelets_calculator.decompose(signal, wavelet_decomposition_level)
#        
        no_segments = len(segments)
                
        self.ExtractedFeatures = np.zeros(shape=(no_segments, len(self.ExtractedFeaturesList)))          
        
        LPC1 = self.Features['LPC'][:,1]
        LSF7 = self.Features['LSF'][:,7]
        SpectralFlatness = self.Features['SpectralFlatness']
        SSS_centroid = self.Features['SpectralShapeStatistics'][:,0]
        SSS_spread = self.Features['SpectralShapeStatistics'][:,1]
        PerceptualSpread = self.Features['PerceptualSpread']
        SpectralSlope = self.Features['SpectralSlope']
        PerceptualSharpness = self.Features['PerceptualSharpness']
        SpectralDecrease = self.Features['SpectralDecrease']
        OBSI0 = self.Features['OBSI'][:,0]
        SpectralRolloff = self.Features['SpectralRolloff']
        
        for i, segment in enumerate(segments):
            start = int(segment[0] / frame_overlap) - 1
            end = int(segment[1] / frame_overlap) + 1
            
            self.ExtractedFeatures[i,0] = LPC1[start:end].mean()
            self.ExtractedFeatures[i,1] = LSF7[start:end].min()
            self.ExtractedFeatures[i,2] = SpectralFlatness[start:end].min()
            self.ExtractedFeatures[i,3] = SSS_centroid[start:end].min()      
            self.ExtractedFeatures[i,4] = SSS_spread[start:end].min() 
            self.ExtractedFeatures[i,5] = PerceptualSpread[start:end].min()  
            self.ExtractedFeatures[i,6] = SpectralSlope[start:end].min()   
            self.ExtractedFeatures[i,7] = PerceptualSharpness[start:end].min() 
            self.ExtractedFeatures[i,8] = SpectralDecrease[start:end].max()  
            self.ExtractedFeatures[i,9] = maxmin(OBSI0[start:end])
            self.ExtractedFeatures[i,10] = SpectralRolloff[start:end].min()      
            
        return self.ExtractedFeatures
示例#21
0
import numpy
from os import walk
import yaafelib as yaafe

from sklearn import neighbors
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.svm import NuSVC
from sklearn import linear_model
from sklearn.linear_model import SGDClassifier
from sklearn import tree
from sklearn.neighbors.nearest_centroid import NearestCentroid

yaafe.loadComponentLibrary('yaafe-io')
fp = yaafe.FeaturePlan(sample_rate=8000)
fp.loadFeaturePlan('./featureplan')
engine = yaafe.Engine()
engine.load(fp.getDataFlow())
afp = yaafe.AudioFileProcessor()
afp.setOutputFormat('csv', './outputs', {
    'Metadata': 'false',
    'Precision': '2'
})

emotions = ['angry', 'happy', 'neutral', 'unhappy']
feats = ['eng', 'lpc', 'lsf', 'ldd', 'mfc']


def getProperties(audiofile):
    props = ""
    for feat in feats:
示例#22
0
def compute_features(dataStruct):
    """ This function takes a data structure dictionnaire, and renders several
    audio features as spectral rolloff, spectral slope etc... and store the
    data into the datastructure.

    Args:
        - dataStruct: dictionnaire containing filepath, labels, and list of
          classes

    Returns:
        - dataSet: same as dataStruct, with the given spectral features
    """

    ### --- INIT --- ###
    # DSP settings
    Nwin_bin = 1024
    Hop_bin = round(Nwin_bin)

    # Const
    Nex = len(dataStruct["filepath"])  # Number of files

    # Listing audio features
    features_yaafe = [
        'SpectralFlatness', 'SpectralRolloff', 'PerceptualSharpness',
        'PerceptualSpread', 'SpectralDecrease', 'SpectralVariation',
        'SpectralFlux'
    ]
    features_libro = [
        'Loudness', 'SpectralCentroid', 'SpectralContrast', 'SpectralRolloff',
        'SpectralBandwidth'
    ]

    dataStruct["SpectralFeatures"] = features_yaafe + features_libro

    # New fields
    dataStruct["signal"] = []
    dataStruct["sRate"] = []

    # Creating three fields per descriptor: full temporal vector, mean, and
    # standard deviation
    for f in dataStruct["SpectralFeatures"]:
        dataStruct[f] = []
        dataStruct[f + 'Mean'] = []
        dataStruct[f + 'Std'] = []
        dataStruct[f + 'Max'] = []

    ### --- Compute Feature --- ###
    print('\t \t \t Feature Extraction')
    # Computing the set of features
    for curFile in range(Nex):
        print('%s' % dataStruct["filepath"][curFile])

        # Loading signal
        curSignal, curSRate = lib.load(dataStruct["filepath"][curFile],
                                       mono=True,
                                       offset=0)

        # Storing signal data
        dataStruct["signal"].append(curSignal)
        dataStruct["sRate"].append(curSRate)
        """ YAAFE Extraction """
        # Create YAAFE extraction engine
        fp = yaf.FeaturePlan(sample_rate=curSRate)

        # Formatting string for DSP
        for f in features_yaafe:
            fp.addFeature(f+': '+f+' blockSize='+str(Nwin_bin)+\
                          ' stepSize='+str(Hop_bin))

        engine = yaf.Engine()
        engine.load(fp.getDataFlow())
        features = engine.processAudio(curSignal.astype('float64')\
                                       .reshape((1, curSignal.shape[0])))

        # Computing mean and std for each
        for key, val in sorted(features.items()):
            dataStruct[key].append(val)
            dataStruct[key + 'Mean'].append(np.mean(val))
            dataStruct[key + 'Std'].append(np.std(val))
            dataStruct[key + 'Max'].append(np.max(val))
        """ Librosa extraction """
        # Add the specific features from Librosa
        dataStruct["Loudness"].append(lib.feature.rmse(curSignal))

        # Compute the spectral centroid. [y, sr, S, n_fft, ...]
        dataStruct["SpectralCentroid"].append(
            lib.feature.spectral_centroid(curSignal))

        # Compute spectral contrast [R16] , sr, S, n_fft, ...])
        dataStruct["SpectralContrast"].append(
            lib.feature.spectral_contrast(curSignal))

        # Compute roll-off frequency
        dataStruct["SpectralRolloff"].append(
            lib.feature.spectral_rolloff(curSignal))

        # Compute Bandwidth
        dataStruct["SpectralBandwidth"].append(
            lib.feature.spectral_bandwidth(curSignal))

        # Computing mean and std for each
        for f in features_libro:
            val = dataStruct[f][-1]
            dataStruct[f + 'Mean'].append(np.mean(val))
            dataStruct[f + 'Std'].append(np.std(val))
            dataStruct[f + 'Max'].append(np.max(val))

    ### --- Formatting --- ###
    return dataStruct
示例#23
0
文件: yaafe.py 项目: sagnikm95/Yaafe
def main():

    parser = OptionParser(version='yaafe.py, Yaafe v%s' %
                          yaafe.getYaafeVersion())
    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
                      default=False, help='display more output')
    parser.add_option('-l', '--list', dest='listFeatures', action='store_true',
                      default=False,
                      help='list all available features and output formats')
    parser.add_option('-d', '--describe', dest='describe', default=None,
                      help='describe a feature or an output format')
    parser.add_option('-f', '--feature', action='append', dest='feature',
                      metavar='FEATUREDEFINITION', help='feature to extract')
    parser.add_option('-c', '--config-file', dest='configFile', default=None,
                      help='feature extraction plan')
    parser.add_option('-r', '--rate', dest='sample_rate', type='int',
                      default=None,
                      help='working samplerate in Hz.')
    parser.add_option('', '--resample', dest='resample', action='store_true',
                      default=False,
                      help='Resample input signal to the analysis sample rate')
    parser.add_option('-n', '--normalize', dest='normalize',
                      action='store_true', default=False,
                      help='normalize input signal by removing mean and scale maximum absolute value to 0.98 (or other value given with --normalize-max)')
    parser.add_option('', '--normalize-max', dest='normalize_max',
                      type='float', default=0.98,
                      help='Normalize input signal so that maximum absolute value reached given value (see -n, --normalize)')
    parser.add_option('-i', '--input', dest='input_list', default=None,
                      help='text file, each line is an audio file to process')
    parser.add_option('-b', '--base-dir', dest='out_dir', default='',
                      help='output directory base')
    parser.add_option('-o', '--output-format', dest='format', default='csv',
                      choices=output_format_choices,
                      help='Features output format: %s' % '|'.join(output_format_choices))
    parser.add_option('-p', '--output-params', dest='formatparams',
                      action='append', default=[],
                      metavar='key=value',
                      help='add an output format parameter (can be used multiple times, use -l options to list output formats and parameters)')
    parser.add_option('', '--dump-dataflow', dest='dumpDataflow', default='',
                      metavar='FILE',
                      help='output dataflow plan (suitable for process with yaafe-engine)')
    parser.add_option('', '--dump-graph', dest='dumpGraph', default='',
                      metavar='FILE',
                      help="output dataflow in dot format (suitable for display with graphviz")
    parser.add_option('-s', '--data-block-size', dest='buffer_size',
                      type='int', default=None,
                      help='Prefered size for data blocks.')
    parser.add_option('', '--show', dest='showFeatures', default=None,
                      help='Show all features in a H5 file')

    (options, audiofiles) = parser.parse_args()

    if options.listFeatures:
        listFeatures()
        return
    if options.describe:
        if options.describe in yaafe.getOutputFormatList():
            describeOutputFormat(options.describe)
        else:
            describeFeature(options.describe)
        return
    if options.showFeatures:
        showFeatures(options.showFeatures)
        return
    if not options.sample_rate:
        print "ERROR: please specify sample rate !"
        return
    if options.buffer_size:
        yaafe.setPreferedDataBlockSize(options.buffer_size)
    if options.verbose:
        yaafe.setVerbose(True)

    # initialize feature plan
    fp = yaafe.FeaturePlan(sample_rate=options.sample_rate,
                           normalize=(options.normalize_max
                                      if options.normalize else None),
                           resample=options.resample)

    if options.configFile:
        if not fp.loadFeaturePlan(options.configFile):
            return
    elif options.feature:
        for feat in options.feature:
            if not fp.addFeature(feat):
                return
    else:
        print "ERROR: please specify features using either a config file or -f [feature]"
        return

    if options.dumpDataflow:
        fp.getDataFlow().save(options.dumpDataflow)
    if options.dumpGraph:
        fp.getDataFlow().save(options.dumpGraph)

    # read audio file list
    if options.input_list:
        fin = open(options.input_list, 'r')
        for line in fin:
            audiofiles.append(line.strip())
        fin.close()

    if audiofiles:
        # initialize engine
        engine = yaafe.Engine()
        if not engine.load(fp.getDataFlow()):
            return
        # initialize file processor
        afp = yaafe.AudioFileProcessor()
        oparams = dict()
        for pstr in options.formatparams:
            pstrdata = pstr.split('=')
            if len(pstrdata) != 2:
                print 'ERROR: invalid parameter syntax in "%s" (should be "key=value")' % pstr
                return
            oparams[pstrdata[0]] = pstrdata[1]
        afp.setOutputFormat(options.format, options.out_dir, oparams)
        # process audio files
        for audiofile in audiofiles:
            afp.processFile(engine, audiofile)
示例#24
0
    def __call__(self, item):
        """Extract features

        Parameters
        ----------
        item : dict

        Returns
        -------
        features : SlidingWindowFeature

        """

        # --- load audio file
        y, sample_rate = read_audio(item,
                                    sample_rate=self.sample_rate,
                                    mono=True)

        # --- update data_flow every time sample rate changes
        if not hasattr(self,
                       'sample_rate_') or self.sample_rate_ != sample_rate:
            self.sample_rate_ = sample_rate
            feature_plan = yaafelib.FeaturePlan(sample_rate=self.sample_rate_)
            for name, recipe in self.definition():
                assert feature_plan.addFeature("{name}: {recipe}".format(
                    name=name, recipe=recipe))
            data_flow = feature_plan.getDataFlow()
            self.engine_.load(data_flow)

        # Yaafe needs this: float64, column-contiguous, 2-dimensional
        y = np.array(y, dtype=np.float64, order='C').reshape((1, -1))

        # --- extract features
        features = self.engine_.processAudio(y)
        data = np.hstack([features[name] for name, _ in self.definition()])

        # --- stack features
        n_samples, n_features = data.shape
        zero_padding = self.stack // 2
        if self.stack % 2 == 0:
            expanded_data = np.concatenate(
                (np.zeros((zero_padding, n_features)) + data[0], data,
                 np.zeros((zero_padding - 1, n_features)) + data[-1]))
        else:
            expanded_data = np.concatenate(
                (np.zeros((zero_padding, n_features)) + data[0], data,
                 np.zeros((zero_padding, n_features)) + data[-1]))

        data = np.lib.stride_tricks.as_strided(expanded_data,
                                               shape=(n_samples,
                                                      n_features * self.stack),
                                               strides=data.strides)

        self.engine_.reset()

        # --- return as SlidingWindowFeature
        if np.any(np.isnan(data)):
            uri = get_unique_identifier(item)
            msg = 'Features extracted from "{uri}" contain NaNs.'
            warnings.warn(msg.format(uri=uri))

        return SlidingWindowFeature(data, self.sliding_window_)
示例#25
0
def export_features(
    path=None,
    audiofiles=None,
    out='../audio_features',
    train_file_path=None,
    pca_params_path="/data/lisa/exp/faces/emotiw_final/caglar_audio/pca_params.pkl"
):

    # prepare the FeaturePlan
    plan = yaafelib.FeaturePlan(sample_rate=48000, normalize=0.99)
    size_info = 'blockSize=1248 stepSize=624'
    if pca is None:
        global pca

    features = [
        'ZCR', 'TemporalShapeStatistics', 'Energy', 'MagnitudeSpectrum',
        'SpectralVariation', 'SpectralSlope', 'SpectralRolloff',
        'SpectralShapeStatistics', 'SpectralFlux', 'SpectralFlatness',
        'SpectralDecrease', 'SpectralFlatnessPerBand',
        'SpectralCrestFactorPerBand', 'AutoCorrelation', 'LPC', 'LSF',
        'ComplexDomainOnsetDetection', 'MelSpectrum',
        'MFCC: MFCC CepsNbCoeffs=22', 'MFCC_d1: MFCC %s > Derivate DOrder=1',
        'MFCC_d2: MFCC %s > Derivate DOrder=2', 'Envelope',
        'EnvelopeShapeStatistics', 'AmplitudeModulation', 'Loudness',
        'PerceptualSharpness', 'PerceptualSpread', 'OBSI', 'OBSIR'
    ]

    for f in features:
        if ':' not in f: f = '%s: %s' % (f, f)
        if '%s' not in f: f += ' %s'
        plan.addFeature(f % size_info)

    dataflow = plan.getDataFlow()
    engine = yaafelib.Engine()
    engine.load(dataflow)
    processor = yaafelib.AudioFileProcessor()

    subsets = {'full': 'full'}

    def train_pca(pca=None):
        if pca is not None:
            return pca

        assert train_file_path is not None
        print "Training pca..."
        pca = defaultdict(PCA)
        audiofiles_ = glob.glob('%s/*/*.mp3' % train_file_path)
        if not (os.path.exists(pca_params_path)):
            # extract features from audio files
            for audiofile in audiofiles_:
                processor.processFile(engine, audiofile)
                features = engine.readAllOutputs()
                for subset, keys in subsets.iteritems():
                    if keys == 'full':
                        keys = sorted(features.keys())
                    output = numpy.concatenate([features[k].T for k in keys]).T

                    if 'Train' in audiofile:
                        pca[subset].add(output)

            pca_params = {}
            pca_params["mean"] = pca["full"].mean
            pca_params["covariance"] = pca["full"].covariance
            pca_params["num_frames"] = pca["full"].num_frames
            pca_params["ndim"] = pca["full"].ndim
            cPickle.dump(pca_params, file_create(pca_params_path),
                         cPickle.HIGHEST_PROTOCOL)
        else:
            pca_params = cPickle.load(open(pca_params_path, "rb"))
            pca["full"].covariance = pca_params["covariance"]
            pca["full"].mean = pca_params["mean"]
            pca["full"].num_frames = pca_params["num_frames"]
            pca["full"].ndim = pca_params["ndim"]

        print "PCA training finished."
        return pca

    assert audiofiles is not None
    pca = train_pca(pca)
    assert pca is not None

    for f in features:
        if ':' not in f: f = '%s: %s' % (f, f)
        if '%s' not in f: f += ' %s'
        plan.addFeature(f % size_info)

    # extract features from audio files
    for audiofile in audiofiles:
        audiofile = os.path.join(path, audiofile)
        processor.processFile(engine, audiofile)
        features = engine.readAllOutputs()
        for subset, keys in subsets.iteritems():
            if keys == 'full':
                keys = sorted(features.keys())
            output = numpy.concatenate([features[k].T for k in keys]).T
            pickle_file = audiofile.replace('.mp3',
                                            '.%s.pkl' % subset).replace(
                                                path, out)
            cPickle.dump(output, file_create(pickle_file),
                         cPickle.HIGHEST_PROTOCOL)

    for subset in subsets.iterkeys():
        pca[subset].pca(diagonal=True)
        cPickle.dump(pca[subset], file_create('%s/%s.pca' % (out, subset)))

    print 'Rewriting PCA data...'
    sys.stdout.flush()

    for audiofile in audiofiles:
        for subset in subsets.iterkeys():
            pickle_file = os.path.join(out, audiofile).replace(
                '.mp3', '.%s.pkl' % subset)
            #pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace(path, out)
            matrix = cPickle.load(file(pickle_file))
            matrix = pca[subset].feature(matrix)
            cPickle.dump(matrix,
                         file_create(pickle_file.replace('.pkl', '.pca.pkl')),
                         cPickle.HIGHEST_PROTOCOL)
示例#26
0
    def transform(self, X):
        X_prime = None

        params = {'block_size': 256,
                  'step_size': 128,
                  'mel_min_freq': 0.0,
                  'mel_max_freq': 500.0,
                  'mel_nb_filters': 50,
                  'ceps_ign_first_coef': 0,
                  'fft_len': 0,
                  'do1len': 5,
                  'do2len': 1,
                  'slope_step_nbframes': 5,
                  'slope_nbframes': 9,
                  }

        fp = yf.FeaturePlan(sample_rate=self.sample_rate)

        fp.addFeature('melspec: MelSpectrum FFTWindow=Hanning  MelNbFilters=%(mel_nb_filters)d'
                      ' blockSize=%(block_size)d stepSize=%(step_size)d'
                      ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f' % params)

        if self.diff:
            fp.addFeature('melspec_diff1: MelSpectrum FFTWindow=Hanning  MelNbFilters=%(mel_nb_filters)d'
                      ' blockSize=%(block_size)d stepSize=%(step_size)d'
                      ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f'
                      ' > Derivate DOrder=1 DO1Len=%(do1len)d' % params)

        if self.diff2:
            fp.addFeature('melspec_diff2: MelSpectrum FFTWindow=Hanning  MelNbFilters=%(mel_nb_filters)d'
                      ' blockSize=%(block_size)d stepSize=%(step_size)d'
                      ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f'
                      ' > Derivate DOrder=2 DO2Len=%(do2len)d' % params)

        if self.slope:
            fp.addFeature('melspec_slope: MelSpectrum FFTWindow=Hanning  MelNbFilters=%(mel_nb_filters)d'
                      ' blockSize=%(block_size)d stepSize=%(step_size)d'
                      ' MelMinFreq=%(mel_min_freq)f MelMaxFreq=%(mel_max_freq)f'
                      ' > SlopeIntegrator NbFrames=%(slope_nbframes)d  StepNbFrames=%(slope_step_nbframes)d' % params)

        df = fp.getDataFlow()
        engine = yf.Engine()
        engine.load(df)

        X = X.astype(np.float64)
        x_shape = (1, X.shape[1])

        for i, x in enumerate(X):
            x = x.reshape(x_shape)

            feats = engine.processAudio(x)

            ## if i == 0:
            ##     import IPython
            ##     IPython.embed()

            if X_prime is None:
                fx_groups = tuple(feats.keys())
                n_features = 0
                for fx_group in fx_groups:
                    n_features += feats[fx_group].ravel().shape[0]

                X_prime = np.empty((X.shape[0], n_features), dtype=np.float64)
                print 'n_groups:', len(fx_groups)
                print 'n_features:', n_features

            offset = 0
            for fx_group in fx_groups:
                fxs = feats[fx_group].ravel()
                if fx_group == 'melspec':
                    # log melspec features
                    fxs = np.log10(fxs)
                X_prime[i, offset:(offset + fxs.shape[0])] = fxs
                offset += fxs.shape[0]

        return X_prime
示例#27
0
                    nargs='+',
                    help='tar archive which contains all the wav files')
parser.add_argument('output', help='output file')
parser.add_argument('--derivatives', action='store_true')

args = parser.parse_args()

parameters = dict(
    step_size=160,  # corresponds to 10 ms (at 16 kHz)
    block_size=640,  # corresponds to 40 ms
    mfcc_coeffs=40,
    mfcc_filters=
    41  # more filters? (needs to be at least mfcc_coeffs+1, because first coeff is ignored)
)

fp = yaafelib.FeaturePlan(sample_rate=16000)

mfcc_features = 'MFCC MelNbFilters={mfcc_filters} CepsNbCoeffs={mfcc_coeffs} ' \
                'blockSize={block_size} stepSize={step_size}'.format(**parameters)
energy_features = 'Energy blockSize={block_size} stepSize={step_size}'.format(
    **parameters)

fp.addFeature('mfcc: {}'.format(mfcc_features))
if args.derivatives:
    fp.addFeature('mfcc_d1: {} > Derivate DOrder=1'.format(mfcc_features))
    fp.addFeature('mfcc_d2: {} > Derivate DOrder=2'.format(mfcc_features))

fp.addFeature('energy: {}'.format(energy_features))
if args.derivatives:
    fp.addFeature('energy_d1: {} > Derivate DOrder=1'.format(energy_features))
    fp.addFeature('energy_d2: {} > Derivate DOrder=2'.format(energy_features))