Пример #1
0
def createAFP():
    engine = yaafelib.Engine()
    fp = yaafelib.FeaturePlan(sample_rate=16000)
    fp.addFeature('energy: Energy')
    fp.addFeature('mfcc: MFCC blockSize=2048 stepSize=1024')
    df = fp.getDataFlow()
    engine.load(fp.getDataFlow())
    afp = yaafelib.AudioFileProcessor()
    return afp, engine
Пример #2
0
def yaafe2features(wavefiles, out_file, feature_type='MFCC'):
    """Generate features with yaafe and put them in h5features format.

    Whole wavefiles are encoded as internal h5features files.
    To use them with abkhazia's ABX tasks, these need to be segmented
    according to an abkhazia segments.txt
    (abkhazia/utilities/segment_features.py can be used for this)

    Supported feature types:
    - 'MFCC' (default)
    - 'CMSP13' (cubic-root-compressed 13-frequency-channels Mel spectrogram)
    """
    assert feature_type in ['MFCC', 'CMSP13'], \
        'Unsupported feature_type {0}'.format(feature_type)

    feature_plan = ya.FeaturePlan(sample_rate=16000)
    if feature_type == 'MFCC':
        feat_name = 'mfcc'
        feature_plan.addFeature('{0}: MFCC blockSize=400 stepSize=160'.format(
            feat_name))  # 0.025s + 0.01s
    elif feature_type == 'CMSP13':
        feat_name = 'melsp'
        feature_plan.addFeature(
            '{0}: MelSpectrum MelNbFilters=13 blockSize=400 stepSize=160'.
            format(feat_name))  # 0.025s + 0.01s

    engine = ya.Engine()
    engine.load(feature_plan.getDataFlow())

    wav_ids = []
    times = []
    features = []
    for wavefile in wavefiles:
        wav_ids.append(p.splitext(p.basename(wavefile))[0])
        afp = ya.AudioFileProcessor()
        afp.processFile(engine, wavefile)
        feat_out = engine.readAllOutputs()[feat_name]

        if feature_type == 'CMSP13':
            # need to add compression by hand
            feat_out = np.power(feat_out, 1 / 3.)

        # times according to:
        # http://yaafe.sourceforge.net/features.html?highlight=mfcc#yaafefeatures.Frames
        nframes = feat_out.shape[0]
        # 0.01 here is ad hoc and dependent on 160 above
        times.append(0.01 * np.arange(nframes))
        features.append(feat_out)
    h5features.write(out_file, 'features', wav_ids, times, features)
Пример #3
0
def extract_feature(filename, offset):
    fp = yaafelib.FeaturePlan(sample_rate=22050, resample=True)
    fp.loadFeaturePlan('static/featureplan.txt')
    engine = yaafelib.Engine()
    engine.load(fp.getDataFlow())
    print(filename)
    print offset

    sound = AudioSegment.from_file(filename)

    halfway_point = int(offset) * 1000
    end = halfway_point + 30000
    first_half = sound[halfway_point:end]
    filename = os.path.join(
        app.config['UPLOAD_FOLDER'],
        os.path.splitext(os.path.basename(filename))[0] + str(offset) +
        ".cliped.wav")
    if not os.path.isfile(filename):
        first_half.export(filename, format="wav")
    afp = yaafelib.AudioFileProcessor()
    afp.processFile(engine, filename)
    feats = engine.readAllOutputs()
    return preprocessed(feats)
Пример #4
0
Файл: sqk.py Проект: sloria/usv
def write_features(audiofile, sample_rate, data):
    """Extract features then write means and std devs to data (tab) file.
    Returns True if extraction was successful, False if unsuccessful.
    
    Arguments:
    audioFile -- WAV file to process
    sampleRate -- sample rate of the audio file in Hz
    data -- the data file to write to
    
    """
    N_MFCC = 12  # Number of MFCC coefficients
    N_LLD = 2  # Number of other low-level descriptors
    N_FUNCS = 4  # Number of functionals

    # Add features to extract
    featplan = yf.FeaturePlan(sample_rate=sample_rate, resample=False)
    featplan.addFeature('mfcc: MFCC CepsIgnoreFirstCoeff=0 CepsNbCoeffs=12 \
FFTWindow=Hanning MelMinFreq=1200 MelMaxFreq=5050')
    featplan.addFeature('energy: Energy')
    featplan.addFeature('zcr: ZCR')

    # Configure an Engine
    engine = yf.Engine()
    engine.load(featplan.getDataFlow())

    # Extract features
    afp = yf.AudioFileProcessor()
    afp.processFile(engine, audiofile)
    # 2D numpy arrays
    mfccs = engine.readOutput('mfcc')
    energy = engine.readOutput('energy')
    zcr = engine.readOutput('zcr')

    # Write header lines if they don't exist
    data.seek(0, 0)
    if not data.readline():
        # Write attribute header line
        for i in range(N_MFCC):
            # MFCC header
            data.write("mfcc" + str(i + 1) + "_mean" + "\t")
            data.write("mfcc" + str(i + 1) + "_std" + "\t")
            data.write("mfcc" + str(i + 1) + "_skew" + "\t")
            data.write("mfcc" + str(i + 1) + "_kurtosis" + "\t")

        #Energy header
        data.write("energy_mean" + "\t")
        data.write("energy_std" + "\t")
        data.write("energy_skew" + "\t")
        data.write("energy_kurtosis" + "\t")

        # ZCR header
        data.write("zcr_mean" + "\t")
        data.write("zcr_std" + "\t")
        data.write("zcr_skew" + "\t")
        data.write("zcr_kurtosis" + "\t")

        # Filename and classification headers
        data.write("filename" + '\t')
        data.write("classification" + "\n")

        # Write attribute type line
        for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)):
            data.write("continuous" + "\t")
        # filename is a string
        data.write("string" + '\t')
        # Classification is discrete
        data.write("discrete" + "\n")

        # Write flags
        for i in range(N_MFCC * N_FUNCS + (N_LLD * N_FUNCS)):
            data.write('\t')
        data.write("meta" + '\t')
        data.write("class" + '\n')
    data.seek(0, 2)  # Go to end of file.

    # Write feature data
    if mfccs.size > 0 and energy.size > 0 and zcr.size > 0:
        # Write MFCCs
        for i in range(mfccs[0].size):
            mfcc_mean = mfccs[:, i].mean()
            mfcc_std = mfccs[:, i].std()
            mfcc_skew = spstats.skew(mfccs[:, i])
            mfcc_kurt = spstats.kurtosis(mfccs[:, i])
            data.write(
                str(mfcc_mean) + '\t' + str(mfcc_std) + '\t' + str(mfcc_skew) +
                '\t' + str(mfcc_kurt) + '\t')
        # Write energy
        for i in range(energy[0].size):
            energy_mean = energy[:, i].mean()
            energy_std = energy[:, i].std()
            energy_skew = spstats.skew(energy[:, i])
            energy_kurt = spstats.kurtosis(energy[:, i])
            data.write(
                str(energy_mean) + '\t' + str(energy_std) + '\t' +
                str(energy_skew) + '\t' + str(energy_kurt) + '\t')
        # Write ZCR
        for i in range(zcr[0].size):
            zcr_mean = zcr[:, i].mean()
            zcr_std = zcr[:, i].std()
            zcr_skew = spstats.skew(zcr[:, i])
            zcr_kurt = spstats.kurtosis(energy[:, i])
            data.write(
                str(zcr_mean) + '\t' + str(zcr_std) + '\t' + str(zcr_skew) +
                '\t' + str(zcr_kurt) + '\t')
        return True
    else:
        return False
Пример #5
0
    fp.addFeature('mfcc_d2: {} > Derivate DOrder=2'.format(mfcc_features))

fp.addFeature('energy: {}'.format(energy_features))
if args.derivatives:
    fp.addFeature('energy_d1: {} > Derivate DOrder=1'.format(energy_features))
    fp.addFeature('energy_d2: {} > Derivate DOrder=2'.format(energy_features))

if args.derivatives:
    keys = ['mfcc', 'mfcc_d1', 'mfcc_d2', 'energy', 'energy_d1', 'energy_d2']
else:
    keys = ['mfcc', 'energy']

df = fp.getDataFlow()
engine = yaafelib.Engine()
engine.load(df)
afp = yaafelib.AudioFileProcessor()

outfile = open(args.output, 'wb')

total = 0
for filename in args.inputs:
    tar = tarfile.open(filename)
    total += len([f for f in tar if f.isfile()])

for j, filename in enumerate(args.inputs):
    tar = tarfile.open(filename)
    files = sorted([f for f in tar if f.isfile()], key=lambda f: f.name)

    for i, fileinfo in enumerate(files):
        _, data = wav.read(tar.extractfile(fileinfo))
        data = data.astype(np.float64)
Пример #6
0
def export_features(
    path=None,
    audiofiles=None,
    out='../audio_features',
    train_file_path=None,
    pca_params_path="/data/lisa/exp/faces/emotiw_final/caglar_audio/pca_params.pkl"
):

    # prepare the FeaturePlan
    plan = yaafelib.FeaturePlan(sample_rate=48000, normalize=0.99)
    size_info = 'blockSize=1248 stepSize=624'
    if pca is None:
        global pca

    features = [
        'ZCR', 'TemporalShapeStatistics', 'Energy', 'MagnitudeSpectrum',
        'SpectralVariation', 'SpectralSlope', 'SpectralRolloff',
        'SpectralShapeStatistics', 'SpectralFlux', 'SpectralFlatness',
        'SpectralDecrease', 'SpectralFlatnessPerBand',
        'SpectralCrestFactorPerBand', 'AutoCorrelation', 'LPC', 'LSF',
        'ComplexDomainOnsetDetection', 'MelSpectrum',
        'MFCC: MFCC CepsNbCoeffs=22', 'MFCC_d1: MFCC %s > Derivate DOrder=1',
        'MFCC_d2: MFCC %s > Derivate DOrder=2', 'Envelope',
        'EnvelopeShapeStatistics', 'AmplitudeModulation', 'Loudness',
        'PerceptualSharpness', 'PerceptualSpread', 'OBSI', 'OBSIR'
    ]

    for f in features:
        if ':' not in f: f = '%s: %s' % (f, f)
        if '%s' not in f: f += ' %s'
        plan.addFeature(f % size_info)

    dataflow = plan.getDataFlow()
    engine = yaafelib.Engine()
    engine.load(dataflow)
    processor = yaafelib.AudioFileProcessor()

    subsets = {'full': 'full'}

    def train_pca(pca=None):
        if pca is not None:
            return pca

        assert train_file_path is not None
        print "Training pca..."
        pca = defaultdict(PCA)
        audiofiles_ = glob.glob('%s/*/*.mp3' % train_file_path)
        if not (os.path.exists(pca_params_path)):
            # extract features from audio files
            for audiofile in audiofiles_:
                processor.processFile(engine, audiofile)
                features = engine.readAllOutputs()
                for subset, keys in subsets.iteritems():
                    if keys == 'full':
                        keys = sorted(features.keys())
                    output = numpy.concatenate([features[k].T for k in keys]).T

                    if 'Train' in audiofile:
                        pca[subset].add(output)

            pca_params = {}
            pca_params["mean"] = pca["full"].mean
            pca_params["covariance"] = pca["full"].covariance
            pca_params["num_frames"] = pca["full"].num_frames
            pca_params["ndim"] = pca["full"].ndim
            cPickle.dump(pca_params, file_create(pca_params_path),
                         cPickle.HIGHEST_PROTOCOL)
        else:
            pca_params = cPickle.load(open(pca_params_path, "rb"))
            pca["full"].covariance = pca_params["covariance"]
            pca["full"].mean = pca_params["mean"]
            pca["full"].num_frames = pca_params["num_frames"]
            pca["full"].ndim = pca_params["ndim"]

        print "PCA training finished."
        return pca

    assert audiofiles is not None
    pca = train_pca(pca)
    assert pca is not None

    for f in features:
        if ':' not in f: f = '%s: %s' % (f, f)
        if '%s' not in f: f += ' %s'
        plan.addFeature(f % size_info)

    # extract features from audio files
    for audiofile in audiofiles:
        audiofile = os.path.join(path, audiofile)
        processor.processFile(engine, audiofile)
        features = engine.readAllOutputs()
        for subset, keys in subsets.iteritems():
            if keys == 'full':
                keys = sorted(features.keys())
            output = numpy.concatenate([features[k].T for k in keys]).T
            pickle_file = audiofile.replace('.mp3',
                                            '.%s.pkl' % subset).replace(
                                                path, out)
            cPickle.dump(output, file_create(pickle_file),
                         cPickle.HIGHEST_PROTOCOL)

    for subset in subsets.iterkeys():
        pca[subset].pca(diagonal=True)
        cPickle.dump(pca[subset], file_create('%s/%s.pca' % (out, subset)))

    print 'Rewriting PCA data...'
    sys.stdout.flush()

    for audiofile in audiofiles:
        for subset in subsets.iterkeys():
            pickle_file = os.path.join(out, audiofile).replace(
                '.mp3', '.%s.pkl' % subset)
            #pickle_file = audiofile.replace('.mp3', '.%s.pkl' % subset).replace(path, out)
            matrix = cPickle.load(file(pickle_file))
            matrix = pca[subset].feature(matrix)
            cPickle.dump(matrix,
                         file_create(pickle_file.replace('.pkl', '.pca.pkl')),
                         cPickle.HIGHEST_PROTOCOL)
Пример #7
0
def main():

    parser = OptionParser(version='yaafe.py, Yaafe v%s' %
                          yaafe.getYaafeVersion())
    parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
                      default=False, help='display more output')
    parser.add_option('-l', '--list', dest='listFeatures', action='store_true',
                      default=False,
                      help='list all available features and output formats')
    parser.add_option('-d', '--describe', dest='describe', default=None,
                      help='describe a feature or an output format')
    parser.add_option('-f', '--feature', action='append', dest='feature',
                      metavar='FEATUREDEFINITION', help='feature to extract')
    parser.add_option('-c', '--config-file', dest='configFile', default=None,
                      help='feature extraction plan')
    parser.add_option('-r', '--rate', dest='sample_rate', type='int',
                      default=None,
                      help='working samplerate in Hz.')
    parser.add_option('', '--resample', dest='resample', action='store_true',
                      default=False,
                      help='Resample input signal to the analysis sample rate')
    parser.add_option('-n', '--normalize', dest='normalize',
                      action='store_true', default=False,
                      help='normalize input signal by removing mean and scale maximum absolute value to 0.98 (or other value given with --normalize-max)')
    parser.add_option('', '--normalize-max', dest='normalize_max',
                      type='float', default=0.98,
                      help='Normalize input signal so that maximum absolute value reached given value (see -n, --normalize)')
    parser.add_option('-i', '--input', dest='input_list', default=None,
                      help='text file, each line is an audio file to process')
    parser.add_option('-b', '--base-dir', dest='out_dir', default='',
                      help='output directory base')
    parser.add_option('-o', '--output-format', dest='format', default='csv',
                      choices=output_format_choices,
                      help='Features output format: %s' % '|'.join(output_format_choices))
    parser.add_option('-p', '--output-params', dest='formatparams',
                      action='append', default=[],
                      metavar='key=value',
                      help='add an output format parameter (can be used multiple times, use -l options to list output formats and parameters)')
    parser.add_option('', '--dump-dataflow', dest='dumpDataflow', default='',
                      metavar='FILE',
                      help='output dataflow plan (suitable for process with yaafe-engine)')
    parser.add_option('', '--dump-graph', dest='dumpGraph', default='',
                      metavar='FILE',
                      help="output dataflow in dot format (suitable for display with graphviz")
    parser.add_option('-s', '--data-block-size', dest='buffer_size',
                      type='int', default=None,
                      help='Prefered size for data blocks.')
    parser.add_option('', '--show', dest='showFeatures', default=None,
                      help='Show all features in a H5 file')

    (options, audiofiles) = parser.parse_args()

    if options.listFeatures:
        listFeatures()
        return
    if options.describe:
        if options.describe in yaafe.getOutputFormatList():
            describeOutputFormat(options.describe)
        else:
            describeFeature(options.describe)
        return
    if options.showFeatures:
        showFeatures(options.showFeatures)
        return
    if not options.sample_rate:
        print "ERROR: please specify sample rate !"
        return
    if options.buffer_size:
        yaafe.setPreferedDataBlockSize(options.buffer_size)
    if options.verbose:
        yaafe.setVerbose(True)

    # initialize feature plan
    fp = yaafe.FeaturePlan(sample_rate=options.sample_rate,
                           normalize=(options.normalize_max
                                      if options.normalize else None),
                           resample=options.resample)

    if options.configFile:
        if not fp.loadFeaturePlan(options.configFile):
            return
    elif options.feature:
        for feat in options.feature:
            if not fp.addFeature(feat):
                return
    else:
        print "ERROR: please specify features using either a config file or -f [feature]"
        return

    if options.dumpDataflow:
        fp.getDataFlow().save(options.dumpDataflow)
    if options.dumpGraph:
        fp.getDataFlow().save(options.dumpGraph)

    # read audio file list
    if options.input_list:
        fin = open(options.input_list, 'r')
        for line in fin:
            audiofiles.append(line.strip())
        fin.close()

    if audiofiles:
        # initialize engine
        engine = yaafe.Engine()
        if not engine.load(fp.getDataFlow()):
            return
        # initialize file processor
        afp = yaafe.AudioFileProcessor()
        oparams = dict()
        for pstr in options.formatparams:
            pstrdata = pstr.split('=')
            if len(pstrdata) != 2:
                print 'ERROR: invalid parameter syntax in "%s" (should be "key=value")' % pstr
                return
            oparams[pstrdata[0]] = pstrdata[1]
        afp.setOutputFormat(options.format, options.out_dir, oparams)
        # process audio files
        for audiofile in audiofiles:
            afp.processFile(engine, audiofile)
# Get the instruments and their class indices
instruments = getInstruments(trainAudio)

# Specify features
fp = yl.FeaturePlan(sample_rate=44100)
fp.loadFeaturePlan('featureplan.txt')

# Initialize yaafe tools
df = fp.getDataFlow()
eng = yl.Engine()
eng.load(df)
dimensions = 0  # The sum of the dimensions of the features
ftSizes = eng.getOutputs().items()
for ftSize in ftSizes:
    dimensions += int(ftSize[1]['size'])
afp = yl.AudioFileProcessor()

# Remove previous model files
for k in range(len(instruments)):
    classFile = model + '.' + str(k)
    if (os.path.isfile(classFile)):
        os.remove(classFile)

# Write training features
print('\nExtracting training features\n')
numTrainFiles = writeFeatures(trainAudio, trainFeats, instruments)

# Display the number of training files used
print('Number of training files used: ' + str(numTrainFiles) + '\n')

# Train the svm
Пример #9
0
from sklearn import neighbors
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.svm import NuSVC
from sklearn import linear_model
from sklearn.linear_model import SGDClassifier
from sklearn import tree
from sklearn.neighbors.nearest_centroid import NearestCentroid

yaafe.loadComponentLibrary('yaafe-io')
fp = yaafe.FeaturePlan(sample_rate=8000)
fp.loadFeaturePlan('./featureplan')
engine = yaafe.Engine()
engine.load(fp.getDataFlow())
afp = yaafe.AudioFileProcessor()
afp.setOutputFormat('csv', './outputs', {
    'Metadata': 'false',
    'Precision': '2'
})

emotions = ['angry', 'happy', 'neutral', 'unhappy']
feats = ['eng', 'lpc', 'lsf', 'ldd', 'mfc']


def getProperties(audiofile):
    props = ""
    for feat in feats:
        lines = []
        outfile = "./outputs" + audiofile[1:] + "." + feat + ".csv"
        with open(outfile, 'r') as f: