Exemplo n.º 1
0
def loadAudioFile(inputFilename, pool, options):

    sampleRate = options['sampleRate']

    audio = essentia.MonoLoader(filename=inputFilename,
                                sampleRate=sampleRate,
                                downmix='mix')()

    #pool.setCurrentNamespace('metadata')

    # compute the temporal duration
    duration = essentia.Duration(sampleRate=options['sampleRate'])(audio)

    # trim audio if asked
    startTime = options['startTime']
    endTime = options['endTime']
    if startTime >= endTime:
        raise essentia.EssentiaError(
            'In the configuration file, startTime should be lower or equal than endTime'
        )
    startSample = int(options['sampleRate'] * startTime)
    try:
        endSample = int(options['sampleRate'] * endTime)
    except TypeError:
        endTime = duration
        endSample = int(options['sampleRate'] * duration)

    if startTime > duration:
        raise essentia.EssentiaError(
            'The file is too short to be trimmed from second %d to second %d' %
            (startTime, endTime))
    else:
        if endTime > duration:
            if startTime != 0.0:
                INFO('The file is being trimmed from second %d to second %d' %
                     (startTime, duration))
                audio = audio[startSample:]
        else:
            if startTime != 0.0 or endTime != duration:
                INFO('The file is being trimmed from second %d to second %d' %
                     (startTime, endTime))
                audio = audio[startSample:endSample]

    #pool.setGlobalScope([ 0.0, len(audio) / options['sampleRate'] ])
    pool.add('metadata.duration', duration)  #, pool.GlobalScope)
    pool.add('metadata.duration_processed',
             len(audio) / options['sampleRate'])  #, pool.GlobalScope)

    # add sample rate and number of channels to pool
    pool.add('metadata.filename', inputFilename)  #, pool.GlobalScope)
    pool.add('metadata.sample_rate', sampleRate)  ##, pool.GlobalScope)
    #pool.add('channels', originalChannelsNumber, pool.GlobalScope)

    return audio
Exemplo n.º 2
0
def preProcess(audio, pool, options, namespace=''):
    # which preprocessing preprocessing do we want to apply?
    preprocessing = toList(options['preprocessing'])

    # filtering and normalization
    for step in preprocessing:
        # do we remove the DC component?
        if step == 'dckiller':
            audio = essentia.DCRemoval()(audio)

        # do we normalize the audio?
        elif step == 'normalize':
            # compute replay gain first
            replayGain = essentia.ReplayGain(sampleRate = options['sampleRate'])(audio)
            pool.add(namespace + '.' + 'replay_gain', replayGain)#, pool.GlobalScope)

            # rescale audio if not silent (also apply a 6dB pre-amplification)
            if replayGain < 68.0:
                audio = essentia.Scale(factor = 10**((replayGain)/20))(audio)

        # do we apply an equal-loudness filter on all the audio?
        elif step == 'eqloud':
            audio = essentia.EqualLoudness(sampleRate = options['sampleRate'])(audio)

        else:
            raise essentia.EssentiaError('Unknown preprocessing step: \'%s\'' % step)

    return audio
Exemplo n.º 3
0
def cleanStats(pool, options):

    # remove unwanted descriptors
    wantedStats = {}
    supportedStats = ['mean', 'min', 'max', 'var', 'dmean', 'dvar', 'dmean2',\
                      'dvar2', 'value', 'copy', 'single_gaussian', 'cov', 'icov']

    for extractor in options['specific']:

        if 'output' in options['specific'][extractor] and extractor in options[
                'generatedBy']:
            outputList = options['specific'][extractor]['output']
            exec('import extractor.' + extractor + ' as extractor_module')
            namespace = extractor_module.namespace

            # check if we're not asking for some inexistent descriptor
            for descriptor in outputList:
                generated = options['generatedBy'][extractor]
                if descriptor not in generated:
                    raise essentia.EssentiaError(
                        'Could not find descriptor \'' + descriptor +
                        '\'. Available are: \'' + '\', \''.join(generated) +
                        '\'')

            for descriptor in options['generatedBy'][extractor]:

                if descriptor not in outputList:
                    #del pool.descriptors[namespace][descriptor]
                    pool.remove(namespace + '.' + descriptor)
                else:
                    try:
                        wantedStats[namespace + '.' + descriptor] = options[
                            'specific'][extractor]['output'][descriptor]
                    except KeyError:
                        wantedStats[namespace] = {}
                        wantedStats[namespace + '.' + descriptor] = options[
                            'specific'][extractor]['output'][descriptor]
            for (k, v) in wantedStats.items():
                if not isinstance(v, list):
                    wantedStats[k] = [v]
                stats = wantedStats[k]
                unwantedStats = []
                for stat in stats:
                    if stat not in supportedStats:
                        unwantedStats += [stat]
                        print 'Ignoring', stat, 'for', k, '. It is not supported.'
                    if stat == 'single_gaussian':
                        unwantedStats += [stat]
                        wantedStats[k] += ['mean', 'cov', 'icov']
                for stat in unwantedStats:
                    wantedStats[k].remove(stat)

    metaDescs = descriptorNames(pool.descriptorNames(), 'metadata')
    wantedStats['lowlevel.spectral_contrast.mean'] = ['copy']
    wantedStats['lowlevel.spectral_contrast.var'] = ['copy']
    for desc in metaDescs:
        wantedStats['metadata' + '.' + desc] = ['copy']

    return wantedStats
Exemplo n.º 4
0
def compute(profile, inputFilename, outputFilename, userOptions = {}):

    # load profile
    profileDirectory = __file__.split(os.path.sep)[:-1]
    profileDirectory.append('profiles')
    profileDirectory.append('%s_config.yaml' % profile)

    try:
        # try to load the predefined profile, if it exists
        config = open(os.path.sep.join(profileDirectory), 'r').read()
    except:
        # otherwise, just load the file that was specified
        config = open(profile, 'r').read()

    options = yaml.load(config)
    mergeRecursiveDict(options, userOptions)

    # which format for the output?
    format = options['outputFormat']
    if format not in [ 'xml', 'yaml' ]:
        raise essentia.EssentiaError('output format should be either \'xml\' or \'yaml\'')
    if format == 'xml':
        xmlOutput = True
    else:
        xmlOutput = False

    # we need this for dependencies checking
    options['computed'] = []
    options['generatedBy'] = {}

    # get list of extractors to compute
    extractors = options['extractors']

    # create pool & megalopool
    pool = essentia.Pool()

    # load audio file into memory
    audio = loadAudioFile(inputFilename, pool, options)

    # preprocess audio by applying a DC filter, normalization, etc...
    # preprocessing is a special step because it modifies the audio, hence it
    # must be executed before all the other extractors
    audio = preProcess(audio, pool, options, 'metadata')
    options['globalPreprocessing'] = options['preprocessing']
    del options['preprocessing']

    # process all extractors
    computeAllExtractors(extractors, audio, pool, options)

    # process segmentation if asked
    if options['segmentation']['doSegmentation']:
        segments = segmentation.compute(inputFilename, audio, pool, options)

    # remove unwanted descriptors
    wantedStats = cleanStats(pool, options)

    # add to megalopool
    #megalopool = essentia.Pool()
    scope = [ 0.0, len(audio)/options['sampleRate'] ]
    #megalopool.add('global', pool.aggregate_descriptors(wantedStats))#, scope)
    megalopool = essentia.PoolAggregator(exceptions=wantedStats)(pool)
    # special case for spectral contrast, which is only 1 matrix, therefore no
    # stats are computed:
    spectral_contrast_stats(megalopool, 'lowlevel.spectral_contrast', wantedStats)

    # plotting descriptors evolution
    try:
        if options['plots']:
            import plotting
            plotting.compute(inputFilename, audio, pool, options)
    except KeyError: pass

    # compute extractors on segments
    if options['segmentation']['doSegmentation']:
        if options['segmentation']['computeSegments']:
            if len(segments) == 0:
                megalopool.add('void', [0])
            else:
                computeSegments(audio, segments, extractors, megalopool, options)

    # save to output file
    essentia.YamlOutput(filename=outputFilename)(megalopool)
Exemplo n.º 5
0
def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # temporal descriptors
    lpc = essentia.LPC(order=10, type='warped', sampleRate=sampleRate)
    zerocrossingrate = essentia.ZeroCrossingRate()

    # frame algorithms
    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = essentia.Spectrum(size=frameSize)

    # spectral algorithms
    barkbands = essentia.BarkBands(sampleRate=sampleRate)
    centralmoments = essentia.SpectralCentralMoments()
    crest = essentia.Crest()
    centroid = essentia.SpectralCentroid()
    decrease = essentia.SpectralDecrease()
    spectral_contrast = essentia.SpectralContrast(frameSize=frameSize,
                                                  sampleRate=sampleRate,
                                                  numberBands=6,
                                                  lowFrequencyBound=20,
                                                  highFrequencyBound=11000,
                                                  neighbourRatio=0.4,
                                                  staticDistribution=0.15)
    distributionshape = essentia.DistributionShape()
    energy = essentia.Energy()
    # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers
    energyband_bass = essentia.EnergyBand(startCutoffFrequency=20.0,
                                          stopCutoffFrequency=150.0,
                                          sampleRate=sampleRate)
    energyband_middle_low = essentia.EnergyBand(startCutoffFrequency=150.0,
                                                stopCutoffFrequency=800.0,
                                                sampleRate=sampleRate)
    energyband_middle_high = essentia.EnergyBand(startCutoffFrequency=800.0,
                                                 stopCutoffFrequency=4000.0,
                                                 sampleRate=sampleRate)
    energyband_high = essentia.EnergyBand(startCutoffFrequency=4000.0,
                                          stopCutoffFrequency=20000.0,
                                          sampleRate=sampleRate)
    flatnessdb = essentia.FlatnessDB()
    flux = essentia.Flux()
    harmonic_peaks = essentia.HarmonicPeaks()
    hfc = essentia.HFC()
    mfcc = essentia.MFCC()
    rolloff = essentia.RollOff()
    rms = essentia.RMS()
    strongpeak = essentia.StrongPeak()

    # pitch algorithms
    pitch_detection = essentia.PitchDetection(frameSize=frameSize,
                                              sampleRate=sampleRate)
    pitch_salience = essentia.PitchSalience()

    # dissonance
    spectral_peaks = essentia.SpectralPeaks(sampleRate=sampleRate,
                                            orderBy='frequency')
    dissonance = essentia.Dissonance()

    # spectral complexity
    # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame
    spectral_complexity = essentia.SpectralComplexity(magnitudeThreshold=0.005)

    INFO('Computing Low-Level descriptors...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    pitches, pitch_confidences = [], []

    progress = Progress(total=total_frames)

    scPool = essentia.Pool()  # pool for spectral contrast

    for frame in frames:

        frameScope = [
            start_of_frame / sampleRate,
            (start_of_frame + frameSize) / sampleRate
        ]
        #pool.setCurrentScope(frameScope)

        # silence rate
        pool.add(namespace + '.' + 'silence_rate_60dB',
                 essentia.isSilent(frame))
        pool.add(namespace + '.' + 'silence_rate_30dB',
                 is_silent_threshold(frame, -30))
        pool.add(namespace + '.' + 'silence_rate_20dB',
                 is_silent_threshold(frame, -20))

        if options['skipSilence'] and essentia.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        # temporal descriptors
        pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame))
        (frame_lpc, frame_lpc_reflection) = lpc(frame)
        pool.add(namespace + '.' + 'temporal_lpc', frame_lpc)

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectrum-based descriptors
        power_spectrum = frame_spectrum**2
        pool.add(namespace + '.' + 'spectral_centroid',
                 centroid(power_spectrum))
        pool.add(namespace + '.' + 'spectral_decrease',
                 decrease(power_spectrum))
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_low',
                 energyband_bass(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_low',
                 energyband_middle_low(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_high',
                 energyband_middle_high(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_high',
                 energyband_high(frame_spectrum))
        pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_strongpeak',
                 strongpeak(frame_spectrum))

        # central moments descriptors
        frame_centralmoments = centralmoments(power_spectrum)
        (frame_spread, frame_skewness,
         frame_kurtosis) = distributionshape(frame_centralmoments)
        pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis)
        pool.add(namespace + '.' + 'spectral_spread', frame_spread)
        pool.add(namespace + '.' + 'spectral_skewness', frame_skewness)

        # dissonance
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        frame_dissonance = dissonance(frame_frequencies, frame_magnitudes)
        pool.add(namespace + '.' + 'dissonance', frame_dissonance)

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'mfcc', frame_mfcc)

        # spectral contrast
        (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum)
        scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs)
        scPool.add(namespace + '.' + 'scvalleys', sc_valleys)

        # barkbands-based descriptors
        frame_barkbands = barkbands(frame_spectrum)
        pool.add(namespace + '.' + 'barkbands', frame_barkbands)
        pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands))
        pool.add(namespace + '.' + 'spectral_flatness_db',
                 flatnessdb(frame_barkbands))
        barkbands_centralmoments = essentia.CentralMoments(
            range=len(frame_barkbands) - 1)
        (barkbands_spread, barkbands_skewness,
         barkbands_kurtosis) = distributionshape(
             barkbands_centralmoments(frame_barkbands))
        pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread)
        pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness)
        pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)
        if frame_pitch > 0 and frame_pitch <= 20000.:
            pool.add(namespace + '.' + 'pitch', frame_pitch)
        pitches.append(frame_pitch)
        pitch_confidences.append(frame_pitch_confidence)
        pool.add(namespace + '.' + 'pitch_instantaneous_confidence',
                 frame_pitch_confidence)

        frame_pitch_salience = pitch_salience(frame_spectrum[:-1])
        pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience)

        # spectral complexity
        pool.add(namespace + '.' + 'spectral_complexity',
                 spectral_complexity(frame_spectrum))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # if no 'temporal_zerocrossingrate' it means that this is a silent file
    if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(),
                                                 namespace):
        raise essentia.EssentiaError('This is a silent file!')

    spectralContrastPCA(scPool, pool)

    # build pitch value histogram
    from math import log
    from numpy import bincount
    # convert from Hz to midi notes
    midipitches = []
    unknown = 0
    for freq in pitches:
        if freq > 0. and freq <= 12600:
            midipitches.append(12 * (log(freq / 6.875) / 0.69314718055995) -
                               3.)
        else:
            unknown += 1

    if len(midipitches) > 0:
        # compute histogram
        midipitchhist = bincount(midipitches)
        # set 0 midi pitch to be the number of pruned value
        midipitchhist[0] = unknown
        # normalise
        midipitchhist = [
            val / float(sum(midipitchhist)) for val in midipitchhist
        ]
        # zero pad
        for i in range(128 - len(midipitchhist)):
            midipitchhist.append(0.0)
    else:
        midipitchhist = [0.] * 128
        midipitchhist[0] = 1.

    # pitchhist = essentia.array(zip(range(len(midipitchhist)), midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram',
             midipitchhist)  #, pool.GlobalScope)

    # the code below is the same as the one above:
    #for note in midipitchhist:
    #    pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note)
    #    print "midi note:", note

    pitch_centralmoments = essentia.CentralMoments(range=len(midipitchhist) -
                                                   1)
    (pitch_histogram_spread, pitch_histogram_skewness,
     pitch_histogram_kurtosis) = distributionshape(
         pitch_centralmoments(midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram_spread',
             pitch_histogram_spread)  #, pool.GlobalScope)

    progress.finish()