示例#1
0
def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = essentia.Spectrum(size=frameSize)

    # spectral algorithms
    energy = essentia.Energy()
    mfcc = essentia.MFCC(highFrequencyBound=8000)

    INFO('Computing Low-Level descriptors necessary for segmentation...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    progress = Progress(total=total_frames)

    for frame in frames:

        frameScope = [
            start_of_frame / sampleRate,
            (start_of_frame + frameSize) / sampleRate
        ]
        #pool.setCurrentScope(frameScope)
        pool.add(namespace + '.' + 'scope', frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # need the energy for getting the thumbnail
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'spectral_mfcc', frame_mfcc)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    progress.finish()
示例#2
0
def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize  = options['frameSize']
    hopSize    = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType)
    spectrum = essentia.Spectrum(size = frameSize)

    # spectral algorithms
    energy = essentia.Energy()
    mfcc = essentia.MFCC(highFrequencyBound = 8000)

    INFO('Computing Low-Level descriptors necessary for segmentation...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5

    progress = Progress(total = total_frames)

    for frame in frames:

        frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)
        pool.add(namespace + '.' + 'scope', frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
          total_frames -= 1
          start_of_frame += hopSize
          continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # need the energy for getting the thumbnail
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'spectral_mfcc', frame_mfcc)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    progress.finish()
示例#3
0
def compute(audio, pool, options):

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize  = options['frameSize']
    hopSize    = options['hopSize']
    windowType = options['windowType']

    # temporal descriptors
    lpc = essentia.LPC(order = 10, type = 'warped', sampleRate = sampleRate)
    zerocrossingrate = essentia.ZeroCrossingRate()

    # frame algorithms
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType)
    spectrum = essentia.Spectrum(size = frameSize)

    # spectral algorithms
    barkbands = essentia.BarkBands(sampleRate = sampleRate)
    centralmoments = essentia.SpectralCentralMoments()
    crest = essentia.Crest()
    centroid = essentia.SpectralCentroid()
    decrease = essentia.SpectralDecrease()
    spectral_contrast = essentia.SpectralContrast(frameSize = frameSize,
                                                  sampleRate = sampleRate,
                                                  numberBands = 6,
                                                  lowFrequencyBound = 20,
                                                  highFrequencyBound = 11000,
                                                  neighbourRatio = 0.4,
                                                  staticDistribution = 0.15)
    distributionshape = essentia.DistributionShape()
    energy = essentia.Energy()
    # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers
    energyband_bass = essentia.EnergyBand(startCutoffFrequency = 20.0, stopCutoffFrequency = 150.0, sampleRate = sampleRate)
    energyband_middle_low = essentia.EnergyBand(startCutoffFrequency = 150.0, stopCutoffFrequency = 800.0, sampleRate = sampleRate)
    energyband_middle_high = essentia.EnergyBand(startCutoffFrequency = 800.0, stopCutoffFrequency = 4000.0, sampleRate = sampleRate)
    energyband_high = essentia.EnergyBand(startCutoffFrequency = 4000.0, stopCutoffFrequency = 20000.0, sampleRate = sampleRate)
    flatnessdb = essentia.FlatnessDB()
    flux = essentia.Flux()
    harmonic_peaks = essentia.HarmonicPeaks()
    hfc = essentia.HFC()
    mfcc = essentia.MFCC()
    rolloff = essentia.RollOff()
    rms = essentia.RMS()
    strongpeak = essentia.StrongPeak()

    # pitch algorithms
    pitch_detection = essentia.PitchDetection(frameSize = frameSize, sampleRate = sampleRate)
    pitch_salience = essentia.PitchSalience()

    # dissonance
    spectral_peaks = essentia.SpectralPeaks(sampleRate = sampleRate, orderBy='frequency')
    dissonance = essentia.Dissonance()

    # spectral complexity
    # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame
    spectral_complexity = essentia.SpectralComplexity(magnitudeThreshold = 0.005)

    INFO('Computing Low-Level descriptors...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5

    pitches, pitch_confidences =  [],[]

    progress = Progress(total = total_frames)

    scPool = essentia.Pool() # pool for spectral contrast

    for frame in frames:

        frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)

        # silence rate
        pool.add(namespace + '.' + 'silence_rate_60dB', essentia.isSilent(frame))
        pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30))
        pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20))

        if options['skipSilence'] and essentia.isSilent(frame):
          total_frames -= 1
          start_of_frame += hopSize
          continue

        # temporal descriptors
        pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame))
        (frame_lpc, frame_lpc_reflection) = lpc(frame)
        pool.add(namespace + '.' + 'temporal_lpc', frame_lpc)

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectrum-based descriptors
        power_spectrum = frame_spectrum ** 2
        pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum))
        pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum))
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum))
        pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum))

	# central moments descriptors
	frame_centralmoments = centralmoments(power_spectrum)
        (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments)
        pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis)
	pool.add(namespace + '.' + 'spectral_spread', frame_spread)
        pool.add(namespace + '.' + 'spectral_skewness', frame_skewness)

	# dissonance
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        frame_dissonance = dissonance(frame_frequencies, frame_magnitudes)
        pool.add(namespace + '.' + 'dissonance', frame_dissonance)

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'mfcc', frame_mfcc)

        # spectral contrast
        (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum)
        scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs)
        scPool.add(namespace + '.' + 'scvalleys', sc_valleys)

        # barkbands-based descriptors
        frame_barkbands = barkbands(frame_spectrum)
        pool.add(namespace + '.' + 'barkbands', frame_barkbands)
        pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands))
        pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands))
        barkbands_centralmoments = essentia.CentralMoments(range = len(frame_barkbands) - 1)
        (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape(barkbands_centralmoments(frame_barkbands))
        pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread)
        pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness)
        pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)
        if frame_pitch > 0 and frame_pitch <= 20000.:
            pool.add(namespace + '.' + 'pitch', frame_pitch)
        pitches.append(frame_pitch)
        pitch_confidences.append(frame_pitch_confidence)
        pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence)

        frame_pitch_salience = pitch_salience(frame_spectrum[:-1])
        pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience)

        # spectral complexity
        pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # if no 'temporal_zerocrossingrate' it means that this is a silent file
    if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace):
        raise essentia.EssentiaError('This is a silent file!')

    spectralContrastPCA(scPool, pool)

    # build pitch value histogram
    from math import log
    from numpy import bincount
    # convert from Hz to midi notes
    midipitches = []
    unknown = 0
    for freq in pitches:
        if freq > 0. and freq <= 12600:
            midipitches.append(12*(log(freq/6.875)/0.69314718055995)-3.)
        else:
            unknown += 1

    if len(midipitches) > 0:
      # compute histogram
      midipitchhist = bincount(midipitches)
      # set 0 midi pitch to be the number of pruned value
      midipitchhist[0] = unknown
      # normalise
      midipitchhist = [val/float(sum(midipitchhist)) for val in midipitchhist]
      # zero pad
      for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0)
    else:
      midipitchhist = [0.]*128
      midipitchhist[0] = 1.

    # pitchhist = essentia.array(zip(range(len(midipitchhist)), midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist)#, pool.GlobalScope)

    # the code below is the same as the one above:
    #for note in midipitchhist:
    #    pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note)
    #    print "midi note:", note

    pitch_centralmoments = essentia.CentralMoments(range = len(midipitchhist) - 1)
    (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape(pitch_centralmoments(midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread)#, pool.GlobalScope)

    progress.finish()
示例#4
0
文件: sfx.py 项目: Aldor007/essentia
def compute(audio, pool, options):

    INFO('Computing SFX descriptors...')

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize  = options['frameSize']
    hopSize    = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType)
    spectrum = essentia.Spectrum(size = frameSize)

    # pitch algorithm
    pitch_detection = essentia.PitchDetection(frameSize = 2048, sampleRate = sampleRate)

    # sfx descriptors
    spectral_peaks = essentia.SpectralPeaks(sampleRate = sampleRate, orderBy = 'frequency')
    harmonic_peaks = essentia.HarmonicPeaks()
    inharmonicity = essentia.Inharmonicity()
    odd2evenharmonicenergyratio = essentia.OddToEvenHarmonicEnergyRatio()
    tristimulus = essentia.Tristimulus()

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5
    progress = Progress(total = total_frames)

    for frame in frames:

        frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)

	if options['skipSilence'] and essentia.isSilent(frame):
          total_frames -= 1
          start_of_frame += hopSize
          continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)

        # spectral peaks based descriptors
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        (frame_harmonic_frequencies, frame_harmonic_magnitudes) = harmonic_peaks(frame_frequencies, frame_magnitudes, frame_pitch)
        if len(frame_harmonic_frequencies) > 1:
            frame_inharmonicity = inharmonicity(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'inharmonicity', frame_inharmonicity)
            frame_tristimulus = tristimulus(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'tristimulus', frame_tristimulus)
            frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'odd2evenharmonicenergyratio', frame_odd2evenharmonicenergyratio)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize


    envelope = essentia.Envelope()
    file_envelope = envelope(audio)

    # temporal statistics
    decrease = essentia.AudioDecrease(blockSize = len(audio))
    pool.add(namespace + '.' + 'temporal_decrease', decrease(file_envelope))#, pool.GlobalScope)

    centralmoments = essentia.AudioCentralMoments(blockSize = len(audio))
    file_centralmoments = centralmoments(file_envelope)

    distributionshape = essentia.DistributionShape()
    (file_spread, file_skewness, file_kurtosis) = distributionshape(file_centralmoments)
    pool.add(namespace + '.' + 'temporal_spread', file_spread)#, pool.GlobalScope)
    pool.add(namespace + '.' + 'temporal_skewness', file_skewness)#, pool.GlobalScope)
    pool.add(namespace + '.' + 'temporal_kurtosis', file_kurtosis)#, pool.GlobalScope)

    centroid = essentia.AudioCentroid(blockSize = len(audio))
    pool.add(namespace + '.' + 'temporal_centroid', centroid(file_envelope))#, pool.GlobalScope)

    # effective duration
    effectiveduration = essentia.EffectiveDuration()
    pool.add(namespace + '.' + 'effective_duration', effectiveduration(file_envelope))#, pool.GlobalScope)

    # log attack time
    logattacktime = essentia.LogAttackTime()
    pool.add(namespace + '.' + 'logattacktime', logattacktime(audio))#, pool.GlobalScope)

    # strong decay
    strongdecay = essentia.StrongDecay()
    pool.add(namespace + '.' + 'strongdecay', strongdecay(file_envelope))#, pool.GlobalScope)

    # dynamic profile
    flatness = essentia.FlatnessSFX()
    pool.add(namespace + '.' + 'flatness', flatness(file_envelope))#, pool.GlobalScope)

    # onsets number
    onsets_number = len(pool.value('rhythm.onset_times')[0])
    pool.add(namespace + '.' + 'onsets_number', onsets_number)#, pool.GlobalScope)

    # morphological descriptors
    max_to_total = essentia.MaxToTotal()
    pool.add(namespace + '.' + 'max_to_total', max_to_total(file_envelope))#, pool.GlobalScope)

    tc_to_total = essentia.TCToTotal(sampleRate = sampleRate)
    pool.add(namespace + '.' + 'tc_to_total', tc_to_total(file_envelope))#, pool.GlobalScope)

    derivativeSFX = essentia.DerivativeSFX(sampleRate = sampleRate)
    (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope)
    pool.add(namespace + '.' + 'der_av_after_max', der_av_after_max)#, pool.GlobalScope)
    pool.add(namespace + '.' + 'max_der_before_max', max_der_before_max)#, pool.GlobalScope)

    # pitch profile
    pitch = pool.value('lowlevel.pitch')

    if len(pitch) > 1:
       pool.add(namespace + '.' + 'pitch_max_to_total', max_to_total(pitch))#, pool.GlobalScope)

       min_to_total = essentia.MinToTotal()
       pool.add(namespace + '.' + 'pitch_min_to_total', min_to_total(pitch))#, pool.GlobalScope)

       pitch_centroid = essentia.Centroid(range = len(pitch)-1)
       pool.add(namespace + '.' + 'pitch_centroid', pitch_centroid(pitch))#, pool.GlobalScope)

       pitch_after_max_to_before_max_energy_ratio = essentia.AfterMaxToBeforeMaxEnergyRatio()
       pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', pitch_after_max_to_before_max_energy_ratio(pitch))#, pool.GlobalScope)

    else:
       pool.add(namespace + '.' + 'pitch_max_to_total', 0.0)#, pool.GlobalScope)
       pool.add(namespace + '.' + 'pitch_min_to_total', 0.0)#, pool.GlobalScope)
       pool.add(namespace + '.' + 'pitch_centroid', 0.0)#, pool.GlobalScope)
       pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', 0.0)#, pool.GlobalScope)

    progress.finish()
示例#5
0
def compute(audio, pool, options):

    INFO('Computing Tempo extractor...')

    use_onset = options['useOnset']
    use_bands = options['useBands']

    # frameNumber * hopSize ~= about 6 seconds
    hopSize = options['hopSize']
    frameSize = options['frameSize']
    frameNumber = options['frameNumber']
    frameHop = options['frameHop']
    sampleRate = options['sampleRate']
    tolerance = 0.24
    period_tol = 2
    windowType = options['windowType']

    bands_freq = [
        40.0, 413.16, 974.51, 1818.94, 3089.19, 5000.0, 7874.4, 12198.29,
        17181.13
    ]
    bands_gain = [2.0, 3.0, 2.0, 1.0, 1.2, 2.0, 3.0, 2.5]
    maxbpm = 208
    minbpm = 40
    last_beat_interval = 0.025
    frame_time = float(hopSize) / float(sampleRate)

    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    if use_onset:
        fft = essentia.FFT(size=frameSize)
        cartesian2polar = essentia.CartesianToPolar()
        onset_hfc = essentia.OnsetDetection(method='hfc',
                                            sampleRate=sampleRate)
        onset_complex = essentia.OnsetDetection(method='complex',
                                                sampleRate=sampleRate)
    if use_bands:
        espectrum = essentia.Spectrum(size=frameSize)
        tempotapbands = essentia.FrequencyBands(frequencyBands=bands_freq)
        temposcalebands = essentia.TempoScaleBands(bandsGain=bands_gain)
    tempotap = essentia.TempoTap(numberFrames=frameNumber,
                                 sampleRate=sampleRate,
                                 frameHop=frameHop)
    tempotapticks = essentia.TempoTapTicks(hopSize=hopSize,
                                           sampleRate=sampleRate,
                                           frameHop=frameHop)

    frameTime = float(hopSize) / float(sampleRate)
    frameRate = 1. / frameTime

    nframes = 0
    bpm_estimates_list = []
    ticks = []
    matchingPeriods = []
    oldhfc = 0

    fileLength = len(audio) / sampleRate
    startSilence = 0
    oldSilence = 0
    endSilence = round(fileLength * sampleRate / hopSize) + 1

    for frame in frames:
        windowed_frame = window(frame)
        features = []
        if use_onset:
            complex_fft = fft(windowed_frame)
            (spectrum, phase) = cartesian2polar(complex_fft)
            hfc = onset_hfc(spectrum, phase)
            complexdomain = onset_complex(spectrum, phase)
            difhfc = max(hfc - oldhfc, 0)
            oldhfc = hfc
            features += [hfc, difhfc, complexdomain]
        if use_bands:
            spectrum_frame = espectrum(windowed_frame)
            bands = tempotapbands(spectrum_frame)
            (scaled_bands, cumul) = temposcalebands(bands)
            features += list(scaled_bands)

        features = essentia.array(features)
        (periods, phases) = tempotap(features)
        (these_ticks, these_matchingPeriods) = tempotapticks(periods, phases)
        for period in these_matchingPeriods:
            if period != 0:
                matchingPeriods += [period]
        ticks += list(these_ticks)

        if nframes < 5. * sampleRate / hopSize:
            if isSilent(frame) and startSilence == nframes - 1:
                startSilence = nframes

        if nframes > (fileLength - 5.) * sampleRate / hopSize:
            if isSilent(frame):
                if oldSilence != nframes - 1:
                    endSilence = nframes
                oldSilence = nframes

        nframes += 1

    # make sure we do not kill beat too close to music
    if startSilence > 0: startSilence -= 1
    endSilence += 1

    # fill the rest of buffer with zeros
    features = essentia.array([0] * len(features))
    while nframes % frameNumber != 0:
        (periods, phases) = tempotap(features)
        (these_ticks, these_matchingPeriods) = tempotapticks(periods, phases)
        ticks += list(these_ticks)
        matchingPeriods += list(these_matchingPeriods)
        nframes += 1

    if len(ticks) > 2:
        # fill up to end of file
        if fileLength > ticks[-1]:
            lastPeriod = ticks[-1] - ticks[-2]
            while ticks[-1] + lastPeriod < fileLength - last_beat_interval:
                if ticks[-1] > fileLength - last_beat_interval:
                    break
                ticks.append(ticks[-1] + lastPeriod)
    if len(ticks) > 1:
        # remove all negative ticks
        i = 0
        while i < len(ticks):
            if ticks[i] < startSilence / sampleRate * hopSize: ticks.pop(i)
            else: i += 1
        # kill all ticks from 350ms before the end of the song
        i = 0
        while i < len(ticks):
            if ticks[i] > endSilence / sampleRate * hopSize: ticks.pop(i)
            else: i += 1
        # prune values closer than tolerance
        i = 1
        while i < len(ticks):
            if ticks[i] - ticks[i - 1] < tolerance: ticks.pop(i)
            else: i += 1
        # prune all backward offbeat
        i = 3
        while i < len(ticks):
            if    abs( (ticks[i] - ticks[i-2]) - 1.5 * (ticks[i]   - ticks[i-1]) ) < 0.100 \
              and abs( (ticks[i] - ticks[i-1]) -       (ticks[i-2] - ticks[i-3]) ) < 0.100 :
                ticks.pop(i - 2)
            else:
                i += 1

    for period in matchingPeriods:
        if period != 0:
            bpm_estimates_list += [lagtobpm(period, sampleRate, hopSize)]
        #else:
        #  bpm_estimates_list += [ 0 ]

    # bpm estimates
    for bpm_estimate in bpm_estimates_list:
        pool.add(namespace + '.' + 'bpm_estimates', bpm_estimate)

    # estimate the bpm from the list of candidates
    if len(bpm_estimates_list) > 0:
        estimates = [bpm / 2. for bpm in bpm_estimates_list]
        closestBpm = argmax(bincount(estimates)) * 2.
        matching = []
        for bpm in bpm_estimates_list:
            if abs(closestBpm - bpm) < period_tol:
                matching.append(bpm)
        if (len(matching) < 1):
            # something odd happened
            bpm = closestBpm
        else:
            bpm = mean(matching)
    else:
        bpm = 0.
    # convert to floats, as python bindings yet not support numpy.float32
    ticks = [float(tick) for tick in ticks]
    pool.add(namespace + '.' + 'bpm', bpm)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'beats_position', ticks)  #, pool.GlobalScope

    bpm_intervals = [ticks[i] - ticks[i - 1] for i in range(1, len(ticks))]
    pool.add(namespace + '.' + 'bpm_intervals',
             bpm_intervals)  #, pool.GlobalScope

    from numpy import histogram
    tempotap_bpms = [60. / i for i in bpm_intervals]
    if len(tempotap_bpms) > 0:
        weight, values = histogram(tempotap_bpms,
                                   bins=250,
                                   range=(0, 250),
                                   normed=True)
    else:
        weight, values = [0.], [0.]
    first_peak_weights = [0] * 250
    secnd_peak_weights = [0] * 250

    for i in range(max(argmax(weight) - 4, 0),
                   min(argmax(weight) + 5, len(weight))):
        first_peak_weights[i] = weight[i]
        weight[i] = 0.
    for i in range(max(argmax(weight) - 4, 0),
                   min(argmax(weight) + 5, len(weight))):
        secnd_peak_weights[i] = weight[i]
        weight[i] = 0.

    pool.add(namespace + '.' + 'first_peak_bpm',
             values[argmax(first_peak_weights)])  #, pool.GlobalScope
    pool.add(
        namespace + '.' + 'first_peak_weight',
        first_peak_weights[argmax(first_peak_weights)])  #, pool.GlobalScope
    if sum(first_peak_weights) != 0.:
        pool.add(namespace + '.' + 'first_peak_spread',
                 1. - first_peak_weights[argmax(first_peak_weights)] /
                 sum(first_peak_weights))  #, pool.GlobalScope
    else:
        pool.add(namespace + '.' + 'first_peak_spread',
                 0.)  #, pool.GlobalScope
    pool.add(namespace + '.' + 'second_peak_bpm',
             values[argmax(secnd_peak_weights)])  #, pool.GlobalScope
    pool.add(
        namespace + '.' + 'second_peak_weight',
        secnd_peak_weights[argmax(secnd_peak_weights)])  #, pool.GlobalScope
    if sum(secnd_peak_weights) != 0.:
        pool.add(namespace + '.' + 'second_peak_spread',
                 1. - secnd_peak_weights[argmax(secnd_peak_weights)] /
                 sum(secnd_peak_weights))  #, pool.GlobalScope
    else:
        pool.add(namespace + '.' + 'second_peak_spread',
                 0.)  #, pool.GlobalScope
    '''
    def rubato(ticks):
        bpm_rubato_python = []
        tolerance = 0.08
        i = 5
        tmp1 = 60./ float(ticks[i  ] - ticks[i-1])
        tmp2 = 60./ float(ticks[i-1] - ticks[i-2])
        tmp3 = 60./ float(ticks[i-2] - ticks[i-3])
        tmp4 = 60./ float(ticks[i-3] - ticks[i-4])
        tmp5 = 60./ float(ticks[i-4] - ticks[i-5])
        for i in range(6, len(ticks)):
            if (  abs(1. - tmp1 / tmp4) >= tolerance
              and abs(1. - tmp2 / tmp5) >= tolerance
              and abs(1. - tmp2 / tmp4) >= tolerance
              and abs(1. - tmp1 / tmp5) >= tolerance
              and abs(1. - tmp1 / tmp2) <= tolerance
              and abs(1. - tmp4 / tmp5) <= tolerance ):
                bpm_rubato_python.append(ticks[i-2])
            tmp5 = tmp4; tmp4 = tmp3; tmp3 = tmp2; tmp2 = tmp1
            tmp1 = 60./ (ticks[i] - ticks[i-1])
        print bpm_rubato_python
        return bpm_rubato_python
    '''
    # FIXME we need better rubato algorithm
    #rubato = essentia.BpmRubato()
    #bpm_rubato_start, bpm_rubato_stop = rubato(ticks)
    #pool.add(namespace + '.' + 'rubato_start', bpm_rubato_start)#, pool.GlobalScope
    #pool.add(namespace + '.' + 'rubato_stop',  bpm_rubato_stop)#,  pool.GlobalScope)

    INFO('100% done...')
示例#6
0
def compute(audio, pool, options):
    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # temporal descriptors
    lpc = ess.LPC(order=10, type='warped', sampleRate=sampleRate)
    zerocrossingrate = ess.ZeroCrossingRate()

    # frame algorithms
    frames = ess.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = ess.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = ess.Spectrum(size=frameSize)

    # spectral algorithms
    barkbands = ess.BarkBands(sampleRate=sampleRate)
    centralmoments = ess.CentralMoments()
    crest = ess.Crest()
    centroid = ess.Centroid()
    decrease = ess.Decrease()
    spectral_contrast = ess.SpectralContrast(frameSize=frameSize,
                                             sampleRate=sampleRate,
                                             numberBands=6,
                                             lowFrequencyBound=20,
                                             highFrequencyBound=11000,
                                             neighbourRatio=0.4,
                                             staticDistribution=0.15)
    distributionshape = ess.DistributionShape()
    energy = ess.Energy()
    # energyband_bass, energyband_middle and energyband_high parameters come from "standard" hi-fi equalizers
    energyband_bass = ess.EnergyBand(startCutoffFrequency=20.0, stopCutoffFrequency=150.0, sampleRate=sampleRate)
    energyband_middle_low = ess.EnergyBand(startCutoffFrequency=150.0, stopCutoffFrequency=800.0, sampleRate=sampleRate)
    energyband_middle_high = ess.EnergyBand(startCutoffFrequency=800.0, stopCutoffFrequency=4000.0,
                                            sampleRate=sampleRate)
    energyband_high = ess.EnergyBand(startCutoffFrequency=4000.0, stopCutoffFrequency=20000.0, sampleRate=sampleRate)
    flatnessdb = ess.FlatnessDB()
    flux = ess.Flux()
    harmonic_peaks = ess.HarmonicPeaks()
    hfc = ess.HFC()
    mfcc = ess.MFCC()
    rolloff = ess.RollOff()
    rms = ess.RMS()
    strongpeak = ess.StrongPeak()

    # pitch algorithms
    pitch_detection = ess.PitchYinFFT(frameSize=frameSize, sampleRate=sampleRate)
    pitch_salience = ess.PitchSalience()

    # dissonance
    spectral_peaks = ess.SpectralPeaks(sampleRate=sampleRate, orderBy='frequency')
    dissonance = ess.Dissonance()

    # spectral complexity
    # magnitudeThreshold = 0.005 is hardcoded for a "blackmanharris62" frame
    spectral_complexity = ess.SpectralComplexity(magnitudeThreshold=0.005)

    INFO('Computing Low-Level descriptors...')

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    pitches, pitch_confidences = [], []

    progress = Progress(total=total_frames)

    #scPool = es.Pool()  # pool for spectral contrast

    for frame in frames:

        frameScope = [start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate]
        # pool.setCurrentScope(frameScope)

        # silence rate
        # pool.add(namespace + '.' + 'silence_rate_60dB', es.isSilent(frame))
        pool.add(namespace + '.' + 'silence_rate_60dB', is_silent_threshold(frame, -60))
        pool.add(namespace + '.' + 'silence_rate_30dB', is_silent_threshold(frame, -30))
        pool.add(namespace + '.' + 'silence_rate_20dB', is_silent_threshold(frame, -20))

        if options['skipSilence'] and es.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        # temporal descriptors
        pool.add(namespace + '.' + 'zerocrossingrate', zerocrossingrate(frame))
        (frame_lpc, frame_lpc_reflection) = lpc(frame)
        pool.add(namespace + '.' + 'temporal_lpc', frame_lpc)

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectrum-based descriptors
        power_spectrum = frame_spectrum ** 2
        pool.add(namespace + '.' + 'spectral_centroid', centroid(power_spectrum))
        pool.add(namespace + '.' + 'spectral_decrease', decrease(power_spectrum))
        pool.add(namespace + '.' + 'spectral_energy', energy(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_low', energyband_bass(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_low', energyband_middle_low(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_middle_high', energyband_middle_high(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_energyband_high', energyband_high(frame_spectrum))
        pool.add(namespace + '.' + 'hfc', hfc(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rms', rms(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_flux', flux(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_rolloff', rolloff(frame_spectrum))
        pool.add(namespace + '.' + 'spectral_strongpeak', strongpeak(frame_spectrum))

        # central moments descriptors
        frame_centralmoments = centralmoments(power_spectrum)
        (frame_spread, frame_skewness, frame_kurtosis) = distributionshape(frame_centralmoments)
        pool.add(namespace + '.' + 'spectral_kurtosis', frame_kurtosis)
        pool.add(namespace + '.' + 'spectral_spread', frame_spread)
        pool.add(namespace + '.' + 'spectral_skewness', frame_skewness)

        # dissonance
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        frame_dissonance = dissonance(frame_frequencies, frame_magnitudes)
        pool.add(namespace + '.' + 'dissonance', frame_dissonance)

        # mfcc
        (frame_melbands, frame_mfcc) = mfcc(frame_spectrum)
        pool.add(namespace + '.' + 'mfcc', frame_mfcc)

        # spectral contrast
        (sc_coeffs, sc_valleys) = spectral_contrast(frame_spectrum)
        #scPool.add(namespace + '.' + 'sccoeffs', sc_coeffs)
        #scPool.add(namespace + '.' + 'scvalleys', sc_valleys)
        pool.add(namespace + '.' + 'spectral_contrast', sc_coeffs)


        # barkbands-based descriptors
        frame_barkbands = barkbands(frame_spectrum)
        pool.add(namespace + '.' + 'barkbands', frame_barkbands)
        pool.add(namespace + '.' + 'spectral_crest', crest(frame_barkbands))
        pool.add(namespace + '.' + 'spectral_flatness_db', flatnessdb(frame_barkbands))
        barkbands_centralmoments = ess.CentralMoments(range=len(frame_barkbands) - 1)
        (barkbands_spread, barkbands_skewness, barkbands_kurtosis) = distributionshape(
            barkbands_centralmoments(frame_barkbands))
        pool.add(namespace + '.' + 'barkbands_spread', barkbands_spread)
        pool.add(namespace + '.' + 'barkbands_skewness', barkbands_skewness)
        pool.add(namespace + '.' + 'barkbands_kurtosis', barkbands_kurtosis)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)
        if frame_pitch > 0 and frame_pitch <= 20000.:
            pool.add(namespace + '.' + 'pitch', frame_pitch)
        pitches.append(frame_pitch)
        pitch_confidences.append(frame_pitch_confidence)
        pool.add(namespace + '.' + 'pitch_instantaneous_confidence', frame_pitch_confidence)

        frame_pitch_salience = pitch_salience(frame_spectrum[:-1])
        pool.add(namespace + '.' + 'pitch_salience', frame_pitch_salience)

        # spectral complexity
        pool.add(namespace + '.' + 'spectral_complexity', spectral_complexity(frame_spectrum))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # if no 'temporal_zerocrossingrate' it means that this is a silent file
    if 'zerocrossingrate' not in descriptorNames(pool.descriptorNames(), namespace):
        raise ess.EssentiaError('This is a silent file!')

    #spectralContrastPCA(scPool, pool)

    # build pitch value histogram
    from math import log
    from numpy import bincount
    # convert from Hz to midi notes
    midipitches = []
    unknown = 0
    for freq in pitches:
        if freq > 0. and freq <= 12600:
            midipitches.append(12 * (log(freq / 6.875) / 0.69314718055995) - 3.)
        else:
            unknown += 1

    if len(midipitches) > 0:
        # compute histogram
        midipitchhist = bincount(midipitches)
        # set 0 midi pitch to be the number of pruned value
        midipitchhist[0] = unknown
        # normalise
        midipitchhist = [val / float(sum(midipitchhist)) for val in midipitchhist]
        # zero pad
        for i in range(128 - len(midipitchhist)): midipitchhist.append(0.0)
    else:
        midipitchhist = [0.] * 128
        midipitchhist[0] = 1.

    # pitchhist = ess.array(zip(range(len(midipitchhist)), midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram', midipitchhist)  # , pool.GlobalScope)

    # the code below is the same as the one above:
    # for note in midipitchhist:
    #    pool.add(namespace + '.' + 'spectral_pitch_histogram_values', note)
    #    print "midi note:", note

    pitch_centralmoments = ess.CentralMoments(range=len(midipitchhist) - 1)
    (pitch_histogram_spread, pitch_histogram_skewness, pitch_histogram_kurtosis) = distributionshape(
        pitch_centralmoments(midipitchhist))
    pool.add(namespace + '.' + 'spectral_pitch_histogram_spread', pitch_histogram_spread)  # , pool.GlobalScope)

    progress.finish()
示例#7
0
文件: sfx.py 项目: hoinx/sms-tools
def compute(audio, pool, options):
    INFO("Computing SFX descriptors...")

    # analysis parameters
    sampleRate = options["sampleRate"]
    frameSize = options["frameSize"]
    hopSize = options["hopSize"]
    windowType = options["windowType"]

    # frame algorithms
    frames = ess.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize)
    window = ess.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = ess.Spectrum(size=frameSize)

    # pitch algorithm
    pitch_detection = ess.PitchYinFFT(frameSize=2048, sampleRate=sampleRate)

    # sfx descriptors
    spectral_peaks = ess.SpectralPeaks(sampleRate=sampleRate, orderBy="frequency")
    harmonic_peaks = ess.HarmonicPeaks()
    inharmonicity = ess.Inharmonicity()
    odd2evenharmonicenergyratio = ess.OddToEvenHarmonicEnergyRatio()
    tristimulus = ess.Tristimulus()

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5
    progress = Progress(total=total_frames)

    for frame in frames:

        frameScope = [start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate]
        # pool.setCurrentScope(frameScope)

        if options["skipSilence"] and es.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)

        # spectral peaks based descriptors
        frame_frequencies, frame_magnitudes = spectral_peaks(frame_spectrum)

        # ERROR CORRECTION - hoinx 2015-12
        errIdx = np.where(frame_frequencies < 1)
        frame_frequencies = np.delete(frame_frequencies, errIdx)
        frame_magnitudes = np.delete(frame_magnitudes, errIdx)

        (frame_harmonic_frequencies, frame_harmonic_magnitudes) = harmonic_peaks(
            frame_frequencies, frame_magnitudes, frame_pitch
        )
        if len(frame_harmonic_frequencies) > 1:
            frame_inharmonicity = inharmonicity(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + "." + "inharmonicity", frame_inharmonicity)
            frame_tristimulus = tristimulus(frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + "." + "tristimulus", frame_tristimulus)
            frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio(
                frame_harmonic_frequencies, frame_harmonic_magnitudes
            )
            pool.add(namespace + "." + "odd2evenharmonicenergyratio", frame_odd2evenharmonicenergyratio)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    envelope = ess.Envelope()
    file_envelope = envelope(audio)

    # temporal statistics
    decrease = ess.Decrease()
    pool.add(namespace + "." + "temporal_decrease", decrease(file_envelope))  # , pool.GlobalScope)

    centralmoments = ess.CentralMoments()
    file_centralmoments = centralmoments(file_envelope)

    distributionshape = ess.DistributionShape()
    (file_spread, file_skewness, file_kurtosis) = distributionshape(file_centralmoments)
    pool.add(namespace + "." + "temporal_spread", file_spread)  # , pool.GlobalScope)
    pool.add(namespace + "." + "temporal_skewness", file_skewness)  # , pool.GlobalScope)
    pool.add(namespace + "." + "temporal_kurtosis", file_kurtosis)  # , pool.GlobalScope)

    centroid = ess.Centroid()
    pool.add(namespace + "." + "temporal_centroid", centroid(file_envelope))  # , pool.GlobalScope)

    # effective duration
    effectiveduration = ess.EffectiveDuration()
    pool.add(namespace + "." + "effective_duration", effectiveduration(file_envelope))  # , pool.GlobalScope)

    # log attack time
    logattacktime = ess.LogAttackTime()
    pool.add(namespace + "." + "logattacktime", logattacktime(audio))  # , pool.GlobalScope)

    # strong decay
    strongdecay = ess.StrongDecay()
    pool.add(namespace + "." + "strongdecay", strongdecay(file_envelope))  # , pool.GlobalScope)

    # dynamic profile
    flatness = ess.FlatnessSFX()
    pool.add(namespace + "." + "flatness", flatness(file_envelope))  # , pool.GlobalScope)

    """
    # onsets number
    onsets_number = len(pool['rhythm.onset_times'][0])
    pool.add(namespace + '.' + 'onsets_number', onsets_number)  # , pool.GlobalScope)
    """

    # morphological descriptors
    max_to_total = ess.MaxToTotal()
    pool.add(namespace + "." + "max_to_total", max_to_total(file_envelope))  # , pool.GlobalScope)

    tc_to_total = ess.TCToTotal()
    pool.add(namespace + "." + "tc_to_total", tc_to_total(file_envelope))  # , pool.GlobalScope)

    derivativeSFX = ess.DerivativeSFX()
    (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope)
    pool.add(namespace + "." + "der_av_after_max", der_av_after_max)  # , pool.GlobalScope)
    pool.add(namespace + "." + "max_der_before_max", max_der_before_max)  # , pool.GlobalScope)

    # pitch profile
    """
    pitch = pool['lowlevel.pitch']

    if len(pitch) > 1:
        pool.add(namespace + '.' + 'pitch_max_to_total', max_to_total(pitch))  # , pool.GlobalScope)

        min_to_total = ess.MinToTotal()
        pool.add(namespace + '.' + 'pitch_min_to_total', min_to_total(pitch))  # , pool.GlobalScope)

        pitch_centroid = ess.Centroid(range=len(pitch) - 1)
        pool.add(namespace + '.' + 'pitch_centroid', pitch_centroid(pitch))  # , pool.GlobalScope)

        pitch_after_max_to_before_max_energy_ratio = ess.AfterMaxToBeforeMaxEnergyRatio()
        pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio',
                 pitch_after_max_to_before_max_energy_ratio(pitch))  # , pool.GlobalScope)

    else:
        pool.add(namespace + '.' + 'pitch_max_to_total', 0.0)  # , pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_min_to_total', 0.0)  # , pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_centroid', 0.0)  # , pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_after_max_to_before_max_energy_ratio', 0.0)  # , pool.GlobalScope)
    """

    progress.finish()
示例#8
0
def compute(audio, pool, options):

    INFO('Computing Tempo extractor...')

    use_onset   = options['useOnset']
    use_bands   = options['useBands']

    # frameNumber * hopSize ~= about 6 seconds
    hopSize     = options['hopSize']
    frameSize   = options['frameSize']
    frameNumber = options['frameNumber']
    frameHop    = options['frameHop']
    sampleRate  = options['sampleRate']
    tolerance   = 0.24
    period_tol  = 2
    windowType  = options['windowType']

    bands_freq = [40.0, 413.16, 974.51, 1818.94, 3089.19, 5000.0, 7874.4, 12198.29, 17181.13]
    bands_gain = [2.0, 3.0, 2.0, 1.0, 1.2, 2.0, 3.0, 2.5]
    maxbpm = 208
    minbpm = 40
    last_beat_interval = 0.025
    frame_time = float(hopSize) / float(sampleRate)

    frames           = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window           = essentia.Windowing(size = frameSize, zeroPadding = 0, type = windowType)
    if use_onset:
        fft              = essentia.FFT(size = frameSize)
        cartesian2polar  = essentia.CartesianToPolar()
        onset_hfc        = essentia.OnsetDetection(method = 'hfc', sampleRate = sampleRate)
        onset_complex    = essentia.OnsetDetection(method = 'complex', sampleRate = sampleRate)
    if use_bands:
        espectrum        = essentia.Spectrum(size = frameSize)
        tempotapbands    = essentia.FrequencyBands(frequencyBands = bands_freq)
        temposcalebands  = essentia.TempoScaleBands(bandsGain = bands_gain)
    tempotap         = essentia.TempoTap(numberFrames = frameNumber, sampleRate = sampleRate, frameHop = frameHop)
    tempotapticks    = essentia.TempoTapTicks(hopSize = hopSize, sampleRate = sampleRate, frameHop = frameHop)

    frameTime = float(hopSize) / float(sampleRate)
    frameRate = 1. / frameTime

    nframes = 0
    bpm_estimates_list = []
    ticks = []
    matchingPeriods = []
    oldhfc = 0

    fileLength = len(audio)/sampleRate
    startSilence = 0
    oldSilence = 0
    endSilence = round(fileLength * sampleRate / hopSize) + 1

    for frame in frames:
        windowed_frame = window(frame)
        features = []
        if use_onset:
            complex_fft = fft(windowed_frame)
            (spectrum,phase) = cartesian2polar(complex_fft)
            hfc = onset_hfc(spectrum,phase)
            complexdomain = onset_complex(spectrum,phase)
            difhfc = max(hfc - oldhfc,0)
            oldhfc = hfc
            features += [hfc,difhfc,complexdomain]
        if use_bands:
            spectrum_frame = espectrum(windowed_frame)
            bands = tempotapbands(spectrum_frame)
            (scaled_bands, cumul) = temposcalebands(bands)
            features += list(scaled_bands)

        features = essentia.array(features)
        (periods, phases) = tempotap(features)
        (these_ticks, these_matchingPeriods) = tempotapticks(periods, phases)
        for period in these_matchingPeriods:
          if period != 0:
            matchingPeriods += [ period ]
        ticks += list(these_ticks)

        if nframes < 5. * sampleRate / hopSize:
          if isSilent(frame) and startSilence == nframes - 1:
            startSilence = nframes

        if nframes > (fileLength - 5.) * sampleRate / hopSize:
          if isSilent(frame):
            if oldSilence != nframes - 1:
              endSilence = nframes
            oldSilence = nframes

        nframes += 1

    # make sure we do not kill beat too close to music
    if startSilence > 0: startSilence -= 1
    endSilence += 1

    # fill the rest of buffer with zeros
    features = essentia.array([0]*len(features))
    while nframes % frameNumber != 0:
        (periods, phases) = tempotap(features)
        (these_ticks, these_matchingPeriods) = tempotapticks(periods, phases)
        ticks += list(these_ticks)
        matchingPeriods += list(these_matchingPeriods)
        nframes += 1

    if len(ticks) > 2:
      # fill up to end of file
      if fileLength > ticks[-1]:
        lastPeriod = ticks[-1] - ticks[-2]
        while ticks[-1] + lastPeriod < fileLength - last_beat_interval:
          if ticks[-1] > fileLength - last_beat_interval:
            break
          ticks.append(ticks[-1] + lastPeriod)
    if len(ticks) > 1:
      # remove all negative ticks
      i = 0
      while i < len(ticks):
        if ticks[i] < startSilence / sampleRate * hopSize: ticks.pop(i)
        else: i += 1
      # kill all ticks from 350ms before the end of the song
      i = 0
      while i < len(ticks):
        if ticks[i] > endSilence / sampleRate * hopSize: ticks.pop(i)
        else: i += 1
      # prune values closer than tolerance
      i = 1
      while i < len(ticks):
        if ticks[i] - ticks[i-1] < tolerance: ticks.pop(i)
        else: i += 1
      # prune all backward offbeat
      i = 3
      while i < len(ticks):
        if    abs( (ticks[i] - ticks[i-2]) - 1.5 * (ticks[i]   - ticks[i-1]) ) < 0.100 \
          and abs( (ticks[i] - ticks[i-1]) -       (ticks[i-2] - ticks[i-3]) ) < 0.100 :
          ticks.pop(i-2)
        else: i += 1


    for period in matchingPeriods:
      if period != 0:
        bpm_estimates_list += [ lagtobpm(period, sampleRate, hopSize) ]
      #else:
      #  bpm_estimates_list += [ 0 ]

    # bpm estimates
    for bpm_estimate in bpm_estimates_list:
        pool.add(namespace + '.' + 'bpm_estimates', bpm_estimate)

    # estimate the bpm from the list of candidates
    if len(bpm_estimates_list) > 0:
      estimates = [bpm/2. for bpm in bpm_estimates_list]
      closestBpm = argmax(bincount(estimates))*2.
      matching = []
      for bpm in bpm_estimates_list:
        if abs(closestBpm - bpm) < period_tol:
          matching.append(bpm)
      if (len(matching) < 1):
        # something odd happened
        bpm = closestBpm
      else :
        bpm = mean(matching)
    else:
      bpm = 0.
    # convert to floats, as python bindings yet not support numpy.float32
    ticks = [float(tick) for tick in ticks]
    pool.add(namespace + '.' + 'bpm', bpm)#, pool.GlobalScope)
    pool.add(namespace + '.' + 'beats_position', ticks)#, pool.GlobalScope

    bpm_intervals = [ticks[i] - ticks[i-1] for i in range(1, len(ticks))]
    pool.add(namespace + '.' + 'bpm_intervals', bpm_intervals)#, pool.GlobalScope

    from numpy import histogram
    tempotap_bpms = [60./i for i in bpm_intervals]
    if len(tempotap_bpms) > 0:
      weight, values = histogram(tempotap_bpms, bins = 250, range = (0,250), normed=True)
    else:
      weight, values = [0.], [0.]
    first_peak_weights = [0] * 250
    secnd_peak_weights = [0] * 250

    for i in range(max(argmax(weight)-4,0), min(argmax(weight)+5,len(weight)) ):
      first_peak_weights[i] = weight[i]
      weight[i] = 0.
    for i in range(max(argmax(weight)-4,0), min(argmax(weight)+5,len(weight)) ):
      secnd_peak_weights[i] = weight[i]
      weight[i] = 0.

    pool.add(namespace + '.' + 'first_peak_bpm', values[argmax(first_peak_weights)])#, pool.GlobalScope
    pool.add(namespace + '.' + 'first_peak_weight', first_peak_weights[argmax(first_peak_weights)])#, pool.GlobalScope
    if sum(first_peak_weights) != 0.:
      pool.add(namespace + '.' + 'first_peak_spread', 1.-first_peak_weights[argmax(first_peak_weights)]/sum(first_peak_weights))#, pool.GlobalScope
    else:
      pool.add(namespace + '.' + 'first_peak_spread', 0.)#, pool.GlobalScope
    pool.add(namespace + '.' + 'second_peak_bpm', values[argmax(secnd_peak_weights)])#, pool.GlobalScope
    pool.add(namespace + '.' + 'second_peak_weight', secnd_peak_weights[argmax(secnd_peak_weights)])#, pool.GlobalScope
    if sum(secnd_peak_weights) != 0.:
      pool.add(namespace + '.' + 'second_peak_spread', 1.-secnd_peak_weights[argmax(secnd_peak_weights)]/sum(secnd_peak_weights))#, pool.GlobalScope
    else:
      pool.add(namespace + '.' + 'second_peak_spread', 0.)#, pool.GlobalScope

    '''
    def rubato(ticks):
        bpm_rubato_python = []
        tolerance = 0.08
        i = 5
        tmp1 = 60./ float(ticks[i  ] - ticks[i-1])
        tmp2 = 60./ float(ticks[i-1] - ticks[i-2])
        tmp3 = 60./ float(ticks[i-2] - ticks[i-3])
        tmp4 = 60./ float(ticks[i-3] - ticks[i-4])
        tmp5 = 60./ float(ticks[i-4] - ticks[i-5])
        for i in range(6, len(ticks)):
            if (  abs(1. - tmp1 / tmp4) >= tolerance
              and abs(1. - tmp2 / tmp5) >= tolerance
              and abs(1. - tmp2 / tmp4) >= tolerance
              and abs(1. - tmp1 / tmp5) >= tolerance
              and abs(1. - tmp1 / tmp2) <= tolerance
              and abs(1. - tmp4 / tmp5) <= tolerance ):
                bpm_rubato_python.append(ticks[i-2])
            tmp5 = tmp4; tmp4 = tmp3; tmp3 = tmp2; tmp2 = tmp1
            tmp1 = 60./ (ticks[i] - ticks[i-1])
        print bpm_rubato_python
        return bpm_rubato_python
    '''
    # FIXME we need better rubato algorithm
    #rubato = essentia.BpmRubato()
    #bpm_rubato_start, bpm_rubato_stop = rubato(ticks)
    #pool.add(namespace + '.' + 'rubato_start', bpm_rubato_start)#, pool.GlobalScope
    #pool.add(namespace + '.' + 'rubato_stop',  bpm_rubato_stop)#,  pool.GlobalScope)

    INFO('100% done...')
示例#9
0
def compute(audio, pool, options):

    INFO('Computing Tonal descriptors...')

    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    zeroPadding = options['zeroPadding']
    windowType = options['windowType']

    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize,
                                zeroPadding=zeroPadding,
                                type=windowType)
    spectrum = essentia.Spectrum(size=(frameSize + zeroPadding) / 2)
    spectral_peaks = essentia.SpectralPeaks(maxPeaks=10000,
                                            magnitudeThreshold=0.00001,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            orderBy="frequency")
    tuning = essentia.TuningFrequency()

    # computing the tuning frequency
    tuning_frequency = 440.0

    for frame in frames:

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)

        #if len(frame_frequencies) > 0:
        (tuning_frequency, tuning_cents) = tuning(frame_frequencies,
                                                  frame_magnitudes)

    pool.add(namespace + '.' + 'tuning_frequency',
             tuning_frequency)  #, pool.GlobalScope)

    # computing the HPCPs
    spectral_whitening = essentia.SpectralWhitening()

    hpcp_key_size = 36
    hpcp_chord_size = 36
    hpcp_tuning_size = 120

    hpcp_key = essentia.HPCP(size=hpcp_key_size,
                             referenceFrequency=tuning_frequency,
                             bandPreset=False,
                             minFrequency=40.0,
                             maxFrequency=5000.0,
                             weightType='squaredCosine',
                             nonLinear=False,
                             windowSize=4.0 / 3.0,
                             sampleRate=sampleRate)

    hpcp_chord = essentia.HPCP(size=hpcp_chord_size,
                               referenceFrequency=tuning_frequency,
                               harmonics=8,
                               bandPreset=True,
                               minFrequency=40.0,
                               maxFrequency=5000.0,
                               splitFrequency=500.0,
                               weightType='cosine',
                               nonLinear=True,
                               windowSize=0.5,
                               sampleRate=sampleRate)

    hpcp_tuning = essentia.HPCP(size=hpcp_tuning_size,
                                referenceFrequency=tuning_frequency,
                                harmonics=8,
                                bandPreset=True,
                                minFrequency=40.0,
                                maxFrequency=5000.0,
                                splitFrequency=500.0,
                                weightType='cosine',
                                nonLinear=True,
                                windowSize=0.5,
                                sampleRate=sampleRate)

    # intializing the HPCP arrays
    hpcps_key = []
    hpcps_chord = []
    hpcps_tuning = []

    # computing HPCP loop
    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    progress = Progress(total=total_frames)

    for frame in frames:

        #frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectral peaks
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)

        if (len(frame_frequencies) > 0):
            # spectral_whitening
            frame_magnitudes_white = spectral_whitening(
                frame_spectrum, frame_frequencies, frame_magnitudes)
            frame_hpcp_key = hpcp_key(frame_frequencies,
                                      frame_magnitudes_white)
            frame_hpcp_chord = hpcp_chord(frame_frequencies,
                                          frame_magnitudes_white)
            frame_hpcp_tuning = hpcp_tuning(frame_frequencies,
                                            frame_magnitudes_white)
        else:
            frame_hpcp_key = essentia.array([0] * hpcp_key_size)
            frame_hpcp_chord = essentia.array([0] * hpcp_chord_size)
            frame_hpcp_tuning = essentia.array([0] * hpcp_tuning_size)

        # key HPCP
        hpcps_key.append(frame_hpcp_key)

        # add HPCP to the pool
        pool.add(namespace + '.' + 'hpcp', frame_hpcp_key)

        # chords HPCP
        hpcps_chord.append(frame_hpcp_chord)

        # tuning system HPCP
        hpcps_tuning.append(frame_hpcp_tuning)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    progress.finish()

    # check if silent file
    if len(hpcps_key) == 0:
        raise EssentiaError('This is a silent file!')

    # key detection
    key_detector = essentia.Key(profileType='temperley')
    average_hpcps_key = numpy.average(essentia.array(hpcps_key), axis=0)
    average_hpcps_key = normalize(average_hpcps_key)

    # thpcps
    max_arg = numpy.argmax(average_hpcps_key)
    thpcp = []
    for i in range(max_arg, len(average_hpcps_key)):
        thpcp.append(float(average_hpcps_key[i]))
    for i in range(max_arg):
        thpcp.append(float(average_hpcps_key[i]))
    pool.add(namespace + '.' + 'thpcp', thpcp)  #, pool.GlobalScope  )

    (key, scale, key_strength,
     first_to_second_relative_strength) = key_detector(
         essentia.array(average_hpcps_key))
    pool.add(namespace + '.' + 'key_key', key)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'key_scale', scale)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'key_strength',
             key_strength)  #, pool.GlobalScope)

    # chord detection
    chord_detector = essentia.Key(profileType='tonictriad', usePolyphony=False)
    hpcp_frameSize = 2.0  # 2 seconds
    hpcp_number = int(hpcp_frameSize * (sampleRate / hopSize - 1))

    for hpcp_index in range(len(hpcps_chord)):

        hpcp_index_begin = max(0, hpcp_index - hpcp_number)
        hpcp_index_end = min(hpcp_index + hpcp_number, len(hpcps_chord))
        average_hpcps_chord = numpy.average(essentia.array(
            hpcps_chord[hpcp_index_begin:hpcp_index_end]),
                                            axis=0)
        average_hpcps_chord = normalize(average_hpcps_chord)
        (key, scale, strength,
         first_to_second_relative_strength) = chord_detector(
             essentia.array(average_hpcps_chord))

        if scale == 'minor':
            chord = key + 'm'
        else:
            chord = key

        frame_second_scope = [
            hpcp_index_begin * hopSize / sampleRate,
            hpcp_index_end * hopSize / sampleRate
        ]
        pool.add(namespace + '.' + 'chords_progression',
                 chord)  #, frame_second_scope)
        pool.add(namespace + '.' + 'chords_strength',
                 strength)  #, frame_second_scope)

    # tuning system features
    keydetector = essentia.Key(profileType='diatonic')
    average_hpcps_tuning = numpy.average(essentia.array(hpcps_tuning), axis=0)
    average_hpcps_tuning = normalize(average_hpcps_tuning)
    (key, scale, diatonic_strength,
     first_to_second_relative_strength) = keydetector(
         essentia.array(average_hpcps_tuning))

    pool.add(namespace + '.' + 'tuning_diatonic_strength',
             diatonic_strength)  #, pool.GlobalScope)

    (equal_tempered_deviation, nontempered_energy_ratio,
     nontempered_peaks_energy_ratio
     ) = essentia.HighResolutionFeatures()(average_hpcps_tuning)

    pool.add(namespace + '.' + 'tuning_equal_tempered_deviation',
             equal_tempered_deviation)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'tuning_nontempered_energy_ratio',
             nontempered_energy_ratio)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'tuning_nontempered_peaks_energy_ratio',
             nontempered_peaks_energy_ratio)  #, pool.GlobalScope)
示例#10
0
def compute(audio, pool, options):

    INFO('Computing Tonal descriptors...')

    sampleRate  = options['sampleRate']
    frameSize   = options['frameSize']
    hopSize     = options['hopSize']
    zeroPadding = options['zeroPadding']
    windowType  = options['windowType']

    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window = essentia.Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType)
    spectrum = essentia.Spectrum(size = (frameSize + zeroPadding) / 2)
    spectral_peaks = essentia.SpectralPeaks(maxPeaks = 10000, magnitudeThreshold = 0.00001, minFrequency = 40, maxFrequency = 5000, orderBy = "frequency")
    tuning = essentia.TuningFrequency()

    # computing the tuning frequency
    tuning_frequency = 440.0

    for frame in frames:

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)

        #if len(frame_frequencies) > 0:
        (tuning_frequency, tuning_cents) = tuning(frame_frequencies, frame_magnitudes)

    pool.add(namespace + '.' + 'tuning_frequency', tuning_frequency)#, pool.GlobalScope)

    # computing the HPCPs
    spectral_whitening = essentia.SpectralWhitening()

    hpcp_key_size = 36
    hpcp_chord_size = 36
    hpcp_tuning_size = 120

    hpcp_key = essentia.HPCP(size = hpcp_key_size,
                             referenceFrequency = tuning_frequency,
                             bandPreset = False,
                             minFrequency = 40.0,
                             maxFrequency = 5000.0,
                             weightType = 'squaredCosine',
                             nonLinear = False,
                             windowSize = 4.0/3.0,
                             sampleRate = sampleRate)

    hpcp_chord = essentia.HPCP(size = hpcp_chord_size,
                               referenceFrequency = tuning_frequency,
                               harmonics = 8,
                               bandPreset = True,
                               minFrequency = 40.0,
                               maxFrequency = 5000.0,
                               splitFrequency = 500.0,
                               weightType = 'cosine',
                               nonLinear = True,
                               windowSize = 0.5,
                               sampleRate = sampleRate)

    hpcp_tuning = essentia.HPCP(size = hpcp_tuning_size,
                                referenceFrequency = tuning_frequency,
                                harmonics = 8,
                                bandPreset = True,
                                minFrequency = 40.0,
                                maxFrequency = 5000.0,
                                splitFrequency = 500.0,
                                weightType = 'cosine',
                                nonLinear = True,
                                windowSize = 0.5,
                                sampleRate = sampleRate)

    # intializing the HPCP arrays
    hpcps_key = []
    hpcps_chord = []
    hpcps_tuning = []

    # computing HPCP loop
    frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    progress = Progress(total = total_frames)


    for frame in frames:

        #frameScope = [ start_of_frame / sampleRate, (start_of_frame + frameSize) / sampleRate ]
        #pool.setCurrentScope(frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
          total_frames -= 1
          start_of_frame += hopSize
          continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # spectral peaks
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)

        if (len(frame_frequencies) > 0):
           # spectral_whitening
           frame_magnitudes_white = spectral_whitening(frame_spectrum, frame_frequencies, frame_magnitudes)
           frame_hpcp_key = hpcp_key(frame_frequencies, frame_magnitudes_white)
           frame_hpcp_chord = hpcp_chord(frame_frequencies, frame_magnitudes_white)
           frame_hpcp_tuning = hpcp_tuning(frame_frequencies, frame_magnitudes_white)
        else:
           frame_hpcp_key = essentia.array([0] * hpcp_key_size)
           frame_hpcp_chord = essentia.array([0] * hpcp_chord_size)
           frame_hpcp_tuning = essentia.array([0] * hpcp_tuning_size)

        # key HPCP
        hpcps_key.append(frame_hpcp_key)

        # add HPCP to the pool
        pool.add(namespace + '.' +'hpcp', frame_hpcp_key)

        # chords HPCP
        hpcps_chord.append(frame_hpcp_chord)

        # tuning system HPCP
        hpcps_tuning.append(frame_hpcp_tuning)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    progress.finish()

    # check if silent file
    if len(hpcps_key) == 0:
       raise EssentiaError('This is a silent file!')

    # key detection
    key_detector = essentia.Key(profileType = 'temperley')
    average_hpcps_key = numpy.average(essentia.array(hpcps_key), axis=0)
    average_hpcps_key = normalize(average_hpcps_key)

    # thpcps
    max_arg = numpy.argmax( average_hpcps_key )
    thpcp=[]
    for i in range( max_arg, len(average_hpcps_key) ):
        thpcp.append( float(average_hpcps_key[i]) )
    for i in range( max_arg ):
        thpcp.append( float(average_hpcps_key[i]) )
    pool.add(namespace + '.' +'thpcp', thpcp)#, pool.GlobalScope  )

    (key, scale, key_strength, first_to_second_relative_strength) = key_detector(essentia.array(average_hpcps_key))
    pool.add(namespace + '.' +'key_key', key)#, pool.GlobalScope)
    pool.add(namespace + '.' +'key_scale', scale)#, pool.GlobalScope)
    pool.add(namespace + '.' +'key_strength', key_strength)#, pool.GlobalScope)

    # chord detection
    chord_detector = essentia.Key(profileType = 'tonictriad', usePolyphony = False)
    hpcp_frameSize = 2.0 # 2 seconds
    hpcp_number = int(hpcp_frameSize * (sampleRate / hopSize - 1))

    for hpcp_index in range(len(hpcps_chord)):

        hpcp_index_begin = max(0, hpcp_index - hpcp_number)
        hpcp_index_end = min(hpcp_index + hpcp_number, len(hpcps_chord))
        average_hpcps_chord = numpy.average(essentia.array(hpcps_chord[hpcp_index_begin : hpcp_index_end]), axis=0)
        average_hpcps_chord = normalize(average_hpcps_chord)
        (key, scale, strength, first_to_second_relative_strength) = chord_detector(essentia.array(average_hpcps_chord))

        if scale == 'minor':
           chord = key + 'm'
        else:
           chord = key

        frame_second_scope = [hpcp_index_begin * hopSize / sampleRate, hpcp_index_end * hopSize / sampleRate]
        pool.add(namespace + '.' +'chords_progression', chord)#, frame_second_scope)
        pool.add(namespace + '.' +'chords_strength', strength)#, frame_second_scope)

    # tuning system features
    keydetector	= essentia.Key(profileType = 'diatonic')
    average_hpcps_tuning = numpy.average(essentia.array(hpcps_tuning), axis=0)
    average_hpcps_tuning = normalize(average_hpcps_tuning)
    (key, scale, diatonic_strength, first_to_second_relative_strength) = keydetector(essentia.array(average_hpcps_tuning))

    pool.add(namespace + '.' +'tuning_diatonic_strength', diatonic_strength)#, pool.GlobalScope)

    (equal_tempered_deviation,
     nontempered_energy_ratio,
     nontempered_peaks_energy_ratio) = essentia.HighResolutionFeatures()(average_hpcps_tuning)

    pool.add(namespace + '.' +'tuning_equal_tempered_deviation', equal_tempered_deviation)#, pool.GlobalScope)
    pool.add(namespace + '.' +'tuning_nontempered_energy_ratio', nontempered_energy_ratio)#, pool.GlobalScope)
    pool.add(namespace + '.' +'tuning_nontempered_peaks_energy_ratio', nontempered_peaks_energy_ratio)#, pool.GlobalScope)
示例#11
0
def compute(audio, pool, options):

    INFO('Computing SFX descriptors...')

    # analysis parameters
    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    windowType = options['windowType']

    # frame algorithms
    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize, zeroPadding=0, type=windowType)
    spectrum = essentia.Spectrum(size=frameSize)

    # pitch algorithm
    pitch_detection = essentia.PitchDetection(frameSize=2048,
                                              sampleRate=sampleRate)

    # sfx descriptors
    spectral_peaks = essentia.SpectralPeaks(sampleRate=sampleRate,
                                            orderBy='frequency')
    harmonic_peaks = essentia.HarmonicPeaks()
    inharmonicity = essentia.Inharmonicity()
    odd2evenharmonicenergyratio = essentia.OddToEvenHarmonicEnergyRatio()
    tristimulus = essentia.Tristimulus()

    # used for a nice progress display
    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5
    progress = Progress(total=total_frames)

    for frame in frames:

        frameScope = [
            start_of_frame / sampleRate,
            (start_of_frame + frameSize) / sampleRate
        ]
        #pool.setCurrentScope(frameScope)

        if options['skipSilence'] and essentia.isSilent(frame):
            total_frames -= 1
            start_of_frame += hopSize
            continue

        frame_windowed = window(frame)
        frame_spectrum = spectrum(frame_windowed)

        # pitch descriptors
        frame_pitch, frame_pitch_confidence = pitch_detection(frame_spectrum)

        # spectral peaks based descriptors
        (frame_frequencies, frame_magnitudes) = spectral_peaks(frame_spectrum)
        (frame_harmonic_frequencies,
         frame_harmonic_magnitudes) = harmonic_peaks(frame_frequencies,
                                                     frame_magnitudes,
                                                     frame_pitch)
        if len(frame_harmonic_frequencies) > 1:
            frame_inharmonicity = inharmonicity(frame_harmonic_frequencies,
                                                frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'inharmonicity', frame_inharmonicity)
            frame_tristimulus = tristimulus(frame_harmonic_frequencies,
                                            frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'tristimulus', frame_tristimulus)
            frame_odd2evenharmonicenergyratio = odd2evenharmonicenergyratio(
                frame_harmonic_frequencies, frame_harmonic_magnitudes)
            pool.add(namespace + '.' + 'odd2evenharmonicenergyratio',
                     frame_odd2evenharmonicenergyratio)

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    envelope = essentia.Envelope()
    file_envelope = envelope(audio)

    # temporal statistics
    decrease = essentia.AudioDecrease(blockSize=len(audio))
    pool.add(namespace + '.' + 'temporal_decrease',
             decrease(file_envelope))  #, pool.GlobalScope)

    centralmoments = essentia.AudioCentralMoments(blockSize=len(audio))
    file_centralmoments = centralmoments(file_envelope)

    distributionshape = essentia.DistributionShape()
    (file_spread, file_skewness,
     file_kurtosis) = distributionshape(file_centralmoments)
    pool.add(namespace + '.' + 'temporal_spread',
             file_spread)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'temporal_skewness',
             file_skewness)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'temporal_kurtosis',
             file_kurtosis)  #, pool.GlobalScope)

    centroid = essentia.AudioCentroid(blockSize=len(audio))
    pool.add(namespace + '.' + 'temporal_centroid',
             centroid(file_envelope))  #, pool.GlobalScope)

    # effective duration
    effectiveduration = essentia.EffectiveDuration()
    pool.add(namespace + '.' + 'effective_duration',
             effectiveduration(file_envelope))  #, pool.GlobalScope)

    # log attack time
    logattacktime = essentia.LogAttackTime()
    pool.add(namespace + '.' + 'logattacktime',
             logattacktime(audio))  #, pool.GlobalScope)

    # strong decay
    strongdecay = essentia.StrongDecay()
    pool.add(namespace + '.' + 'strongdecay',
             strongdecay(file_envelope))  #, pool.GlobalScope)

    # dynamic profile
    flatness = essentia.FlatnessSFX()
    pool.add(namespace + '.' + 'flatness',
             flatness(file_envelope))  #, pool.GlobalScope)

    # onsets number
    onsets_number = len(pool.value('rhythm.onset_times')[0])
    pool.add(namespace + '.' + 'onsets_number',
             onsets_number)  #, pool.GlobalScope)

    # morphological descriptors
    max_to_total = essentia.MaxToTotal()
    pool.add(namespace + '.' + 'max_to_total',
             max_to_total(file_envelope))  #, pool.GlobalScope)

    tc_to_total = essentia.TCToTotal(sampleRate=sampleRate)
    pool.add(namespace + '.' + 'tc_to_total',
             tc_to_total(file_envelope))  #, pool.GlobalScope)

    derivativeSFX = essentia.DerivativeSFX(sampleRate=sampleRate)
    (der_av_after_max, max_der_before_max) = derivativeSFX(file_envelope)
    pool.add(namespace + '.' + 'der_av_after_max',
             der_av_after_max)  #, pool.GlobalScope)
    pool.add(namespace + '.' + 'max_der_before_max',
             max_der_before_max)  #, pool.GlobalScope)

    # pitch profile
    pitch = pool.value('lowlevel.pitch')

    if len(pitch) > 1:
        pool.add(namespace + '.' + 'pitch_max_to_total',
                 max_to_total(pitch))  #, pool.GlobalScope)

        min_to_total = essentia.MinToTotal()
        pool.add(namespace + '.' + 'pitch_min_to_total',
                 min_to_total(pitch))  #, pool.GlobalScope)

        pitch_centroid = essentia.Centroid(range=len(pitch) - 1)
        pool.add(namespace + '.' + 'pitch_centroid',
                 pitch_centroid(pitch))  #, pool.GlobalScope)

        pitch_after_max_to_before_max_energy_ratio = essentia.AfterMaxToBeforeMaxEnergyRatio(
        )
        pool.add(namespace + '.' +
                 'pitch_after_max_to_before_max_energy_ratio',
                 pitch_after_max_to_before_max_energy_ratio(
                     pitch))  #, pool.GlobalScope)

    else:
        pool.add(namespace + '.' + 'pitch_max_to_total',
                 0.0)  #, pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_min_to_total',
                 0.0)  #, pool.GlobalScope)
        pool.add(namespace + '.' + 'pitch_centroid', 0.0)  #, pool.GlobalScope)
        pool.add(namespace + '.' +
                 'pitch_after_max_to_before_max_energy_ratio',
                 0.0)  #, pool.GlobalScope)

    progress.finish()