def computeLowLevel(input_file, pool, startTime, endTime, namespace=''):
    llspace = 'lowlevel.'
    rhythmspace = 'rhythm.'
    if namespace:
        llspace = namespace + '.lowlevel.'
        rhythmspace = namespace + '.rhythm.'

    rgain, sampleRate, downmix = getAnalysisMetadata(pool)
    loader = streaming.EqloudLoader(filename=input_file,
                                    sampleRate=sampleRate,
                                    startTime=startTime,
                                    endTime=endTime,
                                    replayGain=rgain,
                                    downmix=downmix)

    lowlevel.compute(loader.audio, loader.audio, pool, startTime, endTime,
                     namespace)
    essentia.run(loader)

    # check if we processed enough audio for it to be useful, in particular did
    # we manage to get an estimation for the loudness (2 seconds required)
    if not pool.containsKey(llspace + "loudness"):
        INFO('ERROR: File is too short (< 2sec)... Aborting...')
        sys.exit(2)

    numOnsets = len(pool[rhythmspace + 'onset_times'])
    sampleRate = pool['metadata.audio_properties.analysis_sample_rate']
    onset_rate = numOnsets / float(loader.audio.totalProduced()) * sampleRate
    pool.set(rhythmspace + 'onset_rate', onset_rate)
示例#2
0
def get_file_bpm(audio_path: str):
    loader = MonoLoader(filename=audio_path)
    rhythm_extractor = RhythmExtractor2013(method="degara")

    # code for if using essentia.standard
    # slightly less memory effecient
    # audio = loader()
    # rhythm = rhythm_extractor(audio)
    # return rhythm[0]

    # code for percival estimator
    # pool = essentia.Pool()
    # percival_bpm_estimator = PercivalBpmEstimator()
    # loader.audio >> rhythm_extractor.signal
    # percival_bpm_estimator.bpm >> (pool, 'rhythm')

    pool = essentia.Pool()
    loader.audio >> rhythm_extractor.signal
    rhythm_extractor.ticks >> None
    rhythm_extractor.confidence >> None
    rhythm_extractor.bpm >> (pool, 'rhythm')
    rhythm_extractor.estimates >> None
    rhythm_extractor.bpmIntervals >> None
    essentia.run(loader)
    return pool['rhythm']
示例#3
0
    def computeBpmHistogram(self, noveltyCurve, frameSize=4, overlap=2,
                            frameRate=44100./128., window='hann',
                            zeroPadding=0,
                            constantTempo=False,
                            minBpm=30):

        pool=Pool()
        bpmHist = BpmHistogram(frameRate=frameRate,
                               frameSize=frameSize,
                               overlap=overlap,
                               zeroPadding=zeroPadding,
                               constantTempo=constantTempo,
                               windowType='hann',
                               minBpm=minBpm)

        gen    = VectorInput(noveltyCurve)
        gen.data >> bpmHist.novelty
        bpmHist.bpm >> (pool, 'bpm')
        bpmHist.bpmCandidates >> (pool, 'bpmCandidates')
        bpmHist.bpmMagnitudes >> (pool, 'bpmMagnitudes')
        bpmHist.frameBpms >> None #(pool, 'frameBpms')
        bpmHist.tempogram >> (pool, 'tempogram')
        bpmHist.ticks >> (pool, 'ticks')
        bpmHist.ticksMagnitude >> (pool, 'ticksMagnitude')
        bpmHist.sinusoid >> (pool, 'sinusoid')
        essentia.run(gen)

        return pool
示例#4
0
文件: analysis.py 项目: samim23/pablo
def estimate_main_band(infile):
    """
    Estimate if this is a low, mid, or high track.

    Not _really_ sure if this does what I need it to,
    but some quick tests looked right.
    """
    loader = streaming.MonoLoader(filename=infile)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    freqbands = streaming.FrequencyBands(frequencyBands=[0, 250, 750, 4000])
    pool = Pool()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> freqbands.spectrum
    freqbands.bands >> (pool, 'bands')

    run(loader)

    sums = np.sum(pool['bands'], axis=0)
    band = np.argmax(sums)
    if band == 0:
        return 'low'
    elif band == 1:
        return 'mid'
    elif band == 2:
        return 'high'
def callback(data):
    # update audio buffer
    buffer[:] = array(unpack('f' * bufferSize, data))

    # generate predictions
    reset(vimp)
    run(vimp)
示例#6
0
def get_bpm(file_in):
    pool = Pool()

    loader = streaming.MonoLoader(filename=file_in)
    bt = streaming.RhythmExtractor2013()
    bpm_histogram = streaming.BpmHistogramDescriptors()
    # BPM histogram output size is 250
    centroid = streaming.Centroid(range=250)

    loader.audio >> bt.signal
    bt.bpm >> (pool, 'bpm')
    bt.ticks >> None
    bt.confidence >> (pool, 'confidence')
    bt.estimates >> None
    bt.bpmIntervals >> bpm_histogram.bpmIntervals
    bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
    bpm_histogram.firstPeakWeight >> None
    bpm_histogram.firstPeakSpread >> None
    bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
    bpm_histogram.secondPeakWeight >> None
    bpm_histogram.secondPeakSpread >> None
    bpm_histogram.histogram >> (pool, 'bpm_histogram')
    bpm_histogram.histogram >> centroid.array
    centroid.centroid >> (pool, 'bpm_centroid')

    run(loader)
    return pool['bpm']
示例#7
0
def get_key(file_in):
    """
    Estimates the key and scale for an audio file.
    """
    loader = streaming.MonoLoader(filename=file_in)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
    pool = Pool()
    hpcp = streaming.HPCP()
    key = streaming.Key()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    hpcp.hpcp >> key.pcp
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')

    run(loader)

    return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
示例#8
0
文件: analysis.py 项目: samim23/pablo
def estimate_key(infile):
    """
    Estimates the key and scale for an audio file.
    """
    loader = streaming.MonoLoader(filename=infile)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    spectralpeaks = streaming.SpectralPeaks(orderBy="magnitude",
                                magnitudeThreshold=1e-05,
                                minFrequency=40,
                                maxFrequency=5000,
                                maxPeaks=10000)
    pool = Pool()
    hpcp = streaming.HPCP()
    key = streaming.Key()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> spectralpeaks.spectrum
    spectralpeaks.magnitudes >> hpcp.magnitudes
    spectralpeaks.frequencies >> hpcp.frequencies
    hpcp.hpcp >> key.pcp
    key.key >> (pool, 'tonal.key_key')
    key.scale >> (pool, 'tonal.key_scale')
    key.strength >> (pool, 'tonal.key_strength')

    run(loader)

    return Key(pool['tonal.key_key'], pool['tonal.key_scale'])
示例#9
0
    def computeBpmHistogram(self,
                            noveltyCurve,
                            frameSize=4,
                            overlap=2,
                            frameRate=44100. / 128.,
                            window='hann',
                            zeroPadding=0,
                            constantTempo=False,
                            minBpm=30):

        pool = Pool()
        bpmHist = ess.BpmHistogram(frameRate=frameRate,
                                   frameSize=frameSize,
                                   overlap=overlap,
                                   zeroPadding=zeroPadding,
                                   constantTempo=constantTempo,
                                   windowType='hann',
                                   minBpm=minBpm)

        gen = ess.VectorInput(noveltyCurve)
        gen.data >> bpmHist.novelty
        bpmHist.bpm >> (pool, 'bpm')
        bpmHist.bpmCandidates >> (pool, 'bpmCandidates')
        bpmHist.bpmMagnitudes >> (pool, 'bpmMagnitudes')
        bpmHist.frameBpms >> None  #(pool, 'frameBpms')
        bpmHist.tempogram >> (pool, 'tempogram')
        bpmHist.ticks >> (pool, 'ticks')
        bpmHist.ticksMagnitude >> (pool, 'ticksMagnitude')
        bpmHist.sinusoid >> (pool, 'sinusoid')
        essentia.run(gen)

        return pool
示例#10
0
def estimate_main_band(infile):
    """
    Estimate if this is a low, mid, or high track.

    Not _really_ sure if this does what I need it to,
    but some quick tests looked right.
    """
    loader = streaming.MonoLoader(filename=infile)
    framecutter = streaming.FrameCutter()
    windowing = streaming.Windowing(type="blackmanharris62")
    spectrum = streaming.Spectrum()
    freqbands = streaming.FrequencyBands(frequencyBands=[0, 250, 750, 4000])
    pool = Pool()

    loader.audio >> framecutter.signal
    framecutter.frame >> windowing.frame >> spectrum.frame
    spectrum.spectrum >> freqbands.spectrum
    freqbands.bands >> (pool, 'bands')

    run(loader)

    sums = np.sum(pool['bands'], axis=0)
    band = np.argmax(sums)
    if band == 0:
        return 'low'
    elif band == 1:
        return 'mid'
    elif band == 2:
        return 'high'
def computeSegmentation(filename, pool):
    sampleRate = 44100
    frameSize = 2048
    hopSize = frameSize / 2

    audio = EqloudLoader(filename=filename,
                         downmix=pool['downmix'],
                         sampleRate=sampleRate)

    fc = FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='keep')
    w = Windowing(type='blackmanharris62')
    spec = Spectrum()
    mfcc = MFCC(highFrequencyBound=8000)
    tmpPool = essentia.Pool()

    audio.audio >> fc.signal
    fc.frame >> w.frame >> spec.frame
    spec.spectrum >> mfcc.spectrum
    mfcc.bands >> (tmpPool, 'mfcc_bands')
    mfcc.mfcc >> (tmpPool, 'mfcc_coeff')

    essentia.run(audio)

    # compute transpose of features array, don't call numpy.matrix.transpose
    # because essentia f***s it up!!
    features = copy.deepcopy(tmpPool['mfcc_coeff'].transpose())
    segments = std.SBic(cpw=1.5, size1=1000, inc1=300, size2=600,
                        inc2=50)(features)
    for segment in segments:
        pool.add('segments', segment * hopSize / sampleRate)
def get_bpm(audiofile):
    pool = essentia.Pool()

    loader = MonoLoader(filename = audiofile)
    bt = RhythmExtractor2013()
    bpm_histogram = BpmHistogramDescriptors()
    centroid = Centroid(range=250) # BPM histogram output size is 250

    loader.audio >> bt.signal
    bt.bpm >> (pool, 'bpm')
    bt.ticks >> None
    bt.confidence >> None
    bt.estimates >> None
    bt.bpmIntervals >> bpm_histogram.bpmIntervals
    bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
    bpm_histogram.firstPeakWeight >> None
    bpm_histogram.firstPeakSpread >> None
    bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
    bpm_histogram.secondPeakWeight >> None
    bpm_histogram.secondPeakSpread >> None
    


    essentia.run(loader)

    return "BPM:", pool['bpm']
示例#13
0
def analysisSynthesisStreaming(params, signal):

    out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
    w = es.Windowing(type = "hann");
    fft = es.FFT(size = params['frameSize']);
    ifft = es.IFFT(size = params['frameSize']);
    overl = es.OverlapAdd (frameSize = params['frameSize'], hopSize = params['hopSize'], gain = 1./params['frameSize']);    
    
    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    insignal = VectorInput (signal)
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> ifft.fft
    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')
    
    
    essentia.run(insignal)
    
    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio [2*params['hopSize']:]
    return outaudio
示例#14
0
def computeLowLevel(input_file, pool, startTime, endTime, namespace=''):
    llspace = 'lowlevel.'
    rhythmspace = 'rhythm.'
    if namespace :
        llspace = namespace + '.lowlevel.'
        rhythmspace = namespace + '.rhythm.'

    rgain, sampleRate, downmix = getAnalysisMetadata(pool)
    loader = streaming.EqloudLoader(filename = input_file,
                                    sampleRate = sampleRate,
                                    startTime = startTime,
                                    endTime = endTime,
                                    replayGain = rgain,
                                    downmix = downmix)

    lowlevel.compute(loader.audio, loader.audio, pool, startTime, endTime, namespace)
    essentia.run(loader)

    # check if we processed enough audio for it to be useful, in particular did
    # we manage to get an estimation for the loudness (2 seconds required)
    if not pool.containsKey(llspace + "loudness"):
        INFO('ERROR: File is too short (< 2sec)... Aborting...')
        sys.exit(2)

    numOnsets = len(pool[rhythmspace + 'onset_times'])
    sampleRate = pool['metadata.audio_properties.analysis_sample_rate']
    onset_rate = numOnsets/float(loader.audio.totalProduced())*sampleRate
    pool.set(rhythmspace + 'onset_rate', onset_rate)
def analSineModelStreaming(params, signal):
  
    #out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
    w = es.Windowing(type = "hann");
    fft = es.FFT(size = params['frameSize']);
    smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'])
    
    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    insignal = VectorInput (signal)
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    
    essentia.run(insignal)
    
    # remove first half window frames
    mags = pool['magnitudes']
    freqs = pool['frequencies']
    phases = pool['phases']

    # remove short tracks
    minFrames = int( params['minSineDur'] * params['sampleRate'] / params['hopSize']);
    freqsClean = cleaningSineTracks(freqs, minFrames)
    pool['frequencies'].data = freqsClean
    
    return mags, freqsClean, phases
示例#16
0
文件: analysis.py 项目: samim23/pablo
def estimate_bpm(infile):
    """
    Estimates the BPM for an audio file.
    """
    pool = Pool()

    loader = streaming.MonoLoader(filename=infile)
    bt = streaming.RhythmExtractor2013()
    bpm_histogram = streaming.BpmHistogramDescriptors()
    centroid = streaming.Centroid(range=250) # BPM histogram output size is 250

    loader.audio >> bt.signal
    bt.bpm >> (pool, 'bpm')
    bt.ticks >> None
    bt.confidence >> (pool, 'confidence')
    bt.estimates >> None
    bt.bpmIntervals >> bpm_histogram.bpmIntervals
    bpm_histogram.firstPeakBPM >> (pool, 'bpm_first_peak')
    bpm_histogram.firstPeakWeight >> None
    bpm_histogram.firstPeakSpread >> None
    bpm_histogram.secondPeakBPM >> (pool, 'bpm_second_peak')
    bpm_histogram.secondPeakWeight >> None
    bpm_histogram.secondPeakSpread >> None
    bpm_histogram.histogram >> (pool, 'bpm_histogram')
    bpm_histogram.histogram >> centroid.array
    centroid.centroid >> (pool, 'bpm_centroid')

    run(loader)
    return pool['bpm']
def extract_spectral_complexity(file):
    print("Extracting spectral complexity...")
    pool = essentia.Pool()

    loader = MonoLoader(filename=file)
    frameCutter = FrameCutter(frameSize=44100 * 20, hopSize=5 * 44100)
    w = Windowing(type='hann')
    spec = Spectrum()
    mfcc = MFCC()
    # Pool to store the restults
    pool = essentia.Pool()
    spectralComplexity = SpectralComplexity()

    # Connect the input and outputs
    loader.audio >> frameCutter.signal

    # Spectral Complexity
    frameCutter.frame >> w.frame >> spec.frame
    spec.spectrum >> spectralComplexity.spectrum
    spectralComplexity.spectralComplexity >> (pool, "spectral complexity")

    essentia.run(loader)
    df = pd.DataFrame()

    df["Sprectral Complexity"] = pool["spectral complexity"]
    return df
def analysisSynthesisStreaming(params, signal):

    out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="hann")
    fft = es.FFT(size=params['frameSize'])
    ifft = es.IFFT(size=params['frameSize'])
    overl = es.OverlapAdd(frameSize=params['frameSize'],
                          hopSize=params['hopSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] / 2))
    insignal = VectorInput(signal)
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> ifft.fft
    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')

    essentia.run(insignal)

    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio[2 * params['hopSize']:]
    return outaudio
示例#19
0
def computeSegmentation(filename, pool):
    sampleRate = 44100
    frameSize = 2048
    hopSize = frameSize/2

    audio = EqloudLoader(filename = filename,
                       downmix=pool['downmix'],
                       sampleRate=sampleRate)

    fc = FrameCutter(frameSize=frameSize, hopSize=hopSize, silentFrames='keep')
    w = Windowing(type='blackmanharris62')
    spec = Spectrum()
    mfcc = MFCC(highFrequencyBound=8000)
    tmpPool = essentia.Pool()

    audio.audio >> fc.signal
    fc.frame >> w.frame >> spec.frame
    spec.spectrum >> mfcc.spectrum
    mfcc.bands >> (tmpPool, 'mfcc_bands')
    mfcc.mfcc>> (tmpPool, 'mfcc_coeff')

    essentia.run(audio)

    # compute transpose of features array, don't call numpy.matrix.transpose
    # because essentia f***s it up!!
    features = copy.deepcopy(tmpPool['mfcc_coeff'].transpose())
    segments = std.SBic(cpw=1.5, size1=1000, inc1=300, size2=600, inc2=50)(features)
    for segment in segments:
        pool.add('segments', segment*hopSize/sampleRate)
示例#20
0
    def computeNoveltyCurve(self, filename, frameSize=1024, hopSize=512, windowType='hann',
                            weightCurveType='inverse_quadratic', sampleRate=44100.0,
                            startTime=0, endTime=2000):

        loader = EasyLoader(filename=filename, startTime=startTime,
                            endTime=endTime, sampleRate=sampleRate,
                            downmix='left')
        fc     = FrameCutter(frameSize=frameSize, hopSize=hopSize,
                             silentFrames="keep",
                             startFromZero=False, lastFrameToEndOfFile=True)
        window = Windowing(type=windowType, zeroPhase=True,
                           zeroPadding=1024-frameSize)
        freqBands = FrequencyBands(sampleRate=sampleRate) # using barkbands by default
        pool = Pool()
        spec = Spectrum()
        loader.audio >> fc.signal
        fc.frame >> window.frame >> spec.frame
        spec.spectrum >> freqBands.spectrum
        freqBands.bands >> (pool, 'frequency_bands')
        essentia.run(loader)


        noveltyCurve = std.NoveltyCurve(frameRate=sampleRate/float(hopSize),
                                        weightCurveType=weightCurveType)(pool['frequency_bands'])

        return noveltyCurve
示例#21
0
def analHpsModelStreaming(params, signal):

    #out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    spec = es.Spectrum(size=params['frameSize'])

    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'],
                                 sampleRate=params['sampleRate'])

    smanal = es.HpsModelAnal(sampleRate=params['sampleRate'],
                             hopSize=params['hopSize'],
                             maxnSines=params['maxnSines'],
                             magnitudeThreshold=params['magnitudeThreshold'],
                             freqDevOffset=params['freqDevOffset'],
                             freqDevSlope=params['freqDevSlope'],
                             minFrequency=params['minFrequency'],
                             maxFrequency=params['maxFrequency'],
                             stocf=params['stocf'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] // 2))
    insignal = VectorInput (signal)


    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame
    spec.spectrum >> pitchDetect.spectrum

    fcut.frame >> smanal.frame
    pitchDetect.pitch >> smanal.pitch
    pitchDetect.pitch >> (pool, 'pitch')
    pitchDetect.pitchConfidence >> (pool, 'pitchConfidence')
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    smanal.stocenv >> (pool, 'stocenv')


    essentia.run(insignal)

    # remove first half window frames
    mags = pool['magnitudes']
    freqs = pool['frequencies']
    phases = pool['phases']
    pitchConf = pool['pitchConfidence']

    # remove short tracks
    minFrames = int(params['minSineDur'] * params['sampleRate'] / params['hopSize'])
    freqsClean = cleaningHarmonicTracks(freqs, minFrames, pitchConf)
    pool['frequencies'].data = freqsClean

    return mags, freqsClean, phases
示例#22
0
def analHpsModelStreaming(params, signal):

    #out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    spec = es.Spectrum(size=params['frameSize'])

    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'],
                                 sampleRate=params['sampleRate'])

    smanal = es.HpsModelAnal(sampleRate=params['sampleRate'],
                             hopSize=params['hopSize'],
                             maxnSines=params['maxnSines'],
                             magnitudeThreshold=params['magnitudeThreshold'],
                             freqDevOffset=params['freqDevOffset'],
                             freqDevSlope=params['freqDevSlope'],
                             minFrequency=params['minFrequency'],
                             maxFrequency=params['maxFrequency'],
                             stocf=params['stocf'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize']/2))
    insignal = VectorInput (signal)


    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame
    spec.spectrum >> pitchDetect.spectrum

    fcut.frame >> smanal.frame
    pitchDetect.pitch >> smanal.pitch
    pitchDetect.pitch >> (pool, 'pitch')
    pitchDetect.pitchConfidence >> (pool, 'pitchConfidence')
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    smanal.stocenv >> (pool, 'stocenv')


    essentia.run(insignal)

    # remove first half window frames
    mags = pool['magnitudes']
    freqs = pool['frequencies']
    phases = pool['phases']
    pitchConf = pool['pitchConfidence']

    # remove short tracks
    minFrames = int(params['minSineDur'] * params['sampleRate'] / params['hopSize'])
    freqsClean = cleaningHarmonicTracks(freqs, minFrames, pitchConf)
    pool['frequencies'].data = freqsClean

    return mags, freqsClean, phases
示例#23
0
def writeBeatFile(filename, pool) :
    beatFilename = os.path.splitext(filename)[0] + '_beat.wav' #'out_beat.wav' #
    audio = EasyLoader(filename=filename, downmix='mix', startTime=STARTTIME, endTime=ENDTIME)
    writer = MonoWriter(filename=beatFilename)
    onsetsMarker = AudioOnsetsMarker(onsets=pool['ticks'])
    audio.audio >> onsetsMarker.signal >> writer.audio
    essentia.run(audio)
    return beatFilename
示例#24
0
def writeBeatFile(filename, pool) :
    beatFilename = os.path.splitext(filename)[0] + '_beat.wav' #'out_beat.wav' #
    audio = EasyLoader(filename=filename, downmix='mix', startTime=STARTTIME, endTime=ENDTIME)
    writer = MonoWriter(filename=beatFilename)
    onsetsMarker = AudioOnsetsMarker(onsets=pool['ticks'])
    audio.audio >> onsetsMarker.signal >> writer.audio
    essentia.run(audio)
    return beatFilename
def analsynthHprModelStreaming(params, signal):

    out = array([0.0])

    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize=params["frameSize"], hopSize=params["hopSize"], startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    spec = es.Spectrum(size=params["frameSize"])

    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params["frameSize"], sampleRate=params["sampleRate"])

    smanal = es.HprModelAnal(
        sampleRate=params["sampleRate"],
        hopSize=params["hopSize"],
        maxnSines=params["maxnSines"],
        magnitudeThreshold=params["magnitudeThreshold"],
        freqDevOffset=params["freqDevOffset"],
        freqDevSlope=params["freqDevSlope"],
        minFrequency=params["minFrequency"],
        maxFrequency=params["maxFrequency"],
    )
    synFFTSize = min(params["frameSize"] / 4, 4 * params["hopSize"])
    # make sure the FFT size is appropriate
    smsyn = es.SprModelSynth(sampleRate=params["sampleRate"], fftSize=synFFTSize, hopSize=params["hopSize"])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params["frameSize"] / 2))
    insignal = VectorInput(signal)

    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame
    spec.spectrum >> pitchDetect.spectrum

    fcut.frame >> smanal.frame
    pitchDetect.pitch >> smanal.pitch
    pitchDetect.pitchConfidence >> (pool, "pitchConfidence")
    pitchDetect.pitch >> (pool, "pitch")

    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases
    smanal.res >> smsyn.res

    smsyn.frame >> (pool, "frames")
    smsyn.sineframe >> (pool, "sineframes")
    smsyn.resframe >> (pool, "resframes")

    essentia.run(insignal)

    outaudio = framesToAudio(pool["frames"])
    outaudio = outaudio[2 * params["hopSize"] :]

    return outaudio, pool
示例#26
0
def analsynthSineModelStreaming(params, signal):

    out = numpy.array(0)

    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    fft = es.FFT(size=params['frameSize'])
    smanal = es.SineModelAnal(sampleRate=params['sampleRate'],
                              maxnSines=params['maxnSines'],
                              magnitudeThreshold=params['magnitudeThreshold'],
                              freqDevOffset=params['freqDevOffset'],
                              freqDevSlope=params['freqDevSlope'])
    smsyn = es.SineModelSynth(sampleRate=params['sampleRate'],
                              fftSize=params['frameSize'],
                              hopSize=params['hopSize'])
    ifft = es.IFFT(size=params['frameSize'])
    overl = es.OverlapAdd(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          gain=1. / params['frameSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] / 2))
    insignal = VectorInput(signal)
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases
    smsyn.fft >> ifft.fft
    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')

    essentia.run(insignal)

    # remove short tracks
    freqs = pool['frequencies']
    minFrames = int(params['minSineDur'] * params['sampleRate'] /
                    params['hopSize'])
    freqsClean = cleaningSineTracks(freqs, minFrames)
    pool['frequencies'].data = freqsClean

    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio[2 * params['hopSize']:]

    return outaudio, pool
def computeMidLevel(input_file, pool, startTime, endTime, namespace=''):
    rgain, sampleRate, downmix = getAnalysisMetadata(pool)
    loader = streaming.EqloudLoader(filename=input_file,
                                    sampleRate=sampleRate,
                                    startTime=startTime,
                                    endTime=endTime,
                                    replayGain=rgain,
                                    downmix=downmix)
    midlevel.compute(loader.audio, pool, startTime, endTime, namespace)
    essentia.run(loader)
示例#28
0
def computeMidLevel(input_file, pool, startTime, endTime, namespace=''):
    rgain, sampleRate, downmix = getAnalysisMetadata(pool)
    loader = streaming.EqloudLoader(filename = input_file,
                                    sampleRate = sampleRate,
                                    startTime = startTime,
                                    endTime = endTime,
                                    replayGain = rgain,
                                    downmix = downmix)
    midlevel.compute(loader.audio, pool, startTime, endTime, namespace)
    essentia.run(loader)
示例#29
0
def callback(data):
    buffer[:] = array(unpack('f' * bufferSize, data))
    mfccBuffer = np.zeros([numberBands])
    reset(vectorInput)
    run(vectorInput)
    mfccBuffer = np.roll(mfccBuffer, -patchSize)
    mfccBuffer = pool['mfcc'][-patchSize]
    features = mfccBuffer
    features = features.tolist()
    return features
示例#30
0
文件: sines.py 项目: sbl/sms
def sines(filename, outfile, params):
    '''
    Extract the sinusoidal components of an audio file
    '''
    import essentia
    from essentia.streaming import (MonoLoader, MonoWriter, FrameCutter,
                                    Windowing, SineModelAnal, SineModelSynth,
                                    FFT, IFFT, OverlapAdd)

    loader = MonoLoader(filename=filename, sampleRate=params['sampleRate'])
    awrite = MonoWriter(filename=outfile, sampleRate=params['sampleRate'])

    fcut = FrameCutter(
        frameSize=params['frameSize'], hopSize=params['hopSize'],
        startFromZero=False)
    overl = OverlapAdd(
        frameSize=params['frameSize'],
        hopSize=params['hopSize'],
        gain=1.0 / params['frameSize'])

    w = Windowing(type="blackmanharris92")
    fft = FFT(size=params['frameSize'])
    ifft = IFFT(size=params['frameSize'])

    smanal = SineModelAnal(
        sampleRate=params['sampleRate'],
        maxnSines=params['maxnSines'],
        magnitudeThreshold=params['magnitudeThreshold'],
        freqDevOffset=params['freqDevOffset'],
        freqDevSlope=params['freqDevSlope'],
        minFrequency=params['minFrequency'],
        maxFrequency=params['maxFrequency'], )

    syn = SineModelSynth(
        sampleRate=params['sampleRate'],
        fftSize=params['frameSize'],
        hopSize=params['hopSize'])

    # analysis
    loader.audio >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft

    # synth
    smanal.magnitudes >> syn.magnitudes
    smanal.frequencies >> syn.frequencies
    smanal.phases >> syn.phases

    # ifft
    syn.fft >> ifft.fft
    ifft.frame >> overl.frame
    overl.signal >> awrite.audio

    essentia.run(loader)
示例#31
0
文件: analysis.py 项目: samim23/pablo
def estimate_danceability(infile):
    loader = streaming.MonoLoader(filename=infile)
    dance = streaming.Danceability()
    pool = Pool()

    loader.audio >> dance.signal
    dance.danceability >> (pool, 'danceability')

    run(loader)

    return pool['danceability']
示例#32
0
def estimate_danceability(infile):
    loader = streaming.MonoLoader(filename=infile)
    dance = streaming.Danceability()
    pool = Pool()

    loader.audio >> dance.signal
    dance.danceability >> (pool, 'danceability')

    run(loader)

    return pool['danceability']
示例#33
0
文件: subtract.py 项目: sbl/sms
def subtract(filename, outfile, params):
    '''
    Subtract the sinusoidal components as computed by the sinusoidal model.
    '''
    import essentia
    from essentia.streaming import (MonoLoader, MonoWriter, FrameCutter,
                                    Windowing, SineModelAnal, SineSubtraction,
                                    FFT, VectorInput)

    loader = MonoLoader(filename=filename, sampleRate=params['sampleRate'])
    awrite = MonoWriter(filename=outfile, sampleRate=params['sampleRate'])

    fcut = FrameCutter(
        frameSize=params['frameSize'], hopSize=params['hopSize'])

    w = Windowing(type="blackmanharris92")
    fft = FFT(size=params['frameSize'])

    smanal = SineModelAnal(
        sampleRate=params['sampleRate'],
        maxnSines=params['maxnSines'],
        magnitudeThreshold=params['magnitudeThreshold'],
        freqDevOffset=params['freqDevOffset'],
        freqDevSlope=params['freqDevSlope'],
        minFrequency=params['minFrequency'],
        maxFrequency=params['maxFrequency'], )

    subtrFFTSize = min(params['frameSize'] / 4, 4 * params['hopSize'])
    smsub = SineSubtraction(
        sampleRate=params['sampleRate'],
        fftSize=subtrFFTSize,
        hopSize=params['hopSize'])

    pool = essentia.Pool()

    # analysis
    loader.audio >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft
    # subtraction
    fcut.frame >> smsub.frame
    smanal.magnitudes >> smsub.magnitudes
    smanal.frequencies >> smsub.frequencies
    smanal.phases >> smsub.phases
    smsub.frame >> (pool, 'frames')

    essentia.run(loader)

    outaudio = pool['frames'].flatten()
    outvector = VectorInput(outaudio)

    outvector.data >> awrite.audio
    essentia.run(outvector)
示例#34
0
def computeBeatsLoudness(filename, pool):
    loader = MonoLoader(filename=filename,
                        sampleRate=pool['samplerate'],
                        downmix=pool['downmix'])
    ticks = pool['ticks']#[pool['bestTicksStart']:pool['bestTicksStart']+32]
    beatsLoud = BeatsLoudness(sampleRate = pool['samplerate'],
                              frequencyBands = barkBands, #EqBands, #scheirerBands, #barkBands,
                              beats=ticks)
    loader.audio >> beatsLoud.signal
    beatsLoud.loudness >> (pool, 'loudness')
    beatsLoud.loudnessBandRatio >> (pool, 'loudnessBandRatio')
    essentia.run(loader)
示例#35
0
def computeOnsets(filename, pool):
    loader = EasyLoader(filename=filename,
                        sampleRate=pool['samplerate'],
                        startTime=STARTTIME, endTime=ENDTIME,
                        downmix=pool['downmix'])
    onset = OnsetRate()
    loader.audio >> onset.signal
    onset.onsetTimes >> (pool, 'ticks')
    onset.onsetRate >> None
    essentia.run(loader)
    pool.set('size', loader.audio.totalProduced())
    pool.set('length', pool['size']/pool['samplerate'])
示例#36
0
 def run(self):
     global frame_g
     while not self.stoprequest.isSet():
         pool = essentia.Pool()
         self.frame_q.get()
         v_in2 = VectorInput(frame_g)
         onset = OnsetRate()
         v_in2.data >> onset.signal
         onset.onsetRate >> (pool, 'Rhythm.onsetRate')
         onset.onsetTimes >> None
         essentia.run(v_in2)
         self.result_q.put(pool)
示例#37
0
def computeOnsets(filename, pool):
    loader = EasyLoader(filename=filename,
                        sampleRate=pool['samplerate'],
                        startTime=STARTTIME, endTime=ENDTIME,
                        downmix=pool['downmix'])
    onset = OnsetRate()
    loader.audio >> onset.signal
    onset.onsetTimes >> (pool, 'ticks')
    onset.onsetRate >> None
    essentia.run(loader)
    pool.set('size', loader.audio.totalProduced())
    pool.set('length', pool['size']/pool['samplerate'])
示例#38
0
def computeBeatsLoudness(filename, pool):
    loader = MonoLoader(filename=filename,
                        sampleRate=pool['samplerate'],
                        downmix=pool['downmix'])
    ticks = pool['ticks']#[pool['bestTicksStart']:pool['bestTicksStart']+32]
    beatsLoud = BeatsLoudness(sampleRate = pool['samplerate'],
                              frequencyBands = barkBands, #EqBands, #scheirerBands, #barkBands,
                              beats=ticks)
    loader.audio >> beatsLoud.signal
    beatsLoud.loudness >> (pool, 'loudness')
    beatsLoud.loudnessBandRatio >> (pool, 'loudnessBandRatio')
    essentia.run(loader)
def analsynthSpsModelStreaming(params, signal):

    out = array([0.])

    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    spec = es.Spectrum(size=params['frameSize'])

    smanal = es.SpsModelAnal(sampleRate=params['sampleRate'],
                             hopSize=params['hopSize'],
                             maxnSines=params['maxnSines'],
                             magnitudeThreshold=params['magnitudeThreshold'],
                             freqDevOffset=params['freqDevOffset'],
                             freqDevSlope=params['freqDevSlope'],
                             minFrequency=params['minFrequency'],
                             maxFrequency=params['maxFrequency'],
                             stocf=params['stocf'])
    synFFTSize = min(
        int(params['frameSize'] / 4),
        4 * params['hopSize'])  # make sure the FFT size is appropriate
    smsyn = es.SpsModelSynth(sampleRate=params['sampleRate'],
                             fftSize=synFFTSize,
                             hopSize=params['hopSize'],
                             stocf=params['stocf'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] // 2))
    insignal = VectorInput(signal)

    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> smanal.frame

    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases
    smanal.stocenv >> smsyn.stocenv

    smsyn.frame >> (pool, 'frames')
    smsyn.sineframe >> (pool, 'sineframes')
    smsyn.stocframe >> (pool, 'stocframes')

    essentia.run(insignal)

    outaudio = framesToAudio(pool['frames'])
    outaudio = outaudio[2 * params['hopSize']:]

    return outaudio, pool
def analsynthHarmonicMaskStreaming(params, signal):

    out = array([0.])

    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    fft = es.FFT(size=params['frameSize'])
    spec = es.Spectrum(size=params['frameSize'])

    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'],
                                 sampleRate=params['sampleRate'])

    hmask = es.HarmonicMask(sampleRate=params['sampleRate'],
                            binWidth=params['binWidth'],
                            attenuation=params['attenuation_dB'])

    ifft = es.IFFT(size=params['frameSize'])
    overl = es.OverlapAdd(frameSize=params['frameSize'],
                          hopSize=params['hopSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] // 2))
    insignal = VectorInput(signal)

    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame
    w.frame >> fft.frame
    spec.spectrum >> pitchDetect.spectrum

    fft.fft >> hmask.fft
    pitchDetect.pitch >> hmask.pitch
    pitchDetect.pitchConfidence >> (pool, 'pitchConfidence')

    hmask.fft >> ifft.fft

    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')

    essentia.run(insignal)

    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio[2 * params['hopSize']:]

    return outaudio, pool
def computeMidLevel(input_file, neqPool, eqPool, startTime, endTime, namespace=''):
    rgain, sampleRate, downmix = getAnalysisMetadata(neqPool)
    loader = streaming.EasyLoader(filename = input_file,
                                  sampleRate = sampleRate,
                                  startTime = startTime,
                                  endTime = endTime,
                                  replayGain = rgain,
                                  downmix = downmix)

    eqloud = streaming.EqualLoudness()
    loader.audio >> eqloud.signal
    midlevel.compute(loader.audio, neqPool, startTime, endTime, namespace)
    midlevel.compute(eqloud.signal, eqPool, startTime, endTime, namespace)
    essentia.run(loader)
示例#42
0
    def estimate_chroma(self, uid):
        loader = esstr.MonoLoader(
            filename=self.audio_path_extractor.audio_path_name(uid))
        framecutter = esstr.FrameCutter(hopSize=self.hop_size,
                                        frameSize=self.frame_size)
        windowing = esstr.Windowing(type="blackmanharris62")
        spectrum = esstr.Spectrum()
        spectralpeaks = esstr.SpectralPeaks(orderBy="magnitude",
                                            magnitudeThreshold=1e-05,
                                            minFrequency=40,
                                            maxFrequency=5000,
                                            maxPeaks=10000)
        hpcp = esstr.HPCP(size=12,
                          referenceFrequency=self.tuning_freq,
                          harmonics=8,
                          bandPreset=True,
                          minFrequency=float(40),
                          maxFrequency=float(5000),
                          bandSplitFrequency=500.0,
                          weightType="cosine",
                          nonLinear=True,
                          windowSize=1.0)
        """
        hpcp = esstr.HPCP(
            size=12,
            referenceFrequency = tuningFreq,
            harmonics = 8,
            bandPreset = True,
            minFrequency = 40.0,
            maxFrequency = 5000.0,
            bandSplitFrequency = 250.0,
            weightType = "cosine",
            nonLinear = False,
            windowSize = 1.0)
        """
        pool = essentia.Pool()
        # connect algorithms together
        loader.audio >> framecutter.signal
        framecutter.frame >> windowing.frame >> spectrum.frame
        spectrum.spectrum >> spectralpeaks.spectrum
        spectrum.spectrum >> (pool, 'spectrum.magnitude')
        spectralpeaks.magnitudes >> hpcp.magnitudes
        spectralpeaks.frequencies >> hpcp.frequencies
        hpcp.hpcp >> (pool, 'chroma.hpcp')

        essentia.run(loader)
        # roll from 'A' based to 'C' based
        chroma = pool['chroma.hpcp']
        chroma = np.roll(chroma, shift=-3, axis=1)
        return chroma
def analsynthHprModelStreaming(params, signal):
  
    out = array([0.])
  
    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
    w = es.Windowing(type = "blackmanharris92");    
    spec = es.Spectrum(size = params['frameSize']);
    
    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'], sampleRate =  params['sampleRate'])    
    
    smanal = es.HprModelAnal(sampleRate = params['sampleRate'], hopSize = params['hopSize'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'], minFrequency =  params['minFrequency'], maxFrequency =  params['maxFrequency'])
    synFFTSize = min(params['frameSize']/4, 4*params['hopSize']);  # make sure the FFT size is appropriate
    smsyn = es.SprModelSynth(sampleRate = params['sampleRate'], fftSize = synFFTSize, hopSize = params['hopSize'])    
    
    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    insignal = VectorInput (signal)
        
      
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame   
    spec.spectrum >> pitchDetect.spectrum
    
    fcut.frame >> smanal.frame
    pitchDetect.pitch >> smanal.pitch  
    pitchDetect.pitchConfidence >> (pool, 'pitchConfidence')  
    pitchDetect.pitch >> (pool, 'pitch')  
    print freqsClean
    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases
    smanal.res >> smsyn.res
    
    smsyn.frame >> (pool, 'frames')
    smsyn.sineframe >> (pool, 'sineframes')
    smsyn.resframe >> (pool, 'resframes')
    
    essentia.run(insignal)
       
    outaudio = framesToAudio(pool['frames'])        
    outaudio = outaudio [2*params['hopSize']:]
    

    return outaudio, pool
def analsynthHarmonicMaskStreaming(params, signal):
  
    out = array([0.])
  
    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
    w = es.Windowing(type = "blackmanharris92");
    fft = es.FFT(size = params['frameSize']);
    spec = es.Spectrum(size = params['frameSize']);
    
    # pitch detection
    pitchDetect = es.PitchYinFFT(frameSize=params['frameSize'], sampleRate =  params['sampleRate'])    
      
    hmask= es.HarmonicMask(sampleRate = params['sampleRate'], binWidth = params['binWidth'], attenuation = params['attenuation_dB'])
    
    ifft = es.IFFT(size = params['frameSize']);
    overl = es.OverlapAdd (frameSize = params['frameSize'], hopSize = params['hopSize']);

    
    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    insignal = VectorInput (signal)
        
      
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> spec.frame
    w.frame >> fft.frame
    spec.spectrum >> pitchDetect.spectrum
    
    fft.fft >> hmask.fft
    pitchDetect.pitch >> hmask.pitch  
    pitchDetect.pitchConfidence >> (pool, 'pitchConfidence')  

    hmask.fft >> ifft.fft
    
    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')

    essentia.run(insignal)
    

    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio [2*params['hopSize']:]

    return outaudio, pool
示例#45
0
 def run(self):
     global frame_g
     while not self.stoprequest.isSet():
         pool = essentia.Pool()
         self.frame_q.get()
         v_in = VectorInput(frame_g)
         beat_tracker = RhythmExtractor2013(method="degara")
         v_in.data >> beat_tracker.signal
         beat_tracker.ticks >> (pool, 'Rhythm.ticks')
         beat_tracker.bpm >> (pool, 'Rhythm.bpm')
         beat_tracker.confidence >> None
         beat_tracker.estimates >> None
         beat_tracker.bpmIntervals >> None
         essentia.run(v_in)
         self.result_q.put(pool)
示例#46
0
文件: features.py 项目: ctralie/acoss
    def chromaprint(self, analysisTime=30):
        """
        This algorithm computes the fingerprint of the input signal using Chromaprint algorithm. 
        It is a wrapper of the Chromaprint library

        Returns: The chromaprints are returned as base64-encoded strings.
        """
        vec_input = ess.VectorInput(self.audio_vector)
        chromaprinter = ess.Chromaprinter(analysisTime=analysisTime, sampleRate=self.fs)
        pool = Pool()

        vec_input.data >> chromaprinter.signal
        chromaprinter.fingerprint >> (pool, 'chromaprint')
        run(vec_input)
        return pool['chromaprint']
def analsynthSineModelStreaming(params, signal):
  
    out = numpy.array(0)
  
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
    w = es.Windowing(type = "blackmanharris92");
    fft = es.FFT(size = params['frameSize']);
    smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'])
    smsyn = es.SineModelSynth(sampleRate = params['sampleRate'], fftSize = params['frameSize'], hopSize = params['hopSize'])
    ifft = es.IFFT(size = params['frameSize']);
    overl = es.OverlapAdd (frameSize = params['frameSize'], hopSize = params['hopSize']);


    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    insignal = VectorInput (signal)
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases
    smsyn.fft >> ifft.fft
    ifft.frame >> overl.frame
    overl.signal >> (pool, 'audio')

    essentia.run(insignal)
    

    # remove short tracks
    freqs = pool['frequencies']
    minFrames = int( params['minSineDur'] * params['sampleRate'] / params['hopSize']);
    freqsClean = cleaningSineTracks(freqs, minFrames)
    pool['frequencies'].data = freqsClean

    # remove first half window frames
    outaudio = pool['audio']
    outaudio = outaudio [2*params['hopSize']:]

    return outaudio, pool
示例#48
0
def analsynthSineSubtractionStreaming(params, signal):

    out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    fft = es.FFT(size=params['frameSize'])
    smanal = es.SineModelAnal(sampleRate=params['sampleRate'],
                              maxnSines=params['maxnSines'],
                              magnitudeThreshold=params['magnitudeThreshold'],
                              freqDevOffset=params['freqDevOffset'],
                              freqDevSlope=params['freqDevSlope'])

    subtrFFTSize = min(params['frameSize'] / 4, 4 * params['hopSize'])
    smsub = es.SineSubtraction(sampleRate=params['sampleRate'],
                               fftSize=subtrFFTSize,
                               hopSize=params['hopSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] / 2))

    insignal = VectorInput(signal)
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    # subtraction
    fcut.frame >> smsub.frame
    smanal.magnitudes >> smsub.magnitudes
    smanal.frequencies >> smsub.frequencies
    smanal.phases >> smsub.phases
    smsub.frame >> (pool, 'frames')

    essentia.run(insignal)

    print pool['frames'].shape
    outaudio = framesToAudio(pool['frames'])
    outaudio = outaudio[2 * params['hopSize']:]

    return outaudio, pool
示例#49
0
def analSprModelStreaming(params, signal):

    #out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize=params['frameSize'],
                          hopSize=params['hopSize'],
                          startFromZero=False)
    w = es.Windowing(type="blackmanharris92")
    spec = es.Spectrum(size=params['frameSize'])

    smanal = es.SprModelAnal(sampleRate=params['sampleRate'],
                             maxnSines=params['maxnSines'],
                             magnitudeThreshold=params['magnitudeThreshold'],
                             freqDevOffset=params['freqDevOffset'],
                             freqDevSlope=params['freqDevSlope'],
                             minFrequency=params['minFrequency'],
                             maxFrequency=params['maxFrequency'])

    # add half window of zeros to input signal to reach same ooutput length
    signal = numpy.append(signal, zeros(params['frameSize'] / 2))
    insignal = VectorInput(signal)

    # analysis
    insignal.data >> fcut.signal

    fcut.frame >> smanal.frame
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    smanal.res >> (pool, 'res')

    essentia.run(insignal)

    # remove first half window frames
    mags = pool['magnitudes']
    freqs = pool['frequencies']
    phases = pool['phases']

    # remove short tracks
    minFrames = int(params['minSineDur'] * params['sampleRate'] /
                    params['hopSize'])
    freqsClean = cleaningSineTracks(freqs, minFrames)
    pool['frequencies'].data = freqsClean

    return mags, freqsClean, phases
def extract_loudness(file):
    pool = essentia.Pool()

    loader = MonoLoader(filename=file)

    frameCutter = FrameCutter(frameSize=44100 * 60, hopSize=44100 * 5)
    loader.audio >> frameCutter.signal

    #Loudness
    loudness = Loudness()
    frameCutter.frame >> loudness.signal
    loudness.loudness >> (pool, "lowlevel.loudness")

    essentia.run(loader)
    df = pd.DataFrame()

    df["Loudness"] = pool["lowlevel.loudness"]
    return df
示例#51
0
def __rhythm_extractor_process(filename, result):
    pool = essentia.Pool()
    loader = MonoLoader(filename=filename)
    bt = RhythmExtractor2013()
    loader.audio >> bt.signal
    bt.bpm >> (pool, 'bpm')
    bt.ticks >> (pool, 'ticks')
    bt.confidence >> (pool, 'confidence')
    bt.estimates >> (pool, 'estimates')
    bt.bpmIntervals >> (pool, 'bpmIntervals')
    essentia.run(loader)
    result.append({
        'bpm': pool['bpm'],
        'ticks': list(pool['ticks']),
        'confidence': pool['confidence'],
        'estimates': list(pool['estimates']),
        'bpmIntervals': list(pool['bpmIntervals'])
    })
    pool.clear()
def callback(data):
    # update audio buffer
    buffer[:] = array(unpack('f' * bufferSize, data))

    # generate predictions
    reset(vimp)
    run(vimp)

    # update mel and activation buffers
    melBuffer[:] = np.roll(melBuffer, -patchSize)
    melBuffer[:, -patchSize:] = pool['melbands'][-patchSize:,:].T
    img_mel.set_data(melBuffer)
    
    actBuffer[:] = np.roll(actBuffer, -1)
    actBuffer[:, -1] = softmax(20 * pool['model/Sigmoid'][-1,:].T)
    img_act.set_data(actBuffer)

    # update plots
    f.canvas.draw()
def analsynthSpsModelStreaming(params, signal):
  
    out = array([0.])
  
    pool = essentia.Pool()
    # windowing and FFT
    fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
    w = es.Windowing(type = "blackmanharris92");    
    spec = es.Spectrum(size = params['frameSize']);
    
        
    smanal = es.SpsModelAnal(sampleRate = params['sampleRate'], hopSize = params['hopSize'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'], minFrequency =  params['minFrequency'], maxFrequency =  params['maxFrequency'], stocf = params['stocf'])
    synFFTSize = min(params['frameSize']/4, 4*params['hopSize']);  # make sure the FFT size is appropriate
    smsyn = es.SpsModelSynth(sampleRate = params['sampleRate'], fftSize = synFFTSize, hopSize = params['hopSize'], stocf = params['stocf'])    
    
    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    insignal = VectorInput (signal)
        
      
    # analysis
    insignal.data >> fcut.signal    
    fcut.frame >> smanal.frame

    
    # synthesis
    smanal.magnitudes >> smsyn.magnitudes
    smanal.frequencies >> smsyn.frequencies
    smanal.phases >> smsyn.phases    
    smanal.stocenv >> smsyn.stocenv
  
    
    smsyn.frame >> (pool, 'frames')
    smsyn.sineframe >> (pool, 'sineframes')
    smsyn.stocframe >> (pool, 'stocframes')
    
    essentia.run(insignal)
       
    outaudio = framesToAudio(pool['frames'])
    outaudio = outaudio [2*params['hopSize']:]
    

    return outaudio, pool
示例#54
0
def sfxPitch(pool, namespace=''):
    sfxspace = 'sfx.'
    llspace = 'lowlevel.'
    if namespace:
        sfxspace = namespace + '.sfx.'
        llspace = namespace + '.lowlevel.'
    pitch = pool[llspace+'pitch']
    gen = streaming.VectorInput(pitch)
    maxtt = streaming.MaxToTotal()
    mintt = streaming.MinToTotal()
    amt = streaming.AfterMaxToBeforeMaxEnergyRatio()
    gen.data >> maxtt.envelope
    gen.data >> mintt.envelope
    gen.data >> amt.pitch
    maxtt.maxToTotal >> (pool, sfxspace+'pitch_max_to_total')
    mintt.minToTotal >> (pool, sfxspace+'pitch_min_to_total')
    amt.afterMaxToBeforeMaxEnergyRatio >> (pool, sfxspace+'pitch_after_max_to_before_max_energy_ratio')
    essentia.run(gen)

    pc = standard.Centroid(range=len(pitch)-1)(pitch)
    pool.set(sfxspace+'pitch_centroid', pc)
def extract_HPCP(_filename):
    
    # initialization
    HPCP_size=36
    pool = Pool()
    key = Key()
    loader = MonoLoader(filename = _filename)
    frameCutter = FrameCutter(frameSize = 4096, hopSize = 512)
    window = Windowing(type = 'blackmanharris62')
    spectrum = Spectrum()
    spectralPeaks= SpectralPeaks(orderBy="magnitude",
                              magnitudeThreshold=1e-05,
                              minFrequency=100.0,
                              maxFrequency=5000.0, 
                              maxPeaks=10000)
    hpcp = HPCP(size = HPCP_size,
                   bandPreset = False,
                   minFrequency = 100.0,
                   maxFrequency = 5000.0,
                   weightType = 'squaredCosine',
                   nonLinear = False,
                   sampleRate= 44100.0,
                   windowSize = 4.0/3.0)

    # audio-->frame cutter-->windowing-->spectrum-->spectral peaks-->HPCP 

    loader.audio >> frameCutter.signal
    frameCutter.frame >> window.frame >> spectrum.frame
    spectrum.spectrum >> spectralPeaks.spectrum
    spectralPeaks.magnitudes >> hpcp.magnitudes
    spectralPeaks.frequencies >> hpcp.frequencies
    hpcp.hpcp>> (pool, 'hpcp vector')
    essentia.run(loader)
    hpcpVector= pool['hpcp vector']
    globalHPCP=[]
    #taking mean value for each frame to get global HPCP 
    globalHPCP=hpcpVector.mean(axis=0) 
    #normalized global HPCP
    globalHPCP=normalize(globalHPCP)
    return globalHPCP
def computeLowLevel(input_file, neqPool, eqPool, startTime, endTime, namespace=""):
    llspace = "lowlevel."
    rhythmspace = "rhythm."
    if namespace:
        llspace = namespace + ".lowlevel."
        rhythmspace = namespace + ".rhythm."

    rgain, sampleRate, downmix = getAnalysisMetadata(neqPool)
    loader = streaming.EasyLoader(
        filename=input_file,
        sampleRate=sampleRate,
        startTime=startTime,
        endTime=endTime,
        replayGain=rgain,
        downmix=downmix,
    )

    eqloud = streaming.EqualLoudness()
    loader.audio >> eqloud.signal
    lowlevel.compute(eqloud.signal, loader.audio, neqPool, startTime, endTime, namespace)
    lowlevel.compute(eqloud.signal, eqloud.signal, eqPool, startTime, endTime, namespace)
    essentia.run(loader)

    # check if we processed enough audio for it to be useful, in particular did
    # we manage to get an estimation for the loudness (2 seconds required)
    if not neqPool.containsKey(llspace + "loudness") and not eqPool.containsKey(llspace + "loudness"):
        INFO("ERROR: File is too short (< 2sec)... Aborting...")
        sys.exit(2)

    sampleRate = neqPool["metadata.audio_properties.analysis_sample_rate"]

    numOnsets = len(neqPool[rhythmspace + "onset_times"])
    onset_rate = numOnsets / float(loader.audio.totalProduced()) * sampleRate
    neqPool.set(rhythmspace + "onset_rate", onset_rate)

    numOnsets = len(eqPool[rhythmspace + "onset_times"])
    onset_rate = numOnsets / float(loader.audio.totalProduced()) * sampleRate
    eqPool.set(rhythmspace + "onset_rate", onset_rate)
def analsynthSineSubtractionStreaming(params, signal):
  
    out = numpy.array(0)
    pool = essentia.Pool()
    fcut = es.FrameCutter(frameSize = params['frameSize'], hopSize = params['hopSize'], startFromZero =  False);
    w = es.Windowing(type = "blackmanharris92");
    fft = es.FFT(size = params['frameSize']);
    smanal = es.SineModelAnal(sampleRate = params['sampleRate'], maxnSines = params['maxnSines'], magnitudeThreshold = params['magnitudeThreshold'], freqDevOffset = params['freqDevOffset'], freqDevSlope = params['freqDevSlope'])
    
    subtrFFTSize = min(params['frameSize']/4, 4* params['hopSize'])
    smsub = es.SineSubtraction(sampleRate = params['sampleRate'], fftSize = subtrFFTSize, hopSize = params['hopSize'])

    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    
    insignal = VectorInput (signal)
    # analysis
    insignal.data >> fcut.signal
    fcut.frame >> w.frame
    w.frame >> fft.frame
    fft.fft >> smanal.fft
    smanal.magnitudes >> (pool, 'magnitudes')
    smanal.frequencies >> (pool, 'frequencies')
    smanal.phases >> (pool, 'phases')
    # subtraction
    fcut.frame >> smsub.frame
    smanal.magnitudes >> smsub.magnitudes
    smanal.frequencies >> smsub.frequencies
    smanal.phases >> smsub.phases
    smsub.frame >> (pool, 'frames')
    
    essentia.run(insignal)
    
    print pool['frames'].shape
    outaudio = framesToAudio(pool['frames'])    
    outaudio = outaudio [2*params['hopSize']:]
    
    return outaudio, pool
示例#58
0
# analysis
loader.audio >> fcut.signal
fcut.frame >> w.frame
w.frame >> fft.frame
fft.fft >> smanal.fft
smanal.magnitudes >> (pool, 'magnitudes')
smanal.frequencies >> (pool, 'frequencies')
smanal.phases >> (pool, 'phases')
# subtraction
fcut.frame >> smsub.frame
smanal.magnitudes >> smsub.magnitudes
smanal.frequencies >> smsub.frequencies
smanal.phases >> smsub.phases
smsub.frame >> (pool, 'frames')


essentia.run(loader)


#store to file
outaudio = pool['frames'].flatten() 

awrite = es.MonoWriter (filename = outputFilename, sampleRate =  params['sampleRate']);
outvector = es.VectorInput(outaudio)

outvector.data >> awrite.audio
essentia.run(outvector)


示例#59
0
    pool = essentia.Pool()
    readMetadata(args[0], pool)
    replaygain.compute(args[0], pool, startTime, endTime)
    rgain, sampleRate, downmix = getAnalysisMetadata(pool)

    loader = streaming.EqloudLoader(filename = args[0],
                                    sampleRate = sampleRate,
                                    startTime = startTime,
                                    endTime = endTime,
                                    replayGain = rgain,
                                    downmix = downmix)

    compute(loader.audio, loader.audio, pool, startTime, endTime,
            sampleRate=analysisSampleRate)

    essentia.run(loader)

    # check if we processed enough audio for it to be useful, in particular did
    # we manage to get an estimation for the loudness (2 seconds required)
    if not pool.containsKey(llspace + "loudness"):
        INFO('ERROR: File is too short (< 2sec)... Aborting...')
        sys.exit(2)

    numOnsets = len(pool[rhythmspace + 'onset_times'])
    sampleRate = pool['metadata.audio_properties.analysis_sample_rate']
    onset_rate = numOnsets/float(source.totalProduced())*sampleRate
    pool.set(rhythmspace + 'onset_rate', onset_rate)



示例#60
0
def runResetRun(gen, *args, **kwargs):
    # 0. Find networks which contain algorithms who do not play nice with our
    #    little trick. In particular, we have a test for multiplexer that runs
    #    multiple generators...
    def isValid(algo):
        if isinstance(algo, essentia.streaming.VectorInput) and not algo.connections.values()[0]:
            # non-connected VectorInput, we don't want to get too fancy here...
            return False
        if algo.name() == 'Multiplexer':
            return False
        for output, inputs in algo.connections.iteritems():
            for inp in inputs:
                if isinstance(inp, essentia.streaming._StreamConnector) and not isValid(inp.input_algo):
                    return False
        return True

    if not isValid(gen):
        print 'Network is not capable of doing the run/reset/run trick, doing it the normal way...'
        essentia.run(gen)
        return


    # 1. Find all the outputs in the network that are connected to pools--aka
    #    pool feeders and for each pool feeder, disconnect the given pool,
    #    store it, and connect a dummy pool in its place
    def useDummy(algo, output, input):
        if not hasattr(output, 'originalPools'):
            output.originalPools = []
            output.dummyPools = []

        # disconnect original
        output.originalPools.append(input)
        output.disconnect(input)

        # connect dummy
        dummy = essentia.Pool()
        output.dummyPools.append((dummy, input[1]))
        output >> output.dummyPools[-1]

    mapPools(gen, useDummy)

    # 2. Run the network
    essentia.run(gen)

    # 3. Reset the network
    essentia.reset(gen)

    # 4. For each pool feeder, disconnect the dummy pool and reconnect the
    #    original pool
    def useOriginal(algo, output, input):
        # disconnect dummy
        output.disconnect(input)
        # the dummy pools and the original pools should have the same index

        idx = output.dummyPools.index(input)
        output.dummyPools.remove(input)

        # connect original
        output >> output.originalPools[idx]

        # don't need these anymore
        if len(output.dummyPools) == 0:
            del output.dummyPools
            del output.originalPools

    mapPools(gen, useOriginal)

    # 5. Run the network for the second and final time
    return essentia.run(gen)