Exemplo n.º 1
0
def segment(audio, hopSize, frameSize, rms_onset_threshold,
            mel_onset_threshold, flux_onset_threshold, onset_threshold):

    # init algorithms
    o_mel = estd.OnsetDetection(method='melflux')
    o_rms = estd.OnsetDetection(method='rms')
    o_hfc = estd.OnsetDetection(method='hfc')
    o_flux = estd.OnsetDetection(method='flux')
    fft = estd.FFT()
    c2p = estd.CartesianToPolar()
    pool = essentia.Pool()
    frame_generator = estd.FrameGenerator(audio,
                                          frameSize=frameSize,
                                          hopSize=hopSize)
    w = estd.Windowing(type='hann')
    yin = estd.PitchYinFFT(frameSize=frameSize,
                           minFrequency=40,
                           maxFrequency=2500,
                           interpolate=True)
    spectrum = estd.Spectrum()
    loudness = estd.Loudness()

    # control parameters
    attack = False
    detection = True
    mel_onset_value = 0
    rms_onset_value = 0

    # output variables
    onset = None
    sustain = None

    for index, frame in enumerate(frame_generator):
        mag, phase = c2p(fft(w(frame)))
        _, conf = yin(spectrum(w(frame)))
        loud = loudness(frame)
        mel_onset = o_mel(mag, phase)
        rms_onset = o_rms(mag, phase)
        hfc_onset = o_hfc(mag, phase)
        flux_onset = o_flux(mag, phase)
        pool.add('onsets_mel', mel_onset)
        pool.add('onsets_rms', rms_onset)
        pool.add('onsets_hfc', hfc_onset)
        pool.add('onsets_flux', flux_onset)
        pool.add('conf', conf)
        pool.add('loudness', loud)

        # condition for onset
        if detection and (flux_onset > flux_onset_threshold or mel_onset > mel_onset_threshold) \
                and rms_onset > rms_onset_threshold and loud > onset_threshold:
            onset = index
            attack = True
            detection = False
            mel_onset_value = mel_onset
            rms_onset_value = rms_onset
        # condition for beginning of sustain
        if attack and conf > 0.5 and rms_onset < rms_onset_value * .05 and mel_onset < mel_onset_value * .3:
            attack = False
            sustain = index
    return onset, sustain
Exemplo n.º 2
0
    def get_onsets(self, _audio=[]):

        if _audio != []:
            audio = _audio
        else:
            audio = self.audio

        W = es.Windowing(type=self.winType)
        c2p = es.CartesianToPolar()
        fft = es.FFT()
        onsetDetection = es.OnsetDetection(method=self.onsetMethod,
                                           sampleRate=44100)
        onsets = es.Onsets(alpha=.2)
        # onsetIndex = []
        pool = Pool()

        for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(W(frame)))
            onsetDetection.configure(method=self.onsetMethod)
            onsetFunction = onsetDetection(mag, phase)
            pool.add("onsetFunction", onsetFunction)

        DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1])

        return DetectedOnsetsArray
Exemplo n.º 3
0
    def __onset_candidate_detection__(self):
        spectrum = e.Spectrum()
        e_onsetdetection = e.OnsetDetection(method="flux")

        onsetspecs = []
        for frame in e.FrameGenerator(self.signal, 1024, 512):
            self.frames.append(frame)
            onsetspecs.append(spectrum(frame))
            self.onset_candidates.append(e_onsetdetection(onsetspecs[-1], [0]*len(onsetspecs[-1])))

        self.frame_count = len(self.frames)
Exemplo n.º 4
0
    def __init__(self,
                 signal,
                 sampleRate,
                 frameSize=1024,
                 hopSize=512,
                 method='complex',
                 window='hann'):
        self.signal = signal.astype(np.float32)
        self.sampleRate = sampleRate
        self.frameSize = frameSize
        self.hopSize = hopSize

        self.calcOnsetFunc = es.OnsetDetection(method=method)
        self.window = es.Windowing(type=window)
Exemplo n.º 5
0
def OnsetsSegmentation(audio,
                       frame_size=1024,
                       frame_hop=512,
                       windowing_type='hann',
                       onsets_method='hfc'):

    #declaração dos algoritmos que serão usados
    spec = es_mode.Spectrum()
    fft = es_mode.FFT()
    c2p = es_mode.CartesianToPolar()
    od1 = es_mode.OnsetDetection(method=onsets_method)
    w = es_mode.Windowing(type=windowing_type)
    pool = es.Pool()

    #Função que será executada a cada frame
    def F(n):
        spectrum = spec(w(n))
        mag, phase, = c2p(fft(w(n)))
        pool.add('features.spectrum', spectrum)
        pool.add('features.', phase)
        pool.add('features.onsetdetection', od1(spectrum, phase))

    #define a função contínua de onsets para cada frame
    qtdFrames = inFrames(audio=audio,
                         algorithm=F,
                         frameSize=frame_size,
                         hopSize=frame_hop)
    #print("Quantidade de frames: ", qtdFrames)

    audio_duration = es_mode.Duration()(audio)
    frame_rate = qtdFrames / audio_duration
    os = es_mode.Onsets(frameRate=frame_rate)

    #matriz de algoritmos de detecção de onset executados
    onset_detection_matrix = es.array([pool['features.onsetdetection']])

    #segundo parâmetro é o vetor de pesos para cada detecção de onset
    onsets = os(onset_detection_matrix, [1])

    end_times = es.array(np.append(onsets, audio_duration))
    start_times = es.array(np.append([0], onsets))
    segments = es_mode.Slicer(endTimes=end_times,
                              startTimes=start_times,
                              timeUnits="seconds")(audio)
    return segments, onsets
Exemplo n.º 6
0
    def __detect_onsets(self, file, frame_size, hop_size, windowfnc,
                        normalize) -> None:
        window = estd.Windowing(size=frame_size,
                                type=windowfnc.value,
                                normalized=normalize)
        fft = estd.FFT(size=frame_size)
        pool = es.Pool()
        pool_add = pool.add
        cart_to_polar = estd.CartesianToPolar()
        detect_onset = estd.OnsetDetection(method=self.algo)
        for frame in estd.FrameGenerator(file.audio,
                                         frameSize=frame_size,
                                         hopSize=hop_size):
            mag, phase, = cart_to_polar(fft(window(frame)))
            pool_add(
                "features." + self.algo,
                detect_onset(mag, phase),
            )

        # The onsets algo expects a matrix of features which can be weighted
        self.onsets = estd.Onsets()(es.array([pool["features." + self.algo]]),
                                    [1])
Exemplo n.º 7
0
def detect_onset(audio, index):
    # should be able to fetch the module from cache
    import essentia.standard as ess_std
    from essentia import array

    print("Subprocess {} starts".format(index))
    processing_start = time()

    onset_detector = ess_std.OnsetDetection(method="complex")
    window = ess_std.Windowing(type="hann")
    fft = ess_std.FFT()
    c2p = ess_std.CartesianToPolar()
    onsets = ess_std.Onsets()

    frames = []
    for frame in ess_std.FrameGenerator(audio, frameSize=1024, hopSize=512):
        mag, phase = c2p(fft(window(frame)))
        frames.append(onset_detector(mag, phase))

    onsets_array = onsets(array([frames]), [1])
    print("Subprocess {} finished. Elapsed time: {:.2}s".format(
        index,
        time() - processing_start))
    return onsets_array
Exemplo n.º 8
0
import numpy as np
import matplotlib.pyplot as plt
import essentia.standard as ess

M = 1024
N = 1024
H = 512
fs = 44100
spectrum = ess.Spectrum(size=N)
window = ess.Windowing(size=M, type='hann')
flux = ess.Flux()
onsetDetection = ess.OnsetDetection(method='hfc')
x = ess.MonoLoader(filename='../../../sounds/speech-male.wav', sampleRate=fs)()
fluxes = []
onsetDetections = []

for frame in ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True):
    mX = spectrum(window(frame))
    flux_val = flux(mX)
    fluxes.append(flux_val)
    onsetDetection_val = onsetDetection(mX, mX)
    onsetDetections.append(onsetDetection_val)
onsetDetections = np.array(onsetDetections)
fluxes = np.array(fluxes)

plt.figure(1, figsize=(9.5, 7))
plt.subplot(2, 1, 1)

plt.plot(np.arange(x.size) / float(fs), x)
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.ylabel('amplitude')
Exemplo n.º 9
0
    def __init__(self, params, fsm=None):
        self.onset_threshold = params['onset_threshold']
        self.offset_threshold = params['offset_threshold']
        self.max_attack_time = params['max_attack_time']
        self.max_release_time = params['max_release_time']
        self.attack_slope_ratio = params['attack_slope_ratio']
        self.release_slope_ratio = params['release_slope_ratio']
        self.flux_threshold = params['flux_threshold']
        self.mel_threshold = params['mel_threshold']
        self.rms_threshold = params['rms_threshold']
        self.conf_threshold = params['conf_threshold']
        self.ratio_mel = params['ratio_mel']
        self.ratio_rms = params['ratio_rms']
        self.rms_threshold_value = 0
        self.mel_threshold_vale = 0

        self.fs = params['fs']
        self.hop_size = params['hop_size']
        self.max_attack_frames = seconds2frames(self.max_attack_time,
                                                fs=self.fs,
                                                hop_size=self.hop_size)
        self.max_release_frames = seconds2frames(self.max_release_time,
                                                 fs=self.fs,
                                                 hop_size=self.hop_size)
        self.ext_fsm = fsm  # external state machine to send events to
        self.buffer = []

        self.was_onset = False
        self.was_offset = False
        self.onset_counter = self.offset_counter = None
        self.onset_samples = 2  # number of consecutive samples to be above threshold
        self.offset_samples = 3  # number of consecutive samples to be below threshold
        self.peak_detect = GrowingSlopeEnd(max_frames=self.max_attack_frames,
                                           m=self.attack_slope_ratio)

        # essentia algorithms initialization
        self.o_mel = estd.OnsetDetection(method='melflux')
        self.o_rms = estd.OnsetDetection(method='rms')
        self.o_hfc = estd.OnsetDetection(method='hfc')
        self.o_flux = estd.OnsetDetection(method='flux')
        self.o_complex = estd.OnsetDetection(method='complex')
        self.fft = estd.FFT()
        self.c2p = estd.CartesianToPolar()
        self.w = estd.Windowing(type='hann')

        # STATE MACHINE
        self.fsm = Fysom({
            'initial':
            'detecting',
            'events': [{
                'name': 'onset',
                'src': 'detecting',
                'dst': 'attack'
            }, {
                'name': 'peak',
                'src': 'attack',
                'dst': 'sustain'
            }, {
                'name': 'offset',
                'src': 'sustain',
                'dst': 'detecting'
            }, {
                'name': 'reset',
                'src': ['detecting', 'attack', 'sustain'],
                'dst': 'detecting'
            }],
            'callbacks': {
                'ondetecting': self.on_detecting,
                'onattack': self.on_attack,
                'onsustain': self.on_sustain,
                'onbeforeonset': self.on_onset,
                'onbeforepeak': self.on_peak,
                'onbeforeoffset': self.on_offset
            }
        })