def segment(audio, hopSize, frameSize, rms_onset_threshold, mel_onset_threshold, flux_onset_threshold, onset_threshold): # init algorithms o_mel = estd.OnsetDetection(method='melflux') o_rms = estd.OnsetDetection(method='rms') o_hfc = estd.OnsetDetection(method='hfc') o_flux = estd.OnsetDetection(method='flux') fft = estd.FFT() c2p = estd.CartesianToPolar() pool = essentia.Pool() frame_generator = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize) w = estd.Windowing(type='hann') yin = estd.PitchYinFFT(frameSize=frameSize, minFrequency=40, maxFrequency=2500, interpolate=True) spectrum = estd.Spectrum() loudness = estd.Loudness() # control parameters attack = False detection = True mel_onset_value = 0 rms_onset_value = 0 # output variables onset = None sustain = None for index, frame in enumerate(frame_generator): mag, phase = c2p(fft(w(frame))) _, conf = yin(spectrum(w(frame))) loud = loudness(frame) mel_onset = o_mel(mag, phase) rms_onset = o_rms(mag, phase) hfc_onset = o_hfc(mag, phase) flux_onset = o_flux(mag, phase) pool.add('onsets_mel', mel_onset) pool.add('onsets_rms', rms_onset) pool.add('onsets_hfc', hfc_onset) pool.add('onsets_flux', flux_onset) pool.add('conf', conf) pool.add('loudness', loud) # condition for onset if detection and (flux_onset > flux_onset_threshold or mel_onset > mel_onset_threshold) \ and rms_onset > rms_onset_threshold and loud > onset_threshold: onset = index attack = True detection = False mel_onset_value = mel_onset rms_onset_value = rms_onset # condition for beginning of sustain if attack and conf > 0.5 and rms_onset < rms_onset_value * .05 and mel_onset < mel_onset_value * .3: attack = False sustain = index return onset, sustain
def get_onsets(self, _audio=[]): if _audio != []: audio = _audio else: audio = self.audio W = es.Windowing(type=self.winType) c2p = es.CartesianToPolar() fft = es.FFT() onsetDetection = es.OnsetDetection(method=self.onsetMethod, sampleRate=44100) onsets = es.Onsets(alpha=.2) # onsetIndex = [] pool = Pool() for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(W(frame))) onsetDetection.configure(method=self.onsetMethod) onsetFunction = onsetDetection(mag, phase) pool.add("onsetFunction", onsetFunction) DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1]) return DetectedOnsetsArray
def __onset_candidate_detection__(self): spectrum = e.Spectrum() e_onsetdetection = e.OnsetDetection(method="flux") onsetspecs = [] for frame in e.FrameGenerator(self.signal, 1024, 512): self.frames.append(frame) onsetspecs.append(spectrum(frame)) self.onset_candidates.append(e_onsetdetection(onsetspecs[-1], [0]*len(onsetspecs[-1]))) self.frame_count = len(self.frames)
def __init__(self, signal, sampleRate, frameSize=1024, hopSize=512, method='complex', window='hann'): self.signal = signal.astype(np.float32) self.sampleRate = sampleRate self.frameSize = frameSize self.hopSize = hopSize self.calcOnsetFunc = es.OnsetDetection(method=method) self.window = es.Windowing(type=window)
def OnsetsSegmentation(audio, frame_size=1024, frame_hop=512, windowing_type='hann', onsets_method='hfc'): #declaração dos algoritmos que serão usados spec = es_mode.Spectrum() fft = es_mode.FFT() c2p = es_mode.CartesianToPolar() od1 = es_mode.OnsetDetection(method=onsets_method) w = es_mode.Windowing(type=windowing_type) pool = es.Pool() #Função que será executada a cada frame def F(n): spectrum = spec(w(n)) mag, phase, = c2p(fft(w(n))) pool.add('features.spectrum', spectrum) pool.add('features.', phase) pool.add('features.onsetdetection', od1(spectrum, phase)) #define a função contínua de onsets para cada frame qtdFrames = inFrames(audio=audio, algorithm=F, frameSize=frame_size, hopSize=frame_hop) #print("Quantidade de frames: ", qtdFrames) audio_duration = es_mode.Duration()(audio) frame_rate = qtdFrames / audio_duration os = es_mode.Onsets(frameRate=frame_rate) #matriz de algoritmos de detecção de onset executados onset_detection_matrix = es.array([pool['features.onsetdetection']]) #segundo parâmetro é o vetor de pesos para cada detecção de onset onsets = os(onset_detection_matrix, [1]) end_times = es.array(np.append(onsets, audio_duration)) start_times = es.array(np.append([0], onsets)) segments = es_mode.Slicer(endTimes=end_times, startTimes=start_times, timeUnits="seconds")(audio) return segments, onsets
def __detect_onsets(self, file, frame_size, hop_size, windowfnc, normalize) -> None: window = estd.Windowing(size=frame_size, type=windowfnc.value, normalized=normalize) fft = estd.FFT(size=frame_size) pool = es.Pool() pool_add = pool.add cart_to_polar = estd.CartesianToPolar() detect_onset = estd.OnsetDetection(method=self.algo) for frame in estd.FrameGenerator(file.audio, frameSize=frame_size, hopSize=hop_size): mag, phase, = cart_to_polar(fft(window(frame))) pool_add( "features." + self.algo, detect_onset(mag, phase), ) # The onsets algo expects a matrix of features which can be weighted self.onsets = estd.Onsets()(es.array([pool["features." + self.algo]]), [1])
def detect_onset(audio, index): # should be able to fetch the module from cache import essentia.standard as ess_std from essentia import array print("Subprocess {} starts".format(index)) processing_start = time() onset_detector = ess_std.OnsetDetection(method="complex") window = ess_std.Windowing(type="hann") fft = ess_std.FFT() c2p = ess_std.CartesianToPolar() onsets = ess_std.Onsets() frames = [] for frame in ess_std.FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase = c2p(fft(window(frame))) frames.append(onset_detector(mag, phase)) onsets_array = onsets(array([frames]), [1]) print("Subprocess {} finished. Elapsed time: {:.2}s".format( index, time() - processing_start)) return onsets_array
import numpy as np import matplotlib.pyplot as plt import essentia.standard as ess M = 1024 N = 1024 H = 512 fs = 44100 spectrum = ess.Spectrum(size=N) window = ess.Windowing(size=M, type='hann') flux = ess.Flux() onsetDetection = ess.OnsetDetection(method='hfc') x = ess.MonoLoader(filename='../../../sounds/speech-male.wav', sampleRate=fs)() fluxes = [] onsetDetections = [] for frame in ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True): mX = spectrum(window(frame)) flux_val = flux(mX) fluxes.append(flux_val) onsetDetection_val = onsetDetection(mX, mX) onsetDetections.append(onsetDetection_val) onsetDetections = np.array(onsetDetections) fluxes = np.array(fluxes) plt.figure(1, figsize=(9.5, 7)) plt.subplot(2, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude')
def __init__(self, params, fsm=None): self.onset_threshold = params['onset_threshold'] self.offset_threshold = params['offset_threshold'] self.max_attack_time = params['max_attack_time'] self.max_release_time = params['max_release_time'] self.attack_slope_ratio = params['attack_slope_ratio'] self.release_slope_ratio = params['release_slope_ratio'] self.flux_threshold = params['flux_threshold'] self.mel_threshold = params['mel_threshold'] self.rms_threshold = params['rms_threshold'] self.conf_threshold = params['conf_threshold'] self.ratio_mel = params['ratio_mel'] self.ratio_rms = params['ratio_rms'] self.rms_threshold_value = 0 self.mel_threshold_vale = 0 self.fs = params['fs'] self.hop_size = params['hop_size'] self.max_attack_frames = seconds2frames(self.max_attack_time, fs=self.fs, hop_size=self.hop_size) self.max_release_frames = seconds2frames(self.max_release_time, fs=self.fs, hop_size=self.hop_size) self.ext_fsm = fsm # external state machine to send events to self.buffer = [] self.was_onset = False self.was_offset = False self.onset_counter = self.offset_counter = None self.onset_samples = 2 # number of consecutive samples to be above threshold self.offset_samples = 3 # number of consecutive samples to be below threshold self.peak_detect = GrowingSlopeEnd(max_frames=self.max_attack_frames, m=self.attack_slope_ratio) # essentia algorithms initialization self.o_mel = estd.OnsetDetection(method='melflux') self.o_rms = estd.OnsetDetection(method='rms') self.o_hfc = estd.OnsetDetection(method='hfc') self.o_flux = estd.OnsetDetection(method='flux') self.o_complex = estd.OnsetDetection(method='complex') self.fft = estd.FFT() self.c2p = estd.CartesianToPolar() self.w = estd.Windowing(type='hann') # STATE MACHINE self.fsm = Fysom({ 'initial': 'detecting', 'events': [{ 'name': 'onset', 'src': 'detecting', 'dst': 'attack' }, { 'name': 'peak', 'src': 'attack', 'dst': 'sustain' }, { 'name': 'offset', 'src': 'sustain', 'dst': 'detecting' }, { 'name': 'reset', 'src': ['detecting', 'attack', 'sustain'], 'dst': 'detecting' }], 'callbacks': { 'ondetecting': self.on_detecting, 'onattack': self.on_attack, 'onsustain': self.on_sustain, 'onbeforeonset': self.on_onset, 'onbeforepeak': self.on_peak, 'onbeforeoffset': self.on_offset } })