def process_song(i, dataset): """ Returns features for all windows of a given song in the dataset (to be run in parallel) """ score = dataset.get_pianoroll( i, score_type=['precise_alignment', 'broad_alignment'], resolution=RES) audio, sr = dataset.get_audio(i) audio = esst.Resample(inputSampleRate=sr, outputSampleRate=SR)(audio) return get_song_win_features(score, audio)
def compute(self, *args): from math import pi x = args[1] for frame in es.FrameGenerator(x, frameSize=self._frameSize, hopSize=self._hopSize, startFromZero=True): y = [] s = int(self._frameSize / 2 - self._hopSize / 2) - 1 # consider non overlapping case e = int(self._frameSize / 2 + self._hopSize / 2) # Stage 1: Attenuation. Is not required because we are using float point. # Stage 2: Resample yResample = es.Resample(inputSampleRate=self._sampleRate, outputSampleRate=self._sampleRateOver, quality=self._quality)(frame) # Stage 3: Emphasis if self._emphatise: fPole = 20e3 # Hz fZero = 14.1e3 rPole = fPole / self._sampleRateOver rZero = fZero / self._sampleRateOver yEmphasis = es.IIR(denominator=esarr([1., rPole]), numerator=esarr([1., -rZero]))(yResample) else: yEmphasis = yResample # Stage 4 Absolute yMaxArray = np.abs(yEmphasis) # Stage 5 optional DC Block if self._BlockDC: yDCBlocked = es.DCRemoval(sampleRate=self._sampleRate, cutoffFrequency=1.)(yEmphasis) yAbsoluteDCBlocked = np.abs(yDCBlocked) yMaxArray = np.maximum(yMaxArray, yAbsoluteDCBlocked) y = [ ((i + self._idx * self._hopSize) / float(self._sampleRateOver), yMax) for i, yMax in enumerate(yMaxArray) if yMax > self._clippingThreshold ] self._idx += 1 return esarr(y)
def apply_replay_gain(float_signal, sample_rate): ''' Normalizes perceived loudness af an audio signal. Calculates a replay gain value and applies this gain to the input. Returns normalized signal and the replay gain calculated. ''' downsampled_signal = es.Resample(inputSampleRate=sample_rate, outputSampleRate=8000)(float_signal) replay_gain_dB = es.ReplayGain(sampleRate=8000)(downsampled_signal) gain = np.power(10, replay_gain_dB / 20) return np.array(float_signal) * gain, replay_gain_dB
def analyze_misc(filename, segment_duration=20): # Compute replay gain and duration on the entire file, then load the # segment that is centered in time with replaygain applied audio = es.MonoLoader(filename=filename)() replaygain = es.ReplayGain()(audio) segment_start = (len(audio) / 44100 - segment_duration) / 2 segment_end = segment_start + segment_duration if segment_start < 0 or segment_end > len(audio) / 44100: raise ValueError( 'Segment duration is larger than the input audio duration') loader = es.EasyLoader(filename=filename, replayGain=replaygain, startTime=segment_start, endTime=segment_end) windowing = es.Windowing(type='blackmanharris62') spectrum = es.Spectrum() powerspectrum = es.PowerSpectrum() centroid = es.Centroid() zcr = es.ZeroCrossingRate() rms = es.RMS() hfc = es.HFC() pool = essentia.Pool() audio = loader() for frame in es.FrameGenerator(audio, frameSize=2048, hopSize=1024): frame_spectrum = spectrum(windowing(frame)) pool.add('rms', rms(frame)) pool.add('rms_spectrum', rms(frame_spectrum)) pool.add('hfc', hfc(frame_spectrum)) pool.add('spectral_centroid', centroid(frame_spectrum)) pool.add('zcr', zcr(frame)) audio_st, sr, _, _, _, _ = es.AudioLoader(filename=filename)() # Ugly hack because we don't have a StereoResample left, right = es.StereoDemuxer()(audio_st) resampler = es.Resample(inputSampleRate=sr, outputSampleRate=44100) left = resampler(left) right = resampler(right) audio_st = es.StereoMuxer()(left, right) audio_st = es.StereoTrimmer(startTime=segment_start, endTime=segment_end)(audio_st) ebu_momentary, _, _, _ = es.LoudnessEBUR128(hopSize=1024 / 44100, startAtZero=True)(audio_st) pool.set('ebu_momentary', ebu_momentary) return pool
def load_audio(path, sample_rate, mono=True): """ Load an audio file using Essentia :param path: (str) location of audio file to load :param sample_rate: (int) sampling rate to load audio at :param mono: (bool) convert file to mono, defaults to True :return: audio samples """ # Load audio file loader = es.AudioLoader(filename=path) results = loader() samples = results[0] orig_rate = results[1] channels = results[2] # Make sure we get a mono or stereo audio if channels > 2: raise RuntimeError("Can't handle more than two audio channels.") # If there is only one channel, duplicate the first over to the second. # Essentia always loads as a stereo audio file and the right channel is # all zeros in this case. We'll convert to a stereo file for some of the # processing here such as the Loudness Normalization. if channels == 1: samples[:, 1] = samples[:, 0] # Mix to mono if required if mono: samples = mix_to_mono(samples) # Perform resampling if required if orig_rate != sample_rate: resample = es.Resample(inputSampleRate=orig_rate, outputSampleRate=sample_rate) # Resampling for a stereo audio file if not mono: resampled_left = resample(samples[:, 0]) resampled_right = resample(samples[:, 1]) samples = np.array([resampled_left, resampled_right]) samples = samples.T # Resampling for a mono audio file else: samples = resample(samples) return samples, channels
def resample_audio(self, target_sample_rate): """Downsample a audio into a target sample rate Arguments: target_sample_rate {[type]} -- [description] Raises: ValueError: If `target_sample_rate` is less than the sample rate of given audio data. Returns: [type] -- [description] """ if target_sample_rate > self.fs: raise ValueError("Target_sample_rate should be lower than %s" % self.fs) resampler = estd.Resample(inputSampleRate=self.fs, outputSampleRate=target_sample_rate, quality=1) return resampler.compute(self.audio_vector)
def resample_audio(self, target_sample_rate): """Downsample a audio into a target sample rate""" if target_sample_rate > self.fs: raise ValueError("Target_sample_rate should be lower than %s" % self.fs) resampler = estd.Resample(inputSampleRate=self.fs, outputSampleRate=target_sample_rate, quality=1) return resampler.compute(self.audio_vector)