def time_stretch(y, rate, **kwargs): '''Time-stretch an audio series by a fixed rate. Parameters ---------- y : np.ndarray [shape=(n,)] audio time series rate : float > 0 [scalar] Stretch factor. If `rate > 1`, then the signal is sped up. If `rate < 1`, then the signal is slowed down. kwargs : additional keyword arguments. See `librosa.decompose.stft` for details. Returns ------- y_stretch : np.ndarray [shape=(round(n/rate),)] audio time series stretched by the specified rate See Also -------- pitch_shift : pitch shifting librosa.core.phase_vocoder : spectrogram phase vocoder pyrubberband.pyrb.time_stretch : high-quality time stretching using RubberBand Examples -------- Compress to be twice as fast >>> y, sr = librosa.load(librosa.util.example_audio_file()) >>> y_fast = librosa.effects.time_stretch(y, 2.0) Or half the original speed >>> y_slow = librosa.effects.time_stretch(y, 0.5) ''' if rate <= 0: raise ParameterError('rate must be a positive number') # Construct the short-term Fourier transform (STFT) stft = stft(y, **kwargs) # Stretch by phase vocoding stft_stretch = phase_vocoder(stft, rate) # Predict the length of y_stretch len_stretch = int(round(len(y) / rate)) # Invert the STFT y_stretch = istft(stft_stretch, dtype=y.dtype, length=len_stretch, **kwargs) return y_stretch
def __cqt_response(y, n_fft, hop_length, fft_basis, mode): '''Compute the filter response with a target STFT hop.''' # Compute the STFT matrix D = stft(y, n_fft=n_fft, hop_length=hop_length, window=np.ones, pad_mode=mode) # And filter response energy return fft_basis.dot(D)
def hpss(y, **kwargs): '''harmonic percussive source separation (HPSS) Decompose an audio time series into harmonic and percussive components. This function automates the STFT->HPSS->ISTFT pipeline, and ensures that the output waveforms have equal length to the input waveform `y`. Parameters ---------- y : np.ndarray [shape=(n,)] audio time series kwargs : additional keyword arguments. See `librosa.decompose.hpss` for details. Returns ------- y_harmonic : np.ndarray [shape=(n,)] audio time series of the harmonic elements y_percussive : np.ndarray [shape=(n,)] audio time series of the percussive elements See Also -------- harmonic : Extract only the harmonic component percussive : Extract only the percussive component librosa.decompose.hpss : HPSS on spectrograms Examples -------- >>> # Extract harmonic and percussive components >>> y, sr = librosa.load(librosa.util.example_audio_file()) >>> y_harmonic, y_percussive = librosa.effects.hpss(y) >>> # Get a more isolated percussive component by widening its margin >>> y_harmonic, y_percussive = librosa.effects.hpss(y, margin=(1.0,5.0)) ''' # Compute the STFT matrix stft = stft(y) # Decompose into harmonic and percussives stft_harm, stft_perc = hpss(stft, **kwargs) # Invert the STFTs. Adjust length to match the input. y_harm = fix_length(istft(stft_harm, dtype=y.dtype), len(y)) y_perc = fix_length(istft(stft_perc, dtype=y.dtype), len(y)) return y_harm, y_perc
def percussive(y, **kwargs): '''Extract percussive elements from an audio time-series. Parameters ---------- y : np.ndarray [shape=(n,)] audio time series kwargs : additional keyword arguments. See `librosa.decompose.hpss` for details. Returns ------- y_percussive : np.ndarray [shape=(n,)] audio time series of just the percussive portion See Also -------- hpss : Separate harmonic and percussive components harmonic : Extract only the harmonic component librosa.decompose.hpss : HPSS for spectrograms Examples -------- >>> # Extract percussive component >>> y, sr = librosa.load(librosa.util.example_audio_file()) >>> y_percussive = librosa.effects.percussive(y) >>> # Use a margin > 1.0 for greater percussive separation >>> y_percussive = librosa.effects.percussive(y, margin=3.0) ''' # Compute the STFT matrix stft = stft(y) # Remove harmonics stft_perc = hpss(stft, **kwargs)[1] # Invert the STFT y_perc = fix_length(istft(stft_perc, dtype=y.dtype), len(y)) return y_perc
def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, bins_per_octave=12, tuning=0.0, filter_scale=1, norm=1, sparsity=0.01, window='hann', scale=True, pad_mode='reflect'): '''Compute the pseudo constant-Q transform of an audio signal. This uses a single fft size that is the smallest power of 2 that is greater than or equal to the max of: 1. The longest CQT filter 2. 2x the hop_length Parameters ---------- y : np.ndarray [shape=(n,)] audio time series sr : number > 0 [scalar] sampling rate of `y` hop_length : int > 0 [scalar] number of samples between successive CQT columns. fmin : float > 0 [scalar] Minimum frequency. Defaults to C1 ~= 32.70 Hz n_bins : int > 0 [scalar] Number of frequency bins, starting at `fmin` bins_per_octave : int > 0 [scalar] Number of bins per octave tuning : None or float in `[-0.5, 0.5)` Tuning offset in fractions of a bin (cents). If `None`, tuning will be automatically estimated from the signal. filter_scale : float > 0 Filter filter_scale factor. Larger values use longer windows. sparsity : float in [0, 1) Sparsify the CQT basis by discarding up to `sparsity` fraction of the energy in each basis. Set `sparsity=0` to disable sparsification. window : str, tuple, number, or function Window specification for the basis filters. See `filters.get_window` for details. pad_mode : string Padding mode for centered frame analysis. See also: `librosa.core.stft` and `np.pad`. Returns ------- CQT : np.ndarray [shape=(n_bins, t), dtype=np.float] Pseudo Constant-Q energy for each frequency at each time. Raises ------ ParameterError If `hop_length` is not an integer multiple of `2**(n_bins / bins_per_octave)` Or if `y` is too short to support the frequency range of the CQT. Notes ----- This function caches at level 20. ''' if fmin is None: # C1 by default fmin = note_to_hz('C1') if tuning is None: tuning = estimate_tuning(y=y, sr=sr) fft_basis, n_fft, _ = __cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, tuning, filter_scale, norm, sparsity, hop_length=hop_length, window=window) fft_basis = np.abs(fft_basis) # Compute the magnitude STFT with Hann window D = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length, pad_mode=pad_mode)) # Project onto the pseudo-cqt basis C = fft_basis.dot(D) if scale: C /= np.sqrt(n_fft) else: lengths = constant_q_lengths(sr, fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, window=window, filter_scale=filter_scale) C *= np.sqrt(lengths[:, np.newaxis] / n_fft) return C
def detectOnset(y, peakThresh, peakWait, hop_length=512, sr=48000, backtrack=False, plots=1, **kwargs): """Basic onset detector. Locate note onset events by picking peaks in an onset strength envelope. The `peak_pick` parameters were chosen by large-scale hyper-parameter optimization over the dataset provided by [1]_. .. [1] https://github.com/CPJKU/onset_db Parameters ---------- y : np.ndarray [shape=(n,)] audio time series peakThresh : controls threshold of onset detection (minimum 0.05 ~ 9.0(?)) peakWait : controls spacing of onset detections (minimum 0.03 ~ .wav length(?)) - long wait = fewer onsets sr : number > 0 [scalar] sampling rate of `y` onset_envelope : np.ndarray [shape=(m,)] (optional) pre-computed onset strength envelope hop_length : int > 0 [scalar] hop length (in samples) units : {'frames', 'samples', 'time'} The units to encode detected onset events in. By default, 'frames' are used. backtrack : bool If `True`, detected onset events are backtracked to the nearest preceding minimum of `energy`. This is primarily useful when using onsets as slice points for segmentation. energy : np.ndarray [shape=(m,)] (optional) An energy function to use for backtracking detected onset events. If none is provided, then `onset_envelope` is used. kwargs : placeholder for internal use (additional keyword arguments Additional parameters for peak picking.) See `librosa.util.peak_pick` for details. Returns ------- onsets : np.ndarray [shape=(n_onsets,)] estimated positions of detected onsets, in whichever units are specified. By default, frame indices. .. note:: If no onset strength could be detected, onset_detect returns an empty list. Raises ------ ParameterError if neither `y` nor `onsets` are provided or if `units` is not one of 'frames', 'samples', or 'time' See Also -------- onset_strength : compute onset strength per-frame onset_backtrack : backtracking onset events librosa.util.peak_pick : pick peaks from a time series Examples -------- Get onset times from a signal >>> y, sr = librosa.load(librosa.util.example_audio_file(), ... offset=30, duration=2.0) >>> onset_frames = librosa.onset.onset_detect(y=y, sr=sr) >>> librosa.frames_to_time(onset_frames, sr=sr) array([ 0.07 , 0.395, 0.511, 0.627, 0.766, 0.975, 1.207, 1.324, 1.44 , 1.788, 1.881]) Or use a pre-computed onset envelope >>> o_env = librosa.onset.onset_strength(y, sr=sr) >>> times = librosa.frames_to_time(np.arange(len(o_env)), sr=sr) >>> onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr) """ onset_env = onset_strength(y=y, sr=sr, hop_length=hop_length, aggregate=np.median) # peak_pick #peaks = peak_pick(onset_env, 3, 3, 3, 5, 0.5, 10) # pre_max : int >= 0 [scalar] # number of samples before `n` over which max is computed # # post_max : int >= 1 [scalar] # number of samples after `n` over which max is computed # # pre_avg : int >= 0 [scalar] # number of samples before `n` over which mean is computed # # post_avg : int >= 1 [scalar] # number of samples after `n` over which mean is computed # # delta : float >= 0 [scalar] # threshold offset for mean # # wait : int >= 0 [scalar] # number of samples to wait after picking a peak # # Returns # ------- # peaks : np.ndarray [shape=(n_peaks,), dtype=int] # indices of peaks in `x` #peaks = peak_pick(onset_env, 3, 3, 3, 5, 0.5, 10) #peaks = peak_pick(onset_env, 6, 6, 6, 6, 0.5, 8) #peaks = peak_pick(onset_env, 7, 7, 7, 7, 0.5, 7) #peaks = peak_pick(onset_env, 9, 9, 9, 9, 0.5, 7) #peaks = peak_pick(onset_env, 12, 12, 12, 12, 0.5, 6) #peaks = peak_pick(onset_env, 32, 32, 32, 32, 0.5, 32) #peaks = peak_pick(onset_env, 64, 64, 64, 64, 0.5, 64) #peaks = peak_pick(onset_env, pkctrl, pkctrl, pkctrl, pkctrl, 0.5, pkctrl) #peak_onsets_ch1 = np.array(onset_env_ch1)[peaks_ch1] #peak_onsets_ch2 = np.array(onset_env_ch2)[peaks_ch2] # These parameter settings found by large-scale search # kwargs.setdefault('pre_max', 0.03*sr//hop_length) # 30ms # kwargs.setdefault('post_max', 0.00*sr//hop_length + 1) # 0ms # kwargs.setdefault('pre_avg', 0.10*sr//hop_length) # 100ms # kwargs.setdefault('post_avg', 0.10*sr//hop_length + 1) # 100ms # kwargs.setdefault('wait', 0.03*sr//hop_length) # 30ms # kwargs.setdefault('delta', 0.07) kwargs.setdefault('pre_max', 0.03 * sr // hop_length) # 30ms kwargs.setdefault('post_max', 0.00 * sr // hop_length + 1) # 0ms kwargs.setdefault('pre_avg', 0.10 * sr // hop_length) # 100ms kwargs.setdefault('post_avg', 0.10 * sr // hop_length + 1) # 100ms #kwargs.setdefault('wait', 0.03*sr//hop_length) # 30ms kwargs.setdefault('wait', peakWait * sr // hop_length) # 30ms kwargs.setdefault('delta', peakThresh) # Peak pick the onset envelope onsets = peak_pick(onset_env, **kwargs) # Optionally backtrack the events if backtrack: onsets = onset_backtrack(onsets, onset_env) onsets_samples = frames_to_samples(onsets, hop_length=hop_length) onsets_time = frames_to_time(onsets, hop_length=hop_length, sr=sr) # // *-----------------------------------------------------------------* // # // *--- Calculate Peak Regions (# frames of peak regions) ---* # peak_regions = get_peak_regions(peaks, len(onset_env)) # // *--- Plot - source signal ---* if plots > 1: fnum = 3 pltTitle = 'Input Signals: aSrc_ch1' pltXlabel = 'sinArray time-domain wav' pltYlabel = 'Magnitude' # define a linear space from 0 to 1/2 Fs for x-axis: xaxis = np.linspace(0, len(y), len(y)) xodplt.xodPlot1D(fnum, y, xaxis, pltTitle, pltXlabel, pltYlabel) # // *-----------------------------------------------------------------* // # // *--- Plot Peak-Picking results vs. Spectrogram ---* if plots > 0: # // *-----------------------------------------------------------------* // # // *--- Perform the STFT ---* NFFT = 2048 ySTFT = stft(y, NFFT) assert (ySTFT.shape[1] == len(onset_env) ), "Number of STFT frames != len onset_env" #times_ch1 = frames_to_time(np.arange(len(onset_env_ch1)), fs, hop_length=512) # currently uses fixed hop_length times = frames_to_time(np.arange(len(onset_env)), sr, NFFT / 4) plt.figure(facecolor='silver', edgecolor='k', figsize=(12, 8)) ax = plt.subplot(2, 1, 1) specshow(amplitude_to_db(magphase(ySTFT)[0], ref=np.max), y_axis='log', x_axis='time', cmap=plt.cm.viridis) plt.title('CH1: Spectrogram (STFT)') plt.subplot(2, 1, 2, sharex=ax) plt.plot(times, onset_env, alpha=0.66, label='Onset strength') plt.vlines(times[onsets], 0, onset_env.max(), color='r', alpha=0.8, label='Selected peaks') plt.legend(frameon=True, framealpha=0.66) plt.axis('tight') plt.tight_layout() plt.xlabel('time') plt.ylabel('Amplitude') plt.title('Onset Strength detection & Peak Selection') plt.show() return onsets_samples, onsets_time