예제 #1
0
def valid_int(x, cast=None):
    '''Ensure that an input value is integer-typed.
    This is primarily useful for ensuring integrable-valued
    array indices.

    Parameters
    ----------
    x : number
        A scalar value to be cast to int

    cast : function [optional]
        A function to modify `x` before casting.
        Default: `np.floor`

    Returns
    -------
    x_int : int
        `x_int = int(cast(x))`

    Raises
    ------
    ParameterError
        If `cast` is provided and is not callable.
    '''

    if cast is None:
        cast = np.floor

    if not six.callable(cast):
        raise ParameterError('cast parameter must be callable')

    return int(cast(x))
예제 #2
0
def __early_downsample(y, sr, hop_length, res_type, n_octaves, nyquist,
                       filter_cutoff, scale):
    '''Perform early downsampling on an audio signal, if it applies.'''

    downsample_count = __early_downsample_count(nyquist, filter_cutoff,
                                                hop_length, n_octaves)

    if downsample_count > 0 and res_type == 'kaiser_fast':
        downsample_factor = 2**(downsample_count)

        hop_length //= downsample_factor

        if len(y) < downsample_factor:
            raise ParameterError('Input signal length={:d} is too short for '
                                 '{:d}-octave CQT'.format(len(y), n_octaves))

        new_sr = sr / float(downsample_factor)
        y = resample(y, sr, new_sr, res_type=res_type, scale=True)

        # If we're not going to length-scale after CQT, we
        # need to compensate for the downsampling factor here
        if not scale:
            y *= np.sqrt(downsample_factor)

        sr = new_sr

    return y, sr, hop_length
예제 #3
0
파일: xodmaEffects.py 프로젝트: xodmk/xodma
def time_stretch(y, rate, **kwargs):
    '''Time-stretch an audio series by a fixed rate.


    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series

    rate : float > 0 [scalar]
        Stretch factor.  If `rate > 1`, then the signal is sped up.
        If `rate < 1`, then the signal is slowed down.

    kwargs : additional keyword arguments.
        See `librosa.decompose.stft` for details.

    Returns
    -------
    y_stretch : np.ndarray [shape=(round(n/rate),)]
        audio time series stretched by the specified rate

    See Also
    --------
    pitch_shift : pitch shifting
    librosa.core.phase_vocoder : spectrogram phase vocoder
    pyrubberband.pyrb.time_stretch : high-quality time stretching using RubberBand

    Examples
    --------
    Compress to be twice as fast

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> y_fast = librosa.effects.time_stretch(y, 2.0)

    Or half the original speed

    >>> y_slow = librosa.effects.time_stretch(y, 0.5)

    '''

    if rate <= 0:
        raise ParameterError('rate must be a positive number')

    # Construct the short-term Fourier transform (STFT)
    stft = stft(y, **kwargs)

    # Stretch by phase vocoding
    stft_stretch = phase_vocoder(stft, rate)

    # Predict the length of y_stretch
    len_stretch = int(round(len(y) / rate))

    # Invert the STFT
    y_stretch = istft(stft_stretch,
                      dtype=y.dtype,
                      length=len_stretch,
                      **kwargs)

    return y_stretch
예제 #4
0
def valid_intervals(intervals):
    '''Ensure that an array is a valid representation of time intervals:

        - intervals.ndim == 2
        - intervals.shape[1] == 2

    Parameters
    ----------
    intervals : np.ndarray [shape=(n, 2)]
        set of time intervals

    Returns
    -------
    valid : bool
        True if `intervals` passes validation.
    '''

    if intervals.ndim != 2 or intervals.shape[-1] != 2:
        raise ParameterError('intervals must have shape (n, 2)')

    return True
예제 #5
0
def onset_detect(y=None,
                 sr=48000,
                 onset_envelope=None,
                 hop_length=512,
                 backtrack=False,
                 energy=None,
                 units='frames',
                 **kwargs):
    """Basic onset detector.  Locate note onset events by picking peaks in an
    onset strength envelope.

    The `peak_pick` parameters were chosen by large-scale hyper-parameter
    optimization over the dataset provided by [1]_.

    .. [1] https://github.com/CPJKU/onset_db


    Parameters
    ----------
    y          : np.ndarray [shape=(n,)]
        audio time series

    sr         : number > 0 [scalar]
        sampling rate of `y`

    onset_envelope     : np.ndarray [shape=(m,)]
        (optional) pre-computed onset strength envelope

    hop_length : int > 0 [scalar]
        hop length (in samples)

    units : {'frames', 'samples', 'time'}
        The units to encode detected onset events in.
        By default, 'frames' are used.

    backtrack : bool
        If `True`, detected onset events are backtracked to the nearest
        preceding minimum of `energy`.

        This is primarily useful when using onsets as slice points for segmentation.

    energy : np.ndarray [shape=(m,)] (optional)
        An energy function to use for backtracking detected onset events.
        If none is provided, then `onset_envelope` is used.

    kwargs : additional keyword arguments
        Additional parameters for peak picking.

        See `librosa.util.peak_pick` for details.


    Returns
    -------

    onsets : np.ndarray [shape=(n_onsets,)]
        estimated positions of detected onsets, in whichever units
        are specified.  By default, frame indices.

        .. note::
            If no onset strength could be detected, onset_detect returns
            an empty list.


    Raises
    ------
    ParameterError
        if neither `y` nor `onsets` are provided

        or if `units` is not one of 'frames', 'samples', or 'time'

    See Also
    --------
    onset_strength : compute onset strength per-frame
    onset_backtrack : backtracking onset events
    librosa.util.peak_pick : pick peaks from a time series


    Examples
    --------
    Get onset times from a signal

    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
    ...                      offset=30, duration=2.0)
    >>> onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
    >>> librosa.frames_to_time(onset_frames, sr=sr)
    array([ 0.07 ,  0.395,  0.511,  0.627,  0.766,  0.975,
            1.207,  1.324,  1.44 ,  1.788,  1.881])

    Or use a pre-computed onset envelope

    >>> o_env = librosa.onset.onset_strength(y, sr=sr)
    >>> times = librosa.frames_to_time(np.arange(len(o_env)), sr=sr)
    >>> onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)


    >>> import matplotlib.pyplot as plt
    >>> D = np.abs(librosa.stft(y))
    >>> plt.figure()
    >>> ax1 = plt.subplot(2, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
    ...                          x_axis='time', y_axis='log')
    >>> plt.title('Power spectrogram')
    >>> plt.subplot(2, 1, 2, sharex=ax1)
    >>> plt.plot(times, o_env, label='Onset strength')
    >>> plt.vlines(times[onset_frames], 0, o_env.max(), color='r', alpha=0.9,
    ...            linestyle='--', label='Onsets')
    >>> plt.axis('tight')
    >>> plt.legend(frameon=True, framealpha=0.75)
    >>> plt.show()

    """

    # First, get the frame->beat strength profile if we don't already have one
    if onset_envelope is None:
        if y is None:
            raise ParameterError('y or onset_envelope must be provided')

        onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length)

    # Shift onset envelope up to be non-negative
    # (a common normalization step to make the threshold more consistent)
    onset_envelope -= onset_envelope.min()

    # Do we have any onsets to grab?
    if not onset_envelope.any():
        return np.array([], dtype=np.int)

    # Normalize onset strength function to [0, 1] range
    onset_envelope /= onset_envelope.max()

    # These parameter settings found by large-scale search
    kwargs.setdefault('pre_max', 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault('post_max', 0.00 * sr // hop_length + 1)  # 0ms
    kwargs.setdefault('pre_avg', 0.10 * sr // hop_length)  # 100ms
    kwargs.setdefault('post_avg', 0.10 * sr // hop_length + 1)  # 100ms
    kwargs.setdefault('wait', 0.03 * sr // hop_length)  # 30ms
    kwargs.setdefault('delta', 0.07)

    # Peak pick the onset envelope
    onsets = peak_pick(onset_envelope, **kwargs)

    # Optionally backtrack the events
    if backtrack:
        if energy is None:
            energy = onset_envelope

        onsets = onset_backtrack(onsets, energy)

    if units == 'frames':
        pass
    elif units == 'samples':
        onsets = frames_to_samples(onsets, hop_length=hop_length)
    elif units == 'time':
        onsets = frames_to_time(onsets, hop_length=hop_length, sr=sr)
    else:
        raise ParameterError('Invalid unit type: {}'.format(units))

    return onsets
예제 #6
0
def match_events(events_from, events_to, left=True, right=True):
    '''Match one set of events to another.

    This is useful for tasks such as matching beats to the nearest
    detected onsets, or frame-aligned events to the nearest zero-crossing.

    .. note:: A target event may be matched to multiple source events.

    Examples
    --------
    >>> # Sources are multiples of 7
    >>> s_from = np.arange(0, 100, 7)
    >>> s_from
    array([ 0,  7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91,
           98])
    >>> # Targets are multiples of 10
    >>> s_to = np.arange(0, 100, 10)
    >>> s_to
    array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])
    >>> # Find the matching
    >>> idx = librosa.util.match_events(s_from, s_to)
    >>> idx
    array([0, 1, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 8, 9, 9])
    >>> # Print each source value to its matching target
    >>> zip(s_from, s_to[idx])
    [(0, 0), (7, 10), (14, 10), (21, 20), (28, 30), (35, 30),
     (42, 40), (49, 50), (56, 60), (63, 60), (70, 70), (77, 80),
     (84, 80), (91, 90), (98, 90)]

    Parameters
    ----------
    events_from : ndarray [shape=(n,)]
      Array of events (eg, times, sample or frame indices) to match from.

    events_to : ndarray [shape=(m,)]
      Array of events (eg, times, sample or frame indices) to
      match against.

    left : bool
    right : bool
        If `False`, then matched events cannot be to the left (or right)
        of source events.

    Returns
    -------
    event_mapping : np.ndarray [shape=(n,)]
        For each event in `events_from`, the corresponding event
        index in `events_to`.

        `event_mapping[i] == arg min |events_from[i] - events_to[:]|`

    See Also
    --------
    match_intervals

    Raises
    ------
    ParameterError
        If either array of input events is not the correct shape
    '''

    if len(events_from) == 0 or len(events_to) == 0:
        raise ParameterError('Attempting to match empty event list')

    # If we can't match left or right, then only strict equivalence
    # counts as a match.
    if not (left or right) and not np.all(np.in1d(events_from, events_to)):
            raise ParameterError('Cannot match events with left=right=False '
                                 'and events_from is not contained '
                                 'in events_to')

    # If we can't match to the left, then there should be at least one
    # target event greater-equal to every source event
    if (not left) and max(events_to) < max(events_from):
        raise ParameterError('Cannot match events with left=False '
                             'and max(events_to) < max(events_from)')

    # If we can't match to the right, then there should be at least one
    # target event less-equal to every source event
    if (not right) and min(events_to) > min(events_from):
        raise ParameterError('Cannot match events with right=False '
                             'and min(events_to) > min(events_from)')

    # Pre-allocate the output array
    output = np.empty_like(events_from, dtype=np.int)

    # Compute how many rows we can process at once within the memory block
    n_rows = int(spectralUtil.MAX_MEM_BLOCK / (np.prod(output.shape[1:]) 
                     * len(events_to) * events_from.itemsize))

    # Make sure we can at least make some progress
    n_rows = max(1, n_rows)

    # Iterate over blocks of the data
    for bl_s in range(0, len(events_from), n_rows):
        bl_t = min(bl_s + n_rows, len(events_from))

        event_block = events_from[bl_s:bl_t]

        # distance[i, j] = |events_from - events_to[j]|
        distance = np.abs(np.subtract.outer(event_block,
                                            events_to)).astype(np.float)

        # If we can't match to the right, squash all comparisons where
        # events_to[j] > events_from[i]
        if not right:
            distance[np.less.outer(event_block, events_to)] = np.nan

        # If we can't match to the left, squash all comparisons where
        # events_to[j] < events_from[i]
        if not left:
            distance[np.greater.outer(event_block, events_to)] = np.nan

        # Find the minimum distance point from whatever's left after squashing
        output[bl_s:bl_t] = np.nanargmin(distance, axis=-1)

    return output
예제 #7
0
def softmask(X, X_ref, power=1, split_zeros=False):
    '''Robustly compute a softmask operation.

        `M = X**power / (X**power + X_ref**power)`


    Parameters
    ----------
    X : np.ndarray
        The (non-negative) input array corresponding to the positive mask elements

    X_ref : np.ndarray
        The (non-negative) array of reference or background elements.
        Must have the same shape as `X`.

    power : number > 0 or np.inf
        If finite, returns the soft mask computed in a numerically stable way

        If infinite, returns a hard (binary) mask equivalent to `X > X_ref`.
        Note: for hard masks, ties are always broken in favor of `X_ref` (`mask=0`).


    split_zeros : bool
        If `True`, entries where `X` and X`_ref` are both small (close to 0)
        will receive mask values of 0.5.

        Otherwise, the mask is set to 0 for these entries.


    Returns
    -------
    mask : np.ndarray, shape=`X.shape`
        The output mask array

    Raises
    ------
    ParameterError
        If `X` and `X_ref` have different shapes.

        If `X` or `X_ref` are negative anywhere

        If `power <= 0`

    Examples
    --------

    >>> X = 2 * np.ones((3, 3))
    >>> X_ref = np.vander(np.arange(3.0))
    >>> X
    array([[ 2.,  2.,  2.],
           [ 2.,  2.,  2.],
           [ 2.,  2.,  2.]])
    >>> X_ref
    array([[ 0.,  0.,  1.],
           [ 1.,  1.,  1.],
           [ 4.,  2.,  1.]])
    >>> librosa.util.softmask(X, X_ref, power=1)
    array([[ 1.   ,  1.   ,  0.667],
           [ 0.667,  0.667,  0.667],
           [ 0.333,  0.5  ,  0.667]])
    >>> librosa.util.softmask(X_ref, X, power=1)
    array([[ 0.   ,  0.   ,  0.333],
           [ 0.333,  0.333,  0.333],
           [ 0.667,  0.5  ,  0.333]])
    >>> librosa.util.softmask(X, X_ref, power=2)
    array([[ 1. ,  1. ,  0.8],
           [ 0.8,  0.8,  0.8],
           [ 0.2,  0.5,  0.8]])
    >>> librosa.util.softmask(X, X_ref, power=4)
    array([[ 1.   ,  1.   ,  0.941],
           [ 0.941,  0.941,  0.941],
           [ 0.059,  0.5  ,  0.941]])
    >>> librosa.util.softmask(X, X_ref, power=100)
    array([[  1.000e+00,   1.000e+00,   1.000e+00],
           [  1.000e+00,   1.000e+00,   1.000e+00],
           [  7.889e-31,   5.000e-01,   1.000e+00]])
    >>> librosa.util.softmask(X, X_ref, power=np.inf)
    array([[ True,  True,  True],
           [ True,  True,  True],
           [False, False,  True]], dtype=bool)
    '''
    if X.shape != X_ref.shape:
        raise ParameterError('Shape mismatch: {}!={}'.format(X.shape,
                                                             X_ref.shape))

    if np.any(X < 0) or np.any(X_ref < 0):
        raise ParameterError('X and X_ref must be non-negative')

    if power <= 0:
        raise ParameterError('power must be strictly positive')

    # We're working with ints, cast to float.
    dtype = X.dtype
    if not np.issubdtype(dtype, float):
        dtype = np.float32

    # Re-scale the input arrays relative to the larger value
    Z = np.maximum(X, X_ref).astype(dtype)
    bad_idx = (Z < np.finfo(dtype).tiny)
    Z[bad_idx] = 1

    # For finite power, compute the softmask
    if np.isfinite(power):
        mask = (X / Z)**power
        ref_mask = (X_ref / Z)**power
        good_idx = ~bad_idx
        mask[good_idx] /= mask[good_idx] + ref_mask[good_idx]
        # Wherever energy is below energy in both inputs, split the mask
        if split_zeros:
            mask[bad_idx] = 0.5
        else:
            mask[bad_idx] = 0.0
    else:
        # Otherwise, compute the hard mask
        mask = X > X_ref

    return mask
예제 #8
0
def spectral_rolloff(y=None, sr=48000, S=None, n_fft=2048, hop_length=512,
                     freq=None, roll_percent=0.85):
    '''Compute roll-off frequency

    Parameters
    ----------
    y : np.ndarray [shape=(n,)] or None
        audio time series

    sr : number > 0 [scalar]
        audio sampling rate of `y`

    S : np.ndarray [shape=(d, t)] or None
        (optional) spectrogram magnitude

    n_fft : int > 0 [scalar]
        FFT window size

    hop_length : int > 0 [scalar]
        hop length for STFT. See `librosa.core.stft` for details.

    freq : None or np.ndarray [shape=(d,) or shape=(d, t)]
        Center frequencies for spectrogram bins.
        If `None`, then FFT bin center frequencies are used.
        Otherwise, it can be a single array of `d` center frequencies,

        .. note:: `freq` is assumed to be sorted in increasing order

    roll_percent : float [0 < roll_percent < 1]
        Roll-off percentage.

    Returns
    -------
    rolloff : np.ndarray [shape=(1, t)]
        roll-off frequency for each frame


    Examples
    --------
    From time-series input

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    >>> rolloff
    array([[ 8376.416,   968.994, ...,  8925.513,  9108.545]])

    From spectrogram input

    >>> S, phase = librosa.magphase(librosa.stft(y))
    >>> librosa.feature.spectral_rolloff(S=S, sr=sr)
    array([[ 8376.416,   968.994, ...,  8925.513,  9108.545]])

    >>> # With a higher roll percentage:
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.95)
    array([[ 10012.939,   3003.882, ...,  10034.473,  10077.539]])

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.subplot(2, 1, 1)
    >>> plt.semilogy(rolloff.T, label='Roll-off frequency')
    >>> plt.ylabel('Hz')
    >>> plt.xticks([])
    >>> plt.xlim([0, rolloff.shape[-1]])
    >>> plt.legend()
    >>> plt.subplot(2, 1, 2)
    >>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
    ...                          y_axis='log', x_axis='time')
    >>> plt.title('log Power spectrogram')
    >>> plt.tight_layout()

    '''

    if not 0.0 < roll_percent < 1.0:
        raise ParameterError('roll_percent must lie in the range (0, 1)')

    S, n_fft = mag_spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

    if not np.isrealobj(S):
        raise ParameterError('Spectral rolloff is only defined '
                             'with real-valued input')
    elif np.any(S < 0):
        raise ParameterError('Spectral rolloff is only defined '
                             'with non-negative energies')

    # Compute the center frequencies of each bin
    if freq is None:
        freq = fft_frequencies(sr=sr, n_fft=n_fft)

    # Make sure that frequency can be broadcast
    if freq.ndim == 1:
        freq = freq.reshape((-1, 1))

    total_energy = np.cumsum(S, axis=0)

    threshold = roll_percent * total_energy[-1]

    ind = np.where(total_energy < threshold, np.nan, 1)

    return np.nanmin(ind * freq, axis=0, keepdims=True)
예제 #9
0
def hpss(S, kernel_size=31, power=2.0, mask=False, margin=1.0):
    """Median-filtering harmonic percussive source separation (HPSS).

    If `margin = 1.0`, decomposes an input spectrogram `S = H + P`
    where `H` contains the harmonic components,
    and `P` contains the percussive components.

    If `margin > 1.0`, decomposes an input spectrogram `S = H + P + R`
    where `R` contains residual components not included in `H` or `P`.

    This implementation is based upon the algorithm described by [1]_ and [2]_.

    .. [1] Fitzgerald, Derry.
        "Harmonic/percussive separation using median filtering."
        13th International Conference on Digital Audio Effects (DAFX10),
        Graz, Austria, 2010.

    .. [2] Driedger, Müller, Disch.
        "Extending harmonic-percussive separation of audio."
        15th International Society for Music Information Retrieval Conference (ISMIR 2014),
        Taipei, Taiwan, 2014.

    Parameters
    ----------
    S : np.ndarray [shape=(d, n)]
        input spectrogram. May be real (magnitude) or complex.

    kernel_size : int or tuple (kernel_harmonic, kernel_percussive)
        kernel size(s) for the median filters.

        - If scalar, the same size is used for both harmonic and percussive.
        - If tuple, the first value specifies the width of the
          harmonic filter, and the second value specifies the width
          of the percussive filter.

    power : float > 0 [scalar]
        Exponent for the Wiener filter when constructing soft mask matrices.

    mask : bool
        Return the masking matrices instead of components.

        Masking matrices contain non-negative real values that
        can be used to measure the assignment of energy from `S`
        into harmonic or percussive components.

        Components can be recovered by multiplying `S * mask_H`
        or `S * mask_P`.


    margin : float or tuple (margin_harmonic, margin_percussive)
        margin size(s) for the masks (as described in [2]_)

        - If scalar, the same size is used for both harmonic and percussive.
        - If tuple, the first value specifies the margin of the
          harmonic mask, and the second value specifies the margin
          of the percussive mask.

    Returns
    -------
    harmonic : np.ndarray [shape=(d, n)]
        harmonic component (or mask)

    percussive : np.ndarray [shape=(d, n)]
        percussive component (or mask)


    See Also
    --------
    util.softmask

    Notes
    -----
    This function caches at level 30.

    Examples
    --------
    Separate into harmonic and percussive

    >>> y, sr = librosa.load(librosa.util.example_audio_file(), duration=15)
    >>> D = librosa.stft(y)
    >>> H, P = librosa.decompose.hpss(D)

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.subplot(3, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(np.abs(D),
    ...                                                  ref=np.max),
    ...                          y_axis='log')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Full power spectrogram')
    >>> plt.subplot(3, 1, 2)
    >>> librosa.display.specshow(librosa.amplitude_to_db(np.abs(H),
    ...                                                  ref=np.max),
    ...                          y_axis='log')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Harmonic power spectrogram')
    >>> plt.subplot(3, 1, 3)
    >>> librosa.display.specshow(librosa.amplitude_to_db(np.abs(P),
    ...                                                  ref=np.max),
    ...                          y_axis='log')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Percussive power spectrogram')
    >>> plt.tight_layout()
    >>> plt.show()


    Or with a narrower horizontal filter

    >>> H, P = librosa.decompose.hpss(D, kernel_size=(13, 31))

    Just get harmonic/percussive masks, not the spectra

    >>> mask_H, mask_P = librosa.decompose.hpss(D, mask=True)
    >>> mask_H
    array([[  1.000e+00,   1.469e-01, ...,   2.648e-03,   2.164e-03],
           [  1.000e+00,   2.368e-01, ...,   9.413e-03,   7.703e-03],
           ...,
           [  8.869e-01,   5.673e-02, ...,   4.603e-02,   1.247e-05],
           [  7.068e-01,   2.194e-02, ...,   4.453e-02,   1.205e-05]], dtype=float32)
    >>> mask_P
    array([[  2.858e-05,   8.531e-01, ...,   9.974e-01,   9.978e-01],
           [  1.586e-05,   7.632e-01, ...,   9.906e-01,   9.923e-01],
           ...,
           [  1.131e-01,   9.433e-01, ...,   9.540e-01,   1.000e+00],
           [  2.932e-01,   9.781e-01, ...,   9.555e-01,   1.000e+00]], dtype=float32)

    Separate into harmonic/percussive/residual components by using a margin > 1.0

    >>> H, P = librosa.decompose.hpss(D, margin=3.0)
    >>> R = D - (H+P)
    >>> y_harm = librosa.core.istft(H)
    >>> y_perc = librosa.core.istft(P)
    >>> y_resi = librosa.core.istft(R)


    Get a more isolated percussive component by widening its margin

    >>> H, P = librosa.decompose.hpss(D, margin=(1.0,5.0))

    """

    if np.iscomplexobj(S):
        S, phase = magphase(S)
    else:
        phase = 1

    if np.isscalar(kernel_size):
        win_harm = kernel_size
        win_perc = kernel_size
    else:
        win_harm = kernel_size[0]
        win_perc = kernel_size[1]

    if np.isscalar(margin):
        margin_harm = margin
        margin_perc = margin
    else:
        margin_harm = margin[0]
        margin_perc = margin[1]

    # margin minimum is 1.0
    if margin_harm < 1 or margin_perc < 1:
        raise ParameterError("Margins must be >= 1.0. "
                             "A typical range is between 1 and 10.")

    # Compute median filters. Pre-allocation here preserves memory layout.
    harm = np.empty_like(S)
    harm[:] = median_filter(S, size=(1, win_harm), mode='reflect')

    perc = np.empty_like(S)
    perc[:] = median_filter(S, size=(win_perc, 1), mode='reflect')

    split_zeros = (margin_harm == 1 and margin_perc == 1)

    mask_harm = softmask(harm,
                         perc * margin_harm,
                         power=power,
                         split_zeros=split_zeros)

    mask_perc = softmask(perc,
                         harm * margin_perc,
                         power=power,
                         split_zeros=split_zeros)

    if mask:
        return mask_harm, mask_perc

    return ((S * mask_harm) * phase, (S * mask_perc) * phase)
예제 #10
0
파일: xodmaEffects.py 프로젝트: xodmk/xodma
def pitch_shift(y,
                sr,
                n_steps,
                bins_per_octave=12,
                res_type='kaiser_best',
                **kwargs):
    '''Shift the pitch of a waveform by `n_steps` semitones.

    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series

    sr : number > 0 [scalar]
        audio sampling rate of `y`

    n_steps : float [scalar]
        how many (fractional) half-steps to shift `y`

    bins_per_octave : float > 0 [scalar]
        how many steps per octave

    res_type : string
        Resample type.
        Possible options: 'kaiser_best', 'kaiser_fast', and 'scipy', 'polyphase',
        'fft'.
        By default, 'kaiser_best' is used.

        See `core.resample` for more information.

    kwargs: additional keyword arguments.
        See `librosa.decompose.stft` for details.

    Returns
    -------
    y_shift : np.ndarray [shape=(n,)]
        The pitch-shifted audio time-series


    See Also
    --------
    time_stretch : time stretching
    librosa.core.phase_vocoder : spectrogram phase vocoder
    pyrubberband.pyrb.pitch_shift : high-quality pitch shifting using RubberBand

    Examples
    --------
    Shift up by a major third (four half-steps)

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> y_third = librosa.effects.pitch_shift(y, sr, n_steps=4)

    Shift down by a tritone (six half-steps)

    >>> y_tritone = librosa.effects.pitch_shift(y, sr, n_steps=-6)

    Shift up by 3 quarter-tones

    >>> y_three_qt = librosa.effects.pitch_shift(y, sr, n_steps=3,
    ...                                          bins_per_octave=24)
    '''

    if bins_per_octave < 1 or not np.issubdtype(type(bins_per_octave),
                                                np.integer):
        raise ParameterError('bins_per_octave must be a positive integer.')

    rate = 2.0**(-float(n_steps) / bins_per_octave)

    # Stretch in time, then resample
    y_shift = resample(time_stretch(y, rate, **kwargs),
                       float(sr) / rate,
                       sr,
                       res_type=res_type)

    # Crop to the same dimension as the input
    return fix_length(y_shift, len(y))
예제 #11
0
def spectral_bandwidth(y=None, sr=48000, S=None, n_fft=2048, hop_length=512,
                       freq=None, centroid=None, norm=True, p=2):
    '''Compute p'th-order spectral bandwidth:

        (sum_k S[k] * (freq[k] - centroid)**p)**(1/p)

    Parameters
    ----------
    y : np.ndarray [shape=(n,)] or None
        audio time series

    sr : number > 0 [scalar]
        audio sampling rate of `y`

    S : np.ndarray [shape=(d, t)] or None
        (optional) spectrogram magnitude

    n_fft : int > 0 [scalar]
        FFT window size

    hop_length : int > 0 [scalar]
        hop length for STFT. See `librosa.core.stft` for details.

    freq : None or np.ndarray [shape=(d,) or shape=(d, t)]
        Center frequencies for spectrogram bins.
        If `None`, then FFT bin center frequencies are used.
        Otherwise, it can be a single array of `d` center frequencies,
        or a matrix of center frequencies as constructed by
        `librosa.core.ifgram`

    centroid : None or np.ndarray [shape=(1, t)]
        pre-computed centroid frequencies

    norm : bool
        Normalize per-frame spectral energy (sum to one)

    p : float > 0
        Power to raise deviation from spectral centroid.


    Returns
    -------
    bandwidth : np.ndarray [shape=(1, t)]
        frequency bandwidth for each frame


    Examples
    --------
    From time-series input

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    >>> spec_bw
    array([[ 3379.878,  1429.486, ...,  3235.214,  3080.148]])

    From spectrogram input

    >>> S, phase = librosa.magphase(librosa.stft(y=y))
    >>> librosa.feature.spectral_bandwidth(S=S)
    array([[ 3379.878,  1429.486, ...,  3235.214,  3080.148]])

    Using variable bin center frequencies

    >>> if_gram, D = librosa.ifgram(y)
    >>> librosa.feature.spectral_bandwidth(S=np.abs(D), freq=if_gram)
    array([[ 3380.011,  1429.11 , ...,  3235.22 ,  3080.148]])

    Plot the result

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.subplot(2, 1, 1)
    >>> plt.semilogy(spec_bw.T, label='Spectral bandwidth')
    >>> plt.ylabel('Hz')
    >>> plt.xticks([])
    >>> plt.xlim([0, spec_bw.shape[-1]])
    >>> plt.legend()
    >>> plt.subplot(2, 1, 2)
    >>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
    ...                          y_axis='log', x_axis='time')
    >>> plt.title('log Power spectrogram')
    >>> plt.tight_layout()

    '''

    S, n_fft = mag_spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

    if not np.isrealobj(S):
        raise ParameterError('Spectral bandwidth is only defined '
                             'with real-valued input')
    elif np.any(S < 0):
        raise ParameterError('Spectral bandwidth is only defined '
                             'with non-negative energies')

    if centroid is None:
        centroid = spectral_centroid(y=y, sr=sr, S=S,
                                     n_fft=n_fft,
                                     hop_length=hop_length,
                                     freq=freq)

    # Compute the center frequencies of each bin
    if freq is None:
        freq = fft_frequencies(sr=sr, n_fft=n_fft)

    if freq.ndim == 1:
        deviation = np.abs(np.subtract.outer(freq, centroid[0]))
    else:
        deviation = np.abs(freq - centroid[0])

    # Column-normalize S
    if norm:
        S = normalize(S, norm=1, axis=0)

    return np.sum(S * deviation**p, axis=0, keepdims=True)**(1./p)
예제 #12
0
def fix_frames(frames, x_min=0, x_max=None, pad=True):
    '''Fix a list of frames to lie within [x_min, x_max]

    Examples
    --------
    >>> # Generate a list of frame indices
    >>> frames = np.arange(0, 1000.0, 50)
    >>> frames
    array([   0.,   50.,  100.,  150.,  200.,  250.,  300.,  350.,
            400.,  450.,  500.,  550.,  600.,  650.,  700.,  750.,
            800.,  850.,  900.,  950.])
    >>> # Clip to span at most 250
    >>> librosa.util.fix_frames(frames, x_max=250)
    array([  0,  50, 100, 150, 200, 250])
    >>> # Or pad to span up to 2500
    >>> librosa.util.fix_frames(frames, x_max=2500)
    array([   0,   50,  100,  150,  200,  250,  300,  350,  400,
            450,  500,  550,  600,  650,  700,  750,  800,  850,
            900,  950, 2500])
    >>> librosa.util.fix_frames(frames, x_max=2500, pad=False)
    array([  0,  50, 100, 150, 200, 250, 300, 350, 400, 450, 500,
           550, 600, 650, 700, 750, 800, 850, 900, 950])

    >>> # Or starting away from zero
    >>> frames = np.arange(200, 500, 33)
    >>> frames
    array([200, 233, 266, 299, 332, 365, 398, 431, 464, 497])
    >>> librosa.util.fix_frames(frames)
    array([  0, 200, 233, 266, 299, 332, 365, 398, 431, 464, 497])
    >>> librosa.util.fix_frames(frames, x_max=500)
    array([  0, 200, 233, 266, 299, 332, 365, 398, 431, 464, 497,
           500])


    Parameters
    ----------
    frames : np.ndarray [shape=(n_frames,)]
        List of non-negative frame indices

    x_min : int >= 0 or None
        Minimum allowed frame index

    x_max : int >= 0 or None
        Maximum allowed frame index

    pad : boolean
        If `True`, then `frames` is expanded to span the full range
        `[x_min, x_max]`

    Returns
    -------
    fixed_frames : np.ndarray [shape=(n_fixed_frames,), dtype=int]
        Fixed frame indices, flattened and sorted

    Raises
    ------
    ParameterError
        If `frames` contains negative values
    '''

    frames = np.asarray(frames)

    if np.any(frames < 0):
        raise ParameterError('Negative frame index detected')

    if pad and (x_min is not None or x_max is not None):
        frames = np.clip(frames, x_min, x_max)

    if pad:
        pad_data = []
        if x_min is not None:
            pad_data.append(x_min)
        if x_max is not None:
            pad_data.append(x_max)
        frames = np.concatenate((pad_data, frames))

    if x_min is not None:
        frames = frames[frames >= x_min]

    if x_max is not None:
        frames = frames[frames <= x_max]

    return np.unique(frames).astype(int)
예제 #13
0
def axis_sort(S, axis=-1, index=False, value=None):
    '''Sort an array along its rows or columns.

    Examples
    --------
    Visualize NMF output for a spectrogram S

    >>> # Sort the columns of W by peak frequency bin
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> S = np.abs(librosa.stft(y))
    >>> W, H = librosa.decompose.decompose(S, n_components=32)
    >>> W_sort = librosa.util.axis_sort(W)

    Or sort by the lowest frequency bin

    >>> W_sort = librosa.util.axis_sort(W, value=np.argmin)

    Or sort the rows instead of the columns

    >>> W_sort_rows = librosa.util.axis_sort(W, axis=0)

    Get the sorting index also, and use it to permute the rows of H

    >>> W_sort, idx = librosa.util.axis_sort(W, index=True)
    >>> H_sort = H[idx, :]

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.subplot(2, 2, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(W, ref=np.max),
    ...                          y_axis='log')
    >>> plt.title('W')
    >>> plt.subplot(2, 2, 2)
    >>> librosa.display.specshow(H, x_axis='time')
    >>> plt.title('H')
    >>> plt.subplot(2, 2, 3)
    >>> librosa.display.specshow(librosa.amplitude_to_db(W_sort,
    ...                                                  ref=np.max),
    ...                          y_axis='log')
    >>> plt.title('W sorted')
    >>> plt.subplot(2, 2, 4)
    >>> librosa.display.specshow(H_sort, x_axis='time')
    >>> plt.title('H sorted')
    >>> plt.tight_layout()


    Parameters
    ----------
    S : np.ndarray [shape=(d, n)]
        Array to be sorted

    axis : int [scalar]
        The axis along which to compute the sorting values

        - `axis=0` to sort rows by peak column index
        - `axis=1` to sort columns by peak row index

    index : boolean [scalar]
        If true, returns the index array as well as the permuted data.

    value : function
        function to return the index corresponding to the sort order.
        Default: `np.argmax`.

    Returns
    -------
    S_sort : np.ndarray [shape=(d, n)]
        `S` with the columns or rows permuted in sorting order

    idx : np.ndarray (optional) [shape=(d,) or (n,)]
        If `index == True`, the sorting index used to permute `S`.
        Length of `idx` corresponds to the selected `axis`.

    Raises
    ------
    ParameterError
        If `S` does not have exactly 2 dimensions (`S.ndim != 2`)
    '''

    if value is None:
        value = np.argmax

    if S.ndim != 2:
        raise ParameterError('axis_sort is only defined for 2D arrays')

    bin_idx = value(S, axis=np.mod(1-axis, S.ndim))
    idx = np.argsort(bin_idx)

    sort_slice = [slice(None)] * S.ndim
    sort_slice[axis] = idx

    if index:
        return S[sort_slice], idx
    else:
        return S[sort_slice]
예제 #14
0
def roll_sparse(x, shift, axis=0):
    '''Sparse matrix roll

    This operation is equivalent to ``numpy.roll``, but operates on sparse matrices.

    Parameters
    ----------
    x : scipy.sparse.spmatrix or np.ndarray
        The sparse matrix input

    shift : int
        The number of positions to roll the specified axis

    axis : (0, 1, -1)
        The axis along which to roll.

    Returns
    -------
    x_rolled : same type as `x`
        The rolled matrix, with the same format as `x`

    See Also
    --------
    numpy.roll

    Examples
    --------
    >>> # Generate a random sparse binary matrix
    >>> X = scipy.sparse.lil_matrix(np.random.randint(0, 2, size=(5,5)))
    >>> X_roll = roll_sparse(X, 2, axis=0)  # Roll by 2 on the first axis
    >>> X_dense_r = roll_sparse(X.toarray(), 2, axis=0)  # Equivalent dense roll
    >>> np.allclose(X_roll, X_dense_r.toarray())
    True
    '''
    if not scipy.sparse.isspmatrix(x):
        return np.roll(x, shift, axis=axis)

    # shift-mod-length lets us have shift > x.shape[axis]
    if axis not in [0, 1, -1]:
        raise ParameterError('axis must be one of (0, 1, -1)')

    shift = np.mod(shift, x.shape[axis])

    if shift == 0:
        return x.copy()

    fmt = x.format
    if axis == 0:
        x = x.tocsc()
    elif axis in (-1, 1):
        x = x.tocsr()

    # lil matrix to start
    x_r = scipy.sparse.lil_matrix(x.shape, dtype=x.dtype)

    idx_in = [slice(None)] * x.ndim
    idx_out = [slice(None)] * x_r.ndim

    idx_in[axis] = slice(0, -shift)
    idx_out[axis] = slice(shift, None)
    x_r[tuple(idx_out)] = x[tuple(idx_in)]

    idx_out[axis] = slice(0, shift)
    idx_in[axis] = slice(-shift, None)
    x_r[tuple(idx_out)] = x[tuple(idx_in)]

    return x_r.asformat(fmt)
예제 #15
0
def sync(data, idx, aggregate=None, pad=True, axis=-1):
    """Synchronous aggregation of a multi-dimensional array between boundaries

    .. note::
        In order to ensure total coverage, boundary points may be added
        to `idx`.

        If synchronizing a feature matrix against beat tracker output, ensure
        that frame index numbers are properly aligned and use the same hop length.

    Parameters
    ----------
    data      : np.ndarray
        multi-dimensional array of features

    idx : iterable of ints or slices
        Either an ordered array of boundary indices, or
        an iterable collection of slice objects.


    aggregate : function
        aggregation function (default: `np.mean`)

    pad : boolean
        If `True`, `idx` is padded to span the full range `[0, data.shape[axis]]`

    axis : int
        The axis along which to aggregate data

    Returns
    -------
    data_sync : ndarray
        `data_sync` will have the same dimension as `data`, except that the `axis`
        coordinate will be reduced according to `idx`.

        For example, a 2-dimensional `data` with `axis=-1` should satisfy

        `data_sync[:, i] = aggregate(data[:, idx[i-1]:idx[i]], axis=-1)`

    Raises
    ------
    ParameterError
        If the index set is not of consistent type (all slices or all integers)

    Notes
    -----
    This function caches at level 40.

    Examples
    --------
    Beat-synchronous CQT spectra

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr, trim=False)
    >>> C = np.abs(librosa.cqt(y=y, sr=sr))
    >>> beats = librosa.util.fix_frames(beats, x_max=C.shape[1])

    By default, use mean aggregation

    >>> C_avg = librosa.util.sync(C, beats)

    Use median-aggregation instead of mean

    >>> C_med = librosa.util.sync(C, beats,
    ...                             aggregate=np.median)

    Or sub-beat synchronization

    >>> sub_beats = librosa.segment.subsegment(C, beats)
    >>> sub_beats = librosa.util.fix_frames(sub_beats, x_max=C.shape[1])
    >>> C_med_sub = librosa.util.sync(C, sub_beats, aggregate=np.median)


    Plot the results

    >>> import matplotlib.pyplot as plt
    >>> beat_t = librosa.frames_to_time(beats, sr=sr)
    >>> subbeat_t = librosa.frames_to_time(sub_beats, sr=sr)
    >>> plt.figure()
    >>> plt.subplot(3, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(C,
    ...                                                  ref=np.max),
    ...                          x_axis='time')
    >>> plt.title('CQT power, shape={}'.format(C.shape))
    >>> plt.subplot(3, 1, 2)
    >>> librosa.display.specshow(librosa.amplitude_to_db(C_med,
    ...                                                  ref=np.max),
    ...                          x_coords=beat_t, x_axis='time')
    >>> plt.title('Beat synchronous CQT power, '
    ...           'shape={}'.format(C_med.shape))
    >>> plt.subplot(3, 1, 3)
    >>> librosa.display.specshow(librosa.amplitude_to_db(C_med_sub,
    ...                                                  ref=np.max),
    ...                          x_coords=subbeat_t, x_axis='time')
    >>> plt.title('Sub-beat synchronous CQT power, '
    ...           'shape={}'.format(C_med_sub.shape))
    >>> plt.tight_layout()
    >>> plt.show()

    """

    if aggregate is None:
        aggregate = np.mean

    shape = list(data.shape)

    if np.all([isinstance(_, slice) for _ in idx]):
        slices = idx
    elif np.all([np.issubdtype(type(_), np.integer) for _ in idx]):
        slices = index_to_slice(np.asarray(idx), 0, shape[axis], pad=pad)
    else:
        raise ParameterError('Invalid index set: {}'.format(idx))

    agg_shape = list(shape)
    agg_shape[axis] = len(slices)

    data_agg = np.empty(agg_shape, order='F' if np.isfortran(data) else 'C', dtype=data.dtype)

    idx_in = [slice(None)] * data.ndim
    idx_agg = [slice(None)] * data_agg.ndim

    for (i, segment) in enumerate(slices):
        idx_in[axis] = segment
        idx_agg[axis] = i
        data_agg[tuple(idx_agg)] = aggregate(data[tuple(idx_in)], axis=axis)

    return data_agg
예제 #16
0
def sparsify_rows(x, quantile=0.01):
    '''
    Return a row-sparse matrix approximating the input `x`.

    Parameters
    ----------
    x : np.ndarray [ndim <= 2]
        The input matrix to sparsify.

    quantile : float in [0, 1.0)
        Percentage of magnitude to discard in each row of `x`

    Returns
    -------
    x_sparse : `scipy.sparse.csr_matrix` [shape=x.shape]
        Row-sparsified approximation of `x`

        If `x.ndim == 1`, then `x` is interpreted as a row vector,
        and `x_sparse.shape == (1, len(x))`.

    Raises
    ------
    ParameterError
        If `x.ndim > 2`

        If `quantile` lies outside `[0, 1.0)`

    Notes
    -----
    This function caches at level 40.

    Examples
    --------
    >>> # Construct a Hann window to sparsify
    >>> x = scipy.signal.hann(32)
    >>> x
    array([ 0.   ,  0.01 ,  0.041,  0.09 ,  0.156,  0.236,  0.326,
            0.424,  0.525,  0.625,  0.72 ,  0.806,  0.879,  0.937,
            0.977,  0.997,  0.997,  0.977,  0.937,  0.879,  0.806,
            0.72 ,  0.625,  0.525,  0.424,  0.326,  0.236,  0.156,
            0.09 ,  0.041,  0.01 ,  0.   ])
    >>> # Discard the bottom percentile
    >>> x_sparse = librosa.util.sparsify_rows(x, quantile=0.01)
    >>> x_sparse
    <1x32 sparse matrix of type '<type 'numpy.float64'>'
        with 26 stored elements in Compressed Sparse Row format>
    >>> x_sparse.todense()
    matrix([[ 0.   ,  0.   ,  0.   ,  0.09 ,  0.156,  0.236,  0.326,
              0.424,  0.525,  0.625,  0.72 ,  0.806,  0.879,  0.937,
              0.977,  0.997,  0.997,  0.977,  0.937,  0.879,  0.806,
              0.72 ,  0.625,  0.525,  0.424,  0.326,  0.236,  0.156,
              0.09 ,  0.   ,  0.   ,  0.   ]])
    >>> # Discard up to the bottom 10th percentile
    >>> x_sparse = librosa.util.sparsify_rows(x, quantile=0.1)
    >>> x_sparse
    <1x32 sparse matrix of type '<type 'numpy.float64'>'
        with 20 stored elements in Compressed Sparse Row format>
    >>> x_sparse.todense()
    matrix([[ 0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.326,
              0.424,  0.525,  0.625,  0.72 ,  0.806,  0.879,  0.937,
              0.977,  0.997,  0.997,  0.977,  0.937,  0.879,  0.806,
              0.72 ,  0.625,  0.525,  0.424,  0.326,  0.   ,  0.   ,
              0.   ,  0.   ,  0.   ,  0.   ]])
    '''

    if x.ndim == 1:
        x = x.reshape((1, -1))

    elif x.ndim > 2:
        raise ParameterError('Input must have 2 or fewer dimensions. '
                             'Provided x.shape={}.'.format(x.shape))

    if not 0.0 <= quantile < 1:
        raise ParameterError('Invalid quantile {:.2f}'.format(quantile))

    x_sparse = scipy.sparse.lil_matrix(x.shape, dtype=x.dtype)

    mags = np.abs(x)
    norms = np.sum(mags, axis=1, keepdims=True)

    mag_sort = np.sort(mags, axis=1)
    cumulative_mag = np.cumsum(mag_sort / norms, axis=1)

    threshold_idx = np.argmin(cumulative_mag < quantile, axis=1)

    for i, j in enumerate(threshold_idx):
        idx = np.where(mags[i] >= mag_sort[i, j])
        x_sparse[i, idx] = x[i, idx]

    return x_sparse.tocsr()
예제 #17
0
def decompose(S,
              n_components=None,
              transformer=None,
              sort=False,
              fit=True,
              **kwargs):
    """Decompose a feature matrix.

    Given a spectrogram `S`, produce a decomposition into `components`
    and `activations` such that `S ~= components.dot(activations)`.

    By default, this is done with with non-negative matrix factorization (NMF),
    but any `sklearn.decomposition`-type object will work.


    Parameters
    ----------
    S : np.ndarray [shape=(n_features, n_samples), dtype=float]
        The input feature matrix (e.g., magnitude spectrogram)

    n_components : int > 0 [scalar] or None
        number of desired components

        if None, then `n_features` components are used

    transformer : None or object
        If None, use `sklearn.decomposition.NMF`

        Otherwise, any object with a similar interface to NMF should work.
        `transformer` must follow the scikit-learn convention, where
        input data is `(n_samples, n_features)`.

        `transformer.fit_transform()` will be run on `S.T` (not `S`),
        the return value of which is stored (transposed) as `activations`

        The components will be retrieved as `transformer.components_.T`

        `S ~= np.dot(activations, transformer.components_).T`

        or equivalently:
        `S ~= np.dot(transformer.components_.T, activations.T)`

    sort : bool
        If `True`, components are sorted by ascending peak frequency.

        .. note:: If used with `transformer`, sorting is applied to copies
            of the decomposition parameters, and not to `transformer`'s
            internal parameters.

    fit : bool
        If `True`, components are estimated from the input ``S``.

        If `False`, components are assumed to be pre-computed and stored
        in ``transformer``, and are not changed.

    kwargs : Additional keyword arguments to the default transformer
        `sklearn.decomposition.NMF`


    Returns
    -------
    components: np.ndarray [shape=(n_features, n_components)]
        matrix of components (basis elements).

    activations: np.ndarray [shape=(n_components, n_samples)]
        transformed matrix/activation matrix


    Raises
    ------
    ParameterError
        if `fit` is False and no `transformer` object is provided.


    See Also
    --------
    sklearn.decomposition : SciKit-Learn matrix decomposition modules


    Examples
    --------
    Decompose a magnitude spectrogram into 32 components with NMF

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> S = np.abs(librosa.stft(y))
    >>> comps, acts = librosa.decompose.decompose(S, n_components=8)
    >>> comps
    array([[  1.876e-01,   5.559e-02, ...,   1.687e-01,   4.907e-02],
           [  3.148e-01,   1.719e-01, ...,   2.314e-01,   9.493e-02],
           ...,
           [  1.561e-07,   8.564e-08, ...,   7.167e-08,   4.997e-08],
           [  1.531e-07,   7.880e-08, ...,   5.632e-08,   4.028e-08]])
    >>> acts
    array([[  4.197e-05,   8.512e-03, ...,   3.056e-05,   9.159e-06],
           [  9.568e-06,   1.718e-02, ...,   3.322e-05,   7.869e-06],
           ...,
           [  5.982e-05,   1.311e-02, ...,  -0.000e+00,   6.323e-06],
           [  3.782e-05,   7.056e-03, ...,   3.290e-05,  -0.000e+00]])


    Sort components by ascending peak frequency

    >>> comps, acts = librosa.decompose.decompose(S, n_components=16,
    ...                                           sort=True)


    Or with sparse dictionary learning

    >>> import sklearn.decomposition
    >>> T = sklearn.decomposition.MiniBatchDictionaryLearning(n_components=16)
    >>> scomps, sacts = librosa.decompose.decompose(S, transformer=T, sort=True)

    >>> import matplotlib.pyplot as plt
    >>> plt.figure(figsize=(10,8))
    >>> plt.subplot(3, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(S,
    ...                                                  ref=np.max),
    ...                          y_axis='log', x_axis='time')
    >>> plt.title('Input spectrogram')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.subplot(3, 2, 3)
    >>> librosa.display.specshow(librosa.amplitude_to_db(comps,
    ...                                                  ref=np.max),
    ...                          y_axis='log')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Components')
    >>> plt.subplot(3, 2, 4)
    >>> librosa.display.specshow(acts, x_axis='time')
    >>> plt.ylabel('Components')
    >>> plt.title('Activations')
    >>> plt.colorbar()
    >>> plt.subplot(3, 1, 3)
    >>> S_approx = comps.dot(acts)
    >>> librosa.display.specshow(librosa.amplitude_to_db(S_approx,
    ...                                                  ref=np.max),
    ...                          y_axis='log', x_axis='time')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Reconstructed spectrogram')
    >>> plt.tight_layout()
    >>> plt.show()
    """

    if transformer is None:
        if fit is False:
            raise ParameterError('fit must be True if transformer is None')

        transformer = sklearn.decomposition.NMF(n_components=n_components,
                                                **kwargs)

    if n_components is None:
        n_components = S.shape[0]

    if fit:
        activations = transformer.fit_transform(S.T).T
    else:
        activations = transformer.transform(S.T).T

    components = transformer.components_.T

    if sort:
        components, idx = axis_sort(components, index=True)
        activations = activations[idx]

    return components, activations
예제 #18
0
def nn_filter(S, rec=None, aggregate=None, axis=-1, **kwargs):
    '''Filtering by nearest-neighbors.

    Each data point (e.g, spectrogram column) is replaced
    by aggregating its nearest neighbors in feature space.

    This can be useful for de-noising a spectrogram or feature matrix.

    The non-local means method [1]_ can be recovered by providing a
    weighted recurrence matrix as input and specifying `aggregate=np.average`.

    Similarly, setting `aggregate=np.median` produces sparse de-noising
    as in REPET-SIM [2]_.

    .. [1] Buades, A., Coll, B., & Morel, J. M.
        (2005, June). A non-local algorithm for image denoising.
        In Computer Vision and Pattern Recognition, 2005.
        CVPR 2005. IEEE Computer Society Conference on (Vol. 2, pp. 60-65). IEEE.

    .. [2] Rafii, Z., & Pardo, B.
        (2012, October).  "Music/Voice Separation Using the Similarity Matrix."
        International Society for Music Information Retrieval Conference, 2012.

    Parameters
    ----------
    S : np.ndarray
        The input data (spectrogram) to filter

    rec : (optional) scipy.sparse.spmatrix or np.ndarray
        Optionally, a pre-computed nearest-neighbor matrix
        as provided by `librosa.segment.recurrence_matrix`

    aggregate : function
        aggregation function (default: `np.mean`)

        If `aggregate=np.average`, then a weighted average is
        computed according to the (per-row) weights in `rec`.

        For all other aggregation functions, all neighbors
        are treated equally.


    axis : int
        The axis along which to filter (by default, columns)

    kwargs
        Additional keyword arguments provided to
        `librosa.segment.recurrence_matrix` if `rec` is not provided

    Returns
    -------
    S_filtered : np.ndarray
        The filtered data

    Raises
    ------
    ParameterError
        if `rec` is provided and its shape is incompatible with `S`.

    See also
    --------
    decompose
    hpss
    librosa.segment.recurrence_matrix


    Notes
    -----
    This function caches at level 30.


    Examples
    --------

    De-noise a chromagram by non-local median filtering.
    By default this would use euclidean distance to select neighbors,
    but this can be overridden directly by setting the `metric` parameter.

    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
    ...                      offset=30, duration=10)
    >>> chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
    >>> chroma_med = librosa.decompose.nn_filter(chroma,
    ...                                          aggregate=np.median,
    ...                                          metric='cosine')

    To use non-local means, provide an affinity matrix and `aggregate=np.average`.

    >>> rec = librosa.segment.recurrence_matrix(chroma, mode='affinity',
    ...                                         metric='cosine', sparse=True)
    >>> chroma_nlm = librosa.decompose.nn_filter(chroma, rec=rec,
    ...                                          aggregate=np.average)

    >>> import matplotlib.pyplot as plt
    >>> plt.figure(figsize=(10, 8))
    >>> plt.subplot(5, 1, 1)
    >>> librosa.display.specshow(chroma, y_axis='chroma')
    >>> plt.colorbar()
    >>> plt.title('Unfiltered')
    >>> plt.subplot(5, 1, 2)
    >>> librosa.display.specshow(chroma_med, y_axis='chroma')
    >>> plt.colorbar()
    >>> plt.title('Median-filtered')
    >>> plt.subplot(5, 1, 3)
    >>> librosa.display.specshow(chroma_nlm, y_axis='chroma')
    >>> plt.colorbar()
    >>> plt.title('Non-local means')
    >>> plt.subplot(5, 1, 4)
    >>> librosa.display.specshow(chroma - chroma_med,
    ...                          y_axis='chroma')
    >>> plt.colorbar()
    >>> plt.title('Original - median')
    >>> plt.subplot(5, 1, 5)
    >>> librosa.display.specshow(chroma - chroma_nlm,
    ...                          y_axis='chroma', x_axis='time')
    >>> plt.colorbar()
    >>> plt.title('Original - NLM')
    >>> plt.tight_layout()
    >>> plt.show()
    '''
    if aggregate is None:
        aggregate = np.mean

    if rec is None:
        kwargs = dict(kwargs)
        kwargs['sparse'] = True
        rec = recurrence_matrix(S, axis=axis, **kwargs)
    elif not scipy.sparse.issparse(rec):
        rec = scipy.sparse.csc_matrix(rec)

    if rec.shape[0] != S.shape[axis] or rec.shape[0] != rec.shape[1]:
        raise ParameterError('Invalid self-similarity matrix shape '
                             'rec.shape={} for S.shape={}'.format(
                                 rec.shape, S.shape))

    return __nn_filter_helper(rec.data, rec.indices, rec.indptr,
                              S.swapaxes(0, axis),
                              aggregate).swapaxes(0, axis)
예제 #19
0
def onset_strength_multi(y=None,
                         sr=48000,
                         S=None,
                         lag=1,
                         max_size=1,
                         detrend=False,
                         center=True,
                         feature=None,
                         aggregate=None,
                         channels=None,
                         **kwargs):
    """Compute a spectral flux onset strength envelope across multiple channels.

    Onset strength for channel `i` at time `t` is determined by:

    `mean_{f in channels[i]} max(0, S[f, t+1] - S[f, t])`


    Parameters
    ----------
    y        : np.ndarray [shape=(n,)]
        audio time-series

    sr       : number > 0 [scalar]
        sampling rate of `y`

    S        : np.ndarray [shape=(d, m)]
        pre-computed (log-power) spectrogram

    lag      : int > 0
        time lag for computing differences

    max_size : int > 0
        size (in frequency bins) of the local max filter.
        set to `1` to disable filtering.

    detrend : bool [scalar]
        Filter the onset strength to remove the DC component

    center : bool [scalar]
        Shift the onset function by `n_fft / (2 * hop_length)` frames

    feature : function
        Function for computing time-series features, eg, scaled spectrograms.
        By default, uses `librosa.feature.melspectrogram` with `fmax=11025.0`

    aggregate : function
        Aggregation function to use when combining onsets
        at different frequency bins.

        Default: `np.mean`

    channels : list or None
        Array of channel boundaries or slice objects.
        If `None`, then a single channel is generated to span all bands.

    kwargs : additional keyword arguments
        Additional parameters to `feature()`, if `S` is not provided.


    Returns
    -------
    onset_envelope   : np.ndarray [shape=(n_channels, m)]
        array containing the onset strength envelope for each specified channel


    Raises
    ------
    ParameterError
        if neither `(y, sr)` nor `S` are provided


    See Also
    --------
    onset_strength

    Notes
    -----
    This function caches at level 30.

    Examples
    --------
    First, load some audio and plot the spectrogram

    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
    ...                      duration=10.0)
    >>> D = np.abs(librosa.stft(y))
    >>> plt.figure()
    >>> plt.subplot(2, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
    ...                          y_axis='log')
    >>> plt.title('Power spectrogram')

    Construct a standard onset function over four sub-bands

    >>> onset_subbands = librosa.onset.onset_strength_multi(y=y, sr=sr,
    ...                                                     channels=[0, 32, 64, 96, 128])
    >>> plt.subplot(2, 1, 2)
    >>> librosa.display.specshow(onset_subbands, x_axis='time')
    >>> plt.ylabel('Sub-bands')
    >>> plt.title('Sub-band onset strength')

    """

    if feature is None:
        feature = melspectrogram
        kwargs.setdefault('fmax', 11025.0)

    if aggregate is None:
        aggregate = np.mean

    if lag < 1 or not isinstance(lag, int):
        raise ParameterError('lag must be a positive integer')

    if max_size < 1 or not isinstance(max_size, int):
        raise ParameterError('max_size must be a positive integer')

    # First, compute mel spectrogram
    if S is None:
        S = np.abs(feature(y=y, sr=sr, **kwargs))

        # Convert to dBs
        S = power_to_db(S)

    # Retrieve the n_fft and hop_length,
    # or default values for onsets if not provided
    n_fft = kwargs.get('n_fft', 2048)
    hop_length = kwargs.get('hop_length', 512)

    # Ensure that S is at least 2-d
    S = np.atleast_2d(S)

    # Compute the reference spectrogram.
    # Efficiency hack: skip filtering step and pass by reference
    # if max_size will produce a no-op.
    if max_size == 1:
        ref_spec = S
    else:
        ref_spec = scipy.ndimage.maximum_filter1d(S, max_size, axis=0)

    # Compute difference to the reference, spaced by lag
    onset_env = S[:, lag:] - ref_spec[:, :-lag]

    # Discard negatives (decreasing amplitude)
    onset_env = np.maximum(0.0, onset_env)

    # Aggregate within channels
    pad = True
    if channels is None:
        channels = [slice(None)]
    else:
        pad = False

    onset_env = sync(onset_env, channels, aggregate=aggregate, pad=pad, axis=0)

    # compensate for lag
    pad_width = lag
    if center:
        # Counter-act framing effects. Shift the onsets by n_fft / hop_length
        pad_width += n_fft // (2 * hop_length)

    onset_env = np.pad(onset_env, ([0, 0], [int(pad_width), 0]),
                       mode='constant')

    # remove the DC component
    if detrend:
        onset_env = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99],
                                         onset_env,
                                         axis=-1)

    # Trim to match the input duration
    if center:
        onset_env = onset_env[:, :S.shape[1]]

    return onset_env
예제 #20
0
def cross_similarity(data, data_ref, k=None, metric='euclidean',
                     sparse=False, mode='connectivity', bandwidth=None):
    '''Compute cross-similarity from one data sequence to a reference sequence.

    The output is a matrix `xsim`:

        `xsim[i, j]` is non-zero if `data_ref[:, i]` is a k-nearest neighbor
        of `data[:, j]`.


    Parameters
    ----------
    data : np.ndarray [shape=(d, n)]
        A feature matrix for the comparison sequence

    data_ref : np.ndarray [shape=(d, n_ref)]
        A feature matrix for the reference sequence

    k : int > 0 [scalar] or None
        the number of nearest-neighbors for each sample

        Default: `k = 2 * ceil(sqrt(n_ref))`,
        or `k = 2` if `n_ref <= 3`

    metric : str
        Distance metric to use for nearest-neighbor calculation.

        See `sklearn.neighbors.NearestNeighbors` for details.

    sparse : bool [scalar]
        if False, returns a dense type (ndarray)
        if True, returns a sparse type (scipy.sparse.csc_matrix)

    mode : str, {'connectivity', 'distance', 'affinity'}
        If 'connectivity', a binary connectivity matrix is produced.

        If 'distance', then a non-zero entry contains the distance between
        points.

        If 'affinity', then non-zero entries are mapped to
        `exp( - distance(i, j) / bandwidth)` where `bandwidth` is
        as specified below.

    bandwidth : None or float > 0
        If using ``mode='affinity'``, this can be used to set the
        bandwidth on the affinity kernel.

        If no value is provided, it is set automatically to the median
        distance to the k'th nearest neighbor of each `data[:, i]`.

    Returns
    -------
    xsim : np.ndarray or scipy.sparse.csc_matrix, [shape=(n_ref, n)]
        Cross-similarity matrix

    See Also
    --------
    recurrence_matrix
    recurrence_to_lag
    feature.stack_memory
    sklearn.neighbors.NearestNeighbors
    scipy.spatial.distance.cdist

    Notes
    -----
    This function caches at level 30.

    Examples
    --------
    Find nearest neighbors in MFCC space between two sequences

    >>> hop_length = 1024
    >>> y_ref, sr = librosa.load(librosa.util.example_audio_file())
    >>> y_comp, sr = librosa.load(librosa.util.example_audio_file(), offset=10)
    >>> mfcc_ref = librosa.feature.mfcc(y=y_ref, sr=sr, hop_length=hop_length)
    >>> mfcc_comp = librosa.feature.mfcc(y=y_comp, sr=sr, hop_length=hop_length)
    >>> xsim = librosa.segment.cross_similarity(mfcc_comp, mfcc_ref)

    Or fix the number of nearest neighbors to 5

    >>> xsim = librosa.segment.cross_similarity(mfcc_comp, mfcc_ref, k=5)

    Use cosine similarity instead of Euclidean distance

    >>> xsim = librosa.segment.cross_similarity(mfcc_comp, mfcc_ref, metric='cosine')

    Use an affinity matrix instead of binary connectivity

    >>> xsim_aff = librosa.segment.cross_similarity(mfcc_comp, mfcc_ref, mode='affinity')

    Plot the feature and recurrence matrices

    >>> import matplotlib.pyplot as plt
    >>> plt.figure(figsize=(8, 4))
    >>> plt.subplot(1, 2, 1)
    >>> librosa.display.specshow(xsim, x_axis='time', y_axis='time', hop_length=hop_length)
    >>> plt.title('Binary recurrence (symmetric)')
    >>> plt.subplot(1, 2, 2)
    >>> librosa.display.specshow(xsim_aff, x_axis='time', y_axis='time',
    ...                          cmap='magma_r', hop_length=hop_length)
    >>> plt.title('Affinity recurrence')
    >>> plt.tight_layout()

    '''
    data_ref = np.atleast_2d(data_ref)
    data = np.atleast_2d(data)

    if data_ref.shape[0] != data.shape[0]:
        raise ValueError("data_ref and data must have the same first dimension")

    # swap data axes so the feature axis is last
    data_ref = np.swapaxes(data_ref, -1, 0)
    n_ref = data_ref.shape[0]
    data_ref = data_ref.reshape((n_ref, -1))

    data = np.swapaxes(data, -1, 0)
    n = data.shape[0]
    data = data.reshape((n, -1))

    if mode not in ['connectivity', 'distance', 'affinity']:
        raise ParameterError(("Invalid mode='{}'. Must be one of "
                              "['connectivity', 'distance', "
                              "'affinity']").format(mode))
    if k is None:
        k = min(n_ref, 2 * np.ceil(np.sqrt(n_ref)))

    k = int(k)

    if bandwidth is not None:
        if bandwidth <= 0:
            raise ParameterError('Invalid bandwidth={}. '
                                 'Must be strictly positive.'.format(bandwidth))

    # Build the neighbor search object
    # `auto` mode does not work with some choices of metric.  Rather than special-case
    # those here, we instead use a fall-back to brute force if auto fails.
    try:
        knn = sklearn.neighbors.NearestNeighbors(n_neighbors=min(n_ref, k),
                                                 metric=metric,
                                                 algorithm='auto')
    except ValueError:
        knn = sklearn.neighbors.NearestNeighbors(n_neighbors=min(n_ref, k),
                                                 metric=metric,
                                                 algorithm='brute')

    knn.fit(data_ref)

    # Get the knn graph
    if mode == 'affinity':
        # sklearn's nearest neighbor doesn't support affinity,
        # so we use distance here and then do the conversion post-hoc
        kng_mode = 'distance'
    else:
        kng_mode = mode

    xsim = knn.kneighbors_graph(X=data, mode=kng_mode).tolil()

    # Retain only the top-k links per point
    for i in range(n):
        # Get the links from point i
        links = xsim[i].nonzero()[1]

        # Order them ascending
        idx = links[np.argsort(xsim[i, links].toarray())][0]

        # Everything past the kth closest gets squashed
        xsim[i, idx[k:]] = 0

    # Convert a compressed sparse row (CSR) format
    xsim = xsim.tocsr()
    xsim.eliminate_zeros()

    if mode == 'connectivity':
        xsim = xsim.astype(np.bool)
    elif mode == 'affinity':
        if bandwidth is None:
            bandwidth = np.nanmedian(xsim.max(axis=1).data)
        xsim.data[:] = np.exp(xsim.data / (-1 * bandwidth))

    # Transpose to n_ref by n
    xsim = xsim.T

    if not sparse:
        xsim = xsim.toarray()

    return xsim
예제 #21
0
def subsegment(data, frames, n_segments=4, axis=-1):
    '''Sub-divide a segmentation by feature clustering.

    Given a set of frame boundaries (`frames`), and a data matrix (`data`),
    each successive interval defined by `frames` is partitioned into
    `n_segments` by constrained agglomerative clustering.

    .. note::
        If an interval spans fewer than `n_segments` frames, then each
        frame becomes a sub-segment.

    Parameters
    ----------
    data : np.ndarray
        Data matrix to use in clustering

    frames : np.ndarray [shape=(n_boundaries,)], dtype=int, non-negative]
        Array of beat or segment boundaries, as provided by
        `librosa.beat.beat_track`,
        `librosa.onset.onset_detect`,
        or `agglomerative`.

    n_segments : int > 0
        Maximum number of frames to sub-divide each interval.

    axis : int
        Axis along which to apply the segmentation.
        By default, the last index (-1) is taken.

    Returns
    -------
    boundaries : np.ndarray [shape=(n_subboundaries,)]
        List of sub-divided segment boundaries

    See Also
    --------
    agglomerative : Temporal segmentation
    librosa.onset.onset_detect : Onset detection
    librosa.beat.beat_track : Beat tracking

    Notes
    -----
    This function caches at level 30.

    Examples
    --------
    Load audio, detect beat frames, and subdivide in twos by CQT

    >>> y, sr = librosa.load(librosa.util.example_audio_file(), duration=8)
    >>> tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=512)
    >>> beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=512)
    >>> cqt = np.abs(librosa.cqt(y, sr=sr, hop_length=512))
    >>> subseg = librosa.segment.subsegment(cqt, beats, n_segments=2)
    >>> subseg_t = librosa.frames_to_time(subseg, sr=sr, hop_length=512)
    >>> subseg
    array([  0,   2,   4,  21,  23,  26,  43,  55,  63,  72,  83,
            97, 102, 111, 122, 137, 142, 153, 162, 180, 182, 185,
           202, 210, 221, 231, 241, 256, 261, 271, 281, 296, 301,
           310, 320, 339, 341, 344, 361, 368, 382, 389, 401, 416,
           420, 430, 436, 451, 456, 465, 476, 489, 496, 503, 515,
           527, 535, 544, 553, 558, 571, 578, 590, 607, 609, 638])

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> librosa.display.specshow(librosa.amplitude_to_db(cqt,
    ...                                                  ref=np.max),
    ...                          y_axis='cqt_hz', x_axis='time')
    >>> lims = plt.gca().get_ylim()
    >>> plt.vlines(beat_times, lims[0], lims[1], color='lime', alpha=0.9,
    ...            linewidth=2, label='Beats')
    >>> plt.vlines(subseg_t, lims[0], lims[1], color='linen', linestyle='--',
    ...            linewidth=1.5, alpha=0.5, label='Sub-beats')
    >>> plt.legend(frameon=True, shadow=True)
    >>> plt.title('CQT + Beat and sub-beat markers')
    >>> plt.tight_layout()
    >>> plt.show()

    '''

    frames = fix_frames(frames, x_min=0, x_max=data.shape[axis], pad=True)

    if n_segments < 1:
        raise ParameterError('n_segments must be a positive integer')

    boundaries = []
    idx_slices = [slice(None)] * data.ndim

    for seg_start, seg_end in zip(frames[:-1], frames[1:]):
        idx_slices[axis] = slice(seg_start, seg_end)
        boundaries.extend(seg_start + agglomerative(data[tuple(idx_slices)],
                                                    min(seg_end - seg_start, n_segments),
                                                    axis=axis))

    return np.ascontiguousarray(boundaries)
예제 #22
0
def cqt(
        y,
        sr=48000,
        hop_length=512,
        fmin=None,
        n_bins=84,
        bins_per_octave=12,
        tuning=0.0,
        filter_scale=1,
        norm=1,
        sparsity=0.01,
        window='hann',
        scale=True,
        #real=util.Deprecated(),
        pad_mode='reflect'):
    '''Compute the constant-Q transform of an audio signal.

    This implementation is based on the recursive sub-sampling method
    described by [1]_.

    .. [1] Schoerkhuber, Christian, and Anssi Klapuri.
        "Constant-Q transform toolbox for music processing."
        7th Sound and Music Computing Conference, Barcelona, Spain. 2010.

    Parameters
    ----------
    y : np.ndarray [shape=(n,)]
        audio time series

    sr : number > 0 [scalar]
        sampling rate of `y`

    hop_length : int > 0 [scalar]
        number of samples between successive CQT columns.

    fmin : float > 0 [scalar]
        Minimum frequency. Defaults to C1 ~= 32.70 Hz

    n_bins : int > 0 [scalar]
        Number of frequency bins, starting at `fmin`

    bins_per_octave : int > 0 [scalar]
        Number of bins per octave

    tuning : None or float in `[-0.5, 0.5)`
        Tuning offset in fractions of a bin (cents).

        If `None`, tuning will be automatically estimated from the signal.

    filter_scale : float > 0
        Filter scale factor. Small values (<1) use shorter windows
        for improved time resolution.

    norm : {inf, -inf, 0, float > 0}
        Type of norm to use for basis function normalization.
        See `librosa.util.normalize`.

    sparsity : float in [0, 1)
        Sparsify the CQT basis by discarding up to `sparsity`
        fraction of the energy in each basis.

        Set `sparsity=0` to disable sparsification.

    window : str, tuple, number, or function
        Window specification for the basis filters.
        See `filters.get_window` for details.

    scale : bool
        If `True`, scale the CQT response by square-root the length of
        each channel's filter.  This is analogous to `norm='ortho'` in FFT.

        If `False`, do not scale the CQT. This is analogous to
        `norm=None` in FFT.

    real : bool [DEPRECATED]
        If `False`, return a complex-valued constant-Q transform (default).

        If `True`, return the CQT magnitude.

        .. warning:: This parameter is deprecated in librosa 0.5.0
            It will be removed in librosa 0.6.0.

    pad_mode : string
        Padding mode for centered frame analysis.

        See also: `librosa.core.stft` and `np.pad`.

    Returns
    -------
    CQT : np.ndarray [shape=(n_bins, t), dtype=np.complex or np.float]
        Constant-Q value each frequency at each time.

    Raises
    ------
    ParameterError
        If `hop_length` is not an integer multiple of
        `2**(n_bins / bins_per_octave)`

        Or if `y` is too short to support the frequency range of the CQT.

    See Also
    --------
    librosa.core.resample
    librosa.util.normalize

    Notes
    -----
    This function caches at level 20.

    Examples
    --------
    Generate and plot a constant-Q power spectrum

    >>> import matplotlib.pyplot as plt
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> C = librosa.cqt(y, sr=sr)
    >>> librosa.display.specshow(librosa.amplitude_to_db(C, ref=np.max),
    ...                          sr=sr, x_axis='time', y_axis='cqt_note')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Constant-Q power spectrum')
    >>> plt.tight_layout()


    Limit the frequency range

    >>> C = librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('C2'),
    ...                 n_bins=60)
    >>> C
    array([[  8.827e-04,   9.293e-04, ...,   3.133e-07,   2.942e-07],
           [  1.076e-03,   1.068e-03, ...,   1.153e-06,   1.148e-06],
           ...,
           [  1.042e-07,   4.087e-07, ...,   1.612e-07,   1.928e-07],
           [  2.363e-07,   5.329e-07, ...,   1.294e-07,   1.611e-07]])


    Using a higher frequency resolution

    >>> C = librosa.cqt(y, sr=sr, fmin=librosa.note_to_hz('C2'),
    ...                 n_bins=60 * 2, bins_per_octave=12 * 2)
    >>> C
    array([[  1.536e-05,   5.848e-05, ...,   3.241e-07,   2.453e-07],
           [  1.856e-03,   1.854e-03, ...,   2.397e-08,   3.549e-08],
           ...,
           [  2.034e-07,   4.245e-07, ...,   6.213e-08,   1.463e-07],
           [  4.896e-08,   5.407e-07, ...,   9.176e-08,   1.051e-07]])
    '''

    # How many octaves are we dealing with?
    n_octaves = int(np.ceil(float(n_bins) / bins_per_octave))
    n_filters = min(bins_per_octave, n_bins)

    len_orig = len(y)

    if fmin is None:
        # C1 by default
        fmin = note_to_hz('C1')

    if tuning is None:
        tuning = estimate_tuning(y=y, sr=sr)

    # First thing, get the freqs of the top octave
    freqs = cqt_frequencies(n_bins, fmin,
                            bins_per_octave=bins_per_octave)[-bins_per_octave:]

    fmin_t = np.min(freqs)
    fmax_t = np.max(freqs)

    # Determine required resampling quality
    Q = float(filter_scale) / (2.0**(1. / bins_per_octave) - 1)
    filter_cutoff = fmax_t * (1 + 0.5 * window_bandwidth(window) / Q)
    nyquist = sr / 2.0
    if filter_cutoff < BW_FASTEST * nyquist:
        res_type = 'kaiser_fast'
    else:
        res_type = 'kaiser_best'

    y, sr, hop_length = __early_downsample(y, sr, hop_length, res_type,
                                           n_octaves, nyquist, filter_cutoff,
                                           scale)

    cqt_resp = []

    if res_type != 'kaiser_fast':

        # Do the top octave before resampling to allow for fast resampling
        fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                               fmin_t,
                                               n_filters,
                                               bins_per_octave,
                                               tuning,
                                               filter_scale,
                                               norm,
                                               sparsity,
                                               window=window)

        # Compute the CQT filter response and append it to the stack
        cqt_resp.append(
            __cqt_response(y, n_fft, hop_length, fft_basis, pad_mode))

        fmin_t /= 2
        fmax_t /= 2
        n_octaves -= 1

        filter_cutoff = fmax_t * (1 + 0.5 * window_bandwidth(window) / Q)

        res_type = 'kaiser_fast'

    # Make sure our hop is long enough to support the bottom octave
    num_twos = __num_two_factors(hop_length)
    if num_twos < n_octaves - 1:
        raise ParameterError('hop_length must be a positive integer '
                             'multiple of 2^{0:d} for {1:d}-octave CQT'.format(
                                 n_octaves - 1, n_octaves))

    # Now do the recursive bit
    fft_basis, n_fft, _ = __cqt_filter_fft(sr,
                                           fmin_t,
                                           n_filters,
                                           bins_per_octave,
                                           tuning,
                                           filter_scale,
                                           norm,
                                           sparsity,
                                           window=window)

    my_y, my_sr, my_hop = y, sr, hop_length

    # Iterate down the octaves
    for i in range(n_octaves):

        # Resample (except first time)
        if i > 0:
            if len(my_y) < 2:
                raise ParameterError('Input signal length={} is too short for '
                                     '{:d}-octave CQT'.format(
                                         len_orig, n_octaves))

            # The additional scaling of sqrt(2) here is to implicitly rescale
            # the filters
            my_y = np.sqrt(2) * resample(
                my_y, my_sr, my_sr / 2.0, res_type=res_type, scale=True)
            my_sr /= 2.0
            my_hop //= 2

        # Compute the cqt filter response and append to the stack
        cqt_resp.append(
            __cqt_response(my_y, n_fft, my_hop, fft_basis, pad_mode))

    C = __trim_stack(cqt_resp, n_bins)

    if scale:
        lengths = constant_q_lengths(sr,
                                     fmin,
                                     n_bins=n_bins,
                                     bins_per_octave=bins_per_octave,
                                     tuning=tuning,
                                     window=window,
                                     filter_scale=filter_scale)
        C /= np.sqrt(lengths[:, np.newaxis])


#    if not isinstance(real, util.Deprecated):
#        warn('Real-valued CQT (real=True) is deprecated in 0.4.2. '
#             'The `real` parameter will be removed in 0.6.0.'
#             'Use np.abs(librosa.cqt(...)) '
#             'instead of real=True to maintain forward compatibility.',
#             DeprecationWarning)
#        if real:
#            C = np.abs(C)

    return C
예제 #23
0
def tonnetz(y=None, sr=48000, chroma=None):
    '''Computes the tonal centroid features (tonnetz), following the method of
    [1]_.

    .. [1] Harte, C., Sandler, M., & Gasser, M. (2006). "Detecting Harmonic
           Change in Musical Audio." In Proceedings of the 1st ACM Workshop
           on Audio and Music Computing Multimedia (pp. 21-26).
           Santa Barbara, CA, USA: ACM Press. doi:10.1145/1178723.1178727.

    Parameters
    ----------
    y : np.ndarray [shape=(n,)] or None
        Audio time series.

    sr : number > 0 [scalar]
        sampling rate of `y`

    chroma : np.ndarray [shape=(n_chroma, t)] or None
        Normalized energy for each chroma bin at each frame.

        If `None`, a cqt chromagram is performed.

    Returns
    -------
    tonnetz : np.ndarray [shape(6, t)]
        Tonal centroid features for each frame.

        Tonnetz dimensions:
            - 0: Fifth x-axis
            - 1: Fifth y-axis
            - 2: Minor x-axis
            - 3: Minor y-axis
            - 4: Major x-axis
            - 5: Major y-axis

    See Also
    --------
    chroma_cqt
        Compute a chromagram from a constant-Q transform.

    chroma_stft
        Compute a chromagram from an STFT spectrogram or waveform.

    Examples
    --------
    Compute tonnetz features from the harmonic component of a song

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> y = librosa.effects.harmonic(y)
    >>> tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    >>> tonnetz
    array([[-0.073, -0.053, ..., -0.054, -0.073],
           [ 0.001,  0.001, ..., -0.054, -0.062],
           ...,
           [ 0.039,  0.034, ...,  0.044,  0.064],
           [ 0.005,  0.002, ...,  0.011,  0.017]])

    Compare the tonnetz features to `chroma_cqt`

    >>> import matplotlib.pyplot as plt
    >>> plt.subplot(2, 1, 1)
    >>> librosa.display.specshow(tonnetz, y_axis='tonnetz')
    >>> plt.colorbar()
    >>> plt.title('Tonal Centroids (Tonnetz)')
    >>> plt.subplot(2, 1, 2)
    >>> librosa.display.specshow(librosa.feature.chroma_cqt(y, sr=sr),
    ...                          y_axis='chroma', x_axis='time')
    >>> plt.colorbar()
    >>> plt.title('Chroma')
    >>> plt.tight_layout()

    '''

    if y is None and chroma is None:
        raise ParameterError('Either the audio samples or the chromagram must be '
                             'passed as an argument.')

    if chroma is None:
        chroma = chroma_cqt(y=y, sr=sr)

    # Generate Transformation matrix
    dim_map = np.linspace(0, 12, num=chroma.shape[0], endpoint=False)

    scale = np.asarray([7. / 6, 7. / 6,
                        3. / 2, 3. / 2,
                        2. / 3, 2. / 3])

    V = np.multiply.outer(scale, dim_map)

    # Even rows compute sin()
    V[::2] -= 0.5

    R = np.array([1, 1,         # Fifths
                  1, 1,         # Minor
                  0.5, 0.5])    # Major

    phi = R[:, np.newaxis] * np.cos(np.pi * V)

    # Do the transform to tonnetz
    return phi.dot(normalize(chroma, norm=1, axis=0))
예제 #24
0
def lag_to_recurrence(lag, axis=-1):
    '''Convert a lag matrix into a recurrence matrix.

    Parameters
    ----------
    lag : np.ndarray or scipy.sparse.spmatrix
        A lag matrix, as produced by `recurrence_to_lag`

    axis : int
        The axis corresponding to the time dimension.
        The alternate axis will be interpreted in lag coordinates.

    Returns
    -------
    rec : np.ndarray or scipy.sparse.spmatrix [shape=(n, n)]
        A recurrence matrix in (time, time) coordinates
        For sparse matrices, format will match that of `lag`.

    Raises
    ------
    ParameterError : if `lag` does not have the correct shape

    See Also
    --------
    recurrence_to_lag

    Examples
    --------
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> hop_length = 1024
    >>> mfccs = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length)
    >>> recurrence = librosa.segment.recurrence_matrix(mfccs)
    >>> lag_pad = librosa.segment.recurrence_to_lag(recurrence, pad=True)
    >>> lag_nopad = librosa.segment.recurrence_to_lag(recurrence, pad=False)
    >>> rec_pad = librosa.segment.lag_to_recurrence(lag_pad)
    >>> rec_nopad = librosa.segment.lag_to_recurrence(lag_nopad)

    >>> import matplotlib.pyplot as plt
    >>> plt.figure(figsize=(8, 4))
    >>> plt.subplot(2, 2, 1)
    >>> librosa.display.specshow(lag_pad, x_axis='time', y_axis='lag',
    ...                          hop_length=hop_length)
    >>> plt.title('Lag (zero-padded)')
    >>> plt.subplot(2, 2, 2)
    >>> librosa.display.specshow(lag_nopad, x_axis='time', y_axis='time',
    ...                          hop_length=hop_length)
    >>> plt.title('Lag (no padding)')
    >>> plt.subplot(2, 2, 3)
    >>> librosa.display.specshow(rec_pad, x_axis='time', y_axis='time',
    ...                          hop_length=hop_length)
    >>> plt.title('Recurrence (with padding)')
    >>> plt.subplot(2, 2, 4)
    >>> librosa.display.specshow(rec_nopad, x_axis='time', y_axis='time',
    ...                          hop_length=hop_length)
    >>> plt.title('Recurrence (without padding)')
    >>> plt.tight_layout()
    >>> plt.show()

    '''

    if axis not in [0, 1, -1]:
        raise ParameterError('Invalid target axis: {}'.format(axis))

    axis = np.abs(axis)

    if lag.ndim != 2 or (lag.shape[0] != lag.shape[1] and
                         lag.shape[1 - axis] != 2 * lag.shape[axis]):
        raise ParameterError('Invalid lag matrix shape: {}'.format(lag.shape))

    # Since lag must be 2-dimensional, abs(axis) = axis
    t = lag.shape[axis]

    sparse = scipy.sparse.issparse(lag)
    if sparse:
        rec = scipy.sparse.lil_matrix(lag)
        roll_ax = 1 - axis
    else:
        rec = lag.copy()
        roll_ax = None

    idx_slice = [slice(None)] * lag.ndim
    for i in range(1, t):
        idx_slice[axis] = i
        rec[tuple(idx_slice)] = roll_sparse(lag[tuple(idx_slice)], i, axis=roll_ax)

    sub_slice = [slice(None)] * rec.ndim
    sub_slice[1 - axis] = slice(t)
    rec = rec[tuple(sub_slice)]

    if sparse:
        return rec.asformat(lag.format)
    return np.ascontiguousarray(rec.T).T
예제 #25
0
def spectral_contrast(y=None, sr=48000, S=None, n_fft=2048, hop_length=512,
                      freq=None, fmin=200.0, n_bands=6, quantile=0.02,
                      linear=False):
    '''Compute spectral contrast [1]_

    .. [1] Jiang, Dan-Ning, Lie Lu, Hong-Jiang Zhang, Jian-Hua Tao,
           and Lian-Hong Cai.
           "Music type classification by spectral contrast feature."
           In Multimedia and Expo, 2002. ICME'02. Proceedings.
           2002 IEEE International Conference on, vol. 1, pp. 113-116.
           IEEE, 2002.

    Parameters
    ----------
    y : np.ndarray [shape=(n,)] or None
        audio time series

    sr : number  > 0 [scalar]
        audio sampling rate of `y`

    S : np.ndarray [shape=(d, t)] or None
        (optional) spectrogram magnitude

    n_fft : int > 0 [scalar]
        FFT window size

    hop_length : int > 0 [scalar]
        hop length for STFT. See `librosa.core.stft` for details.

    freq : None or np.ndarray [shape=(d,)]
        Center frequencies for spectrogram bins.
        If `None`, then FFT bin center frequencies are used.
        Otherwise, it can be a single array of `d` center frequencies.

    fmin : float > 0
        Frequency cutoff for the first bin `[0, fmin]`
        Subsequent bins will cover `[fmin, 2*fmin]`, `[2*fmin, 4*fmin]`, etc.

    n_bands : int > 1
        number of frequency bands

    quantile : float in (0, 1)
        quantile for determining peaks and valleys

    linear : bool
        If `True`, return the linear difference of magnitudes:
        `peaks - valleys`.

        If `False`, return the logarithmic difference:
        `log(peaks) - log(valleys)`.


    Returns
    -------
    contrast : np.ndarray [shape=(n_bands + 1, t)]
        each row of spectral contrast values corresponds to a given
        octave-based frequency


    Examples
    --------
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> S = np.abs(librosa.stft(y))
    >>> contrast = librosa.feature.spectral_contrast(S=S, sr=sr)

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.subplot(2, 1, 1)
    >>> librosa.display.specshow(librosa.amplitude_to_db(S,
    ...                                                  ref=np.max),
    ...                          y_axis='log')
    >>> plt.colorbar(format='%+2.0f dB')
    >>> plt.title('Power spectrogram')
    >>> plt.subplot(2, 1, 2)
    >>> librosa.display.specshow(contrast, x_axis='time')
    >>> plt.colorbar()
    >>> plt.ylabel('Frequency bands')
    >>> plt.title('Spectral contrast')
    >>> plt.tight_layout()
    '''

    S, n_fft = mag_spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

    # Compute the center frequencies of each bin
    if freq is None:
        freq = fft_frequencies(sr=sr, n_fft=n_fft)

    freq = np.atleast_1d(freq)

    if freq.ndim != 1 or len(freq) != S.shape[0]:
        raise ParameterError('freq.shape mismatch: expected '
                             '({:d},)'.format(S.shape[0]))

    if n_bands < 1 or not isinstance(n_bands, int):
        raise ParameterError('n_bands must be a positive integer')

    if not 0.0 < quantile < 1.0:
        raise ParameterError('quantile must lie in the range (0, 1)')

    if fmin <= 0:
        raise ParameterError('fmin must be a positive number')

    octa = np.zeros(n_bands + 2)
    octa[1:] = fmin * (2.0**np.arange(0, n_bands + 1))

    if np.any(octa[:-1] >= 0.5 * sr):
        raise ParameterError('Frequency band exceeds Nyquist. '
                             'Reduce either fmin or n_bands.')

    valley = np.zeros((n_bands + 1, S.shape[1]))
    peak = np.zeros_like(valley)

    for k, (f_low, f_high) in enumerate(zip(octa[:-1], octa[1:])):
        current_band = np.logical_and(freq >= f_low, freq <= f_high)

        idx = np.flatnonzero(current_band)

        if k > 0:
            current_band[idx[0] - 1] = True

        if k == n_bands:
            current_band[idx[-1] + 1:] = True

        sub_band = S[current_band]

        if k < n_bands:
            sub_band = sub_band[:-1]

        # Always take at least one bin from each side
        idx = np.rint(quantile * np.sum(current_band))
        idx = int(np.maximum(idx, 1))

        sortedr = np.sort(sub_band, axis=0)

        valley[k] = np.mean(sortedr[:idx], axis=0)
        peak[k] = np.mean(sortedr[-idx:], axis=0)

    if linear:
        return peak - valley
    else:
        return power_to_db(peak) - power_to_db(valley)
예제 #26
0
def match_intervals(intervals_from, intervals_to):
    '''Match one set of time intervals to another.

    This can be useful for tasks such as mapping beat timings
    to segments.

    .. note:: A target interval may be matched to multiple source
      intervals.

    Parameters
    ----------
    intervals_from : np.ndarray [shape=(n, 2)]
        The time range for source intervals.
        The `i` th interval spans time `intervals_from[i, 0]`
        to `intervals_from[i, 1]`.
        `intervals_from[0, 0]` should be 0, `intervals_from[-1, 1]`
        should be the track duration.

    intervals_to : np.ndarray [shape=(m, 2)]
        Analogous to `intervals_from`.

    Returns
    -------
    interval_mapping : np.ndarray [shape=(n,)]
        For each interval in `intervals_from`, the
        corresponding interval in `intervals_to`.

    See Also
    --------
    match_events

    Raises
    ------
    ParameterError
        If either array of input intervals is not the correct shape
    '''

    if len(intervals_from) == 0 or len(intervals_to) == 0:
        raise ParameterError('Attempting to match empty interval list')

    # Verify that the input intervals has correct shape and size
    valid_intervals(intervals_from)
    valid_intervals(intervals_to)

    # The overlap score of a beat with a segment is defined as
    #   max(0, min(beat_end, segment_end) - max(beat_start, segment_start))
    output = np.empty(len(intervals_from), dtype=np.int)

    n_rows = int(spectralUtil.MAX_MEM_BLOCK / (len(intervals_to) * intervals_to.itemsize))
    n_rows = max(1, n_rows)

    for bl_s in range(0, len(intervals_from), n_rows):
        bl_t = min(bl_s + n_rows, len(intervals_from))
        tmp_from = intervals_from[bl_s:bl_t]

        starts = np.maximum.outer(tmp_from[:, 0], intervals_to[:, 0])
        ends = np.minimum.outer(tmp_from[:, 1], intervals_to[:, 1])
        score = np.maximum(0, ends - starts)

        output[bl_s:bl_t] = np.argmax(score, axis=-1)

    return output
예제 #27
0
def spectral_centroid(y=None, sr=48000, S=None, n_fft=2048, hop_length=512,
                      freq=None):
    '''Compute the spectral centroid.

    Each frame of a magnitude spectrogram is normalized and treated as a
    distribution over frequency bins, from which the mean (centroid) is
    extracted per frame.

    Parameters
    ----------
    y : np.ndarray [shape=(n,)] or None
        audio time series

    sr : number > 0 [scalar]
        audio sampling rate of `y`

    S : np.ndarray [shape=(d, t)] or None
        (optional) spectrogram magnitude

    n_fft : int > 0 [scalar]
        FFT window size

    hop_length : int > 0 [scalar]
        hop length for STFT. See `librosa.core.stft` for details.

    freq : None or np.ndarray [shape=(d,) or shape=(d, t)]
        Center frequencies for spectrogram bins.
        If `None`, then FFT bin center frequencies are used.
        Otherwise, it can be a single array of `d` center frequencies,
        or a matrix of center frequencies as constructed by
        `librosa.core.ifgram`

    Returns
    -------
    centroid : np.ndarray [shape=(1, t)]
        centroid frequencies

    See Also
    --------
    librosa.core.stft
        Short-time Fourier Transform

    librosa.core.ifgram
        Instantaneous-frequency spectrogram

    Examples
    --------
    From time-series input:

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    >>> cent
    array([[ 4382.894,   626.588, ...,  5037.07 ,  5413.398]])

    From spectrogram input:

    >>> S, phase = librosa.magphase(librosa.stft(y=y))
    >>> librosa.feature.spectral_centroid(S=S)
    array([[ 4382.894,   626.588, ...,  5037.07 ,  5413.398]])

    Using variable bin center frequencies:

    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> if_gram, D = librosa.ifgram(y)
    >>> librosa.feature.spectral_centroid(S=np.abs(D), freq=if_gram)
    array([[ 4420.719,   625.769, ...,  5011.86 ,  5221.492]])

    Plot the result

    >>> import matplotlib.pyplot as plt
    >>> plt.figure()
    >>> plt.subplot(2, 1, 1)
    >>> plt.semilogy(cent.T, label='Spectral centroid')
    >>> plt.ylabel('Hz')
    >>> plt.xticks([])
    >>> plt.xlim([0, cent.shape[-1]])
    >>> plt.legend()
    >>> plt.subplot(2, 1, 2)
    >>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
    ...                          y_axis='log', x_axis='time')
    >>> plt.title('log Power spectrogram')
    >>> plt.tight_layout()
    '''

    S, n_fft = mag_spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

    if not np.isrealobj(S):
        raise ParameterError('Spectral centroid is only defined '
                             'with real-valued input')
    elif np.any(S < 0):
        raise ParameterError('Spectral centroid is only defined '
                             'with non-negative energies')

    # Compute the center frequencies of each bin
    if freq is None:
        freq = fft_frequencies(sr=sr, n_fft=n_fft)

    if freq.ndim == 1:
        freq = freq.reshape((-1, 1))

    # Column-normalize S
    return np.sum(freq * normalize(S, norm=1, axis=0),
                  axis=0, keepdims=True)
예제 #28
0
def path_enhance(R, n, window='hann', max_ratio=2.0, min_ratio=None, n_filters=7,
                 zero_mean=False, clip=True, **kwargs):
    '''Multi-angle path enhancement for self- and cross-similarity matrices.

    This function convolves multiple diagonal smoothing filters with a self-similarity (or
    recurrence) matrix R, and aggregates the result by an element-wise maximum.

    Technically, the output is a matrix R_smooth such that

        `R_smooth[i, j] = max_theta (R * filter_theta)[i, j]`

    where `*` denotes 2-dimensional convolution, and `filter_theta` is a smoothing filter at
    orientation theta.

    This is intended to provide coherent temporal smoothing of self-similarity matrices
    when there are changes in tempo.

    Smoothing filters are generated at evenly spaced orientations between min_ratio and
    max_ratio.

    This function is inspired by the multi-angle path enhancement of [1]_, but differs by
    modeling tempo differences in the space of similarity matrices rather than re-sampling
    the underlying features prior to generating the self-similarity matrix.

    .. [1] Müller, Meinard and Frank Kurth.
            "Enhancing similarity matrices for music audio analysis."
            2006 IEEE International Conference on Acoustics Speech and Signal Processing Proceedings.
            Vol. 5. IEEE, 2006.

    .. note:: if using recurrence_matrix to construct the input similarity matrix, be sure to include the main
              diagonal by setting `self=True`.  Otherwise, the diagonal will be suppressed, and this is likely to
              produce discontinuities which will pollute the smoothing filter response.

    Parameters
    ----------
    R : np.ndarray
        The self- or cross-similarity matrix to be smoothed.
        Note: sparse inputs are not supported.

    n : int > 0
        The length of the smoothing filter

    window : window specification
        The type of smoothing filter to use.  See `filters.get_window` for more information
        on window specification formats.

    max_ratio : float > 0
        The maximum tempo ratio to support

    min_ratio : float > 0
        The minimum tempo ratio to support.
        If not provided, it will default to `1/max_ratio`

    n_filters : int >= 1
        The number of different smoothing filters to use, evenly spaced
        between `min_ratio` and `max_ratio`.

        If `min_ratio = 1/max_ratio` (the default), using an odd number
        of filters will ensure that the main diagonal (ratio=1) is included.

    zero_mean : bool
        By default, the smoothing filters are non-negative and sum to one (i.e. are averaging
        filters).

        If `zero_mean=True`, then the smoothing filters are made to sum to zero by subtracting
        a constant value from the non-diagonal coordinates of the filter.  This is primarily
        useful for suppressing blocks while enhancing diagonals.

    clip : bool
        If True, the smoothed similarity matrix will be thresholded at 0, and will not contain
        negative entries.

    kwargs : additional keyword arguments
        Additional arguments to pass to `scipy.ndimage.convolve`


    Returns
    -------
    R_smooth : np.ndarray, shape=R.shape
        The smoothed self- or cross-similarity matrix

    See Also
    --------
    filters.diagonal_filter
    recurrence_matrix


    Examples
    --------
    Use a 51-frame diagonal smoothing filter to enhance paths in a recurrence matrix

    >>> y, sr = librosa.load(librosa.util.example_audio_file(), duration=30)
    >>> hop_length = 1024
    >>> chroma = librosa.feature.chroma_cqt(y=y, sr=sr, hop_length=hop_length)
    >>> rec = librosa.segment.recurrence_matrix(chroma, mode='affinity', self=True)
    >>> rec_smooth = librosa.segment.path_enhance(rec, 51, window='hann', n_filters=7)

    Plot the recurrence matrix before and after smoothing

    >>> import matplotlib.pyplot as plt
    >>> plt.figure(figsize=(8, 4))
    >>> plt.subplot(1,2,1)
    >>> librosa.display.specshow(rec, x_axis='time', y_axis='time',
    ...                          hop_length=hop_length)
    >>> plt.title('Unfiltered recurrence')
    >>> plt.subplot(1,2,2)
    >>> librosa.display.specshow(rec_smooth, x_axis='time', y_axis='time',
    ...                          hop_length=hop_length)
    >>> plt.title('Multi-angle enhanced recurrence')
    >>> plt.tight_layout()
    >>> plt.show()
    '''

    if min_ratio is None:
        min_ratio = 1./max_ratio
    elif min_ratio > max_ratio:
        raise ParameterError('min_ratio={} cannot exceed max_ratio={}'.format(min_ratio, max_ratio))

    R_smooth = None
    for ratio in np.logspace(np.log2(min_ratio), np.log2(max_ratio), num=n_filters, base=2):
        kernel = diagonal_filter(window, n, slope=ratio, zero_mean=zero_mean)

        if R_smooth is None:
            R_smooth = scipy.ndimage.convolve(R, kernel, **kwargs)
        else:
            # Compute the point-wise maximum in-place
            np.maximum(R_smooth, scipy.ndimage.convolve(R, kernel, **kwargs),
                       out=R_smooth)

    if clip:
        # Clip the output in-place
        np.clip(R_smooth, 0, None, out=R_smooth)

    return R_smooth
예제 #29
0
def pad_center(data, size, axis=-1, **kwargs):
    '''Wrapper for np.pad to automatically center an array prior to padding.
    This is analogous to `str.center()`

    Examples
    --------
    >>> # Generate a vector
    >>> data = np.ones(5)
    >>> librosa.util.pad_center(data, 10, mode='constant')
    array([ 0.,  0.,  1.,  1.,  1.,  1.,  1.,  0.,  0.,  0.])

    >>> # Pad a matrix along its first dimension
    >>> data = np.ones((3, 5))
    >>> librosa.util.pad_center(data, 7, axis=0)
    array([[ 0.,  0.,  0.,  0.,  0.],
           [ 0.,  0.,  0.,  0.,  0.],
           [ 1.,  1.,  1.,  1.,  1.],
           [ 1.,  1.,  1.,  1.,  1.],
           [ 1.,  1.,  1.,  1.,  1.],
           [ 0.,  0.,  0.,  0.,  0.],
           [ 0.,  0.,  0.,  0.,  0.]])
    >>> # Or its second dimension
    >>> librosa.util.pad_center(data, 7, axis=1)
    array([[ 0.,  1.,  1.,  1.,  1.,  1.,  0.],
           [ 0.,  1.,  1.,  1.,  1.,  1.,  0.],
           [ 0.,  1.,  1.,  1.,  1.,  1.,  0.]])

    Parameters
    ----------
    data : np.ndarray
        Vector to be padded and centered

    size : int >= len(data) [scalar]
        Length to pad `data`

    axis : int
        Axis along which to pad and center the data

    kwargs : additional keyword arguments
      arguments passed to `np.pad()`

    Returns
    -------
    data_padded : np.ndarray
        `data` centered and padded to length `size` along the
        specified axis

    Raises
    ------
    ParameterError
        If `size < data.shape[axis]`

    See Also
    --------
    numpy.pad
    '''

    kwargs.setdefault('mode', 'constant')

    n = data.shape[axis]

    lpad = int((size - n) // 2)

    lengths = [(0, 0)] * data.ndim
    lengths[axis] = (lpad, int(size - n - lpad))

    if lpad < 0:
        raise ParameterError(('Target size ({:d}) must be '
                              'at least input size ({:d})').format(size, n))

    return np.pad(data, lengths, **kwargs)
예제 #30
0
def recurrence_to_lag(rec, pad=True, axis=-1):
    '''Convert a recurrence matrix into a lag matrix.

        `lag[i, j] == rec[i+j, j]`

    Parameters
    ----------
    rec : np.ndarray, or scipy.sparse.spmatrix [shape=(n, n)]
        A (binary) recurrence matrix, as returned by `recurrence_matrix`

    pad : bool
        If False, `lag` matrix is square, which is equivalent to
        assuming that the signal repeats itself indefinitely.

        If True, `lag` is padded with `n` zeros, which eliminates
        the assumption of repetition.

    axis : int
        The axis to keep as the `time` axis.
        The alternate axis will be converted to lag coordinates.

    Returns
    -------
    lag : np.ndarray
        The recurrence matrix in (lag, time) (if `axis=1`)
        or (time, lag) (if `axis=0`) coordinates

    Raises
    ------
    ParameterError : if `rec` is non-square

    See Also
    --------
    recurrence_matrix
    lag_to_recurrence

    Examples
    --------
    >>> y, sr = librosa.load(librosa.util.example_audio_file())
    >>> hop_length = 1024
    >>> mfccs = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length)
    >>> recurrence = librosa.segment.recurrence_matrix(mfccs)
    >>> lag_pad = librosa.segment.recurrence_to_lag(recurrence, pad=True)
    >>> lag_nopad = librosa.segment.recurrence_to_lag(recurrence, pad=False)

    >>> import matplotlib.pyplot as plt
    >>> plt.figure(figsize=(8, 4))
    >>> plt.subplot(1, 2, 1)
    >>> librosa.display.specshow(lag_pad, x_axis='time', y_axis='lag',
    ...                          hop_length=hop_length)
    >>> plt.title('Lag (zero-padded)')
    >>> plt.subplot(1, 2, 2)
    >>> librosa.display.specshow(lag_nopad, x_axis='time', hop_length=hop_length)
    >>> plt.title('Lag (no padding)')
    >>> plt.tight_layout()
    >>> plt.show()
    '''

    axis = np.abs(axis)

    if rec.ndim != 2 or rec.shape[0] != rec.shape[1]:
        raise ParameterError('non-square recurrence matrix shape: '
                             '{}'.format(rec.shape))

    sparse = scipy.sparse.issparse(rec)

    roll_ax = None
    if sparse:
        roll_ax = 1 - axis
        lag_format = rec.format
        if axis == 0:
            rec = rec.tocsc()
        elif axis in (-1, 1):
            rec = rec.tocsr()

    t = rec.shape[axis]

    if sparse:
        if pad:
            kron = np.asarray([[1, 0]]).swapaxes(axis, 0)
            lag = scipy.sparse.kron(kron.astype(rec.dtype), rec, format='lil')
        else:
            lag = scipy.sparse.lil_matrix(rec)
    else:
        if pad:
            padding = [(0, 0), (0, 0)]
            padding[(1-axis)] = (0, t)
            lag = np.pad(rec, padding, mode='constant')
        else:
            lag = rec.copy()

    idx_slice = [slice(None)] * lag.ndim

    for i in range(1, t):
        idx_slice[axis] = i
        lag[tuple(idx_slice)] = roll_sparse(lag[tuple(idx_slice)], -i, axis=roll_ax)

    if sparse:
        return lag.asformat(lag_format)
    return np.ascontiguousarray(lag.T).T