Пример #1
0
def auto_correlogram(x, filter_bank, correlation_window=Milliseconds(30)):
    n_filters = filter_bank.shape[0]
    filter_size = filter_bank.shape[1]

    corr_win_samples = int(correlation_window / x.samplerate.frequency)
    windowed = sliding_window(x, filter_size, 1, flatten=False)
    print(windowed.shape)
    filtered = np.dot(windowed, filter_bank.T)
    print(filtered.shape)
    corr = sliding_window(filtered,
                          ws=(corr_win_samples, n_filters),
                          ss=(1, n_filters),
                          flatten=False)
    print(corr.shape)

    padded_shape = list(corr.shape)
    padded_shape[2] = corr_win_samples * 2
    padded = np.zeros(padded_shape, dtype=np.float32)
    padded[:, :, :corr_win_samples, :] = corr
    print(padded.shape)

    coeffs = np.fft.fft(padded, axis=2, norm='ortho')
    correlated = np.fft.ifft(np.abs(coeffs)**2, axis=2, norm='ortho')
    return np.concatenate([
        correlated[:, :, corr_win_samples:, :],
        correlated[:, :, :corr_win_samples, :],
    ],
                          axis=2)
    return correlated
Пример #2
0
def pitch_shift(x, semitones, frame_sample_rate=None):
    original_shape = x.shape[1] if x.ndim == 2 else x.shape[0]

    # first, perform a time stretch so that the audio will have the desired
    # pitch
    factor = 2.0**(-float(semitones) / 12.0)
    stretched = time_stretch(x, factor, frame_sample_rate=frame_sample_rate)

    # hang on to original dimensions
    dimensions = stretched.dimensions

    # window the audio using a power-of-2 frame size for more efficient FFT
    # computations
    batch_size = stretched.shape[0]
    window_size = 1024
    step = (1, window_size)
    new_window_shape = int(window_size * factor)
    padding = window_size - int(stretched.shape[-1] % window_size)
    stretched = np.pad(stretched, ((0, 0), (0, padding)), mode='constant')
    windowed = sliding_window(stretched, step, step, flatten=False).squeeze()

    # resample the audio so that it has the correct duration
    rs = resample(windowed, new_window_shape, axis=-1)

    # flatten out the windowed, resampled audio
    rs = rs.reshape(batch_size, -1)

    # slice the audio to remove residual zeros resulting from our power-of-2
    # zero padding above
    rs = rs[:, :original_shape]

    return ArrayWithUnits(rs, dimensions)
Пример #3
0
    def sliding_window(self, windowsize, stepsize=None):
        ws = tuple(self._compute_span(windowsize))
        ss = tuple(self._compute_span(stepsize)) if stepsize else ws
        result = sliding_window(self, ws, ss)

        try:
            new_dims = tuple(self._compute_new_dims(result, ws, ss))
        except ValueError:
            new_dims = [IdentityDimension()] * result.ndim

        return ArrayWithUnits(result, new_dims)
Пример #4
0
    def sliding_window(self, windowsize, stepsize=None):
        ws = tuple(self._compute_span(windowsize))
        ss = tuple(self._compute_span(stepsize)) if stepsize else ws
        result = sliding_window(self, ws, ss)

        try:
            new_dims = tuple(self._compute_new_dims(result, ws, ss))
        except ValueError:
            new_dims = [IdentityDimension()] * result.ndim

        return ArrayWithUnits(result, new_dims)