def auto_correlogram(x, filter_bank, correlation_window=Milliseconds(30)): n_filters = filter_bank.shape[0] filter_size = filter_bank.shape[1] corr_win_samples = int(correlation_window / x.samplerate.frequency) windowed = sliding_window(x, filter_size, 1, flatten=False) print(windowed.shape) filtered = np.dot(windowed, filter_bank.T) print(filtered.shape) corr = sliding_window(filtered, ws=(corr_win_samples, n_filters), ss=(1, n_filters), flatten=False) print(corr.shape) padded_shape = list(corr.shape) padded_shape[2] = corr_win_samples * 2 padded = np.zeros(padded_shape, dtype=np.float32) padded[:, :, :corr_win_samples, :] = corr print(padded.shape) coeffs = np.fft.fft(padded, axis=2, norm='ortho') correlated = np.fft.ifft(np.abs(coeffs)**2, axis=2, norm='ortho') return np.concatenate([ correlated[:, :, corr_win_samples:, :], correlated[:, :, :corr_win_samples, :], ], axis=2) return correlated
def pitch_shift(x, semitones, frame_sample_rate=None): original_shape = x.shape[1] if x.ndim == 2 else x.shape[0] # first, perform a time stretch so that the audio will have the desired # pitch factor = 2.0**(-float(semitones) / 12.0) stretched = time_stretch(x, factor, frame_sample_rate=frame_sample_rate) # hang on to original dimensions dimensions = stretched.dimensions # window the audio using a power-of-2 frame size for more efficient FFT # computations batch_size = stretched.shape[0] window_size = 1024 step = (1, window_size) new_window_shape = int(window_size * factor) padding = window_size - int(stretched.shape[-1] % window_size) stretched = np.pad(stretched, ((0, 0), (0, padding)), mode='constant') windowed = sliding_window(stretched, step, step, flatten=False).squeeze() # resample the audio so that it has the correct duration rs = resample(windowed, new_window_shape, axis=-1) # flatten out the windowed, resampled audio rs = rs.reshape(batch_size, -1) # slice the audio to remove residual zeros resulting from our power-of-2 # zero padding above rs = rs[:, :original_shape] return ArrayWithUnits(rs, dimensions)
def sliding_window(self, windowsize, stepsize=None): ws = tuple(self._compute_span(windowsize)) ss = tuple(self._compute_span(stepsize)) if stepsize else ws result = sliding_window(self, ws, ss) try: new_dims = tuple(self._compute_new_dims(result, ws, ss)) except ValueError: new_dims = [IdentityDimension()] * result.ndim return ArrayWithUnits(result, new_dims)