def dft_bins(N, fs=44000, positive_only=True): """ Calc the frequency bin centers for a DFT with `N` coefficients. Parameters ---------- N : int The number of frequency bins in the DFT fs : int The sample rate/frequency of the signal (in Hz). Default is 44000. positive_only : bool Whether to only return the bins for the positive frequency terms. Default is True. Returns ------- bins : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` or `(N // 2 + 1,)` if `positive_only` The frequency bin centers associated with each coefficient in the DFT spectrum """ if positive_only: freq_bins = np.linspace(0, fs / 2, 1 + N // 2, endpoint=True) else: l, r = (1 + (N - 1) / 2, (1 - N) / 2) if N % 2 else (N / 2, -N / 2) freq_bins = np.r_[np.arange(l), np.arange(r, 0)] * fs / N return freq_bins
def DFT(frame, positive_only=True): """ A naive :math:`O(N^2)` implementation of the 1D discrete Fourier transform (DFT). Notes ----- The Fourier transform decomposes a signal into a linear combination of sinusoids (ie., basis elements in the space of continuous periodic functions). For a sequence :math:`\mathbf{x} = [x_1, \ldots, x_N]` of N evenly spaced samples, the `k` th DFT coefficient is given by: .. math:: c_k = \sum_{n=0}^{N-1} x_n \exp(-2 \pi i k n / N) where `i` is the imaginary unit, `k` is an index ranging from `0, ..., N-1`, and :math:`X_k` is the complex coefficient representing the phase (imaginary part) and amplitude (real part) of the `k` th sinusoid in the DFT spectrum. The frequency of the `k` th sinusoid is :math:`(k 2 \pi / N)` radians per sample. When applied to a real-valued input, the negative frequency terms are the complex conjugates of the positive-frequency terms and the overall spectrum is symmetric (excluding the first index, which contains the zero-frequency / intercept term). Parameters ---------- frame : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` A signal frame consisting of N samples positive_only : bool Whether to only return the coefficients for the positive frequency terms. Default is True. Returns ------- spectrum : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` or `(N // 2 + 1,)` if `real_only` The coefficients of the frequency spectrum for `frame`, including imaginary components. """ N = len(frame) # window length # F[i,j] = coefficient for basis vector i, timestep j (i.e., k * n) F = np.arange(N).reshape(1, -1) * np.arange(N).reshape(-1, 1) F = np.exp(F * (-1j * 2 * np.pi / N)) # vdot only operates on vectors (rather than ndarrays), so we have to # loop over each basis vector in F explicitly spectrum = np.array([np.vdot(f, frame) for f in F]) return spectrum[:(N // 2) + 1] if positive_only else spectrum
def cepstral_lifter(mfccs, D): """ A simple sinusoidal filter applied in the Mel-frequency domain. Notes ----- Cepstral lifting helps to smooth the spectral envelope and dampen the magnitude of the higher MFCC coefficients while keeping the other coefficients unchanged. The filter function is: .. math:: \\text{lifter}( x_n ) = x_n \left(1 + \\frac{D \sin(\pi n / D)}{2}\\right) Parameters ---------- mfccs : :py:class:`ndarray <numpy.ndarray>` of shape `(G, C)` Matrix of Mel cepstral coefficients. Rows correspond to frames, columns to cepstral coefficients D : int in :math:`[0, +\infty]` The filter coefficient. 0 corresponds to no filtering, larger values correspond to greater amounts of smoothing Returns ------- out : :py:class:`ndarray <numpy.ndarray>` of shape `(G, C)` The lifter'd MFCC coefficients """ if D == 0: return mfccs n = np.arange(mfccs.shape[1]) return mfccs * (1 + (D / 2) * np.sin(np.pi * n / D))
def _p_decreasing(self, loss_history, i): """ Compute the probability that the slope of the OLS fit to the loss history is negative. Parameters ---------- loss_history : numpy array of shape (N,) The sequence of loss values for the previous `N` minibatches. i : int Compute P(Slope < 0) beginning at index i in `history`. Returns ------ p_decreasing : float The probability that the slope of the OLS fit to loss_history is less than or equal to 0. """ loss = loss_history[i:] N = len(loss) # perform OLS on the loss entries to calc the slope mean X = np.c_[np.ones(N), np.arange(i, len(loss_history))] intercept, s_mean = np.linalg.inv(X.T @ X) @ X.T @ loss loss_pred = s_mean * X[:, 1] + intercept # compute the variance of our loss predictions and use this to compute # the (unbiased) estimate of the slope variance loss_var = 1 / (N - 2) * np.sum((loss - loss_pred)**2) s_var = (12 * loss_var) / (N**3 - N) # compute the probability that a random sample from a Gaussian # parameterized by s_mean and s_var is less than or equal to 0 p_decreasing = gaussian_cdf(0, s_mean, s_var) return p_decreasing
def transform(self, labels, categories=None): """ Convert a list of labels into a one-hot encoding. Parameters ---------- labels : list of length `N` A list of category labels. categories : list of length `C` List of the unique category labels for the items to encode. Default is None. Returns ------- Y : :py:class:`ndarray <numpy.ndarray>` of shape `(N, C)` The one-hot encoded labels. Each row corresponds to an example, with a single 1 in the column corresponding to the respective label. """ if not self._is_fit: categories = set(labels) if categories is None else categories self.fit(categories) unknown = list(set(labels.asnumpy()) - set(self.cat2idx.keys())) assert len(unknown) == 0, "Unrecognized label(s): {}".format(unknown) N, C = len(labels), len(self.cat2idx) cols = np.array([self.cat2idx[c.item()] for c in labels]) Y = np.zeros((N, C)) Y[np.arange(N), cols] = 1 return Y
def __DCT2(frame): """Currently broken""" N = len(frame) # window length k = np.arange(N, dtype=float) F = k.reshape(1, -1) * k.reshape(-1, 1) K = np.divide(F, k, out=np.zeros_like(F), where=F != 0) FC = np.cos(F * np.pi / N + K * np.pi / 2 * N) return 2 * (FC @ frame)
def _im2col_indices(X_shape, fr, fc, p, s, d=0): """ Helper function that computes indices into X in prep for columnization in :func:`im2col`. Code extended from Andrej Karpathy's `im2col.py` """ pr1, pr2, pc1, pc2 = p n_ex, n_in, in_rows, in_cols = X_shape # adjust effective filter size to account for dilation _fr, _fc = fr * (d + 1) - d, fc * (d + 1) - d out_rows = (in_rows + pr1 + pr2 - _fr) // s + 1 out_cols = (in_cols + pc1 + pc2 - _fc) // s + 1 if any([out_rows <= 0, out_cols <= 0]): raise ValueError("Dimension mismatch during convolution: " "out_rows = {}, out_cols = {}".format( out_rows, out_cols)) # i1/j1 : row/col templates # i0/j0 : n. copies (len) and offsets (values) for row/col templates i0 = np.repeat(np.arange(fr), fc) i0 = np.tile(i0, n_in) * (d + 1) i1 = s * np.repeat(np.arange(out_rows), out_cols) j0 = np.tile(np.arange(fc), fr * n_in) * (d + 1) j1 = s * np.tile(np.arange(out_cols), out_rows) # i.shape = (fr * fc * n_in, out_height * out_width) # j.shape = (fr * fc * n_in, out_height * out_width) # k.shape = (fr * fc * n_in, 1) i = i0.reshape(-1, 1) + i1.reshape(1, -1) j = j0.reshape(-1, 1) + j1.reshape(1, -1) k = np.repeat(np.arange(n_in), fr * fc).reshape(-1, 1) return k, i, j
def dilate(X, d): """ Dilate the 4D volume `X` by `d`. Notes ----- For a visual depiction of a dilated convolution, see [1]. References ---------- .. [1] Dumoulin & Visin (2016). "A guide to convolution arithmetic for deep learning." https://arxiv.org/pdf/1603.07285v1.pdf Parameters ---------- X : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, in_rows, in_cols, in_ch)` Input volume. d : int The number of 0-rows to insert between each adjacent row + column in `X`. Returns ------- Xd : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, out_rows, out_cols, out_ch)` The dilated array where .. math:: \\text{out_rows} &= \\text{in_rows} + d(\\text{in_rows} - 1) \\\\ \\text{out_cols} &= \\text{in_cols} + d (\\text{in_cols} - 1) """ n_ex, in_rows, in_cols, n_in = X.shape r_ix = np.repeat(np.arange(1, in_rows), d) c_ix = np.repeat(np.arange(1, in_cols), d) Xd = np.insert(X, r_ix, 0, axis=1) Xd = np.insert(Xd, c_ix, 0, axis=2) return Xd
def minibatch(X, batchsize=256, shuffle=True): """ Compute the minibatch indices for a training dataset. Parameters ---------- X : :py:class:`ndarray <numpy.ndarray>` of shape `(N, \*)` The dataset to divide into minibatches. Assumes the first dimension represents the number of training examples. batchsize : int The desired size of each minibatch. Note, however, that if ``X.shape[0] % batchsize > 0`` then the final batch will contain fewer than batchsize entries. Default is 256. shuffle : bool Whether to shuffle the entries in the dataset before dividing into minibatches. Default is True. Returns ------- mb_generator : generator A generator which yields the indices into X for each batch n_batches: int The number of batches """ N = X.shape[0] ix = np.arange(N) n_batches = int(np.ceil(N / batchsize)) if shuffle: np.random.shuffle(ix) def mb_generator(): for i in range(n_batches): yield ix[i * batchsize:(i + 1) * batchsize] return mb_generator(), n_batches
def check_deepnp_indices_default_dtype(): assert np.arange(3, 7, 2).dtype == 'float32'
def check_np_indices_default_dtype(): assert np.arange(3, 7, 2).dtype == 'int64'
def plot_schedulers(): fig, axes = plt.subplots(2, 2) schedulers = [ ( [ConstantScheduler(lr=0.01), "lr=1e-2"], [ConstantScheduler(lr=0.008), "lr=8e-3"], [ConstantScheduler(lr=0.006), "lr=6e-3"], [ConstantScheduler(lr=0.004), "lr=4e-3"], [ConstantScheduler(lr=0.002), "lr=2e-3"], ), ( [ ExponentialScheduler(lr=0.01, stage_length=250, staircase=False, decay=0.4), "lr=0.01, stage=250, stair=False, decay=0.4", ], [ ExponentialScheduler(lr=0.01, stage_length=250, staircase=True, decay=0.4), "lr=0.01, stage=250, stair=True, decay=0.4", ], [ ExponentialScheduler(lr=0.01, stage_length=125, staircase=True, decay=0.1), "lr=0.01, stage=125, stair=True, decay=0.1", ], [ ExponentialScheduler(lr=0.001, stage_length=250, staircase=False, decay=0.1), "lr=0.001, stage=250, stair=False, decay=0.1", ], [ ExponentialScheduler(lr=0.001, stage_length=125, staircase=False, decay=0.8), "lr=0.001, stage=125, stair=False, decay=0.8", ], [ ExponentialScheduler(lr=0.01, stage_length=250, staircase=False, decay=0.01), "lr=0.01, stage=250, stair=False, decay=0.01", ], ), ( [ NoamScheduler(model_dim=512, scale_factor=1, warmup_steps=250), "dim=512, scale=1, warmup=250", ], [ NoamScheduler(model_dim=256, scale_factor=1, warmup_steps=250), "dim=256, scale=1, warmup=250", ], [ NoamScheduler(model_dim=512, scale_factor=1, warmup_steps=500), "dim=512, scale=1, warmup=500", ], [ NoamScheduler(model_dim=256, scale_factor=1, warmup_steps=500), "dim=512, scale=1, warmup=500", ], [ NoamScheduler(model_dim=512, scale_factor=2, warmup_steps=500), "dim=512, scale=2, warmup=500", ], [ NoamScheduler(model_dim=512, scale_factor=0.5, warmup_steps=500), "dim=512, scale=0.5, warmup=500", ], ), ( # [ # KingScheduler(initial_lr=0.01, patience=100, decay=0.1), # "lr=0.01, patience=100, decay=0.8", # ], # [ # KingScheduler(initial_lr=0.01, patience=300, decay=0.999), # "lr=0.01, patience=300, decay=0.999", # ], [ KingScheduler(initial_lr=0.009, patience=150, decay=0.995), "lr=0.009, patience=150, decay=0.9999", ], [ KingScheduler(initial_lr=0.008, patience=100, decay=0.995), "lr=0.008, patience=100, decay=0.995", ], [ KingScheduler(initial_lr=0.007, patience=50, decay=0.995), "lr=0.007, patience=50, decay=0.995", ], [ KingScheduler(initial_lr=0.005, patience=25, decay=0.9), "lr=0.005, patience=25, decay=0.99", ], ), ] for ax, schs, title in zip(axes.flatten(), schedulers, ["Constant", "Exponential", "Noam", "King"]): t0 = time.time() print("Running {} scheduler".format(title)) X = np.arange(1, 1000) loss = np.array([king_loss_fn(x) for x in X]) # scale loss to fit on same axis as lr scale = 0.01 / loss[0] loss *= scale if title == "King": ax.plot(X, loss, ls=":", label="Loss") for sc, lg in schs: Y = np.array([sc(x, ll) for x, ll in zip(X, loss)]) ax.plot(X, Y, label=lg, alpha=0.6) ax.legend(fontsize=5) ax.set_xlabel("Steps") ax.set_ylabel("Learning rate") ax.set_title("{} scheduler".format(title)) print("Finished plotting {} runs of {} in {:.2f}s".format( len(schs), title, time.time() - t0)) plt.tight_layout() plt.savefig("plot.png", dpi=300) plt.close("all")