def loss(y, y_pred, t_mean, t_log_var): """ Variational lower bound for a Bernoulli VAE. Parameters ---------- y : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, N)` The original images. y_pred : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, N)` The VAE reconstruction of the images. t_mean: :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, T)` Mean of the variational distribution :math:`q(t \mid x)`. t_log_var: :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, T)` Log of the variance vector of the variational distribution :math:`q(t \mid x)`. Returns ------- loss : float The VLB, averaged across the batch. """ # prevent nan on log(0) eps = 2.220446049250313e-16 y_pred = np.clip(y_pred, eps, 1 - eps) # reconstruction loss: binary cross-entropy rec_loss = -np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred), axis=1) # KL divergence between the variational distribution q and the prior p, # a unit gaussian kl_loss = -0.5 * np.sum(1 + t_log_var - t_mean ** 2 - np.exp(t_log_var), axis=1) loss = np.mean(kl_loss + rec_loss) return loss
def grad(y, y_pred, t_mean, t_log_var): """ Compute the gradient of the VLB with regard to the network parameters. Parameters ---------- y : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, N)` The original images. y_pred : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, N)` The VAE reconstruction of the images. t_mean: :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, T)` Mean of the variational distribution :math:`q(t | x)`. t_log_var: :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, T)` Log of the variance vector of the variational distribution :math:`q(t | x)`. Returns ------- dY_pred : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, N)` The gradient of the VLB with regard to `y_pred`. dLogVar : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, T)` The gradient of the VLB with regard to `t_log_var`. dMean : :py:class:`ndarray <numpy.ndarray>` of shape `(n_ex, T)` The gradient of the VLB with regard to `t_mean`. """ N = y.shape[0] eps = 2.220446049250313e-16 y_pred = np.clip(y_pred, eps, 1 - eps) dY_pred = -y / (N * y_pred) - (y - 1) / (N - N * y_pred) dLogVar = (np.exp(t_log_var) - 1) / (2 * N) dMean = t_mean / N return dY_pred, dLogVar, dMean
def fn(self, z): """ Evaluate the logistic sigmoid, :math:`\sigma`, on the elements of input `z`. .. math:: \sigma(x_i) = \\frac{1}{1 + e^{-x_i}} """ return 1 / (1 + np.exp(-z))
def fn(self, z): """ Evaluate the softplus activation on the elements of input `z`. .. math:: \\text{SoftPlus}(z_i) = \log(1 + e^{z_i}) """ return np.log(np.exp(z) + 1)
def logsumexp(log_probs, axis=None): """ Redefine scipy.special.logsumexp see: http://bayesjumping.net/log-sum-exp-trick/ """ _max = np.max(log_probs) ds = log_probs - _max exp_sum = np.exp(ds).sum(axis=axis) return _max + np.log(exp_sum)
def grad2(self, x): """ Evaluate the second derivative of the exponential activation on the elements of input `x`. .. math:: \\frac{\partial^2 \\text{Exponential}}{\partial x_i^2} = e^{x_i} """ return np.exp(x)
def grad(self, x): """ Evaluate the first derivative of the softplus activation on the elements of input `x`. .. math:: \\frac{\partial \\text{SoftPlus}}{\partial x_i} = \\frac{e^{x_i}}{1 + e^{x_i}} """ exp_x = np.exp(x) return exp_x / (exp_x + 1)
def grad2(self, x): """ Evaluate the second derivative of the softplus activation on the elements of input `x`. .. math:: \\frac{\partial^2 \\text{SoftPlus}}{\partial x_i^2} = \\frac{e^{x_i}}{(1 + e^{x_i})^2} """ exp_x = np.exp(x) return exp_x / ((exp_x + 1)**2)
def fn(self, z): """ Evaluate the ELU activation on the elements of input `z`. .. math:: \\text{ELU}(z_i) &= z_i \\ \\ \\ \\ &&\\text{if }z_i > 0 \\\\ &= \\alpha (e^{z_i} - 1) \\ \\ \\ \\ &&\\text{otherwise} """ # z if z > 0 else alpha * (e^z - 1) return np.where(z > 0, z, self.alpha * (np.exp(z) - 1))
def grad(self, x): """ Evaluate the first derivative of the ELU activation on the elements of input `x`. .. math:: \\frac{\partial \\text{ELU}}{\partial x_i} &= 1 \\ \\ \\ \\ &&\\text{if } x_i > 0 \\\\ &= \\alpha e^{x_i} \\ \\ \\ \\ &&\\text{otherwise} """ # 1 if x > 0 else alpha * e^(z) return np.where(x > 0, np.ones_like(x), self.alpha * np.exp(x))
def logsumexp(log_probs, axis=None): """ Redefine scipy.special.logsumexp see: http://bayesjumping.net/log-sum-exp-trick/ """ # print("\nlogsumexp") # print("log_probs",type(log_probs),log_probs.shape,log_probs) _max = np.max(log_probs) # print("_max",type(_max),_max.shape,_max) ds = log_probs - _max exp_sum = np.exp(ds).sum(axis=axis) # print("exp_sum",type(exp_sum),exp_sum.shape,exp_sum) return float(_max + np.log(exp_sum))
def grad2(self, x): """ Evaluate the second derivative of the SELU activation on the elements of input `x`. .. math:: \\frac{\partial^2 \\text{SELU}}{\partial x_i^2} &= 0 \\ \\ \\ \\ &&\\text{if } x_i > 0 \\\\ &= \\text{scale} \\times \\alpha e^{x_i} \\ \\ \\ \\ &&\\text{otherwise} """ return np.where(x > 0, np.zeros_like(x), np.exp(x) * self.alpha * self.scale)
def grad2(self, x): """ Evaluate the second derivative of the ELU activation on the elements of input `x`. .. math:: \\frac{\partial^2 \\text{ELU}}{\partial x_i^2} &= 0 \\ \\ \\ \\ &&\\text{if } x_i > 0 \\\\ &= \\alpha e^{x_i} \\ \\ \\ \\ &&\\text{otherwise} """ # 0 if x > 0 else alpha * e^(z) return np.where(x >= 0, np.zeros_like(x), self.alpha * np.exp(x))
def DFT(frame, positive_only=True): """ A naive :math:`O(N^2)` implementation of the 1D discrete Fourier transform (DFT). Notes ----- The Fourier transform decomposes a signal into a linear combination of sinusoids (ie., basis elements in the space of continuous periodic functions). For a sequence :math:`\mathbf{x} = [x_1, \ldots, x_N]` of N evenly spaced samples, the `k` th DFT coefficient is given by: .. math:: c_k = \sum_{n=0}^{N-1} x_n \exp(-2 \pi i k n / N) where `i` is the imaginary unit, `k` is an index ranging from `0, ..., N-1`, and :math:`X_k` is the complex coefficient representing the phase (imaginary part) and amplitude (real part) of the `k` th sinusoid in the DFT spectrum. The frequency of the `k` th sinusoid is :math:`(k 2 \pi / N)` radians per sample. When applied to a real-valued input, the negative frequency terms are the complex conjugates of the positive-frequency terms and the overall spectrum is symmetric (excluding the first index, which contains the zero-frequency / intercept term). Parameters ---------- frame : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` A signal frame consisting of N samples positive_only : bool Whether to only return the coefficients for the positive frequency terms. Default is True. Returns ------- spectrum : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` or `(N // 2 + 1,)` if `real_only` The coefficients of the frequency spectrum for `frame`, including imaginary components. """ N = len(frame) # window length # F[i,j] = coefficient for basis vector i, timestep j (i.e., k * n) F = np.arange(N).reshape(1, -1) * np.arange(N).reshape(-1, 1) F = np.exp(F * (-1j * 2 * np.pi / N)) # vdot only operates on vectors (rather than ndarrays), so we have to # loop over each basis vector in F explicitly spectrum = np.array([np.vdot(f, frame) for f in F]) return spectrum[:(N // 2) + 1] if positive_only else spectrum
def grad(self, x): """ Evaluate the first derivative of the SELU activation on the elements of input `x`. .. math:: \\frac{\partial \\text{SELU}}{\partial x_i} &= \\text{scale} \\ \\ \\ \\ &&\\text{if } x_i > 0 \\\\ &= \\text{scale} \\times \\alpha e^{x_i} \\ \\ \\ \\ &&\\text{otherwise} """ return np.where(x >= 0, np.ones_like(x) * self.scale, np.exp(x) * self.alpha * self.scale)
def _E_step(self): for i in range(self.N): x_i = self.X[i, :] denom_vals = [] for c in range(self.C): pi_c = self.pi[c] mu_c = self.mu[c, :] sigma_c = self.sigma[c, :, :] log_pi_c = np.log(pi_c) log_p_x_i = log_gaussian_pdf(x_i, mu_c, sigma_c) # log N(X_i | mu_c, Sigma_c) + log pi_c denom_vals.append(log_p_x_i + log_pi_c) # log \sum_c exp{ log N(X_i | mu_c, Sigma_c) + log pi_c } ] log_denom = logsumexp(denom_vals) q_i = np.exp([num - log_denom for num in denom_vals]) assert_allclose(np.sum(q_i), 1, err_msg="{}".format(np.sum(q_i))) self.Q[i, :] = q_i
def _maximize_phi(self): """ Optimize variational parameter phi ϕ_{t, n} ∝ β_{t, w_n} e^( Ψ(γ_t) ) """ D = self.D N = self.N T = self.T phi = self.phi beta = self.beta gamma = self.gamma corpus = self.corpus for d in range(D): for n in range(N[d]): for t in range(T): w_n = int(corpus[d][n]) phi[d][n, t] = beta[w_n, t] * np.exp(dg(gamma, d, t)) # Normalize over topics phi[d][n, :] = phi[d][n, :] / np.sum(phi[d][n, :]) return phi
def _Mstep(self, gamma, xi, phi): """ Run a single M-step update for the Baum-Welch/Forward-Backward algorithm. Parameters ---------- gamma : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N, T)` The estimated state-occupancy count matrix. xi : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N, N, T)` The estimated state-state transition count matrix. phi : :py:class:`ndarray <numpy.ndarray>` of shape `(I, N)` The estimated starting count matrix for each latent state. Returns ------- A : :py:class:`ndarray <numpy.ndarray>` of shape `(N, N)` The estimated transition matrix. B : :py:class:`ndarray <numpy.ndarray>` of shape `(N, V)` The estimated emission matrix. pi : :py:class:`ndarray <numpy.ndarray>` of shape `(N,)` The estimated prior probabilities for each latent state. """ eps = self.eps # initialize the estimated transition (A) and emission (B) matrices A = np.zeros((self.N, self.N)) B = np.zeros((self.N, self.V)) pi = np.zeros(self.N) count_gamma = np.zeros((self.I, self.N, self.V)) count_xi = np.zeros((self.I, self.N, self.N)) for i in range(self.I): Obs = self.O[i, :] for si in range(self.N): for vk in range(self.V): # if not (Obs == vk).any(): if not int(Obs[0]) == vk: # count_gamma[i, si, vk] = -np.inf count_gamma[i, si, vk] = np.log(eps) else: count_gamma[i, si, vk] = logsumexp(gamma[i, si, Obs == vk]) for sj in range(self.N): count_xi[i, si, sj] = logsumexp(xi[i, si, sj, :]) pi = logsumexp(phi, axis=0) - np.log(self.I + eps) np.testing.assert_almost_equal(np.exp(pi).sum(), 1) for si in range(self.N): for vk in range(self.V): B[si, vk] = logsumexp(count_gamma[:, si, vk]) - logsumexp( count_gamma[:, si, :] ) for sj in range(self.N): A[si, sj] = logsumexp(count_xi[:, si, sj]) - logsumexp( count_xi[:, si, :] ) np.testing.assert_almost_equal(np.exp(A[si, :]).sum(), 1) np.testing.assert_almost_equal(np.exp(B[si, :]).sum(), 1) return np.exp(A), np.exp(B), np.exp(pi)
def fn(self, z): """Evaluate the activation function :math:`\\text{Exponential}(z_i) = e^{z_i}`.""" return np.exp(z)