def grad_tlogpdf_df(x, df, loc, scale): y = (x - loc) / scale return 0.5 * ( (y**2 * (df + 1)) / (df * (y**2 + df)) - np.log(y**2 / df + 1) - 1.0 / df - psi(df / 2.0) + psi( (df + 1) / 2.0))
def dirichlet_expectation(alpha): """ Dirichlet expectation computation \Psi(\alpha) - \Psi(\sum_{i=1}^{K}(\alpha_{i})) """ if len(alpha.shape) == 1: return agscipy.psi(alpha + agnp.finfo(agnp.float32).eps) \ - agscipy.psi(agnp.sum(alpha)) return agscipy.psi(alpha + agnp.finfo(agnp.float32).eps)\ - agscipy.psi(agnp.sum(alpha, 1))[:, agnp.newaxis]
def elbo((lambda_pi, lambda_phi, lambda_m, lambda_beta, lambda_nu, lambda_w)): """ ELBO computation """ e3 = e2 = h2 = 0 e1 = - log_beta_function(alpha_o) \ + agnp.dot((alpha_o - agnp.ones(K)), dirichlet_expectation(lambda_pi)) h1 = log_beta_function(lambda_pi) \ - agnp.dot((lambda_pi - agnp.ones(K)), dirichlet_expectation(lambda_pi)) logdet = agnp.log( agnp.array([agnp.linalg.det(lambda_w[k, :, :]) for k in range(K)])) logDeltak = agscipy.psi(lambda_nu / 2.) \ + agscipy.psi((lambda_nu - 1.) / 2.) + 2. * agnp.log( 2.) + logdet for n in range(N): e2 += agnp.dot(lambda_phi[n, :], dirichlet_expectation(lambda_pi)) h2 += -agnp.dot(lambda_phi[n, :], log_(lambda_phi[n, :])) product = agnp.array([ agnp.dot(agnp.dot(xn[n, :] - lambda_m[k, :], lambda_w[k, :, :]), (xn[n, :] - lambda_m[k, :]).T) for k in range(K) ]) e3 += 1. / 2 * agnp.dot(lambda_phi[n, :], (logDeltak - 2. * agnp.log(2 * agnp.pi) - lambda_nu * product - 2. / lambda_beta).T) product = agnp.array([ agnp.dot(agnp.dot(lambda_m[k, :] - m_o, lambda_w[k, :, :]), (lambda_m[k, :] - m_o).T) for k in range(K) ]) traces = agnp.array([ agnp.trace(agnp.dot(agnp.linalg.inv(w_o), lambda_w[k, :, :])) for k in range(K) ]) h4 = agnp.sum((1. + agnp.log(2. * agnp.pi) - 1. / 2 * (agnp.log(lambda_beta) + logdet))) logB = lambda_nu / 2. * logdet + lambda_nu * agnp.log( 2.) + 1. / 2 * agnp.log(agnp.pi) \ + agscipy.gammaln(lambda_nu / 2.) + agscipy.gammaln( (lambda_nu - 1) / 2.) h5 = agnp.sum((logB - (lambda_nu - 3.) / 2. * logDeltak + lambda_nu)) e4 = agnp.sum( (1. / 2 * (agnp.log(beta_o) + logDeltak - 2 * agnp.log(2. * agnp.pi) - beta_o * lambda_nu * product - 2. * beta_o / lambda_beta))) logB = nu_o / 2. * agnp.log(agnp.linalg.det(w_o)) + nu_o * agnp.log(2.) \ + 1. / 2 * agnp.log(agnp.pi) + agscipy.gammaln( nu_o / 2.) + agscipy.gammaln((nu_o - 1) / 2.) e5 = agnp.sum( (-logB + (nu_o - 3.) / 2. * logDeltak - lambda_nu / 2. * traces)) return e1 + e2 + e3 + e4 + e5 + h1 + h2 + h4 + h5
def EPtaulambda(self, tau_mu, tau_sigma, tau_a_prior, lambda_a_prior, lambda_b_prior, lambda_a_hat, lambda_b_hat): """ E[ln p(\tau | \lambda)] + E[ln p(\lambda)]""" etau_given_lambda = -gammaln(tau_a_prior) - tau_a_prior * ( np.log(lambda_b_hat) - psi(lambda_a_hat)) + (-tau_a_prior - 1.) * tau_mu - np.exp( -tau_mu + 0.5 * tau_sigma**2) * (lambda_a_hat / lambda_b_hat) elambda = -gammaln(lambda_a_prior) - 2 * lambda_a_prior * np.log( lambda_b_prior) + (-lambda_a_prior - 1.) * ( np.log(lambda_b_hat) - psi(lambda_a_hat)) - ( 1. / lambda_b_prior**2) * (lambda_a_hat / lambda_b_hat) return np.sum(etau_given_lambda) + np.sum(elambda)
def unpack_params(self, params): # unpack params w_vect = params[:self.n_weights] num_std = 2 * self.n_weights sigma = np.log(1 + np.exp(params[self.n_weights:num_std])) tau_mu = params[num_std:num_std + self.tot_outputs] tau_sigma = np.log(1 + np.exp(params[num_std + self.tot_outputs:num_std + 2 * self.tot_outputs])) tau_mu_global = params[num_std + 2 * self.tot_outputs:num_std + 2 * self.tot_outputs + self.num_hidden_layers] tau_sigma_global = np.log( 1 + np.exp(params[num_std + 2 * self.tot_outputs + self.num_hidden_layers:num_std + 2 * self.tot_outputs + 2 * self.num_hidden_layers])) tau_mu_oplayer = params[num_std + 2 * self.tot_outputs + 2 * self.num_hidden_layers:num_std + 2 * self.tot_outputs + 2 * self.num_hidden_layers + 1] tau_sigma_oplayer = np.log( 1 + np.exp(params[num_std + 2 * self.tot_outputs + 2 * self.num_hidden_layers + 1:])) if not self.classification: a = tau_sigma_oplayer[1] b = tau_sigma_oplayer[2] tau_sigma_oplayer = tau_sigma_oplayer[0] egamma = a / b elog_gamma = psi(a) - np.log(b) self.noise_entropy = inv_gamma_entropy(a, b) # we will just use a point estimate of noise_var b/a+1 (noise_var ~ IGamma) for computing predictive ll self.noisevar = (b / (a + 1)) * self.train_stats['sigma']**2 return w_vect, sigma, tau_mu, tau_sigma, tau_mu_global, tau_sigma_global, tau_mu_oplayer, \ tau_sigma_oplayer, elog_gamma, egamma else: return w_vect, sigma, tau_mu, tau_sigma, tau_mu_global, tau_sigma_global, tau_mu_oplayer, tau_sigma_oplayer
def grad_tlogpdf_df(x, df, loc, scale): y = (x - loc)/scale return 0.5 * ((y**2 * (df+1))/(df * (y**2 + df)) - np.log(y**2 / df + 1) - 1.0/df -psi(df/2.0) + psi((df + 1)/2.0))
def inv_gamma_entropy(a, b): return np.sum(a + np.log(b) + gammaln(a) - (1 + a) * psi(a))
def grad_beta_logpdf_arg2(x, a, b): return np.log1p(-x) - psi(b) + psi(a + b)
def grad_beta_logpdf_arg1(x, a, b): return np.log(x) - psi(a) + psi(a + b)
from __future__ import absolute_import import autograd.numpy as np import autograd.scipy.special as sp ### Gamma functions ### sp.polygamma.defjvp(lambda g, ans, gvs, vs, n, x: g * sp.polygamma(n + 1, x), argnum=1) sp.psi.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x)) sp.digamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x)) sp.gamma.defjvp(lambda g, ans, gvs, vs, x: g * ans * sp.psi(x)) sp.gammaln.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x)) sp.rgamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x) / -sp.gamma(x)) sp.multigammaln.defjvp(lambda g, ans, gvs, vs, a, d: g * np.sum( sp.digamma(np.expand_dims(a, -1) - np.arange(d) / 2.), -1)) ### Bessel functions ### sp.j0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.j1(x)) sp.y0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.y1(x)) sp.j1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.j0(x) - sp.jn(2, x)) / 2.0) sp.y1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.y0(x) - sp.yn(2, x)) / 2.0) sp.jn.defjvp(lambda g, ans, gvs, vs, n, x: g * (sp.jn(n - 1, x) - sp.jn(n + 1, x)) / 2.0, argnum=1) sp.yn.defjvp(lambda g, ans, gvs, vs, n, x: g * (sp.yn(n - 1, x) - sp.yn(n + 1, x)) / 2.0, argnum=1) ### Error Function ### sp.erf.defjvp( lambda g, ans, gvs, vs, x: 2. * g * sp.inv_root_pi * np.exp(-x**2))
def gamma_entropy(theta): alpha, beta = unwrap(theta) return alpha - np.log(beta) + gammaln(alpha) + (1 - alpha) * psi(alpha)
def grad_gamma_logpdf_arg1(x, a): return np.log(x) - psi(a)