def student(theta, df, prod=True): """Implementation of the student t distribution with df degrees of freedom Parameters ---------- theta : type Description of parameter `theta`. df : type Description of parameter `df`. prod : bool If true return the density of the sample If False, return the joint distribution Returns ------- type float if prod np.ndarray if not prod """ individual = gamma((df+1.)/2.)*(1+theta**2 / df)**(-(df+1)/2) \ /(gamma(df/2.)*np.sqrt(df*pi)) if prod: return np.prod(individual) else: return individual
def gamma_(theta, alpha, beta, prod=True): """ gamma distribution with parameter alpha, beta Parameters ---------- theta : np.ndarray alpha : float shape of the gamma distribution, > 0 beta : float rate of the distribution, > 0 prod : bool If true return the density of the sample If False, return the joint distribution Returns ------- type float if prod np.ndarray if not prod """ x = indicator_positive(theta) individual = beta**alpha * theta**(alpha - 1) * np.exp( -beta * theta) / gamma(alpha) * x if prod: return np.prod(individual) else: return individual
def logQ(sample, alpha, m): """ Evaluates log of variational approximation, vectorized. """ temp = alpha * (np.log(alpha) - np.log(m)) temp += (alpha - 1.) * np.log(sample) temp -= alpha * sample / m temp -= np.log(sp.gamma(alpha)) return temp
def probability_of_n_purchases_up_to_time(self, t, n): r""" Compute the probability of n purchases up to time t. .. math:: P( N(t) = n | \text{model} ) where N(t) is the number of repeat purchases a customer makes in t units of time. Parameters ---------- t: float number units of time n: int number of purchases Returns ------- float: Probability to have n purchases up to t units of time """ r, alpha, a, b = self._unload_params("r", "alpha", "a", "b") _j = np.arange(0, n) first_term = (beta(a, b + n + 1) / beta(a, b) * gamma(r + n) / gamma(r) / gamma(n + 1) * (alpha / (alpha + t))**r * (t / (alpha + t))**n) finite_sum = (gamma(r + _j) / gamma(r) / gamma(_j + 1) * (t / (alpha + t))**_j).sum() second_term = beta(a + 1, b + n) / beta( a, b) * (1 - (alpha / (alpha + t))**r * finite_sum) return first_term + second_term
def multinomial(theta, p): """implementation of the multinomial distribution, using the gamma function approximation of the factorial as in scipy.special Parameters ---------- theta : np.ndarray of integers of size d input following the distribution, should contains the number of examples falling into each class p : np.ndarray of size d probabilities to fall in each classes, shoudl sum up to 1 Returns ------- float probability mass function evaluated in theta """ n = np.sum(theta) ratio = gamma(n) / np.prod(gamma(theta)) return ratio * p**theta
def probability_of_n_purchases_up_to_time( self, t, n ): r""" Compute the probability of n purchases. .. math:: P( N(t) = n | \text{model} ) where N(t) is the number of repeat purchases a customer makes in t units of time. Comes from equation (8) of [2]_. Parameters ---------- t: float number units of time n: int number of purchases Returns ------- float: Probability to have n purchases up to t units of time References ---------- .. [2] Fader, Peter S., Bruce G.S. Hardie, and Ka Lok Lee (2005a), "Counting Your Customers the Easy Way: An Alternative to the Pareto/NBD Model," Marketing Science, 24 (2), 275-84. """ r, alpha, a, b = self._unload_params("r", "alpha", "a", "b") first_term = ( beta(a, b + n) / beta(a, b) * gamma(r + n) / gamma(r) / gamma(n + 1) * (alpha / (alpha + t)) ** r * (t / (alpha + t)) ** n ) if n > 0: j = np.arange(0, n) finite_sum = (gamma(r + j) / gamma(r) / gamma(j + 1) * (t / (alpha + t)) ** j).sum() second_term = beta(a + 1, b + n - 1) / beta(a, b) * (1 - (alpha / (alpha + t)) ** r * finite_sum) else: second_term = 0 return first_term + second_term
def test_a_special_case_of_the_derivative(): """ We know a specific to test against: dUIG(s, x) / ds at (s=1, x) = ln(x) * UIG(1, x) + E_1(x) where E_1(x) is the exponential integral """ # incomplete upper gamma IUG = lambda s, x: gammaincc(s, x) * gamma(s) def analytical_derivative(x): dIUG = np.log(x) * IUG(1., x) - expi(-x) return dIUG def approx_derivative(x): return jacobian(IUG, argnum=0)(1., x) x = np.linspace(1, 12) npt.assert_allclose(analytical_derivative(x), approx_derivative(x)) x = np.logspace(-25, 25, 100) npt.assert_allclose(analytical_derivative(x), approx_derivative(x))
def kernel_matern(x, xp): sd, rho, eta = 1, 1, 1 d = L1_norm(x, xp) * np.sqrt(2 * eta) / rho K = sd**2 * (2**(1 - eta) / gamma(eta)) return K * yn(eta, d) * (d)**eta
def inverse_gamma_log_pdf(x, alpha, beta): ''' Inverse gamma log pdf ''' return alpha * np.log(beta) - ( alpha + 1.0) * np.log(x) - beta / x - np.log(gamma(alpha))
from __future__ import absolute_import, division import autograd.numpy as np import scipy.stats from autograd.extend import primitive, defvjp from autograd.numpy.numpy_vjps import unbroadcast_f from autograd.scipy.special import gamma cdf = primitive(scipy.stats.chi2.cdf) logpdf = primitive(scipy.stats.chi2.logpdf) pdf = primitive(scipy.stats.chi2.pdf) def grad_chi2_logpdf(x, df): return np.where(df % 1 == 0, (df - x - 2) / (2 * x), 0) defvjp(cdf, lambda ans, x, df: unbroadcast_f( x, lambda g: g * np.power(2., -df / 2) * np.exp(-x / 2) * np.power( x, df / 2 - 1) / gamma(df / 2)), argnums=[0]) defvjp( logpdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * grad_chi2_logpdf(x, df)), argnums=[0]) defvjp(pdf, lambda ans, x, df: unbroadcast_f( x, lambda g: g * ans * grad_chi2_logpdf(x, df)), argnums=[0])
''' return _scipy_gammainc(k, x) delta = 1e-6 defvjp( gammainc, lambda ans, a, x: unbroadcast_f( a, lambda g: g * (-gammainc(a + 2 * delta, x) + 8 * gammainc(a + delta, x) - 8 * gammainc(a - delta, x) + gammainc(a - 2 * delta, x)) / (12 * delta), ), lambda ans, a, x: unbroadcast_f( x, lambda g: g * np.exp(-x) * np.power(x, a - 1) / gamma(a)), ) gammaincc = primitive(_scipy_gammaincc) defvjp( gammaincc, lambda ans, a, x: unbroadcast_f( a, lambda g: g * (-gammaincc(a + 2 * delta, x) + 8 * gammaincc(a + delta, x) - 8 * gammaincc(a - delta, x) + gammaincc(a - 2 * delta, x)) / (12 * delta), ), lambda ans, a, x: unbroadcast_f( x, lambda g: -g * np.exp(-x) * np.power(x, a - 1) / gamma(a)), )
from __future__ import absolute_import import autograd.numpy as np import autograd.scipy.special as sp ### Gamma functions ### sp.polygamma.defjvp(lambda g, ans, gvs, vs, n, x: g * sp.polygamma(n + 1, x), argnum=1) sp.psi.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x)) sp.digamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x)) sp.gamma.defjvp(lambda g, ans, gvs, vs, x: g * ans * sp.psi(x)) sp.gammaln.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x)) sp.rgamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x) / -sp.gamma(x)) sp.multigammaln.defjvp(lambda g, ans, gvs, vs, a, d: g * np.sum( sp.digamma(np.expand_dims(a, -1) - np.arange(d) / 2.), -1)) ### Bessel functions ### sp.j0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.j1(x)) sp.y0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.y1(x)) sp.j1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.j0(x) - sp.jn(2, x)) / 2.0) sp.y1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.y0(x) - sp.yn(2, x)) / 2.0) sp.jn.defjvp(lambda g, ans, gvs, vs, n, x: g * (sp.jn(n - 1, x) - sp.jn(n + 1, x)) / 2.0, argnum=1) sp.yn.defjvp(lambda g, ans, gvs, vs, n, x: g * (sp.yn(n - 1, x) - sp.yn(n + 1, x)) / 2.0, argnum=1) ### Error Function ### sp.erf.defjvp( lambda g, ans, gvs, vs, x: 2. * g * sp.inv_root_pi * np.exp(-x**2))
def ELBO_terms(param, prior, X, S, Ncon, G, M, K): eps = 1e-12 # get sample size and feature size [N, D] = np.shape(X) # unpack the input parameter vector [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\ mu_b, sigma_b] = unpackParam(param, N, D, G, M, K) # compute eta given mu_w and mu_b eta = np.zeros((0, K)) for g in np.arange(G): t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g]) t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1))) eta = np.vstack((eta, t1 / t2)) eta = np.reshape(eta, (G, N, K)) # compute the expectation terms to be used later E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2) # len(M) E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2) # len(M) E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2) # len(M) E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2) # len(M) E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2) # len(G) E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2) # len(G) E_C = phi # shape(M, G) E_W = mu_w # shape(G, D, K) E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2 # shape(G, D, K) E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2 # shape(G, K) E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2) # shape(G, K) E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \ (prior['tau_a2']-1) * E_log_OneMinusAlpha - \ gammaln(prior['tau_a1']+eps) - \ gammaln(prior['tau_a2']+eps) + \ gammaln(prior['tau_a1']+prior['tau_a2']+eps) E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \ (prior['tau_b2']-1) * E_log_OneMinusBeta - \ gammaln(prior['tau_b1']+eps) - \ gammaln(prior['tau_b2']+eps) + \ gammaln(prior['tau_b1']+prior['tau_b2']+eps) E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \ gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \ gammaln(tau_a1+tau_a2 + eps) E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \ gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \ gammaln(tau_b1+tau_b2 + eps) E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1) eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K)) # compute three terms and then add them up L_1, L_2, L_3 = [0., 0., 0.] # the first term and part of the second term for m in np.arange(M): idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m]) tp_con = S[idx_S, 3] phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K) E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep)) E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]] tp_Asum = np.sum(E_A_use) tp_AdotS = np.sum(E_A_use * tp_con) L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\ (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \ tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \ E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \ tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m]) fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1]) L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \ np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G))) # the second term for g in np.arange(G): tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \ np.log(prior['gamma']+eps) t1 = np.dot(X, mu_w[g]) t2 = 0.5 * np.dot(X**2, sigma_w[g]**2) t3 = np.sum(eta[g], axis=1) t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2), axis=1) tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i) t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \ 0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \ (mu_w[g]-prior['mu_w'])**2) tp_Wg = np.sum(t5) t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \ 0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \ (mu_b[g]-prior['mu_b'])**2) tp_bg = np.sum(t6) L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg # the third term L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C) for g in np.arange(G): tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\ np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \ np.log(gamma(tau_v1[g]+tau_v2[g])+eps) tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps)) tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5) tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5) L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3 return (L_1, L_2, L_3)
def NegELBO(param, prior, X, S, Ncon, G, M, K): """ Parameters ---------- param: length (2M + 2M + MG + 2G + GNK + GDK + GDK + GK + GK) variational parameters, including: 1) tau_a1: len(M), first parameter of q(alpha_m) 2) tau_a2: len(M), second parameter of q(alpha_m) 3) tau_b1: len(M), first parameter of q(beta_m) 4) tau_b2: len(M), second parameter of q(beta_m) 5) phi: shape(M, G), phi[m,:] is the paramter vector of q(c_m) 6) tau_v1: len(G), first parameter of q(nu_g) 7) tau_v2: len(G), second parameter of q(nu_g) 8) mu_w: shape(G, D, K), mu_w[g,d,k] is the mean parameter of q(W^g_{dk}) 9) sigma_w: shape(G, D, K), sigma_w[g,d,k] is the std parameter of q(W^g_{dk}) 10) mu_b: shape(G, K), mu_b[g,k] is the mean parameter of q(b^g_k) 11) sigma_b: shape(G, K), sigma_b[g,k] is the std parameter of q(b^g_k) prior: dictionary the naming of keys follow those in param {'tau_a1':val1, ...} X: shape(N, D) each row represents a sample and each column represents a feature S: shape(n_con, 4) each row represents a observed constrain (expert_id, sample1_id, sample2_id, constraint_type), where 1) expert_id: varies between [0, M-1] 2) sample1 id: varies between [0, N-1] 3) sample2 id: varies between [0, N-1] 4) constraint_type: 1 means must-link and 0 means cannot-link Ncon: shape(M, 1) number of constraints provided by each expert G: int number of local consensus in the posterior truncated Dirichlet Process M: int number of experts K: int maximal number of clusters among different solutions, due to the use of discriminative clustering, some local solution might have empty clusters Returns ------- """ eps = 1e-12 # get sample size and feature size [N, D] = np.shape(X) # unpack the input parameter vector [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\ mu_b, sigma_b] = unpackParam(param, N, D, G, M, K) # compute eta given mu_w and mu_b eta = np.zeros((0, K)) for g in np.arange(G): t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g]) t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1))) eta = np.vstack((eta, t1 / t2)) eta = np.reshape(eta, (G, N, K)) # compute the expectation terms to be used later E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2) # len(M) E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2) # len(M) E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2) # len(M) E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2) # len(M) E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2) # len(G) E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2) # len(G) E_C = phi # shape(M, G) E_W = mu_w # shape(G, D, K) E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2 # shape(G, D, K) E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2 # shape(G, K) E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2) # shape(G, K) E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \ (prior['tau_a2']-1) * E_log_OneMinusAlpha - \ gammaln(prior['tau_a1']+eps) - \ gammaln(prior['tau_a2']+eps) + \ gammaln(prior['tau_a1']+prior['tau_a2']+eps) E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \ (prior['tau_b2']-1) * E_log_OneMinusBeta - \ gammaln(prior['tau_b1']+eps) - \ gammaln(prior['tau_b2']+eps) + \ gammaln(prior['tau_b1']+prior['tau_b2']+eps) E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \ gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \ gammaln(tau_a1+tau_a2 + eps) E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \ gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \ gammaln(tau_b1+tau_b2 + eps) E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1) eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K)) # compute three terms and then add them up L_1, L_2, L_3 = [0., 0., 0.] # the first term and part of the second term for m in np.arange(M): idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m]) tp_con = S[idx_S, 3] phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K) E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep)) E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]] tp_Asum = np.sum(E_A_use) tp_AdotS = np.sum(E_A_use * tp_con) L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\ (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \ tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \ E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \ tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m]) fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1]) L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \ np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G))) # the second term for g in np.arange(G): tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \ np.log(prior['gamma']+eps) t1 = np.dot(X, mu_w[g]) t2 = 0.5 * np.dot(X**2, sigma_w[g]**2) t3 = np.sum(eta[g], axis=1) t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2), axis=1) tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i) t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \ 0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \ (mu_w[g]-prior['mu_w'])**2) tp_Wg = np.sum(t5) t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \ 0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \ (mu_b[g]-prior['mu_b'])**2) tp_bg = np.sum(t6) L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg # the third term L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C) for g in np.arange(G): tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\ np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \ np.log(gamma(tau_v1[g]+tau_v2[g])+eps) tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps)) tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5) tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5) L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3 # Note the third term should have a minus sign before it ELBO = L_1 + L_2 - L_3 #ELBO = L_1 + L_2 return -ELBO
from __future__ import absolute_import, division import autograd.numpy as np import scipy.stats from autograd.extend import primitive, defvjp from autograd.numpy.numpy_vjps import unbroadcast_f from autograd.scipy.special import gamma cdf = primitive(scipy.stats.chi2.cdf) logpdf = primitive(scipy.stats.chi2.logpdf) pdf = primitive(scipy.stats.chi2.pdf) def grad_chi2_logpdf(x, df): return np.where(df % 1 == 0, (df - x - 2) / (2 * x), 0) defvjp(cdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * np.power(2., -df/2) * np.exp(-x/2) * np.power(x, df/2 - 1) / gamma(df/2)), argnums=[0]) defvjp(logpdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * grad_chi2_logpdf(x, df)), argnums=[0]) defvjp(pdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * ans * grad_chi2_logpdf(x, df)), argnums=[0])
from __future__ import absolute_import import autograd.numpy as np import scipy.stats from autograd.extend import primitive, defvjp from autograd.numpy.numpy_vjps import unbroadcast_f from autograd.scipy.special import gamma, psi cdf = primitive(scipy.stats.gamma.cdf) logpdf = primitive(scipy.stats.gamma.logpdf) pdf = primitive(scipy.stats.gamma.pdf) def grad_gamma_logpdf_arg0(x, a): return (a - x - 1) / x def grad_gamma_logpdf_arg1(x, a): return np.log(x) - psi(a) defvjp(cdf, lambda ans, x, a: unbroadcast_f(x, lambda g: g * np.exp(-x) * np.power(x, a-1) / gamma(a)), argnums=[0]) defvjp(logpdf, lambda ans, x, a: unbroadcast_f(x, lambda g: g * grad_gamma_logpdf_arg0(x, a)), lambda ans, x, a: unbroadcast_f(a, lambda g: g * grad_gamma_logpdf_arg1(x, a))) defvjp(pdf, lambda ans, x, a: unbroadcast_f(x, lambda g: g * ans * grad_gamma_logpdf_arg0(x, a)), lambda ans, x, a: unbroadcast_f(a, lambda g: g * ans * grad_gamma_logpdf_arg1(x, a)))