def student(theta, df, prod=True):
    """Implementation of the student t distribution with df degrees of freedom

    Parameters
    ----------
    theta : type
        Description of parameter `theta`.
    df : type
        Description of parameter `df`.
    prod : bool
        If true return the density of the sample
        If False, return the joint distribution

    Returns
    -------
    type
        float if prod
        np.ndarray if not prod
    """
    individual = gamma((df+1.)/2.)*(1+theta**2 / df)**(-(df+1)/2) \
                /(gamma(df/2.)*np.sqrt(df*pi))
    if prod:
        return np.prod(individual)
    else:
        return individual
def gamma_(theta, alpha, beta, prod=True):
    """ gamma distribution with parameter alpha, beta

    Parameters
    ----------
    theta : np.ndarray
    alpha : float
        shape of the gamma distribution, > 0
    beta : float
        rate of the distribution, > 0
    prod : bool
        If true return the density of the sample
        If False, return the joint distribution

    Returns
    -------
    type
        float if prod
        np.ndarray if not prod

    """
    x = indicator_positive(theta)
    individual = beta**alpha * theta**(alpha - 1) * np.exp(
        -beta * theta) / gamma(alpha) * x

    if prod:
        return np.prod(individual)
    else:
        return individual
示例#3
0
def logQ(sample, alpha, m):
    """
    Evaluates log of variational approximation, vectorized.
    """
    temp = alpha * (np.log(alpha) - np.log(m))
    temp += (alpha - 1.) * np.log(sample)
    temp -= alpha * sample / m
    temp -= np.log(sp.gamma(alpha))
    return temp
示例#4
0
    def probability_of_n_purchases_up_to_time(self, t, n):
        r"""
        Compute the probability of n purchases up to time t.

        .. math::  P( N(t) = n | \text{model} )

        where N(t) is the number of repeat purchases a customer makes in t
        units of time.

        Parameters
        ----------
        t: float
            number units of time
        n: int
            number of purchases

        Returns
        -------
        float:
            Probability to have n purchases up to t units of time

        """
        r, alpha, a, b = self._unload_params("r", "alpha", "a", "b")
        _j = np.arange(0, n)

        first_term = (beta(a, b + n + 1) / beta(a, b) * gamma(r + n) /
                      gamma(r) / gamma(n + 1) * (alpha / (alpha + t))**r *
                      (t / (alpha + t))**n)
        finite_sum = (gamma(r + _j) / gamma(r) / gamma(_j + 1) *
                      (t / (alpha + t))**_j).sum()
        second_term = beta(a + 1, b + n) / beta(
            a, b) * (1 - (alpha / (alpha + t))**r * finite_sum)

        return first_term + second_term
def multinomial(theta, p):
    """implementation of the multinomial distribution, using the gamma function
    approximation of the factorial as in scipy.special

    Parameters
    ----------
    theta : np.ndarray of integers of size d
        input following the distribution, should contains the number of examples
        falling into each class
    p : np.ndarray of size d
        probabilities to fall in each classes, shoudl sum up to 1


    Returns
    -------
    float
        probability mass function evaluated in theta

    """

    n = np.sum(theta)
    ratio = gamma(n) / np.prod(gamma(theta))
    return ratio * p**theta
示例#6
0
    def probability_of_n_purchases_up_to_time(
        self, 
        t, 
        n
    ):
        r"""
        Compute the probability of n purchases.

         .. math::  P( N(t) = n | \text{model} )

        where N(t) is the number of repeat purchases a customer makes in t
        units of time.

        Comes from equation (8) of [2]_.

        Parameters
        ----------
        t: float
            number units of time
        n: int
            number of purchases

        Returns
        -------
        float:
            Probability to have n purchases up to t units of time

        References
        ----------
        .. [2] Fader, Peter S., Bruce G.S. Hardie, and Ka Lok Lee (2005a),
        "Counting Your Customers the Easy Way: An Alternative to the
        Pareto/NBD Model," Marketing Science, 24 (2), 275-84.
        """

        r, alpha, a, b = self._unload_params("r", "alpha", "a", "b")

        first_term = (
            beta(a, b + n)
            / beta(a, b)
            * gamma(r + n)
            / gamma(r)
            / gamma(n + 1)
            * (alpha / (alpha + t)) ** r
            * (t / (alpha + t)) ** n
        )

        if n > 0:
            j = np.arange(0, n)
            finite_sum = (gamma(r + j) / gamma(r) / gamma(j + 1) * (t / (alpha + t)) ** j).sum()
            second_term = beta(a + 1, b + n - 1) / beta(a, b) * (1 - (alpha / (alpha + t)) ** r * finite_sum)
        else:
            second_term = 0

        return first_term + second_term
示例#7
0
def test_a_special_case_of_the_derivative():
    """
    We know a specific to test against:

    dUIG(s, x) / ds at (s=1, x) = ln(x) * UIG(1, x) + E_1(x)

    where E_1(x) is the exponential integral
    """

    # incomplete upper gamma
    IUG = lambda s, x: gammaincc(s, x) * gamma(s)

    def analytical_derivative(x):
        dIUG = np.log(x) * IUG(1., x) - expi(-x)
        return dIUG

    def approx_derivative(x):
        return jacobian(IUG, argnum=0)(1., x)

    x = np.linspace(1, 12)
    npt.assert_allclose(analytical_derivative(x), approx_derivative(x))

    x = np.logspace(-25, 25, 100)
    npt.assert_allclose(analytical_derivative(x), approx_derivative(x))
示例#8
0
def kernel_matern(x, xp):
    sd, rho, eta = 1, 1, 1
    d = L1_norm(x, xp) * np.sqrt(2 * eta) / rho
    K = sd**2 * (2**(1 - eta) / gamma(eta))
    return K * yn(eta, d) * (d)**eta
def inverse_gamma_log_pdf(x, alpha, beta):
    ''' Inverse gamma log pdf '''
    return alpha * np.log(beta) - (
        alpha + 1.0) * np.log(x) - beta / x - np.log(gamma(alpha))
示例#10
0
from __future__ import absolute_import, division

import autograd.numpy as np
import scipy.stats
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f
from autograd.scipy.special import gamma

cdf = primitive(scipy.stats.chi2.cdf)
logpdf = primitive(scipy.stats.chi2.logpdf)
pdf = primitive(scipy.stats.chi2.pdf)


def grad_chi2_logpdf(x, df):
    return np.where(df % 1 == 0, (df - x - 2) / (2 * x), 0)


defvjp(cdf,
       lambda ans, x, df: unbroadcast_f(
           x, lambda g: g * np.power(2., -df / 2) * np.exp(-x / 2) * np.power(
               x, df / 2 - 1) / gamma(df / 2)),
       argnums=[0])
defvjp(
    logpdf,
    lambda ans, x, df: unbroadcast_f(x, lambda g: g * grad_chi2_logpdf(x, df)),
    argnums=[0])
defvjp(pdf,
       lambda ans, x, df: unbroadcast_f(
           x, lambda g: g * ans * grad_chi2_logpdf(x, df)),
       argnums=[0])
示例#11
0
    '''
    return _scipy_gammainc(k, x)


delta = 1e-6

defvjp(
    gammainc,
    lambda ans, a, x: unbroadcast_f(
        a,
        lambda g: g *
        (-gammainc(a + 2 * delta, x) + 8 * gammainc(a + delta, x) - 8 *
         gammainc(a - delta, x) + gammainc(a - 2 * delta, x)) / (12 * delta),
    ),
    lambda ans, a, x: unbroadcast_f(
        x, lambda g: g * np.exp(-x) * np.power(x, a - 1) / gamma(a)),
)

gammaincc = primitive(_scipy_gammaincc)

defvjp(
    gammaincc,
    lambda ans, a, x: unbroadcast_f(
        a,
        lambda g: g *
        (-gammaincc(a + 2 * delta, x) + 8 * gammaincc(a + delta, x) - 8 *
         gammaincc(a - delta, x) + gammaincc(a - 2 * delta, x)) / (12 * delta),
    ),
    lambda ans, a, x: unbroadcast_f(
        x, lambda g: -g * np.exp(-x) * np.power(x, a - 1) / gamma(a)),
)
示例#12
0
from __future__ import absolute_import

import autograd.numpy as np
import autograd.scipy.special as sp

### Gamma functions ###
sp.polygamma.defjvp(lambda g, ans, gvs, vs, n, x: g * sp.polygamma(n + 1, x),
                    argnum=1)
sp.psi.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x))
sp.digamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.polygamma(1, x))
sp.gamma.defjvp(lambda g, ans, gvs, vs, x: g * ans * sp.psi(x))
sp.gammaln.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x))
sp.rgamma.defjvp(lambda g, ans, gvs, vs, x: g * sp.psi(x) / -sp.gamma(x))
sp.multigammaln.defjvp(lambda g, ans, gvs, vs, a, d: g * np.sum(
    sp.digamma(np.expand_dims(a, -1) - np.arange(d) / 2.), -1))

### Bessel functions ###
sp.j0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.j1(x))
sp.y0.defjvp(lambda g, ans, gvs, vs, x: -g * sp.y1(x))
sp.j1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.j0(x) - sp.jn(2, x)) / 2.0)
sp.y1.defjvp(lambda g, ans, gvs, vs, x: g * (sp.y0(x) - sp.yn(2, x)) / 2.0)
sp.jn.defjvp(lambda g, ans, gvs, vs, n, x: g *
             (sp.jn(n - 1, x) - sp.jn(n + 1, x)) / 2.0,
             argnum=1)
sp.yn.defjvp(lambda g, ans, gvs, vs, n, x: g *
             (sp.yn(n - 1, x) - sp.yn(n + 1, x)) / 2.0,
             argnum=1)

### Error Function ###
sp.erf.defjvp(
    lambda g, ans, gvs, vs, x: 2. * g * sp.inv_root_pi * np.exp(-x**2))
示例#13
0
def ELBO_terms(param, prior, X, S, Ncon, G, M, K):
    eps = 1e-12

    # get sample size and feature size
    [N, D] = np.shape(X)

    # unpack the input parameter vector
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\
            mu_b, sigma_b] = unpackParam(param, N, D, G, M, K)

    # compute eta given mu_w and mu_b
    eta = np.zeros((0, K))
    for g in np.arange(G):
        t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g])
        t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1)))
        eta = np.vstack((eta, t1 / t2))
    eta = np.reshape(eta, (G, N, K))

    # compute the expectation terms to be used later
    E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2)  # len(M)
    E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2)  # len(M)

    E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2)  # len(G)
    E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2)  # len(G)
    E_C = phi  # shape(M, G)
    E_W = mu_w  # shape(G, D, K)
    E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2  # shape(G, D, K)
    E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2  # shape(G, K)
    E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2)  # shape(G, K)

    E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \
            (prior['tau_a2']-1) * E_log_OneMinusAlpha -  \
            gammaln(prior['tau_a1']+eps) - \
            gammaln(prior['tau_a2']+eps) + \
            gammaln(prior['tau_a1']+prior['tau_a2']+eps)

    E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \
            (prior['tau_b2']-1) * E_log_OneMinusBeta - \
            gammaln(prior['tau_b1']+eps) - \
            gammaln(prior['tau_b2']+eps) + \
            gammaln(prior['tau_b1']+prior['tau_b2']+eps)

    E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \
            gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \
            gammaln(tau_a1+tau_a2 + eps)

    E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \
            gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \
            gammaln(tau_b1+tau_b2 + eps)

    E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1)

    eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K))

    # compute three terms and then add them up
    L_1, L_2, L_3 = [0., 0., 0.]
    # the first term and part of the second term
    for m in np.arange(M):
        idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m])
        tp_con = S[idx_S, 3]

        phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K)
        E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep))
        E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]]
        tp_Asum = np.sum(E_A_use)
        tp_AdotS = np.sum(E_A_use * tp_con)

        L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\
                (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \
                tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \
                E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \
                tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m])

        fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1])

        L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \
                np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G)))

    # the second term
    for g in np.arange(G):
        tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \
                np.log(prior['gamma']+eps)

        t1 = np.dot(X, mu_w[g])
        t2 = 0.5 * np.dot(X**2, sigma_w[g]**2)
        t3 = np.sum(eta[g], axis=1)
        t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2),
                            axis=1)
        tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i)

        t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \
                0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \
                (mu_w[g]-prior['mu_w'])**2)
        tp_Wg = np.sum(t5)
        t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \
                0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \
                (mu_b[g]-prior['mu_b'])**2)
        tp_bg = np.sum(t6)
        L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg

    # the third term
    L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C)
    for g in np.arange(G):
        tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\
                np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \
                np.log(gamma(tau_v1[g]+tau_v2[g])+eps)
        tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps))
        tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5)
        tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5)
        L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3

    return (L_1, L_2, L_3)
示例#14
0
def NegELBO(param, prior, X, S, Ncon, G, M, K):
    """
    Parameters
    ----------
    param: length (2M + 2M + MG + 2G + GNK + GDK + GDK + GK + GK) 
        variational parameters, including:
        1) tau_a1: len(M), first parameter of q(alpha_m)
        2) tau_a2: len(M), second parameter of q(alpha_m)
        3) tau_b1: len(M), first parameter of q(beta_m)
        4) tau_b2: len(M), second parameter of q(beta_m)
        5) phi: shape(M, G), phi[m,:] is the paramter vector of q(c_m)
        6) tau_v1: len(G), first parameter of q(nu_g)
        7) tau_v2: len(G), second parameter of q(nu_g)
        8) mu_w: shape(G, D, K), mu_w[g,d,k] is the mean parameter of 
            q(W^g_{dk})
        9) sigma_w: shape(G, D, K), sigma_w[g,d,k] is the std parameter of 
            q(W^g_{dk})
        10) mu_b: shape(G, K), mu_b[g,k] is the mean parameter of q(b^g_k)
        11) sigma_b: shape(G, K), sigma_b[g,k] is the std parameter of q(b^g_k)

    prior: dictionary
        the naming of keys follow those in param
        {'tau_a1':val1, ...}

    X: shape(N, D)
        each row represents a sample and each column represents a feature

    S: shape(n_con, 4)
        each row represents a observed constrain (expert_id, sample1_id,
        sample2_id, constraint_type), where
        1) expert_id: varies between [0, M-1]
        2) sample1 id: varies between [0, N-1]
        3) sample2 id: varies between [0, N-1]
        4) constraint_type: 1 means must-link and 0 means cannot-link

    Ncon: shape(M, 1)
        number of constraints provided by each expert

    G: int
        number of local consensus in the posterior truncated Dirichlet Process

    M: int
        number of experts

    K: int
        maximal number of clusters among different solutions, due to the use of
        discriminative clustering, some local solution might have empty
        clusters

    Returns
    -------
    """

    eps = 1e-12

    # get sample size and feature size
    [N, D] = np.shape(X)

    # unpack the input parameter vector
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\
            mu_b, sigma_b] = unpackParam(param, N, D, G, M, K)

    # compute eta given mu_w and mu_b
    eta = np.zeros((0, K))
    for g in np.arange(G):
        t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g])
        t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1)))
        eta = np.vstack((eta, t1 / t2))
    eta = np.reshape(eta, (G, N, K))

    # compute the expectation terms to be used later
    E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2)  # len(M)
    E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2)  # len(M)

    E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2)  # len(G)
    E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2)  # len(G)
    E_C = phi  # shape(M, G)
    E_W = mu_w  # shape(G, D, K)
    E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2  # shape(G, D, K)
    E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2  # shape(G, K)
    E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2)  # shape(G, K)

    E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \
            (prior['tau_a2']-1) * E_log_OneMinusAlpha -  \
            gammaln(prior['tau_a1']+eps) - \
            gammaln(prior['tau_a2']+eps) + \
            gammaln(prior['tau_a1']+prior['tau_a2']+eps)

    E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \
            (prior['tau_b2']-1) * E_log_OneMinusBeta - \
            gammaln(prior['tau_b1']+eps) - \
            gammaln(prior['tau_b2']+eps) + \
            gammaln(prior['tau_b1']+prior['tau_b2']+eps)

    E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \
            gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \
            gammaln(tau_a1+tau_a2 + eps)

    E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \
            gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \
            gammaln(tau_b1+tau_b2 + eps)

    E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1)

    eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K))

    # compute three terms and then add them up
    L_1, L_2, L_3 = [0., 0., 0.]
    # the first term and part of the second term
    for m in np.arange(M):
        idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m])
        tp_con = S[idx_S, 3]

        phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K)
        E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep))
        E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]]
        tp_Asum = np.sum(E_A_use)
        tp_AdotS = np.sum(E_A_use * tp_con)

        L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\
                (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \
                tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \
                E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \
                tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m])

        fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1])

        L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \
                np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G)))

    # the second term
    for g in np.arange(G):
        tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \
                np.log(prior['gamma']+eps)

        t1 = np.dot(X, mu_w[g])
        t2 = 0.5 * np.dot(X**2, sigma_w[g]**2)
        t3 = np.sum(eta[g], axis=1)
        t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2),
                            axis=1)
        tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i)

        t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \
                0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \
                (mu_w[g]-prior['mu_w'])**2)
        tp_Wg = np.sum(t5)
        t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \
                0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \
                (mu_b[g]-prior['mu_b'])**2)
        tp_bg = np.sum(t6)
        L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg

    # the third term
    L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C)
    for g in np.arange(G):
        tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\
                np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \
                np.log(gamma(tau_v1[g]+tau_v2[g])+eps)
        tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps))
        tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5)
        tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5)
        L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3

    # Note the third term should have a minus sign before it
    ELBO = L_1 + L_2 - L_3
    #ELBO = L_1 + L_2

    return -ELBO
示例#15
0
文件: chi2.py 项目: HIPS/autograd
from __future__ import absolute_import, division

import autograd.numpy as np
import scipy.stats
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f
from autograd.scipy.special import gamma

cdf = primitive(scipy.stats.chi2.cdf)
logpdf = primitive(scipy.stats.chi2.logpdf)
pdf = primitive(scipy.stats.chi2.pdf)

def grad_chi2_logpdf(x, df):
    return np.where(df % 1 == 0, (df - x - 2) / (2 * x), 0)

defvjp(cdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * np.power(2., -df/2) * np.exp(-x/2) * np.power(x, df/2 - 1) / gamma(df/2)), argnums=[0])
defvjp(logpdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * grad_chi2_logpdf(x, df)), argnums=[0])
defvjp(pdf, lambda ans, x, df: unbroadcast_f(x, lambda g: g * ans * grad_chi2_logpdf(x, df)), argnums=[0])
示例#16
0
from __future__ import absolute_import

import autograd.numpy as np
import scipy.stats
from autograd.extend import primitive, defvjp
from autograd.numpy.numpy_vjps import unbroadcast_f
from autograd.scipy.special import gamma, psi

cdf = primitive(scipy.stats.gamma.cdf)
logpdf = primitive(scipy.stats.gamma.logpdf)
pdf = primitive(scipy.stats.gamma.pdf)

def grad_gamma_logpdf_arg0(x, a):
    return (a - x - 1) / x

def grad_gamma_logpdf_arg1(x, a):
    return np.log(x) - psi(a)

defvjp(cdf, lambda ans, x, a: unbroadcast_f(x, lambda g: g * np.exp(-x) * np.power(x, a-1) / gamma(a)), argnums=[0])
defvjp(logpdf,
       lambda ans, x, a: unbroadcast_f(x, lambda g: g * grad_gamma_logpdf_arg0(x, a)),
       lambda ans, x, a: unbroadcast_f(a, lambda g: g * grad_gamma_logpdf_arg1(x, a)))
defvjp(pdf,
       lambda ans, x, a: unbroadcast_f(x, lambda g: g * ans * grad_gamma_logpdf_arg0(x, a)),
       lambda ans, x, a: unbroadcast_f(a, lambda g: g * ans * grad_gamma_logpdf_arg1(x, a)))