Пример #1
0
def LogJ(m_params, v_params, theano_observed_matrix):
    m_w, s_w, m_r, s_r, m_gamma, s_gamma, m_gamma0, s_gamma0, m_c0, s_c0, msigma, ssigma = v_params
    w, r, gamma, gamma0, c0, sigma = m_params
    is_observed_matrix_numpy = ~np.isnan(observed_matrix)
    is_observed_matrix.set_value(is_observed_matrix_numpy.astype(np.float64))
    theano_observed_matrix.set_value((np.nan_to_num(
        is_observed_matrix_numpy * observed_matrix)).astype(np.float64))
    log_j_t0 = time.clock()
    results, updates = theano.scan(
        fn=LogJointScanFn,
        sequences=[dict(input=np.arange(N), taps=[-1])],
        outputs_info=[dict(initial=np.float64(0), taps=[-1])],
        non_sequences=[is_observed_matrix, theano_observed_matrix])
    log_joint = results[-1]
    log_joint2 = (((D * gamma * T.log(gamma))[0] * r).sum() -
                  (D * T.gammaln(gamma[0] * r)).sum() +
                  ((gamma[0] * r - 1) * T.log(w)).sum() -
                  (gamma[0] * w).sum() +
                  (gamma0 * T.log(c0) -
                   THRESHOLD_RANK * T.gammaln(gamma0 / THRESHOLD_RANK) +
                   (gamma0 / THRESHOLD_RANK - 1)[0] * (T.log(r)).sum() -
                   (c0[0] * r).sum() - gamma - gamma0 - c0)[0])
    log_joint += log_joint2

    return (log_joint)
Пример #2
0
    def shanon_Entropy_studentt(self, log_cov, freedom):
        Nrff, dout = log_cov.shape
        const = T.log(
            ((freedom - 2) * np.pi)**(dout / 2)
        ) + T.gammaln(freedom / 2) - T.gammaln((freedom + dout) / 2) + (T.psi(
            (freedom + dout) / 2) - T.psi(freedom / 2)) * (freedom + dout) / 2

        return 0.5 * T.sum(log_cov) + Nrff * const
Пример #3
0
    def entropy_pi(self):
        log_gamma_term = T.sum( T.gammaln(self.tau_IBP[:,0]) + T.gammaln(self.tau_IBP[:,1]) \
                       - T.gammaln(self.tau_IBP[:,0] + self.tau_IBP[:,1]) )
        digamma_term = T.sum( (1.0-self.tau_IBP[:,0])*T.psi(self.tau_IBP[:,0])
                     + (1.0-self.tau_IBP[:,1])*T.psi(self.tau_IBP[:,1])
                     + (self.tau_IBP[:,0]+self.tau_IBP[:,1]-2.0)*T.psi(self.tau_IBP[:,0]+self.tau_IBP[:,1]) )

        return log_gamma_term + digamma_term
Пример #4
0
 def calc_kl_divergence(self, prior_alpha, prior_beta):
     # use taylor approx for Digamma function
     psi_a_taylor_approx = T.log(
         self.a) - 1. / (2 * self.a) - 1. / (12 * self.a**2)
     kl = (self.a - prior_alpha) * psi_a_taylor_approx
     kl += -T.gammaln(self.a) + T.gammaln(prior_alpha) + prior_alpha * (
         T.log(self.b) - T.log(prior_beta)) + (
             (self.a * (prior_beta - self.b)) / self.b)
     return kl.sum(axis=1)
Пример #5
0
def _log_partition_symfunc():
    natural_params = T.vector()
    log_Z = T.sum(T.gammaln(natural_params + 1.)) -\
        T.gammaln(T.sum(natural_params + 1))

    func = theano.function([natural_params], log_Z)
    grad_func = theano.function([natural_params],
                                T.grad(T.sum(log_Z), natural_params))
    return func, grad_func
Пример #6
0
def loglik_primary_f(k, y, theta, lower_n):
    logit_p = theta[0]
    logn = theta[1]
    n = lower_n + T.exp(logn)
    k = k[:, 0]

    p = T.nnet.nnet.sigmoid(logit_p)

    combiln = T.gammaln(n + 1) - (T.gammaln(k + 1) + T.gammaln(n - k + 1))
    # add y to stop theano from complaining
    #loglik = combiln + k * T.log(p) + (n - k) * T.log1p(-p) + 0.0 * T.sum(y)
    loglik = combiln + k * T.log(p) + (n - k) * T.log(1.0 - p) + 0.0 * T.sum(y)
    return loglik
Пример #7
0
    def __init__(self, mu=0.0, beta=None, cov=None, *args, **kwargs):
        super(GeneralizedGaussian, self).__init__(*args, **kwargs)
        # assert(mu.shape[0] == cov.shape[0] == cov.shape[1])
        dim = mu.shape[0]

        self.mu = mu
        self.beta = beta
        self.prec = tt.nlinalg.pinv(cov)
        # self.k = (dim * tt.gamma(dim / 2.0)) / \
        #          ((np.pi**(dim / 2.0)) * tt.gamma(1 + dim / (2 * beta)) * (2**(1 + dim / (2 * beta))))
        self.logk = tt.log(dim) + tt.gammaln(dim / 2.0) - \
                    (dim / 2.0) * tt.log(np.pi) - \
                    tt.gammaln(1 + dim / (2 * beta)) - \
                    (1 + dim / (2 * beta)) * tt.log(2.0)
Пример #8
0
 def _negCLL(self, z, X):#, validation = False):
     """Estimate -log p[x|z]"""
     if self.params['data_type']=='binary':
         p_x_z    = self._conditionalXgivenZ(z)
         negCLL_m = T.nnet.binary_crossentropy(p_x_z,X)
     elif self.params['data_type'] =='bow':
         #Likelihood under a multinomial distribution
         if self.params['likelihood'] == 'mult':
             lsf      = self._conditionalXgivenZ(z)
             p_x_z    = T.exp(lsf) 
             negCLL_m = -1*(X*lsf)
         elif self.params['likelihood'] =='poisson':
             loglambda_p = self._conditionalXgivenZ(z)
             p_x_z       = T.exp(loglambda_p)
             negCLL_m    = -X*loglambda_p+T.exp(loglambda_p)+T.gammaln(X+1)
         else:
             raise ValueError,'Invalid choice for likelihood: '+self.params['likelihood']
     elif self.params['data_type']=='real':
         params   = self._conditionalXgivenZ(z)
         mu,logvar= params[0], params[1]
         p_x_z    = mu  
         negCLL_m = 0.5 * np.log(2 * np.pi) + 0.5*logvar + 0.5 * ((X - mu_p)**2)/T.exp(logvar)
     else:
         assert False,'Bad data_type: '+str(self.params['data_type'])
     return p_x_z, negCLL_m.sum(1,keepdims=True)
Пример #9
0
    def compute_LogDensity_Yterms(self,
                                  Y=None,
                                  X=None,
                                  padleft=False,
                                  persamp=False):
        """
        
        TODO: The persamp option allows this function to return a list of the costs
        computed for each sample. This is useful for implementing more
        sophisticated optimization procedures such as NVIL. TO BE IMPLEMENTED...
         
        NOTE: Please accompany a compute function with an eval function that
        allows evaluation from an external program. compute functions assume by
        default that the 0th dimension of the data arrays is the trial
        dimension. If you deal with a single trial and the trial dimension is
        omitted, set padleft to False to padleft.
        """
        if Y is None: Y = self.Y
        if X is None: X = self.X

        if padleft: Y = T.shape_padleft(Y, 1)

        Yprime = theano.clone(self.Rate, replace={self.X: X})
        Density = T.sum(Y * T.log(Yprime) - Yprime - T.gammaln(Y + 1))

        return Density
Пример #10
0
    def get_viewed_cost(self, v0, vk_stat):
        # Binary cross-entropy
        cost = 0
        if self.input_type == InputType.binary:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.float32(0.999999))
            cost = -T.sum(v0 * T.log(clip_vk_stat) + (1 - v0) * T.log(1 - clip_vk_stat), axis=1)

        # Sum square error
        elif self.input_type == InputType.gaussian:
            cost = T.sum((v0 - vk_stat) ** 2, axis=1)

        # Categorical cross-entropy
        elif self.input_type == InputType.categorical:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.float32(0.999999))
            cost = -T.sum(v0 * T.log(clip_vk_stat), axis=1)

        elif self.input_type == InputType.poisson:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf)
            cost = -T.sum(-vk_stat + v0 * T.log(clip_vk_stat) - T.gammaln(1 + v0), axis=1)

        if self.input_type == InputType.replicated_softmax:
            clip_vk_stat = T.clip(vk_stat, np.float32(0.000001), np.inf)
            cost = -T.sum((v0 / self.total_count) * T.log(clip_vk_stat), axis=1)

        return cost
Пример #11
0
 def likelihood(xs):
     return tt.sum(
         tt.log(beta) -
         tt.log(2.0 * std *
                tt.sqrt(tt.gamma(1. / beta) / tt.gamma(3. / beta))) -
         tt.gammaln(1.0 / beta) + -tt.power(
             tt.abs_(xs - mu) / std *
             tt.sqrt(tt.gamma(1. / beta) / tt.gamma(3. / beta)), beta))
Пример #12
0
 def likelihood(xs):
     return T.sum(
         T.log(beta) -
         T.log(2.0 * std *
               T.sqrt(T.gamma(1. / beta) / T.gamma(3. / beta))) -
         T.gammaln(1.0 / beta) + -T.power(
             T.abs_(xs - mu) / std *
             T.sqrt(T.gamma(1. / beta) / T.gamma(3. / beta)), beta))
def log_joint_fn(N, D, K,  m_params, y, cov, mask):

    w, r, gamma, gamma0, c0, sigma = m_params

    results, updates = theano.scan(fn=log_joint_scan_fn,
                                   sequences=np.arange(N),
                                   outputs_info=[dict(initial=np.float64(0), taps=[-1])],
                                   non_sequences=[y, cov, mask])

    log_joint = results[-1]

    log_joint += ((D * gamma * T.log(gamma))[0] * r).sum() - (D * T.gammaln(gamma[0] * r)).sum() + (
    (gamma[0] * r - 1) * T.log(w)).sum() - (gamma[0] * w).sum() + (
                gamma0 * T.log(c0) - K * T.gammaln(gamma0 / K) + (gamma0 / K - 1)[0] * (T.log(r)).sum() - (
                c0[0] * r).sum() - gamma - gamma0 - c0)[0]

    return log_joint
Пример #14
0
def log_negative_binomial(x, p, log_r, eps=0.0):
    """
    Compute log pdf of a negative binomial distribution with success probability p and number of failures, r, until the experiment is stopped, at values x.
    
    A simple variation of Stirling's approximation is used: log x! = x log x - x.
    """

    x = T.clip(x, eps, x)

    p = T.clip(p, eps, 1.0 - eps)

    r = T.exp(log_r)
    r = T.clip(r, eps, r)

    y = T.gammaln(x + r) - T.gammaln(x + 1) - T.gammaln(r) \
        + x * T.log(p) + r * T.log(1 - p)

    return y
Пример #15
0
def log_poisson(x, log_lambda, eps=0.0):

    x = T.clip(x, eps, x)

    lambda_ = T.exp(log_lambda)
    lambda_ = T.clip(lambda_, eps, lambda_)

    y = x * log_lambda - lambda_ - T.gammaln(x + 1)

    return y
Пример #16
0
def log_gumbel_softmax(y, logits, tau=1):
    shape = logits.shape
    k = shape[-1]
    logits_flat = logits.reshape((-1, k))
    p_flat = T.nnet.softmax(logits_flat)
    p = p_flat.reshape(shape)
    log_gamma = T.gammaln(k)
    logsum = T.log(T.sum(p / (y**tau), axis=-1))
    sumlog = T.sum(T.log(p / (y**(tau + 1))), axis=-1)
    return log_gamma + (k - 1) * T.log(tau) - k * logsum + sumlog
Пример #17
0
def log_gumbel_softmax(x, mu, tau=1.0, eps=1e-6):
    """
    Compute logpdf of a Gumbel Softmax distribution with parameters p, at values x.
        .. See Appendix B.[1:2] https://arxiv.org/pdf/1611.01144v2.pdf
    """
    k = mu.shape[-1]
    logpdf = T.gammaln(k) + (k - 1) * T.log(tau + eps) \
        - k * T.log(T.sum(T.exp(mu) / T.power(x, tau), axis=2) + eps) \
        + T.sum(mu - (tau + 1) * T.log(x + eps), axis=2)
    return logpdf
Пример #18
0
    def eval_prior(self, buffers):
        """
        Evaluates prior on the latent variables
        """
        zsamp = self.S[:, buffers[0]:-buffers[1]]
        n_samples = zsamp.shape[0] // self.batch_size
        zsamp = zsamp.reshape((self.batch_size, n_samples, -1))
        ks = zsamp.sum(axis=-1)
        ns = zsamp.shape[-1].astype(config.floatX) * T.ones_like(ks)
        log_nok = T.gammaln(ns + 1) - T.gammaln(ks + 1) - T.gammaln(ns - ks +
                                                                    1)
        log_p = 0
        if self.n_genparams == 1:
            log_p = -0.5 * (T.log(2 * np.pi) + 2 * T.log(self.p_sigma) +
                            ((self.P / self.p_sigma)**2).sum(axis=-1))
            log_p = log_p.reshape((self.batch_size, n_samples))

        return log_nok + ks * T.log(
            self.pz) + (ns - ks) * T.log(1 - self.pz) + log_p
Пример #19
0
 def evaluateLogDensity(self,X,Y):
     # This is the log density of the generative model (*not* negated)
     Ypred = theano.clone(self.rate,replace={self.Xsamp: X})
     resY  = Y-Ypred
     resX  = X[1:]-T.dot(X[:-1],self.A.T)
     resX0 = X[0]-self.x0
     LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0]
     #LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0]
     PoisDensity = T.sum(Y * T.log(Ypred)  - Ypred - T.gammaln(Y + 1))
     LogDensity = LatentDensity + PoisDensity
     return LogDensity
Пример #20
0
 def evaluateLogDensity(self,X,Y):
     # This is the log density of the generative model (*not* negated)
     Ypred = theano.clone(self.rate,replace={self.Xsamp: X})
     resY  = Y-Ypred
     resX  = X[1:]-T.dot(X[:-1],self.A.T)
     resX0 = X[0]-self.x0
     LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0]
     #LatentDensity = - 0.5*T.dot(T.dot(resX0,self.Lambda0),resX0.T) - 0.5*(resX*T.dot(resX,self.Lambda)).sum() + 0.5*T.log(Tla.det(self.Lambda))*(Y.shape[0]-1) + 0.5*T.log(Tla.det(self.Lambda0)) - 0.5*(self.xDim)*np.log(2*np.pi)*Y.shape[0]
     PoisDensity = T.sum(Y * T.log(Ypred)  - Ypred - T.gammaln(Y + 1))
     LogDensity = LatentDensity + PoisDensity
     return LogDensity
Пример #21
0
    def _get_log_partition_func(dim, nparams):
        np1, np2, np3, np4 = nparams
        idxs = np.arange(dim) + 1
        W = T.nlinalg.matrix_inverse(np1 - (1. / np3) * T.outer(np2, np2))
        log_Z = .5 * (np4 + dim) * T.log(T.nlinalg.det(W))
        log_Z += .5 * (np4 + dim) * dim * np.log(2)
        log_Z += .5 * dim * (dim - 4)
        log_Z += T.sum(T.gammaln(.5 * (np4 + dim + 1 - idxs)))
        log_Z += -.5 * dim * T.log(np3)

        return log_Z, theano.function([], log_Z)
Пример #22
0
    def liklihood_studnet_t(self, target, free_param):
        self.beta = T.exp(self.ls)
        Covariance = self.beta
        LL = self.log_mvns(target, self.output,
                           Covariance)  # - 0.5*T.sum(T.dot(betaI,Ktilda)))

        N, n_out = target.shape

        CH_const = T.gammaln((n_out + n_out + free_param) / 2) - T.log(
            ((free_param + n_out) * np.pi)**(n_out / 2)) - T.gammaln(
                (free_param + n_out) / 2)

        ch_mc, updates = theano.scan(
            fn=lambda a: T.sum(T.log(1 + T.sum(a * a, -1) / free_param)),
            sequences=[W_samples])

        CH_MC = T.mean(ch_mc)

        CH = CH_const * num_FF - CH_MC * (free_param + n_out) / 2

        return LL
Пример #23
0
def mvt_logpdf_theano(x, mu, Li, df):
    import theano.tensor as T
    dim = Li.shape[0]
    Ki = Li.T.dot(Li)
    #determinant is just multiplication of diagonal elements of cholesky
    logdet = 2*T.log(1./T.diag(Li)).sum()
    lpdf_const = (T.gammaln((df + dim) / 2)
                       -(T.gammaln(df/2)
                         + (T.log(df)+T.log(np.pi)) * dim*0.5
                         + logdet * 0.5)
                       )

    d = (x - mu.reshape((1 ,mu.size))).T
    Ki_d_scal = T.dot(Ki, d) / df          #vector
    d_Ki_d_scal_1 = diag_dot(d.T, Ki_d_scal) + 1. #scalar
    
    res_pdf = (lpdf_const 
               - 0.5 * (df+dim) * T.log(d_Ki_d_scal_1)).flatten() 
    if res_pdf.size == 1:
        res_pdf = T.float(res_pdf)
    return res_pdf 
Пример #24
0
def _log_partition_symfunc():
    natural_params = T.vector()
    size = natural_params.shape[0] // 4
    np1, np2, np3, np4 = T.split(natural_params, 4 * [size], 4)

    log_Z = T.sum(T.gammaln(.5 * (np4 + 1)))
    log_Z += T.sum(- .5 * (np4 + 1) * T.log(.5 * (np1 - (np2 ** 2) / np3)))
    log_Z += T.sum(-.5 * T.log(np3))

    func = theano.function([natural_params], log_Z)
    grad_func = theano.function([natural_params],
                                T.grad(T.sum(log_Z), natural_params))
    return func, grad_func
Пример #25
0
def log_partf(b, s, C, v, logdet=None):
    D = b.size

    # multivariate log-gamma function
    g = tt.sum(tt.gammaln((v + 1. - tt.arange(1, D + 1)) /
                          2.)) + D * (D - 1) / 4. * np.log(np.pi)

    # log-partition function
    if logdet is None:
        return -v / 2. * tt.log(tl.det(C - tt.dot(b, b.T) / (4 * s))) \
         + v * np.log(2.) + g - D / 2. * tt.log(s)
    else:
        return -v / 2. * logdet + v * np.log(2.) + g - D / 2. * tt.log(s)
Пример #26
0
    def logp_cho(cls, value, mu, cho, freedom, mapping):
        delta = mapping.inv(value) - mu

        lcho = tsl.solve_lower_triangular(cho, delta)
        beta = lcho.T.dot(lcho)
        n = cho.shape[0].astype(th.config.floatX)

        np5 = np.float32(0.5)
        np2 = np.float32(2.0)
        npi = np.float32(np.pi)

        r1 = -np5 * (freedom + n) * tt.log1p(beta / (freedom - np2))
        r2 = ifelse(
            tt.le(np.float32(1e6), freedom), -n * np5 * np.log(np2 * npi),
            tt.gammaln((freedom + n) * np5) - tt.gammaln(freedom * np5) -
            np5 * n * tt.log((freedom - np2) * npi))
        r3 = -tt.sum(tt.log(tnl.diag(cho)))
        det_m = mapping.logdet_dinv(value)

        r1 = debug(r1, name='r1', force=True)
        r2 = debug(r2, name='r2', force=True)
        r3 = debug(r3, name='r3', force=True)
        det_m = debug(det_m, name='det_m', force=True)

        r = r1 + r2 + r3 + det_m

        cond1 = tt.or_(tt.any(tt.isinf_(delta)), tt.any(tt.isnan_(delta)))
        cond2 = tt.or_(tt.any(tt.isinf_(det_m)), tt.any(tt.isnan_(det_m)))
        cond3 = tt.or_(tt.any(tt.isinf_(cho)), tt.any(tt.isnan_(cho)))
        cond4 = tt.or_(tt.any(tt.isinf_(lcho)), tt.any(tt.isnan_(lcho)))
        return ifelse(
            cond1, np.float32(-1e30),
            ifelse(
                cond2, np.float32(-1e30),
                ifelse(cond3, np.float32(-1e30),
                       ifelse(cond4, np.float32(-1e30), r))))
Пример #27
0
 def _loglikelihood_step(self, Y_t, L_t, ll_t, W_t, M_t):
     import theano
     import theano.tensor as T
     sum_log_poisson = T.tensordot(Y_t, T.log(W_t), axes=[0,1]) \
         - T.sum(W_t, axis=1) - T.sum(T.gammaln(Y_t+1))
     M_nlc = theano.ifelse.ifelse(T.eq(L_t, -1), T.sum(M_t, axis=0),
                                  M_t[L_t])
     # for numerics: only account for values, where M_nlc is not zero
     a = T.switch(T.eq(M_nlc, 0.), T.min(sum_log_poisson), sum_log_poisson)
     a = T.max(a, keepdims=True)
     logarg = T.switch(
         T.eq(M_nlc, 0.), 0.,
         T.exp(sum_log_poisson - a) * M_nlc /
         T.cast(M_t.shape[0], dtype='float32'))
     logarg = T.sum(logarg)
     return ll_t + a[0] + T.log(logarg)
    def logp(self, x):
        alpha = self.alpha
        n = tt.sum(x, axis=-1)
        sum_alpha = tt.sum(alpha, axis=-1)

        const = (tt.gammaln(n + 1) +
                 tt.gammaln(sum_alpha)) - tt.gammaln(n + sum_alpha)
        series = tt.gammaln(x + alpha) - (tt.gammaln(x + 1) +
                                          tt.gammaln(alpha))
        result = const + tt.sum(series, axis=-1)
        return result
Пример #29
0
 def liklihood_studnet_t(self,target,free_param):            
     self.beta = T.exp(self.ls)
     Covariance = self.beta
     LL = self.log_mvns(target, self.output, Covariance)# - 0.5*T.sum(T.dot(betaI,Ktilda)))  
     
     N,n_out=target.shape    
     
     CH_const=T.gammaln((n_out+n_out+free_param)/2)-T.log(((free_param+n_out)*np.pi)**(n_out/2))-T.gammaln((free_param+n_out)/2) 
      
     ch_mc,updates=theano.scan(fn=lambda a: T.sum(T.log(1+T.sum(a*a,-1)/free_param)),
                           sequences=[W_samples])
      
     CH_MC=T.mean(ch_mc)
      
     CH=CH_const*num_FF-CH_MC*(free_param+n_out)/2    
     
 
     return LL
def LogJ(m_params,v_params,theano_observed_matrix):
    m_matrix_estimate, s_matrix_estimate, m_w, s_w, m_r, s_r, m_gamma, s_gamma, m_gamma0, s_gamma0, m_c0, s_c0, msigma, ssigma=v_params
    matrix_estimate, w, r, gamma, gamma0, c0, sigma= m_params
    is_observed_matrix_numpy=~np.isnan(observed_matrix)
    is_observed_matrix.set_value(is_observed_matrix_numpy.astype(np.float64))
    theano_observed_matrix.set_value((np.nan_to_num(is_observed_matrix_numpy*observed_matrix)).astype(np.float64))
    log_j_t0=time.clock()
    log_joint=0
    for n in range(observed_count):
        log_joint+= np.power((observation_record[0,n]-matrix_estimate[int(observation_record_numpy[1,n]), int(observation_record_numpy[2,n])]),2)
    log_joint=(-1/(2*ERROR[0]*ERROR[0]))*log_joint
    print("first result")
    print(log_joint.eval())
    log_joint += -(N/2.0)*T.nlinalg.Det()(covariance_matrix) - (1/2.0)*T.nlinalg.trace(T.dot(T.dot(matrix_estimate, matrix_estimate.T), T.nlinalg.MatrixInverse()(covariance_matrix)))
    log_joint2= (((D*gamma*T.log(gamma))[0]*r).sum()-(D*T.gammaln(gamma[0]*r)).sum()+((gamma[0]*r-1)*T.log(w)).sum()-(gamma[0]*w).sum() + (gamma0*T.log(c0)-THRESHOLD_RANK*T.gammaln(gamma0/THRESHOLD_RANK)+(gamma0/THRESHOLD_RANK-1)[0]*(T.log(r)).sum()-(c0[0]*r).sum()-gamma-gamma0-c0)[0])
    log_joint += log_joint2
 
    return(log_joint)
Пример #31
0
    def logp(self, value):
        topology = self.topology
        taxon_count = tt.as_tensor_variable(topology.get_taxon_count())
        root_index = topology.get_root_index()

        r = self.r
        a = self.a
        rho = self.rho

        log_coeff = (taxon_count - 1) * tt.log(2.0) - tt.gammaln(taxon_count)
        tree_logp = log_coeff + (taxon_count - 1) * tt.log(
            r * rho) + taxon_count * tt.log(1 - a)

        mrhs = -r * value
        zs = tt.log(rho + ((1 - rho) - a) * tt.exp(mrhs))
        ls = -2 * zs + mrhs
        root_term = mrhs[root_index] - zs[root_index]

        return tree_logp + tt.sum(ls) + root_term
Пример #32
0
    def likelihood(self, z, y):
        η = z.flatten(min(2, z.ndim)) + self.bias
        Δ = self.binsize
        # 1st part of the likelihood
        L1 = tt.dot(y, η)
        if z.ndim > 1:
            ndim = z.ndim - 1
            shp_z = z.shape[-ndim:]
            L1 = L1.reshape(shp_z, ndim=ndim)
        # 2nd part of the likelihood
        λ = self.invlink(z + self.bias)
        L2 = Δ * tt.sum(λ, axis=0)
        # constant factors
        c1 = tt.sum(y) * tt.log(Δ)
        c2 = -tt.sum(tt.where(y > 1, tt.gammaln(y + 1), 0.0))
        const = c1 - c2

        L = L1 - L2 + const
        return as_tensor_variable(L, name='logL')
Пример #33
0
 def _loglikelihood_step(self, Y_t, L_t, ll_t, W_t, M_t):
     import theano
     import theano.tensor as T
     sum_log_poisson = T.tensordot(Y_t, T.log(W_t), axes=[0,1]) \
         - T.sum(W_t, axis=1) - T.sum(T.gammaln(Y_t+1))
     M_nlc = theano.ifelse.ifelse(
         T.eq(L_t, -1),
         T.sum(M_t, axis=0),
         M_t[L_t]
         )
     # for numerics: only account for values, where M_nlc is not zero
     a = T.switch(
         T.eq(M_nlc, 0.),
         T.min(sum_log_poisson),
         sum_log_poisson
         )
     a = T.max(a, keepdims=True)
     logarg = T.switch(
         T.eq(M_nlc, 0.),
         0.,
         T.exp(sum_log_poisson - a)*M_nlc/T.cast(M_t.shape[0], dtype='float32')
         )
     logarg = T.sum(logarg)
     return ll_t + a[0] + T.log(logarg)
 def logprob(self, y_target, n, p):
     coeff = T.gammaln(n + y_target) - T.gammaln(y_target + 1) - T.gammaln(n)
     return - (coeff + n * T.log(p) + y_target * T.log(1-p))
Пример #35
0
def kldiv_gamma(a1, b1, a0=a0, b0=b0):
    return T.sum((a1 - a0)*nnu.Psi()(a1) - T.gammaln(a1) + T.gammaln(a0) + a0*(T.log(b1) - T.log(b0)) + a1*((b0 - b1)/b1))
Пример #36
0
    def __init__(self, rng, input, n_in, n_out, num_MC,num_FF,n_tot,free_param,Domain_number=None,number="1",Domain_consideration=True):
        #inputも100*N*Dで入ってくるようにする.
        self.DATA=input
        #N=DATA.shape[1]
        #n_in_D=DATA.shape[2]
        srng = RandomStreams(seed=234)
        self.num_rff=num_FF

        
        #Define hyperparameters
        lhyp_values = np.zeros(n_in+1,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX)
        #lhyp_values = np.zeros(n_in+1,dtype=theano.config.floatX)+np.log(1.,dtype=theano.config.floatX)
        self.lhyp = theano.shared(value=lhyp_values, name='lhyp'+number, borrow=True)
        self.sf2,self.l = T.exp(self.lhyp[0]), T.exp(self.lhyp[1:1+n_in])
        
        if Domain_consideration:#先行研究は0.1でうまくいった
            ls_value=np.zeros(Domain_number,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX)
        else:
            ls_value=np.zeros(1,dtype=theano.config.floatX)+np.log(0.1,dtype=theano.config.floatX)
        
        self.ls = theano.shared(value=ls_value, name='ls'+number, borrow=True)
        
        
        
        #Define prior omega
        #prior_mean_Omega.append(tf.zeros([self.d_in[i],1]))
        self.log_prior_var_Omega=T.tile(1/(self.l)**0.5,(num_FF,1)).T
        
        #Define posterior omega
        
        #get samples from  omega
        
        sample_value = np.random.randn(1,n_in,num_FF)
        
        self.sample_Omega_epsilon_0 = theano.shared(value=sample_value, name='sample_Omega'+number)
        #self.sample_Omega_epsilon_0 = srng.normal((1,n_in,num_FF))
        Omega_sample=self.sample_Omega_epsilon_0*self.log_prior_var_Omega[None,:,:]
        Omega_samples=T.tile(Omega_sample,(num_MC,1,1))
        
        self.samples=Omega_samples
        #Define prior W
        #prior_mean_W = T.zeros(2*num_FF)
        
        #log_prior_var_W = T.ones(2*num_FF)        
        
        #Define posterior W
        
        
        mean_mu_value = np.random.randn(2*num_FF,n_out)#* 1e-2 
        self.mean_mu = theano.shared(value=mean_mu_value, name='mean_mu'+number, borrow=True)
        
        log_var_value = np.zeros((2*num_FF,n_out))
        self.log_var_W = theano.shared(value=log_var_value, name='q_W'+number, borrow=True)
        
        #get samples from W
        sample_Omega_epsilon = srng.normal((num_MC,2*num_FF,n_out))
        f2 = T.cast(free_param, 'int64')
        N=srng.uniform(size=(f2+n_tot,num_MC), low=1e-10,high=1.0)
        gamma_factor=T.sum(T.log(N),0)*(-1)
        
        #gamma_factor=self.gamma_dist(free_param+n_tot,1,num_MC)
        
        
        sample_Omega_epsilon_gamma=((free_param+n_tot)/gamma_factor)[:,None,None]*sample_Omega_epsilon
        #MC*Nrff*dout
        W_samples = sample_Omega_epsilon_gamma * (T.exp(self.log_var_W)**0.5)[None,:,:] + self.mean_mu[None,:,:]
        

        
        # calculate lyaer N_MC*N*D_out
        F_next, updates = theano.scan(fn=lambda a,b,c: self.passage(a,b,c,num_FF),
                              sequences=[input,Omega_samples,W_samples])
        
        #output
        self.output = F_next
        
        #KL-divergence
         #Omega
         
         
         #W
         #cross-entropy-term
        #self.KL_W=self.DKL_gaussian(self.mean_mu, self.log_var_W, prior_mean_W, log_prior_var_W)
        CH_const=T.gammaln((n_out+free_param)/2)-T.log(((free_param-2)*np.pi)**(n_out/2))-T.gammaln(free_param/2) 
         
        ch_mc,updates=theano.scan(fn=lambda a: (T.log(1+T.sum(a*a,-1)/(free_param-2))),
                              sequences=[W_samples])
         
        CH_MC=T.mean(T.sum(ch_mc,-1))
         
        CH=CH_const*num_FF-CH_MC*(free_param+n_out)/2
        
         #entropy-term
        HF = self.shanon_Entropy_studentt(self.log_var_W,free_param+n_tot)
        
        self.KL_W=-HF-CH
        
        #parameter_setting
        self.all_params=[self.lhyp,self.ls,self.mean_mu,self.log_var_W]
        self.hyp_params=[self.lhyp,self.ls]
        self.variational_params=[self.mean_mu,self.log_var_W]
Пример #37
0
 def in_loop(i,prev_res):
     j=i+1
     res = prev_res + T.gammaln(a + 0.5*(1 - j))
     return res
Пример #38
0
 def kldiv_r(self, a1, b1):
     return - ((a1 - self.a0)*nnu.Psi()(a1) - T.gammaln(a1) + T.gammaln(self.a0) +
               self.a0*(T.log(b1) - T.log(self.b0)) + a1*((self.b0 - b1)/b1))[0]
Пример #39
0
 def shanon_Entropy_studentt(self,log_cov,freedom):
     Nrff,dout=log_cov.shape
     const=T.log(((freedom-2)*np.pi)**(dout/2))+T.gammaln(freedom/2)-T.gammaln((freedom+dout)/2)  +   (T.psi((freedom+dout)/2 ) - T.psi(freedom/2))*(freedom+dout)/2 
     
     return 0.5*T.sum(log_cov) + Nrff*const
def Beta_fn(a, b):
    return T.exp(T.gammaln(a) + T.gammaln(b) - T.gammaln(a+b))
Пример #41
0
 def _log_beta_vec_func(self, alpha):
     output = 0
     for _k in range(self.k):
         output += T.gammaln(self._slice_last(alpha, _k))
     output -= T.gammaln(T.sum(alpha, axis=-1))
     return output
Пример #42
0
 def _log_beta_func(self, alpha, beta):
     return T.gammaln(alpha) + T.gammaln(beta) - T.gammaln(alpha + beta)
Пример #43
0
    def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
        if not self.run_already:
            from theano import tensor as t
            import theano
            y = t.matrix(name='y')
            f = t.matrix(name='f')
            g = t.matrix(name='g')

            def theano_logaddexp(x,y):
                #Implementation of logaddexp from numpy, but in theano
                tmp = x - y
                return t.where(tmp > 0, x + t.log1p(t.exp(-tmp)), y + t.log1p(t.exp(tmp)))

            # Full log likelihood before expectations
            logpy_t = -(t.exp(f) + t.exp(g)) + y*theano_logaddexp(f, g) - t.gammaln(y+1)
            logpy_sum_t = t.sum(logpy_t)

            dF_df_t = theano.grad(logpy_sum_t, f)
            d2F_df2_t = 0.5*theano.grad(t.sum(dF_df_t), f)  # This right?
            dF_dg_t = theano.grad(logpy_sum_t, g)
            d2F_dg2_t = 0.5*theano.grad(t.sum(dF_dg_t), g)  # This right?

            self.logpy_func = theano.function([f,g,y],logpy_t)
            self.dF_df_func = theano.function([f,g,y],dF_df_t)  # , mode='DebugMode')
            self.d2F_df2_func = theano.function([f,g,y],d2F_df2_t)
            self.dF_dg_func = theano.function([f,g,y],dF_dg_t)
            self.d2F_dg2_func = theano.function([f,g,y],d2F_dg2_t)
            self.run_already = True

        funcs = [self.logpy_func, self.dF_df_func, self.d2F_df2_func, self.dF_dg_func, self.d2F_dg2_func]

        D = Y.shape[1]
        mf, mg = m[:, :D], m[:, D:]
        vf, vg = v[:, :D], v[:, D:]

        F = 0  # Could do analytical components here

        T = self.T
        # Need to get these now to duplicate the censored inputs for quadrature
        gh_x, gh_w = self._gh_points(T)

        (F_quad, dF_dmf, dF_dvf, dF_dmg, dF_dvg) = self.quad2d(funcs=funcs, Y=Y, mf=mf, vf=vf, mg=mg, vg=vg,
                                                               gh_points=gh_points, exp_f=False, exp_g=False)

        F += F_quad
        # gprec = safe_exp(mg - 0.5*vg)
        dF_dmf += 0
        dF_dmg += 0
        dF_dvf += 0
        dF_dvg += 0

        dF_dm = np.hstack((dF_dmf, dF_dmg))
        dF_dv = np.hstack((dF_dvf, dF_dvg))

        if np.any(np.isnan(F_quad)):
            raise ValueError("Nan <log p(y|f,g)>_qf_qg")
        if np.any(np.isnan(dF_dmf)):
            raise ValueError("Nan gradients <log p(y|f,g)>_qf_qg wrt to qf mean")
        if np.any(np.isnan(dF_dmg)):
            raise ValueError("Nan gradients <log p(y|f,g)>_qf_qg wrt to qg mean")

        test_integration = False
        if test_integration:
            # Some little code to check the result numerically using quadrature
            from scipy import integrate
            i = 6  # datapoint index

            def quad_func(fi, gi, yi, mgi, vgi, mfi, vfi):
                #link_fi = np.exp(fi)
                #link_gi = np.exp(gi)
                #logpy_fg = -(link_fi + link_gi) + yi*np.logaddexp(fi, gi) - sp.special.gammaln(yi+1)
                logpy_fg = self.logpdf(np.atleast_2d(np.hstack([fi,gi])), np.atleast_2d(yi))
                return (logpy_fg  # log p(y|f,g)
                        * np.exp(-0.5*np.log(2*np.pi*vgi) - 0.5*((gi - mgi)**2)/vgi)  # q(g)
                        * np.exp(-0.5*np.log(2*np.pi*vfi) - 0.5*((fi - mfi)**2)/vfi)  # q(f)
                        )
            quad_func_l = partial(quad_func, yi=Y[i], mgi=mg[i], vgi=vg[i], mfi=mf[i], vfi=vf[i])

            def integrl(gi):
                return integrate.quad(quad_func_l, -70, 70, args=(gi))[0]

            print "These should match"

            print "Numeric scipy F quad"
            print integrate.quad(lambda fi: integrl(fi), -70, 70)

            print "2d quad F quad"
            print F[i]

        return F, dF_dm, dF_dv, None
Пример #44
0
 def log_likelihood(self, samples, alpha, beta):
     output = alpha * T.log(beta + epsilon()) - T.gammaln(alpha)
     output += (alpha - 1) * T.log(samples + epsilon())
     output += -beta * samples
     return mean_sum_samples(output)
Пример #45
0
    term3 = map(s.special.gammaln, (degMatrix==0).sum(axis= 0) + alphaNull[0]) - s.special.gammaln(alphaNull[0])
    return  np.array(term1 + term2 + term3)
        
        
###################################################################

###  The following are Theano represeantion of certain functions
# sum matrix ops

m1 = T.fmatrix()
m2 = T.fmatrix()
add = function([m1, m2], m1 + m2, allow_input_downcast=True)

# declare a function that calcualte gammaln on a shared variable on GPU
aMatrix = shared(np.zeros((65536, 8192)), config.floatX, borrow=True)
gamma_ln = function([ ], T.gammaln(aMatrix))
theanoExp = function([ ], T.exp(aMatrix))
    
alpha = T.fscalar()
gamma_ln_scalar = function([alpha], T.gammaln(alpha), allow_input_downcast=True)

# now compute the second part of the F-score, which is the covariance of mut and deg
mutMatrix = shared(np.ones((32768, 4096)), config.floatX, borrow=True )  
expMatrix = shared(np.ones((8192, 4096)), config.floatX, borrow=True)
mDotE = function([], T.dot(mutMatrix, expMatrix))

nijk_11 = shared(np.zeros((32768, 4096)), config.floatX)
nijk_01 = shared(np.zeros((32768, 4096)), config.floatX)

fscore = shared(np.zeros((32768, 4096)), config.floatX)
tmpLnMatrix = shared(np.zeros((32768, 4096)), config.floatX, borrow=True)
Пример #46
0
 def free_energy(self, v_sample):
     tmp, h = self.propup(v_sample)
     return -T.dot(v_sample, self.vbias) - T.dot(h, self.hbias) + \
            T.sum((-T.dot(v_sample, self.W) * h + T.gammaln(v_sample + 1)), axis=1)
Пример #47
0
 def free_energy(v,h):
     #return -T.dot(v_sample, self.vbias) - T.dot(h, self.hbias) + \
          #  T.sum((-T.dot(v_sample, self.W) * h + T.gammaln(v_sample + 1)), axis=1)
     return -(v * bv).sum() - (h * bh).sum() + T.sum((- T.dot(v, W) * h + T.gammaln(v + 1)))
Пример #48
0
def betaln(alpha, beta):
    return T.gammaln(alpha)+T.gammaln(beta) - T.gammaln(alpha+beta)
Пример #49
0
    def loglikelihood(self, data=None, mode='unsupervised'):
        """
        Calculate the log-likelihood of the given data under the model
        parameters.

        Keyword arguments:
        data: nparray (data,label) or 'None' for input data
        mode=['unsupervised','supervised']: calculate supervised or
              unsupervised loglikelihood
        """
        # To avoid numerical problems the log-likelihood has to be
        # calculated in such a more costly way by using intermediate
        # logarithmic functions

        # input data
        if data is None:
            Y = self.MultiLayer[0].Layer[0].get_input_data().astype('float32')
        else:
            Y = np.asarray(
                [self.MultiLayer[0].Layer[0].output(y) for y in data[0]],
                dtype='float32')

        # labels
        if (mode == 'supervised'):
            if data is None:
                L = self.MultiLayer[0].Layer[0].get_input_label()
            else:
                L = data[1]
        elif (mode == 'unsupervised'):
            L = (-1)*np.ones(Y.shape[0])
        
        # weights & dimensions
        W = self.MultiLayer[0].Layer[1].get_weights().astype('float32')
        N = Y.shape[0]
        C = W.shape[0]
        D = W.shape[1]
        if (self.number_of_multilayers() == 2):
            try:
                M = self.MultiLayer[1].Layer[1].get_weights()
            except:
                M = None
        elif ((self.number_of_multilayers() == 1) and
              (self.MultiLayer[0].number_of_layers() == 3)):
            M = self.MultiLayer[0].Layer[2].get_weights()
        else:
            M = None
        try:
            K = M.shape[0]
        except:
            K = None

        if not self._theano:
            if M is None:
                ones = np.ones(shape=(C,D), dtype=float)
                log_likelihood = np.empty(N, dtype=float)
                for ninput in xrange(N):
                    sum_log_poisson = np.sum(
                        log_poisson_function(ones*Y[ninput,:], W), axis=1)
                    a = np.max(sum_log_poisson)
                    log_likelihood[ninput] = -np.log(C) + a + \
                        np.log(np.sum(np.exp(sum_log_poisson - a)))
            else:
                ones = np.ones(shape=(C,D), dtype=float)
                log_likelihood = np.empty(N, dtype=float)
                for ninput in xrange(N):
                    sum_log_poisson = np.sum(
                        log_poisson_function(ones*Y[ninput,:], W), axis=1)
                    a = np.max(sum_log_poisson)
                    if (L[ninput] == -1):
                        log_likelihood[ninput] = a + np.log(np.sum(
                            np.exp(sum_log_poisson-a)*np.sum(M,axis=0)/float(K)))
                    else:
                        log_likelihood[ninput] = a + np.log(np.sum(
                            np.exp(sum_log_poisson - a)*
                            M[L[ninput],:]/float(K)))
            mean_log_likelihood = np.mean(log_likelihood)
            sum_log_likelihood = np.zeros_like(mean_log_likelihood)
            MPI.COMM_WORLD.Allreduce(mean_log_likelihood, sum_log_likelihood, op=MPI.SUM)
            mean_log_likelihood = sum_log_likelihood/float(MPI.COMM_WORLD.Get_size())
        else:
            import theano
            import theano.tensor as T
            ml = self.MultiLayer[0]
            if ml._scan_batch_size is None:
                nbatches = 1
                scan_batch_size = ml.Layer[0].get_input_data().shape[0]
            else:
                nbatches = int(np.ceil(
                    ml.Layer[0].get_input_data().shape[0]
                    /float(ml._scan_batch_size)))
                scan_batch_size = ml._scan_batch_size
            batch_log_likelihood = np.zeros(nbatches, dtype='float32')
            if M is None:
                if (self._t_sum_log_likelihood_W is None):
                    Y_t = T.matrix('Y', dtype='float32')
                    L_t = T.vector('L', dtype='int32')
                    W_t = T.matrix('W', dtype='float32')
                    sum_log_poisson = T.tensordot(Y_t,T.log(W_t), axes=[1,1]) \
                        - T.sum(W_t, axis=1) \
                        - T.sum(T.gammaln(Y_t+1), axis=1, keepdims=True)
                    a = T.max(sum_log_poisson, axis=1, keepdims=True)
                    logarg = T.sum(T.exp(sum_log_poisson-a), axis=1)
                    log_likelihood = -T.log(C) + a[:,0] + T.log(logarg)
                    # Compile theano function
                    self._t_sum_log_likelihood_W = theano.function(
                        [Y_t,L_t,W_t],
                        T.sum(log_likelihood),
                        on_unused_input='ignore')
                for nbatch in xrange(nbatches):
                    batch_log_likelihood[nbatch] = self._t_sum_log_likelihood_W(
                        Y[nbatch*scan_batch_size:
                            (nbatch+1)*scan_batch_size].astype('float32'),
                        L[nbatch*scan_batch_size:
                            (nbatch+1)*scan_batch_size].astype('int32'),
                        W.astype('float32'))
            else:
                if (self._t_sum_log_likelihood is None):
                    Y_t = T.matrix('Y', dtype='float32')
                    L_t = T.vector('L', dtype='int32')
                    W_t = T.matrix('W', dtype='float32')
                    M_t = T.matrix('M', dtype='float32')
                    sum_log_poisson = T.tensordot(Y_t,T.log(W_t), axes=[1,1]) \
                        - T.sum(W_t, axis=1) \
                        - T.sum(T.gammaln(Y_t+1), axis=1, keepdims=True)
                    M_nlc = M_t[L_t]
                    L_index = T.eq(L_t,-1).nonzero()
                    M_nlc = T.set_subtensor(M_nlc[L_index], T.sum(M_t, axis=0))
                    # for numerics: only account for values, where M_nlc is
                    # not zero
                    a = T.switch(
                        T.eq(M_nlc, 0.),
                        T.cast(T.min(sum_log_poisson), dtype = 'int32'),
                        T.cast(sum_log_poisson, dtype = 'int32'))
                    a = T.max(a, axis=1, keepdims=True)
                    # logarg = T.switch(
                    #     T.eq(M_nlc, 0.),
                    #     0.,
                    #     T.exp(sum_log_poisson-a).astype('float32')*M_nlc\
                    #         /M_t.shape[0].astype('float32'))
                    logarg = T.switch(
                        T.eq(M_nlc, 0.),
                        0.,
                        T.exp(sum_log_poisson-a.astype('float32'))
                    )
                    logarg = T.sum(logarg, axis=1)
                    log_likelihood = a[:,0].astype('float32') + T.log(logarg)
                    # Compile theano function
                    self._t_sum_log_likelihood = theano.function(
                        [Y_t,L_t,W_t,M_t],
                        T.sum(log_likelihood),
                        on_unused_input='ignore')
                    """
                    # LL_scan:
                    ll_t = T.scalar('loglikelihood', dtype='float32')
                    sequences = [Y_t, L_t]
                    outputs_info = [ll_t]
                    non_sequences = [W_t, M_t]
                    likelihood, updates = theano.scan(
                        fn=self._loglikelihood_step,
                        sequences=sequences,
                        outputs_info=outputs_info,
                        non_sequences=non_sequences)
                    result = likelihood[-1]
                    # Compile function
                    self._loglikelihood_scan = theano.function(
                        inputs=sequences + outputs_info + non_sequences,
                        outputs=result,
                        name='loglikelihood')
                    """
                for nbatch in xrange(nbatches):
                    batch_log_likelihood[nbatch] = self._t_sum_log_likelihood(
                        Y[nbatch*scan_batch_size:
                            (nbatch+1)*scan_batch_size].astype('float32'),
                        L[nbatch*scan_batch_size:
                            (nbatch+1)*scan_batch_size].astype('int32'),
                        W.astype('float32'),
                        M.astype('float32'))
            mean_log_likelihood = np.sum(batch_log_likelihood)/float(N)
        return mean_log_likelihood
Пример #50
0
 def __call__(self, X):
     A = T.dot(X[:-1], self.w_trans)
     A = T.exp(T.concatenate([w_init, A], axis=0))
     B = T.sum(T.gammaln(A), axis=-1) - T.gammaln(T.sum(A, axis=-1))
     L = T.dot(A-1, X.dimshuffle(0, 2, 1)) - B
Пример #51
0
def analytical_kl(q1, q2, given, deterministic=False):
    try:
        [x1, x2] = given
    except:
        raise ValueError("The length of given list must be 2, "
                         "got %d" % len(given))

    q1_class = q1.__class__.__name__
    q2_class = q2.__class__.__name__
    if q1_class == "Gaussian" and q2_class == "UnitGaussianSample":
        mean, var = q1.fprop(x1, deterministic=deterministic)
        return gauss_unitgauss_kl(mean, var)

    elif q1_class == "Gaussian" and q2_class == "Gaussian":
        mean1, var1 = q1.fprop(x1, deterministic=deterministic)
        mean2, var2 = q2.fprop(x2, deterministic=deterministic)
        return gauss_gauss_kl(mean1, var1, mean2, var2)

    elif q1_class == "Bernoulli" and q2_class == "UnitBernoulliSample":
        mean = q1.fprop(x1, deterministic=deterministic)
        output = mean * (T.log(mean + epsilon()) + T.log(2)) +\
            (1 - mean) * (T.log(1 - mean + epsilon()) + T.log(2))
        return T.sum(output, axis=1)

    elif q1_class == "Categorical" and q2_class == "UnitCategoricalSample":
        mean = q1.fprop(x1, deterministic=deterministic)
        output = mean * (T.log(mean + epsilon()) + T.log(q1.k))
        return T.sum(output, axis=1)

    elif q1_class == "Kumaraswamy" and q2_class == "UnitBetaSample":
        """
        [Naelisnick+ 2016]
        Deep Generative Models with Stick-Breaking Priors
        """
        M = 10
        euler_gamma = 0.57721

        a, b = q1.fprop(x1, deterministic=deterministic)

        def taylor(m, a, b):
            return 1. / (m + a * b) * q2._beta_func(m / a, b)
        kl, _ = theano.scan(fn=taylor,
                            sequences=T.arange(1, M + 1),
                            non_sequences=[a, b])
        kl = T.sum(kl, axis=0)
        kl *= (q2.beta - 1) * b

        kl += ((a - q2.alpha) / a + epsilon()) *\
              (-euler_gamma - psi(b) - 1. / (b + epsilon()))
        kl += T.log(a * b + epsilon()) +\
            T.log(q2._beta_func(q2.alpha, q2.beta) + epsilon())
        kl += -(b - 1) / (b + epsilon())

        return T.sum(kl, axis=1)

    elif q1_class == "Gamma" and q2_class == "UnitGammaSample":
        """
        https://arxiv.org/pdf/1611.01437.pdf
        """
        alpha1, beta1 = q1.fprop(x1, deterministic=deterministic)
        alpha2 = T.ones_like(alpha1)
        beta2 = T.ones_like(beta1)

        output = (alpha1 - alpha2) * psi(alpha1)
        output += -T.gammaln(alpha1) + T.gammaln(alpha2)
        output += alpha2 * (T.log(beta1 + epsilon()) -
                            T.log(beta2 + epsilon()))
        output += alpha1 * (beta2 - beta1) / (beta1 + epsilon())

        return T.sum(output, axis=1)

    elif q1_class == "Beta" and q2_class == "UnitBetaSample":
        """
        http://bariskurt.com/kullback-leibler-divergence\
        -between-two-dirichlet-and-beta-distributions/
        """
        alpha1, beta1 = q1.fprop(x1, deterministic=deterministic)
        alpha2 = T.ones_like(alpha1) * q2.alpha
        beta2 = T.ones_like(beta1) * q2.beta

        output = T.gammaln(alpha1 + beta1) -\
            T.gammaln(alpha2 + beta2) -\
            (T.gammaln(alpha1) + T.gammaln(beta1)) +\
            (T.gammaln(alpha2) + T.gammaln(beta2)) +\
            (alpha1 - alpha2) * (psi(alpha1) - psi(alpha1 + beta1)) +\
            (beta1 - beta2) * (psi(beta1) - psi(alpha1 + beta1))

        return T.sum(output, axis=1)

    elif q1_class == "Dirichlet" and q2_class == "UnitDirichletSample":
        """
        http://bariskurt.com/kullback-leibler-divergence\
        -between-two-dirichlet-and-beta-distributions/
        """
        alpha1 = q1.fprop(x1, deterministic=deterministic)
        alpha1 = alpha1.reshape((alpha1.shape[0], alpha1.shape[1] / q1.k,
                                 q1.k))

        alpha2 = T.ones_like(alpha1) * q2.alpha

        output = T.gammaln(T.sum(alpha1, axis=-1)) -\
            T.gammaln(T.sum(alpha2, axis=-1)) -\
            T.sum(T.gammaln(alpha1), axis=-1) +\
            T.sum(T.gammaln(alpha2), axis=-1) +\
            T.sum((alpha1 - alpha2) *
                  (psi(alpha1) -
                   psi(T.sum(alpha1, axis=-1,
                             keepdims=True))), axis=-1)

        return T.sum(output, axis=1)

    elif (q1_class == "MultiDistributions") and (
            q2_class == "MultiPriorDistributions"):
        """
        PixelVAE
        https://arxiv.org/abs/1611.05013
        """
        all_kl = 0
        for i, q, p in zip(range(len(q1.distributions[:-1])),
                           q1.distributions[:-1],
                           reversed(q2.distributions)):
            if i == 0:
                _x = x1
            else:
                _x = q1.sample_mean_given_x(x1, layer_id=i - 1)[-1]
            z = q1.sample_given_x(x1, layer_id=i + 1)[-1]
            kl = analytical_kl(q, p, given=[tolist(_x), tolist(z)])
            all_kl += kl

        _x = q1.sample_mean_given_x(x1, layer_id=-2)[-1]
        kl = analytical_kl(
            q1.distributions[-1], q2.prior, given=[tolist(_x), None])
        all_kl += kl

        return all_kl

    elif q1_class == "MultiDistributions":
        if len(q1.distributions) >= 2:
            _x1 = q1.sample_given_x(x1, layer_id=-2)[-1]
        else:
            _x1 = x1
        return analytical_kl(q1.distributions[-1], q2,
                             given=[tolist(_x1), x2],
                             deterministic=deterministic)

    raise Exception("You cannot use this distribution as q or prior, "
                    "got %s and %s." % (q1_class, q2_class))
Пример #52
0
def multinomial_coefficient(Obs, K, num_Obs):
    Ns_p1 = T.dot(Obs,T.ones((K,1))) + T.ones((num_Obs,1))   
    Obs_p1 = Obs + T.ones((num_Obs, K))
    lnmlti = T.gammaln(Ns_p1) - T.dot(T.gammaln(Obs_p1),T.ones((K,1)))
    return T.exp(lnmlti)