Exemplo n.º 1
0
def expected_entropy_from_alphas_ref(alphas):
    """Compute expectation of entropy (in nats), given alphas.  Eq (9) of AMP"""
    if sum(alphas) == 0:
        return 0
    kappa = float(sum(alphas))
    return (polygamma(0, kappa + 1) - sum(a / kappa * polygamma(0, a + 1)
                                          for a in alphas))
Exemplo n.º 2
0
    def gradLikelihood(self, state):
        # State must be VI object -- we expect it to have memoized
        # Determinant and Inverse lookup functions. 
        if not issubclass(type(state), VI):
            raise StateError('State must be given in terms of a VI object, not %s.' % type(state).__name__)

        # Expected bayesian network variables include b0 and gamma.
        reqKeys = ['b0','gamma','c']
        self.check_BNVs(state,reqKeys)

        gamma = state.bnv['gamma'].val_getter()
        b0 = state.bnv['b0'].val_getter()
        a0 = self.val_getter()
        c  = state.bnv['c'].val_getter()
        n = state.n
        diffProj = state.memoizer.FDifferenceProjection(gamma,c)
        

        # VERIFY THAT SCIPY USES 0 AS FIRST DERIVATIVE
        gradL = funcs.polygamma(0, n * 0.5 + a0) \
                 - funcs.polygamma(0, a0) \
                 + np.log(b0) - np.log(b0 + diffProj) \
                 + 0.5 * (funcs.polygamma(1,a0) + a0 * funcs.polygamma(2,a0)) \
                 / (a0 * funcs.polygamma(1,a0) - 1.0)

        if math.isnan(gradL):
            print 'NAN IN when a0 = %f'%a0
            print "b0: %f"%b0
            print "diffProj: %f"%diffProj

        return gradL
Exemplo n.º 3
0
def gnmf_solvebynewton(c, a0):
    """
	routine to solve C=Log(A)-Psi(A)+1 function by newtons method
	"""
    M, N = a0.shape
    Mc, Nc = c.shape

    if M == Mc and N == Nc:
        a = a0
        cond = 1
    else:
        a = a0[0, 0]
        cond = 4
    stop = False
    for i in range(10):
        delta = (log(a) - polygamma(0, a) + 1 - c) / (
            (1 / a) - polygamma(1, a))
        #print(delta.shape)
        count = 0
        while (delta > a).any():
            delta = delta / 2
            if count > 10:
                stop = True
                break
            count += 1
        if stop:
            break
        if (delta < 0).any():
            delta = 0
        a = a - delta
    if cond == 4:
        a = a * np.ones((M, N))

    return a
Exemplo n.º 4
0
    def f(self,x,t):
        N = len(x)/2
        xdot = pl.array([])

        # modulus the x for periodicity.
        x[N:2*N]= x[N:2*N]%self.d
        # HERE ---->> 1Dify
        for i in range(N):
            temp = 0.0
            for j in range(N):
                if i == j:
                    continue
                #repulsive x interparticle force of j on i
                temp += self.qq*(x[N+i]-x[N+j])/(pl.sqrt((x[N+i]-x[N+j])**2)**3)
                # All of the forces coming from the 'same' paricle but from other 'cells' due to the
                # periodic contrains can be wraped up in a sum that converges to an aswer that can
                # be expressed in terms of polygamma functions (se pg 92 of notebook).
                # Note on the sign (xi-xj or xj-xi). Changing the sign of the xi-xj term (i.e. which
                # particle are we considering forces on) changes the direction of the force
                # apropriately.
                temp += self.qq*(polygamma(1,(self.d+x[N+i]-x[N+j])/self.d)-polygamma(1,1.0-((x[N+i]-x[N+j])/self.d)))/(self.d**2)
            # periodic force on particle i
            temp += self.As[i]*pl.sin(x[N+i])*pl.cos(t)
            temp -= self.beta*x[i]
            xdot = pl.append(xdot,temp)
        for i in range(N):
            xdot = pl.append(xdot,x[i])
        return xdot
Exemplo n.º 5
0
    def f(self,x,t):
        N = len(x)/2
        xdot = pl.array([])

        # modulus the x for periodicity.
        x[N:2*N]= x[N:2*N]%self.d
        # HERE ---->> 1Dify
        for i in range(N):
            temp = 0.0
            for j in range(N):
                if i == j:
                    continue
                #repulsive x interparticle force of j on i
                temp += self.qq*(x[N+i]-x[N+j])/(pl.sqrt((x[N+i]-x[N+j])**2)**3)
                # All of the forces coming from the 'same' paricle but from other 'cells' due to the
                # periodic contrains can be wraped up in a sum that converges to an aswer that can
                # be expressed in terms of polygamma functions (se pg 92 of notebook).
                temp += self.qq*(polygamma(1,(self.d+x[N+i]-x[N+j])/self.d)-polygamma(1,1.0-((x[N+i]-x[N+j])/self.d)))/(self.d**2)
            # EC x force on particle i
            for a in range(2):
                temp+=self.As[i]*pl.sin(x[N+i]-a*pl.pi)*pl.cos(t-a*pl.pi)/(pl.cosh(1.0)-pl.cos(x[N+i]-a*pl.pi)) 
            temp -= self.beta*x[i]
            xdot = pl.append(xdot,temp)

        for i in range(N):
            xdot = pl.append(xdot,x[i])

    
        return xdot
def objectiveGradient(lambda_k, nu, tau, Elog_eta_k, nDoc):
  ''' Calculate gradient of objectiveFunc, objective for HDP variational 
      Returns
      -------
        gvec : 2*K length vector,
              where each entry gives partial derivative with respect to
                  the corresponding entry of Cvec
  '''
  # lvec is the derivative of log(lambda_k) via chain rule
  lvec = 1/(lambda_k)
  W = lvec.size
  
  # Derivative of log eta
  digammaAll = digamma(np.sum(lambda_k))
  Elog_lambda_k = digamma(lambda_k) - digammaAll

  # Derivative of Elog_phi_k and E_phi_k
  polygammaAll = polygamma(1,np.sum(lambda_k))
  dElog_phi_k = polygamma(1,lambda_k) - polygammaAll
  lambda_k_sum = np.sum(lambda_k)
  dE_phi_k = (lambda_k_sum - lambda_k) / np.power(lambda_k_sum,2)

  gvec = dElog_phi_k * (N + tau - lambda_k) \
       + dE_phi_k * nu * Elog_eta_k
  gvec = -1 * gvec

  # Apply chain rule!
  gvecC = lvec * gvec
  return gvecC
Exemplo n.º 7
0
    def update_alpha(self, gammat, rho):
        """
        Update parameters for the Dirichlet prior on the per-document
        topic weights `alpha` given the last `gammat`.

        Uses Newton's method, described in **Huang: Maximum Likelihood Estimation of Dirichlet Distribution Parameters.** (http://www.stanford.edu/~jhuang11/research/dirichlet/dirichlet.pdf)

        """
        N = float(len(gammat))
        logphat = sum(dirichlet_expectation(gamma) for gamma in gammat) / N
        dalpha = numpy.copy(self.alpha)
        gradf = N * (psi(numpy.sum(self.alpha)) - psi(self.alpha) + logphat)

        c = N * polygamma(1, numpy.sum(self.alpha))
        q = -N * polygamma(1, self.alpha)

        b = numpy.sum(gradf / q) / ( 1 / c + numpy.sum(1 / q))

        dalpha = -(gradf - b) / q

        if all(rho() * dalpha + self.alpha > 0):
            self.alpha += rho() * dalpha
        else:
            logger.warning("updated alpha not positive")
        logger.info("optimized alpha %s" % list(self.alpha))

        return self.alpha
Exemplo n.º 8
0
Arquivo: lda.py Projeto: laiguokun/LDA
def Mstep(max_iter):
	global alpha,beta,Gamma,Phi,doc,doc_cnt;
	#update beta
	for i in range(K):
		for v in range(voca_size):
			beta[i][v] = 0;
		for d in range(doc_size):
			for n in range(len(doc[d])):
				beta[i][doc[d][n]] += doc_cnt[d][n] * Phi[d][n][i];
	beta_sum = sum_matrix(beta, 0);
	for k in range(K):
		for i in range(voca_size):
			beta[k][i] = beta[k][i]/beta_sum[k];
	#update alpha
	last = 0;
	iter_num = 0;
	const = 0;
	for d in range(doc_size):
		gamma_sum = sum_vector(Gamma[d]);
		for i in range(K):
			const += (sp.psi(Gamma[d][i]) - sp.psi(gamma_sum));	
	now = -compute_alpha_mle(alpha);
	origin = now;
	while (abs(last - now) > 1e-9 and iter_num < max_iter):
		da = K * (doc_size * (sp.psi(alpha * K) - sp.psi(alpha))) + const;
		dda = K * (doc_size * (K * sp.polygamma(1, alpha * K) - sp.polygamma(1, alpha)));
		dx = -da/dda;
		alpha = backtrack(alpha,dx,da,0.01,0.5);
		last = now;
		now = -compute_alpha_mle(alpha);
		iter_num += 1;
	if (now < origin):
		print('error alpha');
Exemplo n.º 9
0
    def estimate(self,dat):
        '''

        Estimates the parameters from the data in dat. It is possible to only selectively fit parameters of the distribution by setting the primary array accordingly (see :doc:`Tutorial on the Distributions module <tutorial_Distributions>`).

        Estimate uses the algorithm by [Minka2002]_ to fit the parameters.

        :param dat: Data points on which the Gamma distribution will be estimated.
        :type dat: natter.DataModule.Data
        '''

        logmean = log(mean(dat.X))
        meanlog = mean(log(dat.X))
        u=2.0
        
        if 'u' in self.primary: # if we want to estimate u
            for k in range(self.maxCount):
                u = max(u,1e-08)
                unew= 1/u + (meanlog - logmean + log(u) - float(polygamma(0,u)))/ \
                      (u**2  * (1/u - float(polygamma(1,u))))
                unew = 1/unew
                if (unew-u)**2 < self.Tol:
                    u=unew
                    break
                u=unew
            
            self.param['u'] = unew;

        if 's' in self.primary:
            self.param['s'] = exp(logmean)/self.param['u'];
Exemplo n.º 10
0
def M_step(Phi, gamma, alpha, corpus, voc, k, M):
    V = len(voc)

    # 1 update Beta
    Beta = np.zeros([k, V])
    for d in range(0, M):
        words = np.array(corpus[d])
        voc_pos = np.array(list(map(lambda x: np.in1d(words, x), voc)))
        Beta += np.dot(voc_pos, Phi[d]).transpose()
    Beta = Beta / Beta.sum(axis=1).reshape(k, 1)

    # 2 update alpha
    for i in range(1000):

        old_alpha = alpha
        # Calculate the gradient
        g = M * (digamma(np.sum(alpha)) - digamma(alpha)) + np.sum(
            digamma(gamma) - np.tile(digamma(np.sum(gamma, axis=1)), (k, 1)).T,
            axis=0)

        # Calculate Hessian
        h = -M * polygamma(1, alpha)
        z = M * polygamma(1, np.sum(alpha))
        # Calculate parameter
        c = np.sum(g / h) / (1 / z + np.sum(1 / h))
        # Update alpha
        alpha -= (g - c) / h

        if np.sqrt(np.mean(np.square(alpha - old_alpha))) < 1e-4:
            break

    return alpha, Beta
Exemplo n.º 11
0
def update_one_alpha(alpha, theta, stepsize=.01, tol=1e-14):
# Newton method in [Minka00]
# NOTE: Need a small stepsize to prevent negative valued alpha
# (I haven't thought about why yet... isn't the log likelihood convex?)
    D, K = np.shape(theta)

    log_p = 1.0 / D * np.sum(np.log(theta), 1)
    while True:
        oldnorm = np.linalg.norm(alpha)
        g = D*psi(np.sum(alpha)) - D*psi(alpha) + D*log_p
#        print log_p.shape
        # Diagonal
        q = -D * polygamma(1, alpha)
        z = D * polygamma(1, np.sum(alpha))
        b = np.sum(g / q) / (1.0 / z + np.sum(1.0 / q))
#        print 'g = %s, q = %s, z = %s, b = %s'%(g, q, z, b)
#        print 'g - b = %s', (g - b)
#        print '(g - b) / q = %s', (g - b) / q 
#
#        print np.shape(stepsize)
#        print "%s - %s"%(alpha, stepsize * (g - b) / q)
        alpha -= stepsize * ((g - b) / q)

        if abs(np.linalg.norm(alpha) - oldnorm) < tol:
            break

    assert(np.all(alpha > 0))
    return alpha
Exemplo n.º 12
0
def test_hessian_h_and_z():
    h, z = hessian_h_and_z(M, alpha)
    for i in xrange(alpha.size):
        actual = h[i]
        expected = - M * polygamma(1, alpha[i])
        assert_array_almost_equal(actual, expected)
    assert_array_almost_equal(z, M * polygamma(1, alpha.sum()))
Exemplo n.º 13
0
def estimate_dirichlet_param(samples, param):
    """
    Uses a Newton-Raphson scheme to estimating the parameter of a
    K-dimensional Dirichlet distribution

    :param samples: an NxK matrix of K-dimensional vectors drawn from
    a Dirichlet distribution
    :param param: the old value of the paramter. This is overwritten
    :return: a K-dimensional vector which is the new
    """

    N, K = samples.shape
    p = np.sum(np.log(samples), axis=0)

    for _ in range(60):
        g = -N * fns.digamma(param)
        g += N * fns.digamma(param.sum())
        g += p

        q = -N * fns.polygamma(1, param)
        np.reciprocal(q, out=q)

        z = N * fns.polygamma(1, param.sum())

        b = np.sum(g * q)
        b /= 1 / z + q.sum()

        param -= (g - b) * q

        print("%.2f" % param.mean(), end=" --> ")
    print

    return param
Exemplo n.º 14
0
def e_step_one_iter(alpha, beta, docs, phi, ips):
    M, K = docs.size, alpha.size


    for m in xrange(M):
        N_m = docs[m].size
        psi_sum_ips = psi(ips[m, :].sum())
        for n in xrange(N_m):
            for i in xrange(K):
                E_q = psi(ips[m, i]) - psi_sum_ips
                phi[m][n, i] = (beta[i, docs[m][n]] *
                                np.exp(E_q))
        phi[m] /= phi[m].sum(axis=1)[:, None]  # normalize phi
        ips[m] = alpha + phi[m].sum(axis=0)


    # gradient computation
    grad_ips = np.zeros(ips.shape, dtype=np.float64)
    for m in xrange(M):
        for i in xrange(K):
            grad_ips[m, i]\
                = (polygamma(1, ips[m, i]) * (alpha[i] + phi[m][:, i].sum() - ips[m, i]) -
                   polygamma(1, ips[m, :].sum()) * (alpha.sum() + phi[m].sum() - ips[m, :].sum()))

    return (phi, ips, grad_ips)
Exemplo n.º 15
0
def objectiveGradient(lambda_k, nu, tau, Elog_eta_k, nDoc):
    ''' Calculate gradient of objectiveFunc, objective for HDP variational 
      Returns
      -------
        gvec : 2*K length vector,
              where each entry gives partial derivative with respect to
                  the corresponding entry of Cvec
  '''
    # lvec is the derivative of log(lambda_k) via chain rule
    lvec = 1 / (lambda_k)
    W = lvec.size

    # Derivative of log eta
    digammaAll = digamma(np.sum(lambda_k))
    Elog_lambda_k = digamma(lambda_k) - digammaAll

    # Derivative of Elog_phi_k and E_phi_k
    polygammaAll = polygamma(1, np.sum(lambda_k))
    dElog_phi_k = polygamma(1, lambda_k) - polygammaAll
    lambda_k_sum = np.sum(lambda_k)
    dE_phi_k = (lambda_k_sum - lambda_k) / np.power(lambda_k_sum, 2)

    gvec = dElog_phi_k * (N + tau - lambda_k) \
         + dE_phi_k * nu * Elog_eta_k
    gvec = -1 * gvec

    # Apply chain rule!
    gvecC = lvec * gvec
    return gvecC
Exemplo n.º 16
0
    def J(self,t):
        # the -1 in the lines below is for the right rounding with int()
        # x1 is position of p1 (particle 1) 
        x1 = self.sol[int(t/self.dt)-1,2]
        # x2 is velocity of p1
        x2 = self.sol[int(t/self.dt)-1,0]
        # x3 is position of p2
        x3 = self.sol[int(t/self.dt)-1,3]
        # x4 is velocity of p2
        x4 = self.sol[int(t/self.dt)-1,1]

        # These are the differentials of the forces of the particles. Writen like this to make the
        # matrix below easier to read f14 is force of p2 on p1 _dx1 is derivitive with respect to x1
        # Note on 1/r2 part -> goes to cubic so it will always retain its sign.
        df13_dx1 = -2.0/(x1-x3)**3 + (polygamma(2,1.0+(x1-x3)/self.d)+polygamma(2,1.0-(x1-x3)/self.d))/self.d**3
        # the final deriviative of -x3 just gives you the negative of everything above
        df13_dx3 = -df13_dx1
        df31_dx1 = 2.0/(x3-x1)**3 - (polygamma(2,1.0-(x3-x1)/self.d)+polygamma(2,1.0+(x3-x1)/self.d))/self.d**3
        df31_dx3 = -df31_dx1


        # define the matrix elements of the time dependent jacobian
        jacobian = pl.array([ \
        [0.0                                   , 1.0       , 0.0                                   , 0.0],
        [self.A*pl.cos(x1)*pl.cos(t)+df13_dx1, -self.beta, df13_dx3                              , 0.0],
        [0.0                                   , 0.0       , 0.0                                   , 1.0],
        [df31_dx1                              , 0.0       , self.A*pl.cos(x3)*pl.cos(t)+df31_dx3, -self.beta]\
        ])

        return jacobian
Exemplo n.º 17
0
def frobenius_norm(counts):
    n = len(counts)
    pgsum = polygamma(1, counts.sum())
    A = (n**2 - n) * pgsum**2
    B = polygamma(1, counts) - polygamma(1, counts.sum())
    B = (B**2).sum()
    return np.sqrt(A + B)
Exemplo n.º 18
0
    def Newton(self):
        print "1, updating alpha------------------"
        ratio = len(self.docs)

        veck = copy.deepcopy(self.alpha)

        t = 0
        while True:
            print "updating the %d times" % t
            print "x%d" % t, veck[0:10]

            gk = self.grad()
            print "gk%d" % t, gk[0:10]
            if self.normof(gk) < self.rho:
                print "after udating:", veck[0:10]
                print ""

                self.alpha = veck
                return

            Hk = [[ratio * polygamma(1, sum(veck))] * len(veck)] * len(veck)
            duijiao = [ratio * polygamma(1, vecki) for vecki in veck]
            Hk = np.mat(Hk) - np.mat(np.diag(duijiao))
            #print "Hk%d"%t,Hk[0]

            pk = (-1 * (Hk.I) * (np.mat(gk).T)).T.tolist()[0]
            print "pk%d" % t, pk[0:10]
            break
            for i in range(len(veck)):
                veck[i] += pk[i]
            t += 1
Exemplo n.º 19
0
def H(params,n,k):
    alpha = params[0]
    beta = params[1]
    H=np.zeros(2)
    H[0]=k*special.polygamma(1,alpha)-n*special.polygamma(1,alpha+beta)
    H[1]=(n-k)*special.polygamma(1,beta)-n*special.polygamma(1,alpha+beta)
    return H
Exemplo n.º 20
0
def gradient_log_recognition(params,theta,i):
    alpha = params[0]
    beta = params[1]
    if i==0:
        return np.log(theta)-special.polygamma(0,alpha)+special.polygamma(0,alpha+beta)
    if i==1:
        return np.log(1-theta)-special.polygamma(0,beta)+special.polygamma(0,alpha+beta)
Exemplo n.º 21
0
  def estimate_abundances(self):
    """
    Compute expectations and variances of the log relative abundances (log rho)
    of each target. Use these to compute 95% confidence intervals of the relative
    abundances themselves.
    """
    log_theta = np.zeros(self.ntargs)
    sd_log_theta = np.zeros(self.ntargs)
    for t in xrange(self.ntargs):
      log_theta[t] = psi(self.alpha[t]) - psi(self.alpha[t]+self.beta[t])
      var_log_theta = polygamma(1,self.alpha[t]) - polygamma(1,
         self.alpha[t]+self.beta[t])
      for j in xrange(t):
        log_theta[t] += psi(self.beta[j]) - psi(self.alpha[j]+self.beta[j])
        var_log_theta += polygamma(1,self.beta[j]) - polygamma(1,
           self.alpha[j]+self.beta[j])
      sd_log_theta[t] = sqrt(var_log_theta)
    self.log_theta = log_theta
    self.sd_log_theta = sd_log_theta
    theta_ci_low = np.zeros(self.ntargs)
    theta_ci_hi = np.zeros(self.ntargs)
    for t in xrange(self.ntargs):
      self.targ_samp_prob[t] = exp(log_theta[t])
      theta_ci_low[t] = exp(log_theta[t] - ci95sd * sd_log_theta[t])
      theta_ci_hi[t] = exp(log_theta[t] + ci95sd * sd_log_theta[t])

    # Compute relative abundances and confidence limits
    w = self.targ_samp_prob / self.eff_len
    self.rho = w / sum(w)
    w_low = theta_ci_low / self.eff_len
    self.rho_ci_low = w_low / sum(w_low)
    w_hi = theta_ci_hi / self.eff_len
    self.rho_ci_hi = w_hi / sum(w_hi)
Exemplo n.º 22
0
def idML_dfdnu(nu, N, K, sum_inv_iws, sum_log_det_iws):
    """
    deriv of pdf of inv wishart-distributed variables wrt deg of freedom
    """
    hnu = nu * 0.5
    return N * K / nu - 0.5 * N * (
        _ssp.polygamma(1, hnu) + _ssp.polygamma(1, hnu - 0.5) +
        _ssp.polygamma(1, hnu - 1) + _ssp.polygamma(1, hnu - 1.5))
Exemplo n.º 23
0
        def hess(alpha):
            temp = np.zeros(self.L_h)
            temp = -1 / self.var_h * np.convolve(
                self.e_2u, np.square(self.g), mode='valid') / self.beta
            temp += -polygamma(1, alpha) + (self.lamb * np.square(self.v) -
                                            alpha) * polygamma(2, alpha)

            return temp
Exemplo n.º 24
0
 def hess_ll_nb(self, X, params):
     hess = np.zeros((2, 2))
     hess[0, 0] = np.sum(polygamma(
         1, X + params[0])) - X.size * polygamma(1, params[0])
     hess[0, 1] = hess[1, 0] = -X.size / (1 - params[1] + 1e-8)
     hess[1, 1] = -X.size * params[0] / (
         (1 - params[1])**2 + 1e-8) - X.sum() / (params[1]**2 + 1e-8)
     return hess
def computeHessian(alpha, P, N, m):
    sumAlpha = np.sum(alpha)
    tempHessians = np.zeros((m))
    for i in range(m):
        tempHessians[i] = -1 * N * (polygamma(1, alpha[i]))
    c = N * polygamma(1, sumAlpha)
    Q = np.diag(tempHessians)
    return (Q, c)
Exemplo n.º 26
0
 def calc_gradient_rel_alpha(self, docs):
     g = numpy.array([0.0] * self.topicNum)
     for doc in docs:
         g += polygamma(0, doc.gamma)
         g -= polygamma(0, sum(doc.gamma))
     g += len(docs) * polygamma(0, sum(self.alpha))
     g -= len(docs) * polygamma(0, self.alpha)
     return g
Exemplo n.º 27
0
 def calc_gradient_rel_alpha(self,docs):
     g=numpy.array([0.0]*self.topicNum);
     for doc in docs:
         g+=polygamma(0,doc.gamma);
         g-=polygamma(0,sum(doc.gamma));
     g+=len(docs)*polygamma(0,sum(self.alpha));
     g-=len(docs)*polygamma(0,self.alpha);
     return g;
Exemplo n.º 28
0
def hes_nb_glm_disp_block(
        x: np.ndarray,
        mu: np.ndarray,
        disp: np.ndarray,
        design_loc: np.ndarray,
        design_scale: np.ndarray,
        i: int,
        j: int
):
    """ Compute entry of hessian in dispersion model block for a given gene.

    Sum the following across cells:
    $$
    h_{ij} =
        disp * x^{m_i} * x^{m_j} * [psi_0(disp+x)
        + psi_0(disp)
        - mu/(disp+mu)^2 * (disp+x)
        +(mu-disp) / (disp+mu)
        + log(disp)
        + 1 - log(disp+mu)]
        + disp * psi_1(disp+x)
        + disp * psi_1(disp)
    $$
    
    Make sure that only element wise operations happen here!
    Do not simplify design matrix elements: they are only 0 or 1 for discrete
    groups but continuous if space, time, pseudotime or spline basis covariates
    are supplied!
    
    :param x: np.ndarray (cells,)
        Observations for a given gene.
    :param mu: np.ndarray (cells,)
        Estimated mean parameters across cells for a given gene.
    :param mu: np.ndarray (cells,)
        Estimated dispersion parameters across cells for a given gene.
    :param design_loc: np.ndarray, matrix (cells, #parameters location model)
        Design matrix of location model.
    :param design_scale: np.ndarray, matrix (cells, #parameters shape model)
        Design matrix of shape model.
    :param i: int
        Index of first dimension in fisher information matrix which is to be computed.
    :param j: int
        Index of second dimension in fisher information matrix which is to be computed

    :return: float
        Entry of fisher information matrix in dispersion model block at position (i,j)
    """
    h_ij = (
            disp * np.asarray(design_loc[:, i]) * np.asarray(design_loc[:, j]) * polygamma(n=0, x=disp + x)
            + polygamma(n=0, x=disp)
            - mu / np.square(disp + mu) * (disp + x)
            + (mu - disp) / (disp + mu)
            + np.log(disp)
            + 1 - np.log(disp + mu)
            + disp * polygamma(n=1, x=disp + x)
            + disp * polygamma(n=1, x=disp)
    )
    return np.sum(h_ij)
Exemplo n.º 29
0
def glda_alpha_hess(alpha, pg, sym=True):
    ''' the hession of glda-alpha
    '''

    M, K = pg.shape
    if sym:
        return -M * (polygamma(1, K * alpha) * K * K - polygamma(1, alpha) * K)
    else:
        return -M * (polygamma(1, alpha.sum()) - diag(polygamma(1, alpha)))
Exemplo n.º 30
0
 def lowerbound_likelihood_rel_alpha(self,docs):
     m=len(docs);
     obj=m*gammaln(sum(self.alpha));
     obj-=m*gammaln(self.alpha).sum();
     for doc in docs:
         c=polygamma(0,sum(doc.gamma));
         for i in xrange(self.topicNum):
             obj+=(self.alpha[i]-1)*(polygamma(0,doc.gamma[i])-c);
     return obj;
Exemplo n.º 31
0
    def Compute_S_star(self, eta):
        """ Compute sufficient statistics S given the parameters eta.
        """
        eta1 = eta[0]
        eta2 = eta[1]
        S1 = eta1
        S2 = polygamma(0, eta2) - polygamma(0, eta2.sum())

        return (S1, S2)
Exemplo n.º 32
0
 def lowerbound_likelihood_rel_alpha(self, docs):
     m = len(docs)
     obj = m * gammaln(sum(self.alpha))
     obj -= m * gammaln(self.alpha).sum()
     for doc in docs:
         c = polygamma(0, sum(doc.gamma))
         for i in xrange(self.topicNum):
             obj += (self.alpha[i] - 1) * (polygamma(0, doc.gamma[i]) - c)
     return obj
def alpha_newton(alpha_t, gamma):
    h = D * (polygamma(1, np.sum(alpha_t)) - polygamma(1, alpha_t))
    z = D * polygamma(1, np.sum(alpha_t))
    g_at = D * (digamma(np.sum(alpha_t)) - digamma(alpha_t)) + np.sum(
        digamma(gamma), axis=0) - np.sum(digamma(np.sum(gamma, axis=1)),
                                         axis=0)
    c = np.sum(g_at / h) / (1 / z + np.sum(1 / h))
    U_at = (g_at - c) / h
    return alpha_t - U_at
Exemplo n.º 34
0
 def cov_T(self, eta):
     """
     @arg eta: The natural parameters.
     The covariance of T_i, T_j, the sufficient statistics, given
     eta.
     """
     theta = self.theta(eta)
     assert (self.dimension,) == theta.shape
     return diag(polygamma(1, theta)) - polygamma(1, theta.sum())
Exemplo n.º 35
0
def idML_f(nu, N, K, sum_inv_iws, sum_log_det_iws):
    """
    pdf of inv wishart-distributed variables
    """
    hnu = nu * 0.5
    return N * K * (_N.log(nu) - _N.log(2)) - N * _N.log(
        _N.linalg.det(sum_inv_iws / N)) - N * (_ssp.polygamma(
            0, hnu) + _ssp.polygamma(0, hnu - 0.5) + _ssp.polygamma(
                0, hnu - 1) + _ssp.polygamma(0, hnu - 1.5)) - sum_log_det_iws
Exemplo n.º 36
0
def glda_alpha_hess(alpha, pg, sym = True):
    ''' the hession of glda-alpha
    '''

    M, K = pg.shape
    if sym:
        return -M*(polygamma(1, K*alpha)*K*K - polygamma(1, alpha)*K)
    else:
        return -M*(polygamma(1, alpha.sum()) - diag(polygamma(1, alpha)))
Exemplo n.º 37
0
def estimateGGDCovShapeIn(X,p_init):

    N = X.shape[1]
    R = np.cov(X)

    #start at Gaussian
    bestC       = p_init
    c           = bestC

    Rold        = np.zeros((2,2),dtype=np.complex)

    xRxC        = 0
    dirXRX      = 0
    dirXRX2     = 0

    for n in xrange(N):
        temp    = X[:,n].conj().T.dot(inv(R)).dot(X[:,n])
        xRxC    += (temp**c).real
        dirXRX  += (log(temp)*temp**c).real
        dirXRX2 += (log(temp)**2*temp**c).real

    c2  = gamma(2*1/c)/(2*gamma(1/c))

    c2p = log(c2) - (1/c) * 2*psi(2*1/c) - psi(1/c)

    gc  = N * ( (1/c) - (1/c**2) * 2*psi(2*1/c) + \
          (1/c**2) * 2*psi(1/c) ) - \
          (c2**c) * (c2p*xRxC + dirXRX)

    ##Second dir
    A   = N * ( (4*psi(2*1/c)/c**3) + \
          (4*polygamma(1,2*1/c)/c**4) - \
          (1/c**2) - (4*psi(1/c)/c**3) - \
          (2*polygamma(1,1/c)/c**4) )

    #Dir c2**c
    dc2C = log(c2)*(c2**c) - \
           c*(c2**(c-1))*(c2*2*psi(2*1/c)/c**2 - \
           c2*psi(1/c)/c**2)

    dc2p = -((psi(1/c) - 2*psi(2*1/c))/c**2) - \
            ((polygamma(1,1/c) - 4 * polygamma(1,2*1/c))/c**3)-\
            ((2*psi(2*1/c)/c**2) - psi(1/c)/c**2)

    B = dc2C*c2p*xRxC + c2**c * (dc2p*xRxC + c2p*dirXRX)

    C = dc2C*dirXRX + c2**c * dirXRX2

    ggc     = A-B-C
    cold    = c
    cn      = c - (1/ggc) * gc

    #Newton update with no negatives
    c       = np.minimum(4,np.maximum(.05,cn))

    return c
Exemplo n.º 38
0
def dda_expected_entropy(qs):
    """return d/da[E[H|a*qs]], a function of alpha"""
    # Agrees with test_diff!
    sum_qs = float(sum(qs))
    h_inf = h(normalize(qs),units='nats')
    h_0 = 0 #expected_entropy_from_alphas([0 for q in qs])
    Z = h_inf#*log(2) # in nats
    return lambda alpha: ((sum_qs*polygamma(1,alpha*sum_qs+1) -
                           sum(qj**2/sum_qs*polygamma(1,alpha*qj+1) for qj in qs))/
                          (Z))
def computeFPrime(alpha, P, N, m):
    Fprime = np.zeros((m))
    sumAlpha = np.sum(alpha)
    A = polygamma(0, sumAlpha)
    for k in range(m):
        C = 0.0
        for j in range(N):
            C += log(P[j][k])
        Fprime[k] = N * (A - polygamma(0, alpha[k]) + (1.0/N)*C)
    return Fprime
Exemplo n.º 40
0
def dirichlet_mle_newton(e_p, e_p2, e_logp, maxiters = 20, thr = 1e-4, silent = False):
    """
    Finds the MLE for the K-dimensional Dirichlet distribution from observed data,
    i.e. the solution alpha_1, ..., alpha_K > 0 to the moment-matching equations
        psi(alpha_k) - psi(sum(alpha)) = E[log p_k]
    where the expectation on the right hand side is with respect to the empirical
    distribution.

    Input: e_p, a vector of length K containing the empirical expectations E[p_k], i.e. e_p.ndim == 1 and len(e_p) == K
           e_p2, the empirical expectations E[p_k^2], the same format as e_p
           e_logp, the empirical expectations E[log p_k], the same format as e_p
           maxiters, the maximum number of Newton-Raphson iterations
           thr, the threshold for convergence 
    Output: alpha, a vector of length K containing the parameters alpha_1, ..., alpha_K

    This method uses the first and second empirical moments e_p and e_p2 to initialize
    the alpha values (by approximately matching the first and second moments), and then
    uses Newton-Raphson method to refine the estimates.

    This method is based on the first section of Minka's paper:
    http://research.microsoft.com/en-us/um/people/minka/papers/dirichlet/minka-dirichlet.pdf
    """

    # For initialization: First compute the approximate sum(alpha)
    alpha0 = (sum(e_p - e_p2)) / (sum(e_p2 - e_p ** 2))

    # Then compute the initial alpha
    alpha = alpha0 * e_p

    # Do Newton-Raphson iterations
    for iteration in range(0, maxiters):
        sum_alpha = sum(alpha)
        g = psi(alpha) - psi(sum_alpha) - e_logp
        z = polygamma(1, sum_alpha)  # polygamma(1,z) is the trigamma function psi_1(z)
        q = polygamma(1, alpha)
        b = sum(g / q) / (1 / z - sum(1 / q))
        alpha_new = alpha - (g + b) / q

        # this is a hack, but if some of alpha_new's components are negative, make them positive
        alpha_new[alpha_new < 0] = alpha / 5  # / 5 is arbitrary, as long as the end result is positive

        # Update alpha and check for convergence
        delta = max(abs(alpha - alpha_new))
        alpha = alpha_new
        if delta < thr:
            # cur_gap = psi(alpha) - psi(sum(alpha)) - e_logp
            # if not silent:
            #     print "Dirichlet-MLE-Newton converged in " + str(iteration) + " iterations, gap = " + str(cur_gap)
            break
        if iteration >= maxiters - 1:
            cur_gap = psi(alpha) - psi(sum(alpha)) - e_logp
            if not silent:
                print "Dirichlet-MLE-Newton did not converge after " + str(iteration) + " iterations, gap = " + str(cur_gap)
    return alpha
def loglike(a, X):
    N = len(X)
    t = np.mean(np.log(X), 0)
    eta = alpha - 1
    A = -N*gammaln(np.sum(a)) + N*np.sum(gammaln(a))
    J = N*eta.dot(t) - A
    dJ = N*(t + polygamma(0, np.sum(a)) - polygamma(0, a))
    q = -1/polygamma(1,a)
    c = polygamma(1, np.sum(a))
    H_inv = (np.diag(q) - np.outer(q,q)*c/(1 + c*np.sum(q)))/N
    return J, dJ, H_inv
def NBRS(counts):
    N = float(sum(counts))
    freqs = [c for c in counts if c > 0]
    f1 = sum([x for x in freqs if x == 1])
    Delt = N - f1
    if Delt > 0.0:  # (can only be done if there are repetitions, psi(Delta) becomes infinite)
        S = Euler - log(2.0) + 2.0 * log(N) - polygamma(0, Delt)
        dS = sqrt(polygamma(1, Delt))
        return (S, dS)
    else:  # defaults back to ML
        return EntropyML(counts)
Exemplo n.º 43
0
 def update_alpha(self, gammat, rho):
     N = float(len(gammat))
     logphat = sum(dirichlet_expectation(gamma) for gamma in gammat) / N
     dalpha = numpy.copy(self.alpha)
     gradf = N * (psi(numpy.sum(self.alpha)) - psi(self.alpha) + logphat)
     c = N * polygamma(1, numpy.sum(self.alpha))
     q = -N * polygamma(1, self.alpha)
     b = numpy.sum(gradf / q) / ( 1 / c + numpy.sum(1 / q))
     dalpha = -(gradf - b) / q
     if all(rho() * dalpha + self.alpha > 0):
         self.alpha += rho() * dalpha
     return self.alpha
Exemplo n.º 44
0
 def backward(self, delta):
     a = self.shape.value
     psia = sp.digamma(a)
     psi1a = sp.polygamma(1, a)
     sqrtpsi1a = np.sqrt(psi1a)
     psi2a = sp.polygamma(2, a)
     b = self.rate.value
     eps = (np.log(self.output) - psia + np.log(b)) / sqrtpsi1a
     dshape = self.output * (0.5 * eps * psi2a / sqrtpsi1a + psi1a) * delta
     drate = -delta * self.output / b
     self.shape.backward(dshape)
     self.rate.backward(drate)
Exemplo n.º 45
0
 def doc_lowerbound_likelihood(self,doc):
     obj=0.0;
     sum_digamma=polygamma(0,sum(doc.gamma));
     digamma=polygamma(0,doc.gamma);
     for i in xrange(self.topicNum):
         obj+=(self.alpha[i]-1)*(digamma[i]-sum_digamma);
         for j in doc.get_term_id_list():
             obj+=doc.phi[(i,j)]*(digamma[i]-sum_digamma+log(self.beta[(i,j)]));
             obj-=doc.phi[(i,j)]*log(doc.phi[(i,j)]);
         obj-=(doc.gamma[i]-1)*(digamma[i]-sum_digamma);
         obj+=gammaln(doc.gamma).sum()-gammaln(sum(doc.gamma));
     return obj;