def expected_entropy_from_alphas_ref(alphas): """Compute expectation of entropy (in nats), given alphas. Eq (9) of AMP""" if sum(alphas) == 0: return 0 kappa = float(sum(alphas)) return (polygamma(0, kappa + 1) - sum(a / kappa * polygamma(0, a + 1) for a in alphas))
def gradLikelihood(self, state): # State must be VI object -- we expect it to have memoized # Determinant and Inverse lookup functions. if not issubclass(type(state), VI): raise StateError('State must be given in terms of a VI object, not %s.' % type(state).__name__) # Expected bayesian network variables include b0 and gamma. reqKeys = ['b0','gamma','c'] self.check_BNVs(state,reqKeys) gamma = state.bnv['gamma'].val_getter() b0 = state.bnv['b0'].val_getter() a0 = self.val_getter() c = state.bnv['c'].val_getter() n = state.n diffProj = state.memoizer.FDifferenceProjection(gamma,c) # VERIFY THAT SCIPY USES 0 AS FIRST DERIVATIVE gradL = funcs.polygamma(0, n * 0.5 + a0) \ - funcs.polygamma(0, a0) \ + np.log(b0) - np.log(b0 + diffProj) \ + 0.5 * (funcs.polygamma(1,a0) + a0 * funcs.polygamma(2,a0)) \ / (a0 * funcs.polygamma(1,a0) - 1.0) if math.isnan(gradL): print 'NAN IN when a0 = %f'%a0 print "b0: %f"%b0 print "diffProj: %f"%diffProj return gradL
def gnmf_solvebynewton(c, a0): """ routine to solve C=Log(A)-Psi(A)+1 function by newtons method """ M, N = a0.shape Mc, Nc = c.shape if M == Mc and N == Nc: a = a0 cond = 1 else: a = a0[0, 0] cond = 4 stop = False for i in range(10): delta = (log(a) - polygamma(0, a) + 1 - c) / ( (1 / a) - polygamma(1, a)) #print(delta.shape) count = 0 while (delta > a).any(): delta = delta / 2 if count > 10: stop = True break count += 1 if stop: break if (delta < 0).any(): delta = 0 a = a - delta if cond == 4: a = a * np.ones((M, N)) return a
def f(self,x,t): N = len(x)/2 xdot = pl.array([]) # modulus the x for periodicity. x[N:2*N]= x[N:2*N]%self.d # HERE ---->> 1Dify for i in range(N): temp = 0.0 for j in range(N): if i == j: continue #repulsive x interparticle force of j on i temp += self.qq*(x[N+i]-x[N+j])/(pl.sqrt((x[N+i]-x[N+j])**2)**3) # All of the forces coming from the 'same' paricle but from other 'cells' due to the # periodic contrains can be wraped up in a sum that converges to an aswer that can # be expressed in terms of polygamma functions (se pg 92 of notebook). # Note on the sign (xi-xj or xj-xi). Changing the sign of the xi-xj term (i.e. which # particle are we considering forces on) changes the direction of the force # apropriately. temp += self.qq*(polygamma(1,(self.d+x[N+i]-x[N+j])/self.d)-polygamma(1,1.0-((x[N+i]-x[N+j])/self.d)))/(self.d**2) # periodic force on particle i temp += self.As[i]*pl.sin(x[N+i])*pl.cos(t) temp -= self.beta*x[i] xdot = pl.append(xdot,temp) for i in range(N): xdot = pl.append(xdot,x[i]) return xdot
def f(self,x,t): N = len(x)/2 xdot = pl.array([]) # modulus the x for periodicity. x[N:2*N]= x[N:2*N]%self.d # HERE ---->> 1Dify for i in range(N): temp = 0.0 for j in range(N): if i == j: continue #repulsive x interparticle force of j on i temp += self.qq*(x[N+i]-x[N+j])/(pl.sqrt((x[N+i]-x[N+j])**2)**3) # All of the forces coming from the 'same' paricle but from other 'cells' due to the # periodic contrains can be wraped up in a sum that converges to an aswer that can # be expressed in terms of polygamma functions (se pg 92 of notebook). temp += self.qq*(polygamma(1,(self.d+x[N+i]-x[N+j])/self.d)-polygamma(1,1.0-((x[N+i]-x[N+j])/self.d)))/(self.d**2) # EC x force on particle i for a in range(2): temp+=self.As[i]*pl.sin(x[N+i]-a*pl.pi)*pl.cos(t-a*pl.pi)/(pl.cosh(1.0)-pl.cos(x[N+i]-a*pl.pi)) temp -= self.beta*x[i] xdot = pl.append(xdot,temp) for i in range(N): xdot = pl.append(xdot,x[i]) return xdot
def objectiveGradient(lambda_k, nu, tau, Elog_eta_k, nDoc): ''' Calculate gradient of objectiveFunc, objective for HDP variational Returns ------- gvec : 2*K length vector, where each entry gives partial derivative with respect to the corresponding entry of Cvec ''' # lvec is the derivative of log(lambda_k) via chain rule lvec = 1/(lambda_k) W = lvec.size # Derivative of log eta digammaAll = digamma(np.sum(lambda_k)) Elog_lambda_k = digamma(lambda_k) - digammaAll # Derivative of Elog_phi_k and E_phi_k polygammaAll = polygamma(1,np.sum(lambda_k)) dElog_phi_k = polygamma(1,lambda_k) - polygammaAll lambda_k_sum = np.sum(lambda_k) dE_phi_k = (lambda_k_sum - lambda_k) / np.power(lambda_k_sum,2) gvec = dElog_phi_k * (N + tau - lambda_k) \ + dE_phi_k * nu * Elog_eta_k gvec = -1 * gvec # Apply chain rule! gvecC = lvec * gvec return gvecC
def update_alpha(self, gammat, rho): """ Update parameters for the Dirichlet prior on the per-document topic weights `alpha` given the last `gammat`. Uses Newton's method, described in **Huang: Maximum Likelihood Estimation of Dirichlet Distribution Parameters.** (http://www.stanford.edu/~jhuang11/research/dirichlet/dirichlet.pdf) """ N = float(len(gammat)) logphat = sum(dirichlet_expectation(gamma) for gamma in gammat) / N dalpha = numpy.copy(self.alpha) gradf = N * (psi(numpy.sum(self.alpha)) - psi(self.alpha) + logphat) c = N * polygamma(1, numpy.sum(self.alpha)) q = -N * polygamma(1, self.alpha) b = numpy.sum(gradf / q) / ( 1 / c + numpy.sum(1 / q)) dalpha = -(gradf - b) / q if all(rho() * dalpha + self.alpha > 0): self.alpha += rho() * dalpha else: logger.warning("updated alpha not positive") logger.info("optimized alpha %s" % list(self.alpha)) return self.alpha
def Mstep(max_iter): global alpha,beta,Gamma,Phi,doc,doc_cnt; #update beta for i in range(K): for v in range(voca_size): beta[i][v] = 0; for d in range(doc_size): for n in range(len(doc[d])): beta[i][doc[d][n]] += doc_cnt[d][n] * Phi[d][n][i]; beta_sum = sum_matrix(beta, 0); for k in range(K): for i in range(voca_size): beta[k][i] = beta[k][i]/beta_sum[k]; #update alpha last = 0; iter_num = 0; const = 0; for d in range(doc_size): gamma_sum = sum_vector(Gamma[d]); for i in range(K): const += (sp.psi(Gamma[d][i]) - sp.psi(gamma_sum)); now = -compute_alpha_mle(alpha); origin = now; while (abs(last - now) > 1e-9 and iter_num < max_iter): da = K * (doc_size * (sp.psi(alpha * K) - sp.psi(alpha))) + const; dda = K * (doc_size * (K * sp.polygamma(1, alpha * K) - sp.polygamma(1, alpha))); dx = -da/dda; alpha = backtrack(alpha,dx,da,0.01,0.5); last = now; now = -compute_alpha_mle(alpha); iter_num += 1; if (now < origin): print('error alpha');
def estimate(self,dat): ''' Estimates the parameters from the data in dat. It is possible to only selectively fit parameters of the distribution by setting the primary array accordingly (see :doc:`Tutorial on the Distributions module <tutorial_Distributions>`). Estimate uses the algorithm by [Minka2002]_ to fit the parameters. :param dat: Data points on which the Gamma distribution will be estimated. :type dat: natter.DataModule.Data ''' logmean = log(mean(dat.X)) meanlog = mean(log(dat.X)) u=2.0 if 'u' in self.primary: # if we want to estimate u for k in range(self.maxCount): u = max(u,1e-08) unew= 1/u + (meanlog - logmean + log(u) - float(polygamma(0,u)))/ \ (u**2 * (1/u - float(polygamma(1,u)))) unew = 1/unew if (unew-u)**2 < self.Tol: u=unew break u=unew self.param['u'] = unew; if 's' in self.primary: self.param['s'] = exp(logmean)/self.param['u'];
def M_step(Phi, gamma, alpha, corpus, voc, k, M): V = len(voc) # 1 update Beta Beta = np.zeros([k, V]) for d in range(0, M): words = np.array(corpus[d]) voc_pos = np.array(list(map(lambda x: np.in1d(words, x), voc))) Beta += np.dot(voc_pos, Phi[d]).transpose() Beta = Beta / Beta.sum(axis=1).reshape(k, 1) # 2 update alpha for i in range(1000): old_alpha = alpha # Calculate the gradient g = M * (digamma(np.sum(alpha)) - digamma(alpha)) + np.sum( digamma(gamma) - np.tile(digamma(np.sum(gamma, axis=1)), (k, 1)).T, axis=0) # Calculate Hessian h = -M * polygamma(1, alpha) z = M * polygamma(1, np.sum(alpha)) # Calculate parameter c = np.sum(g / h) / (1 / z + np.sum(1 / h)) # Update alpha alpha -= (g - c) / h if np.sqrt(np.mean(np.square(alpha - old_alpha))) < 1e-4: break return alpha, Beta
def update_one_alpha(alpha, theta, stepsize=.01, tol=1e-14): # Newton method in [Minka00] # NOTE: Need a small stepsize to prevent negative valued alpha # (I haven't thought about why yet... isn't the log likelihood convex?) D, K = np.shape(theta) log_p = 1.0 / D * np.sum(np.log(theta), 1) while True: oldnorm = np.linalg.norm(alpha) g = D*psi(np.sum(alpha)) - D*psi(alpha) + D*log_p # print log_p.shape # Diagonal q = -D * polygamma(1, alpha) z = D * polygamma(1, np.sum(alpha)) b = np.sum(g / q) / (1.0 / z + np.sum(1.0 / q)) # print 'g = %s, q = %s, z = %s, b = %s'%(g, q, z, b) # print 'g - b = %s', (g - b) # print '(g - b) / q = %s', (g - b) / q # # print np.shape(stepsize) # print "%s - %s"%(alpha, stepsize * (g - b) / q) alpha -= stepsize * ((g - b) / q) if abs(np.linalg.norm(alpha) - oldnorm) < tol: break assert(np.all(alpha > 0)) return alpha
def test_hessian_h_and_z(): h, z = hessian_h_and_z(M, alpha) for i in xrange(alpha.size): actual = h[i] expected = - M * polygamma(1, alpha[i]) assert_array_almost_equal(actual, expected) assert_array_almost_equal(z, M * polygamma(1, alpha.sum()))
def estimate_dirichlet_param(samples, param): """ Uses a Newton-Raphson scheme to estimating the parameter of a K-dimensional Dirichlet distribution :param samples: an NxK matrix of K-dimensional vectors drawn from a Dirichlet distribution :param param: the old value of the paramter. This is overwritten :return: a K-dimensional vector which is the new """ N, K = samples.shape p = np.sum(np.log(samples), axis=0) for _ in range(60): g = -N * fns.digamma(param) g += N * fns.digamma(param.sum()) g += p q = -N * fns.polygamma(1, param) np.reciprocal(q, out=q) z = N * fns.polygamma(1, param.sum()) b = np.sum(g * q) b /= 1 / z + q.sum() param -= (g - b) * q print("%.2f" % param.mean(), end=" --> ") print return param
def e_step_one_iter(alpha, beta, docs, phi, ips): M, K = docs.size, alpha.size for m in xrange(M): N_m = docs[m].size psi_sum_ips = psi(ips[m, :].sum()) for n in xrange(N_m): for i in xrange(K): E_q = psi(ips[m, i]) - psi_sum_ips phi[m][n, i] = (beta[i, docs[m][n]] * np.exp(E_q)) phi[m] /= phi[m].sum(axis=1)[:, None] # normalize phi ips[m] = alpha + phi[m].sum(axis=0) # gradient computation grad_ips = np.zeros(ips.shape, dtype=np.float64) for m in xrange(M): for i in xrange(K): grad_ips[m, i]\ = (polygamma(1, ips[m, i]) * (alpha[i] + phi[m][:, i].sum() - ips[m, i]) - polygamma(1, ips[m, :].sum()) * (alpha.sum() + phi[m].sum() - ips[m, :].sum())) return (phi, ips, grad_ips)
def objectiveGradient(lambda_k, nu, tau, Elog_eta_k, nDoc): ''' Calculate gradient of objectiveFunc, objective for HDP variational Returns ------- gvec : 2*K length vector, where each entry gives partial derivative with respect to the corresponding entry of Cvec ''' # lvec is the derivative of log(lambda_k) via chain rule lvec = 1 / (lambda_k) W = lvec.size # Derivative of log eta digammaAll = digamma(np.sum(lambda_k)) Elog_lambda_k = digamma(lambda_k) - digammaAll # Derivative of Elog_phi_k and E_phi_k polygammaAll = polygamma(1, np.sum(lambda_k)) dElog_phi_k = polygamma(1, lambda_k) - polygammaAll lambda_k_sum = np.sum(lambda_k) dE_phi_k = (lambda_k_sum - lambda_k) / np.power(lambda_k_sum, 2) gvec = dElog_phi_k * (N + tau - lambda_k) \ + dE_phi_k * nu * Elog_eta_k gvec = -1 * gvec # Apply chain rule! gvecC = lvec * gvec return gvecC
def J(self,t): # the -1 in the lines below is for the right rounding with int() # x1 is position of p1 (particle 1) x1 = self.sol[int(t/self.dt)-1,2] # x2 is velocity of p1 x2 = self.sol[int(t/self.dt)-1,0] # x3 is position of p2 x3 = self.sol[int(t/self.dt)-1,3] # x4 is velocity of p2 x4 = self.sol[int(t/self.dt)-1,1] # These are the differentials of the forces of the particles. Writen like this to make the # matrix below easier to read f14 is force of p2 on p1 _dx1 is derivitive with respect to x1 # Note on 1/r2 part -> goes to cubic so it will always retain its sign. df13_dx1 = -2.0/(x1-x3)**3 + (polygamma(2,1.0+(x1-x3)/self.d)+polygamma(2,1.0-(x1-x3)/self.d))/self.d**3 # the final deriviative of -x3 just gives you the negative of everything above df13_dx3 = -df13_dx1 df31_dx1 = 2.0/(x3-x1)**3 - (polygamma(2,1.0-(x3-x1)/self.d)+polygamma(2,1.0+(x3-x1)/self.d))/self.d**3 df31_dx3 = -df31_dx1 # define the matrix elements of the time dependent jacobian jacobian = pl.array([ \ [0.0 , 1.0 , 0.0 , 0.0], [self.A*pl.cos(x1)*pl.cos(t)+df13_dx1, -self.beta, df13_dx3 , 0.0], [0.0 , 0.0 , 0.0 , 1.0], [df31_dx1 , 0.0 , self.A*pl.cos(x3)*pl.cos(t)+df31_dx3, -self.beta]\ ]) return jacobian
def frobenius_norm(counts): n = len(counts) pgsum = polygamma(1, counts.sum()) A = (n**2 - n) * pgsum**2 B = polygamma(1, counts) - polygamma(1, counts.sum()) B = (B**2).sum() return np.sqrt(A + B)
def Newton(self): print "1, updating alpha------------------" ratio = len(self.docs) veck = copy.deepcopy(self.alpha) t = 0 while True: print "updating the %d times" % t print "x%d" % t, veck[0:10] gk = self.grad() print "gk%d" % t, gk[0:10] if self.normof(gk) < self.rho: print "after udating:", veck[0:10] print "" self.alpha = veck return Hk = [[ratio * polygamma(1, sum(veck))] * len(veck)] * len(veck) duijiao = [ratio * polygamma(1, vecki) for vecki in veck] Hk = np.mat(Hk) - np.mat(np.diag(duijiao)) #print "Hk%d"%t,Hk[0] pk = (-1 * (Hk.I) * (np.mat(gk).T)).T.tolist()[0] print "pk%d" % t, pk[0:10] break for i in range(len(veck)): veck[i] += pk[i] t += 1
def H(params,n,k): alpha = params[0] beta = params[1] H=np.zeros(2) H[0]=k*special.polygamma(1,alpha)-n*special.polygamma(1,alpha+beta) H[1]=(n-k)*special.polygamma(1,beta)-n*special.polygamma(1,alpha+beta) return H
def gradient_log_recognition(params,theta,i): alpha = params[0] beta = params[1] if i==0: return np.log(theta)-special.polygamma(0,alpha)+special.polygamma(0,alpha+beta) if i==1: return np.log(1-theta)-special.polygamma(0,beta)+special.polygamma(0,alpha+beta)
def estimate_abundances(self): """ Compute expectations and variances of the log relative abundances (log rho) of each target. Use these to compute 95% confidence intervals of the relative abundances themselves. """ log_theta = np.zeros(self.ntargs) sd_log_theta = np.zeros(self.ntargs) for t in xrange(self.ntargs): log_theta[t] = psi(self.alpha[t]) - psi(self.alpha[t]+self.beta[t]) var_log_theta = polygamma(1,self.alpha[t]) - polygamma(1, self.alpha[t]+self.beta[t]) for j in xrange(t): log_theta[t] += psi(self.beta[j]) - psi(self.alpha[j]+self.beta[j]) var_log_theta += polygamma(1,self.beta[j]) - polygamma(1, self.alpha[j]+self.beta[j]) sd_log_theta[t] = sqrt(var_log_theta) self.log_theta = log_theta self.sd_log_theta = sd_log_theta theta_ci_low = np.zeros(self.ntargs) theta_ci_hi = np.zeros(self.ntargs) for t in xrange(self.ntargs): self.targ_samp_prob[t] = exp(log_theta[t]) theta_ci_low[t] = exp(log_theta[t] - ci95sd * sd_log_theta[t]) theta_ci_hi[t] = exp(log_theta[t] + ci95sd * sd_log_theta[t]) # Compute relative abundances and confidence limits w = self.targ_samp_prob / self.eff_len self.rho = w / sum(w) w_low = theta_ci_low / self.eff_len self.rho_ci_low = w_low / sum(w_low) w_hi = theta_ci_hi / self.eff_len self.rho_ci_hi = w_hi / sum(w_hi)
def idML_dfdnu(nu, N, K, sum_inv_iws, sum_log_det_iws): """ deriv of pdf of inv wishart-distributed variables wrt deg of freedom """ hnu = nu * 0.5 return N * K / nu - 0.5 * N * ( _ssp.polygamma(1, hnu) + _ssp.polygamma(1, hnu - 0.5) + _ssp.polygamma(1, hnu - 1) + _ssp.polygamma(1, hnu - 1.5))
def hess(alpha): temp = np.zeros(self.L_h) temp = -1 / self.var_h * np.convolve( self.e_2u, np.square(self.g), mode='valid') / self.beta temp += -polygamma(1, alpha) + (self.lamb * np.square(self.v) - alpha) * polygamma(2, alpha) return temp
def hess_ll_nb(self, X, params): hess = np.zeros((2, 2)) hess[0, 0] = np.sum(polygamma( 1, X + params[0])) - X.size * polygamma(1, params[0]) hess[0, 1] = hess[1, 0] = -X.size / (1 - params[1] + 1e-8) hess[1, 1] = -X.size * params[0] / ( (1 - params[1])**2 + 1e-8) - X.sum() / (params[1]**2 + 1e-8) return hess
def computeHessian(alpha, P, N, m): sumAlpha = np.sum(alpha) tempHessians = np.zeros((m)) for i in range(m): tempHessians[i] = -1 * N * (polygamma(1, alpha[i])) c = N * polygamma(1, sumAlpha) Q = np.diag(tempHessians) return (Q, c)
def calc_gradient_rel_alpha(self, docs): g = numpy.array([0.0] * self.topicNum) for doc in docs: g += polygamma(0, doc.gamma) g -= polygamma(0, sum(doc.gamma)) g += len(docs) * polygamma(0, sum(self.alpha)) g -= len(docs) * polygamma(0, self.alpha) return g
def calc_gradient_rel_alpha(self,docs): g=numpy.array([0.0]*self.topicNum); for doc in docs: g+=polygamma(0,doc.gamma); g-=polygamma(0,sum(doc.gamma)); g+=len(docs)*polygamma(0,sum(self.alpha)); g-=len(docs)*polygamma(0,self.alpha); return g;
def hes_nb_glm_disp_block( x: np.ndarray, mu: np.ndarray, disp: np.ndarray, design_loc: np.ndarray, design_scale: np.ndarray, i: int, j: int ): """ Compute entry of hessian in dispersion model block for a given gene. Sum the following across cells: $$ h_{ij} = disp * x^{m_i} * x^{m_j} * [psi_0(disp+x) + psi_0(disp) - mu/(disp+mu)^2 * (disp+x) +(mu-disp) / (disp+mu) + log(disp) + 1 - log(disp+mu)] + disp * psi_1(disp+x) + disp * psi_1(disp) $$ Make sure that only element wise operations happen here! Do not simplify design matrix elements: they are only 0 or 1 for discrete groups but continuous if space, time, pseudotime or spline basis covariates are supplied! :param x: np.ndarray (cells,) Observations for a given gene. :param mu: np.ndarray (cells,) Estimated mean parameters across cells for a given gene. :param mu: np.ndarray (cells,) Estimated dispersion parameters across cells for a given gene. :param design_loc: np.ndarray, matrix (cells, #parameters location model) Design matrix of location model. :param design_scale: np.ndarray, matrix (cells, #parameters shape model) Design matrix of shape model. :param i: int Index of first dimension in fisher information matrix which is to be computed. :param j: int Index of second dimension in fisher information matrix which is to be computed :return: float Entry of fisher information matrix in dispersion model block at position (i,j) """ h_ij = ( disp * np.asarray(design_loc[:, i]) * np.asarray(design_loc[:, j]) * polygamma(n=0, x=disp + x) + polygamma(n=0, x=disp) - mu / np.square(disp + mu) * (disp + x) + (mu - disp) / (disp + mu) + np.log(disp) + 1 - np.log(disp + mu) + disp * polygamma(n=1, x=disp + x) + disp * polygamma(n=1, x=disp) ) return np.sum(h_ij)
def glda_alpha_hess(alpha, pg, sym=True): ''' the hession of glda-alpha ''' M, K = pg.shape if sym: return -M * (polygamma(1, K * alpha) * K * K - polygamma(1, alpha) * K) else: return -M * (polygamma(1, alpha.sum()) - diag(polygamma(1, alpha)))
def lowerbound_likelihood_rel_alpha(self,docs): m=len(docs); obj=m*gammaln(sum(self.alpha)); obj-=m*gammaln(self.alpha).sum(); for doc in docs: c=polygamma(0,sum(doc.gamma)); for i in xrange(self.topicNum): obj+=(self.alpha[i]-1)*(polygamma(0,doc.gamma[i])-c); return obj;
def Compute_S_star(self, eta): """ Compute sufficient statistics S given the parameters eta. """ eta1 = eta[0] eta2 = eta[1] S1 = eta1 S2 = polygamma(0, eta2) - polygamma(0, eta2.sum()) return (S1, S2)
def lowerbound_likelihood_rel_alpha(self, docs): m = len(docs) obj = m * gammaln(sum(self.alpha)) obj -= m * gammaln(self.alpha).sum() for doc in docs: c = polygamma(0, sum(doc.gamma)) for i in xrange(self.topicNum): obj += (self.alpha[i] - 1) * (polygamma(0, doc.gamma[i]) - c) return obj
def alpha_newton(alpha_t, gamma): h = D * (polygamma(1, np.sum(alpha_t)) - polygamma(1, alpha_t)) z = D * polygamma(1, np.sum(alpha_t)) g_at = D * (digamma(np.sum(alpha_t)) - digamma(alpha_t)) + np.sum( digamma(gamma), axis=0) - np.sum(digamma(np.sum(gamma, axis=1)), axis=0) c = np.sum(g_at / h) / (1 / z + np.sum(1 / h)) U_at = (g_at - c) / h return alpha_t - U_at
def cov_T(self, eta): """ @arg eta: The natural parameters. The covariance of T_i, T_j, the sufficient statistics, given eta. """ theta = self.theta(eta) assert (self.dimension,) == theta.shape return diag(polygamma(1, theta)) - polygamma(1, theta.sum())
def idML_f(nu, N, K, sum_inv_iws, sum_log_det_iws): """ pdf of inv wishart-distributed variables """ hnu = nu * 0.5 return N * K * (_N.log(nu) - _N.log(2)) - N * _N.log( _N.linalg.det(sum_inv_iws / N)) - N * (_ssp.polygamma( 0, hnu) + _ssp.polygamma(0, hnu - 0.5) + _ssp.polygamma( 0, hnu - 1) + _ssp.polygamma(0, hnu - 1.5)) - sum_log_det_iws
def glda_alpha_hess(alpha, pg, sym = True): ''' the hession of glda-alpha ''' M, K = pg.shape if sym: return -M*(polygamma(1, K*alpha)*K*K - polygamma(1, alpha)*K) else: return -M*(polygamma(1, alpha.sum()) - diag(polygamma(1, alpha)))
def estimateGGDCovShapeIn(X,p_init): N = X.shape[1] R = np.cov(X) #start at Gaussian bestC = p_init c = bestC Rold = np.zeros((2,2),dtype=np.complex) xRxC = 0 dirXRX = 0 dirXRX2 = 0 for n in xrange(N): temp = X[:,n].conj().T.dot(inv(R)).dot(X[:,n]) xRxC += (temp**c).real dirXRX += (log(temp)*temp**c).real dirXRX2 += (log(temp)**2*temp**c).real c2 = gamma(2*1/c)/(2*gamma(1/c)) c2p = log(c2) - (1/c) * 2*psi(2*1/c) - psi(1/c) gc = N * ( (1/c) - (1/c**2) * 2*psi(2*1/c) + \ (1/c**2) * 2*psi(1/c) ) - \ (c2**c) * (c2p*xRxC + dirXRX) ##Second dir A = N * ( (4*psi(2*1/c)/c**3) + \ (4*polygamma(1,2*1/c)/c**4) - \ (1/c**2) - (4*psi(1/c)/c**3) - \ (2*polygamma(1,1/c)/c**4) ) #Dir c2**c dc2C = log(c2)*(c2**c) - \ c*(c2**(c-1))*(c2*2*psi(2*1/c)/c**2 - \ c2*psi(1/c)/c**2) dc2p = -((psi(1/c) - 2*psi(2*1/c))/c**2) - \ ((polygamma(1,1/c) - 4 * polygamma(1,2*1/c))/c**3)-\ ((2*psi(2*1/c)/c**2) - psi(1/c)/c**2) B = dc2C*c2p*xRxC + c2**c * (dc2p*xRxC + c2p*dirXRX) C = dc2C*dirXRX + c2**c * dirXRX2 ggc = A-B-C cold = c cn = c - (1/ggc) * gc #Newton update with no negatives c = np.minimum(4,np.maximum(.05,cn)) return c
def dda_expected_entropy(qs): """return d/da[E[H|a*qs]], a function of alpha""" # Agrees with test_diff! sum_qs = float(sum(qs)) h_inf = h(normalize(qs),units='nats') h_0 = 0 #expected_entropy_from_alphas([0 for q in qs]) Z = h_inf#*log(2) # in nats return lambda alpha: ((sum_qs*polygamma(1,alpha*sum_qs+1) - sum(qj**2/sum_qs*polygamma(1,alpha*qj+1) for qj in qs))/ (Z))
def computeFPrime(alpha, P, N, m): Fprime = np.zeros((m)) sumAlpha = np.sum(alpha) A = polygamma(0, sumAlpha) for k in range(m): C = 0.0 for j in range(N): C += log(P[j][k]) Fprime[k] = N * (A - polygamma(0, alpha[k]) + (1.0/N)*C) return Fprime
def dirichlet_mle_newton(e_p, e_p2, e_logp, maxiters = 20, thr = 1e-4, silent = False): """ Finds the MLE for the K-dimensional Dirichlet distribution from observed data, i.e. the solution alpha_1, ..., alpha_K > 0 to the moment-matching equations psi(alpha_k) - psi(sum(alpha)) = E[log p_k] where the expectation on the right hand side is with respect to the empirical distribution. Input: e_p, a vector of length K containing the empirical expectations E[p_k], i.e. e_p.ndim == 1 and len(e_p) == K e_p2, the empirical expectations E[p_k^2], the same format as e_p e_logp, the empirical expectations E[log p_k], the same format as e_p maxiters, the maximum number of Newton-Raphson iterations thr, the threshold for convergence Output: alpha, a vector of length K containing the parameters alpha_1, ..., alpha_K This method uses the first and second empirical moments e_p and e_p2 to initialize the alpha values (by approximately matching the first and second moments), and then uses Newton-Raphson method to refine the estimates. This method is based on the first section of Minka's paper: http://research.microsoft.com/en-us/um/people/minka/papers/dirichlet/minka-dirichlet.pdf """ # For initialization: First compute the approximate sum(alpha) alpha0 = (sum(e_p - e_p2)) / (sum(e_p2 - e_p ** 2)) # Then compute the initial alpha alpha = alpha0 * e_p # Do Newton-Raphson iterations for iteration in range(0, maxiters): sum_alpha = sum(alpha) g = psi(alpha) - psi(sum_alpha) - e_logp z = polygamma(1, sum_alpha) # polygamma(1,z) is the trigamma function psi_1(z) q = polygamma(1, alpha) b = sum(g / q) / (1 / z - sum(1 / q)) alpha_new = alpha - (g + b) / q # this is a hack, but if some of alpha_new's components are negative, make them positive alpha_new[alpha_new < 0] = alpha / 5 # / 5 is arbitrary, as long as the end result is positive # Update alpha and check for convergence delta = max(abs(alpha - alpha_new)) alpha = alpha_new if delta < thr: # cur_gap = psi(alpha) - psi(sum(alpha)) - e_logp # if not silent: # print "Dirichlet-MLE-Newton converged in " + str(iteration) + " iterations, gap = " + str(cur_gap) break if iteration >= maxiters - 1: cur_gap = psi(alpha) - psi(sum(alpha)) - e_logp if not silent: print "Dirichlet-MLE-Newton did not converge after " + str(iteration) + " iterations, gap = " + str(cur_gap) return alpha
def loglike(a, X): N = len(X) t = np.mean(np.log(X), 0) eta = alpha - 1 A = -N*gammaln(np.sum(a)) + N*np.sum(gammaln(a)) J = N*eta.dot(t) - A dJ = N*(t + polygamma(0, np.sum(a)) - polygamma(0, a)) q = -1/polygamma(1,a) c = polygamma(1, np.sum(a)) H_inv = (np.diag(q) - np.outer(q,q)*c/(1 + c*np.sum(q)))/N return J, dJ, H_inv
def NBRS(counts): N = float(sum(counts)) freqs = [c for c in counts if c > 0] f1 = sum([x for x in freqs if x == 1]) Delt = N - f1 if Delt > 0.0: # (can only be done if there are repetitions, psi(Delta) becomes infinite) S = Euler - log(2.0) + 2.0 * log(N) - polygamma(0, Delt) dS = sqrt(polygamma(1, Delt)) return (S, dS) else: # defaults back to ML return EntropyML(counts)
def update_alpha(self, gammat, rho): N = float(len(gammat)) logphat = sum(dirichlet_expectation(gamma) for gamma in gammat) / N dalpha = numpy.copy(self.alpha) gradf = N * (psi(numpy.sum(self.alpha)) - psi(self.alpha) + logphat) c = N * polygamma(1, numpy.sum(self.alpha)) q = -N * polygamma(1, self.alpha) b = numpy.sum(gradf / q) / ( 1 / c + numpy.sum(1 / q)) dalpha = -(gradf - b) / q if all(rho() * dalpha + self.alpha > 0): self.alpha += rho() * dalpha return self.alpha
def backward(self, delta): a = self.shape.value psia = sp.digamma(a) psi1a = sp.polygamma(1, a) sqrtpsi1a = np.sqrt(psi1a) psi2a = sp.polygamma(2, a) b = self.rate.value eps = (np.log(self.output) - psia + np.log(b)) / sqrtpsi1a dshape = self.output * (0.5 * eps * psi2a / sqrtpsi1a + psi1a) * delta drate = -delta * self.output / b self.shape.backward(dshape) self.rate.backward(drate)
def doc_lowerbound_likelihood(self,doc): obj=0.0; sum_digamma=polygamma(0,sum(doc.gamma)); digamma=polygamma(0,doc.gamma); for i in xrange(self.topicNum): obj+=(self.alpha[i]-1)*(digamma[i]-sum_digamma); for j in doc.get_term_id_list(): obj+=doc.phi[(i,j)]*(digamma[i]-sum_digamma+log(self.beta[(i,j)])); obj-=doc.phi[(i,j)]*log(doc.phi[(i,j)]); obj-=(doc.gamma[i]-1)*(digamma[i]-sum_digamma); obj+=gammaln(doc.gamma).sum()-gammaln(sum(doc.gamma)); return obj;