def entropy(X, k=1): ''' Returns the entropy of X, given as array X = array(n,dx) where n = number of samples dx = number of dimensions Optionally: k = number of nearest neighbors for density estimation ''' # Distance to kth nearest neighbor r = nearest_distances(X, k) # squared distances n, d = X.shape volume_unit_ball = (pi ** (.5 * d)) / gamma(.5 * d + 1) ''' F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures for Continuous Random Variables. Advances in Neural Information Processing Systems 21 (NIPS). Vancouver (Canada), December. return .5*d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k) ''' ''' Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy of a random vector. Probl. Inf. Transm. 23, 95-101. See also: Evans, D. 2008 A computationally efficient estimator for mutual information, Proc. R. Soc. A 464 (2093), 1203-1215. and: Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual information. Phys Rev E 69(6 Pt 2):066138. ''' return .5 * d * mean(log(r)) + log(volume_unit_ball) + psi(n) - psi(k)
def compute_elbo(self, doc_ids, doc_cnt, doc_links): """ compute evidence lower bound for trained model """ elbo = 0 e_log_theta = psi(self.gamma) - psi(np.sum(self.gamma, 1))[:, np.newaxis] # D x K log_beta = np.log(self.beta + eps) for di in xrange(self.n_doc): words = doc_ids[di] cnt = doc_cnt[di] elbo += np.sum(cnt * (self.phi[di] * log_beta[:, words])) # E_q[log p(w_{d,n}|\beta,z_{d,n})] elbo += np.sum((self.alpha - 1.0) * e_log_theta[di, :]) # E_q[log p(\theta_d | alpha)] elbo += np.sum(self.phi[di].T * e_log_theta[di, :]) # E_q[log p(z_{d,n}|\theta_d)] elbo += ( -gammaln(np.sum(self.gamma[di, :])) + np.sum(gammaln(self.gamma[di, :])) - np.sum((self.gamma[di, :] - 1.0) * (e_log_theta[di, :])) ) # - E_q[log q(theta|gamma)] elbo += -np.sum(cnt * self.phi[di] * np.log(self.phi[di])) # - E_q[log q(z|phi)] for adi in doc_links[di]: elbo += ( np.dot(self.eta, self.pi[di] * self.pi[adi]) + self.nu ) # E_q[log p(y_{d1,d2}|z_{d1},z_{d2},\eta,\nu)] return elbo
def estimate_alpha_from_counts(D, K, initial_alpha, counts, n_iter=1000): """ Estimate posterior alpha of a Dirichlet multinomial from samples of the multinomial counts. This implements the fixed point update as described in Minka, T. P. (2003). Estimating a Dirichlet distribution. Annals of Physics, 2000(8), 1-13. http://doi.org/10.1007/s00256-007-0299-1 """ counts = counts.astype(float) sdata = np.sum(counts, axis=1) # initialise old and new alphas before iteration alpha_old = np.ones(K) * initial_alpha for i in range(n_iter): sa = np.sum(alpha_old) temp = np.tile(alpha_old, (D, 1)) g = np.sum(psi(counts + temp), axis=0) - D*psi(alpha_old) h = np.sum(psi(sdata + sa)) - D*psi(sa) alpha_new = alpha_old * (g/h) if np.max(np.abs(alpha_new-alpha_old)) < 1e-6: break if np.isnan(np.min(alpha_new)): # prevent NaN from propagating return alpha_old # set alpha_new to alpha_old for the next iteration update alpha_old = alpha_new return alpha_new
def loglik(self,obs=None): dim=self.__dim__ if obs is None: obs=numpy.arange(dim) else: assert numpy.ndim(obs)==1 pi=self.__param__.pi alpha=self.__param__.alpha if numpy.isfinite(alpha): val=numpy.zeros(numpy.size(obs)) val[:]=-numpy.inf ind,=numpy.where(pi[obs]>0.0) # Evaluate the expected log-likelihood. val[ind]=special.psi(alpha*pi[obs[ind]])-special.psi(alpha) return val else: return numpy.log(pi[obs])
def compute_moments_and_cgf(self, phi, mask=True): r""" Compute the moments and :math:`g(\phi)`. .. math:: \overline{\mathbf{u}} (\boldsymbol{\phi}) &= \begin{bmatrix} \psi(\phi_1) - \psi(\sum_d \phi_{1,d}) \end{bmatrix} \\ g_{\boldsymbol{\phi}} (\boldsymbol{\phi}) &= TODO """ if np.any(np.asanyarray(phi) <= 0): raise ValueError("Natural parameters should be positive") sum_gammaln = np.sum(special.gammaln(phi[0]), axis=-1) gammaln_sum = special.gammaln(np.sum(phi[0], axis=-1)) psi_sum = special.psi(np.sum(phi[0], axis=-1, keepdims=True)) # Moments <log x> u0 = special.psi(phi[0]) - psi_sum u = [u0] # G g = gammaln_sum - sum_gammaln return (u, g)
def localVB(doc, alpha, k, expElogBeta): # Global Variables VAR_MAX_ITER = 10 VAR_CONVERGED = 0.01 isConverged = 0 # Initialization ids = [id for id,_ in doc] cts = np.array([cnt for _,cnt in doc]) gamma = np.asarray([1.0 / k for i in xrange(k)]) # 1*k expElogTheta = np.exp(psi(gamma)) # 1*k expElogBetaD = expElogBeta_k_by_d(expElogBeta,k,ids) # k*d phinorm = np.dot(expElogTheta,expElogBetaD) + 1e-100 # 1 * d for round in xrange(VAR_MAX_ITER): lastgamma = gamma gamma = alpha + expElogTheta * np.dot(cts/phinorm, expElogBetaD.T) expElogTheta = np.exp(psi(gamma)) phinorm = np.dot(expElogTheta,expElogBetaD) + 1e-100 # 1 * d # isConverged ? meanchange = np.mean(abs(gamma - lastgamma)) if (meanchange < VAR_CONVERGED): isConverged = 1 break # calculate phi phi_cts = np.dot( np.dot( np.diag(expElogTheta), expElogBetaD), np.diag(cts/phinorm)) return phi_cts,ids,gamma,isConverged
def Mstep(max_iter): global alpha,beta,Gamma,Phi,doc,doc_cnt; #update beta for i in range(K): for v in range(voca_size): beta[i][v] = 0; for d in range(doc_size): for n in range(len(doc[d])): beta[i][doc[d][n]] += doc_cnt[d][n] * Phi[d][n][i]; beta_sum = sum_matrix(beta, 0); for k in range(K): for i in range(voca_size): beta[k][i] = beta[k][i]/beta_sum[k]; #update alpha last = 0; iter_num = 0; const = 0; for d in range(doc_size): gamma_sum = sum_vector(Gamma[d]); for i in range(K): const += (sp.psi(Gamma[d][i]) - sp.psi(gamma_sum)); now = -compute_alpha_mle(alpha); origin = now; while (abs(last - now) > 1e-9 and iter_num < max_iter): da = K * (doc_size * (sp.psi(alpha * K) - sp.psi(alpha))) + const; dda = K * (doc_size * (K * sp.polygamma(1, alpha * K) - sp.polygamma(1, alpha))); dx = -da/dda; alpha = backtrack(alpha,dx,da,0.01,0.5); last = now; now = -compute_alpha_mle(alpha); iter_num += 1; if (now < origin): print('error alpha');
def expected_log_m(self): """ Compute the expected log probability of each block :return: """ E_log_m = psi(self.mf_pi) - psi(self.mf_pi.sum()) return E_log_m
def run_e_step(self): """ compute variational expectations """ ll = 0. for p in xrange(self.N): for q in xrange(self.N): new_phi = np.zeros(self.K) for g in xrange(self.K): new_phi[g] = np.exp(psi(self.gamma[p,g])-psi(np.sum(self.gamma[p,:]))) * np.prod(( (self.B[g,:]**self.Y[p,q]) * ((1.-self.B[g,:])**(1.-self.Y[p,q])) ) ** self.phi[q,p,:] ) self.phi[p,q,:] = new_phi/np.sum(new_phi) new_phi = np.zeros(self.K) for h in xrange(self.K): new_phi[h] = np.exp(psi(self.gamma[q,h])-psi(np.sum(self.gamma[q,:]))) * np.prod(( (self.B[:,h]**self.Y[p,q]) * ((1.-self.B[:,h])**(1.-self.Y[p,q])) ) ** self.phi[p,q,:] ) self.phi[q,p,:] = new_phi/np.sum(new_phi) for k in xrange(self.K): self.gamma[p,k] = self.alpha[k] + np.sum(self.phi[p,:,k]) + np.sum(self.phi[:,p,k]) self.gamma[q,k] = self.alpha[k] + np.sum(self.phi[q,:,k]) + np.sum(self.phi[:,q,k]) return ll
def maximization(self): sNo = self.p.sNo for i in range(sNo): self.p.avgPi[i] = (self.p.uPiArr[i] + self.p.gmMat[0][i]) / (self.p.sumUPi + 1.0) self.p.avgLnPi[i] = spsp.psi(self.p.uPiArr[i] + self.p.gmMat[0][i]) self.p.avgLnPi[i] -= spsp.psi(self.p.sumUPi + 1.0) for j in range(sNo): self.p.avgA[i][j] = (self.p.uAMat[i][j] + self.p.Nij[i][j]) / (self.p.sumUAArr[i] + self.p.Nii[i]) self.p.avgLnA[i][j] = spsp.psi(self.p.uAMat[i][j] + self.p.Nij[i][j]) self.p.avgLnA[i][j] -= spsp.psi(self.p.sumUAArr[i] + self.p.Nii[i]) self.p.btMu[i] = self.p.uBtArr[i] + self.p.Ni[i] self.p.mu0[i] = (self.p.uBtArr[i] * self.p.uMuArr[i] + self.p.Ni[i] * self.p.barX[i]) / self.p.btMu[i] self.p.aLm[i] = self.p.uAArr[i] + self.p.Ni[i] / 2.0 self.p.bLm[i] = self.p.uBArr[i] + (self.p.NiSi[i] / 2.0) self.p.bLm[i] += ( self.p.uBtArr[i] * self.p.Ni[i] * (self.p.barX[i] - self.p.uMuArr[i]) ** 2.0 / 2.0 / (self.p.uBtArr[i] + self.p.Ni[i]) ) self.p.avgMu[i] = self.p.mu0[i] self.p.avgLm[i] = self.p.aLm[i] / self.p.bLm[i] self.p.avgLnLm[i] = spsp.psi(self.p.aLm[i]) - math.log(self.p.bLm[i])
def update_V(self, corpus): lb = 0 sumLnZ = np.sum(psi(corpus.A) - np.log(corpus.B), 0) # K dim tmp = np.dot(corpus.R, corpus.w) # M x K sum_r_w = np.sum(tmp, 0) assert len(sum_r_w) == self.K for i in xrange(self.c_a_max_step): one_V = 1-self.V stickLeft = self.getStickLeft(self.V) # prod(1-V_(dim-1)) p = self.V * stickLeft psiV = psi(self.beta * p) vVec = - self.beta*stickLeft*sum_r_w + self.beta*stickLeft*sumLnZ - corpus.M*self.beta*stickLeft*psiV; for k in xrange(self.K): tmp1 = self.beta*sum(sum_r_w[k+1:]*p[k+1:]/one_V[k]); tmp2 = self.beta*sum(sumLnZ[k+1:]*p[k+1:]/one_V[k]); tmp3 = corpus.M*self.beta*sum(psiV[k+1:]*p[k+1:]/one_V[k]); vVec[k] = vVec[k] + tmp1 - tmp2; vVec[k] = vVec[k] + tmp3; vVec[k] = vVec[k] vVec[:self.K-2] -= (self.alpha-1)/one_V[:self.K-2]; vVec[self.K-1] = 0; step_stick = self.getstepSTICK(self.V,vVec,sum_r_w,sumLnZ,self.beta,self.alpha,corpus.M); self.V = self.V + step_stick*vVec; self.p = self.getP(self.V) lb += self.K*gammaln(self.alpha+1) - self.K*gammaln(self.alpha) + np.sum((self.alpha-1)*np.log(1-self.V[:self.K-1])) if self.is_verbose: print 'p(V)-q(V) %f' % lb return lb
def update_alpha(self, gammat, rho): """ Update parameters for the Dirichlet prior on the per-document topic weights `alpha` given the last `gammat`. Uses Newton's method, described in **Huang: Maximum Likelihood Estimation of Dirichlet Distribution Parameters.** (http://www.stanford.edu/~jhuang11/research/dirichlet/dirichlet.pdf) """ N = float(len(gammat)) logphat = sum(dirichlet_expectation(gamma) for gamma in gammat) / N dalpha = numpy.copy(self.alpha) gradf = N * (psi(numpy.sum(self.alpha)) - psi(self.alpha) + logphat) c = N * polygamma(1, numpy.sum(self.alpha)) q = -N * polygamma(1, self.alpha) b = numpy.sum(gradf / q) / ( 1 / c + numpy.sum(1 / q)) dalpha = -(gradf - b) / q if all(rho() * dalpha + self.alpha > 0): self.alpha += rho() * dalpha else: logger.warning("updated alpha not positive") logger.info("optimized alpha %s" % list(self.alpha)) return self.alpha
def variation_update(self): #update phi, gamma e_log_theta = psi(self.gamma) - psi(np.sum(self.gamma, 1))[:,np.newaxis] new_beta = np.zeros([self.K, self.V]) for di in xrange(self.D): words = self.doc_ids[di] cnt = self.doc_cnt[di] doc_len = np.sum(cnt) new_phi = np.log(self.beta[:,words]+eps) + e_log_theta[di,:][:,np.newaxis] gradient = np.zeros(self.K) for adi in self.doc_links[di]: gradient += self.eta * self.pi[adi,:] / doc_len new_phi += gradient[:,np.newaxis] new_phi = np.exp(new_phi) new_phi = new_phi/np.sum(new_phi,0) self.phi[di] = new_phi self.pi[di,:] = np.sum(cnt * self.phi[di],1)/np.sum(cnt * self.phi[di]) self.gamma[di,:] = np.sum(cnt * self.phi[di], 1) + self.alpha new_beta[:, words] += (cnt * self.phi[di]) self.beta = new_beta / np.sum(new_beta, 1)[:,np.newaxis]
def _fit_s(D, a0, logp, tol=1e-7, maxiter=1000): '''Assuming a fixed mean for Dirichlet distribution, maximize likelihood for preicision a.k.a. s''' N, K = D.shape s1 = a0.sum() m = a0 / s1 mlogp = (m*logp).sum() for i in xrange(maxiter): s0 = s1 g = psi(s1) - (m*psi(s1*m)).sum() + mlogp h = _trigamma(s1) - ((m**2)*_trigamma(s1*m)).sum() if g + s1 * h < 0: s1 = 1/(1/s0 + g/h/(s0**2)) if s1 <= 0: s1 = s0 * exp(-g/(s0*h + g)) # Newton on log s if s1 <= 0: s1 = 1/(1/s0 + g/((s0**2)*h + 2*s0*g)) # Newton on 1/s if s1 <= 0: s1 = s0 - g/h # Newton if s1 <= 0: raise Exception('Unable to update s from {}'.format(s0)) a = s1 * m if abs(s1 - s0) < tol: return a raise Exception('Failed to converge after {} iterations, s is {}' .format(maxiter, s1))
def rice_homomorf_est(image, SNR = 0, LPF = 4.8, mode = 2, config = build_default()): window_size = config['ex_window_size'] (M2, Sigma_n) = em_ml_rice2D(image, config['ex_iterations'], [window_size, window_size]) Sigma_n2 = lpf(Sigma_n, config['lpf_f_SNR']) M1 = filter2B(image, numpy.ones((5, 5)) / 25) if (SNR.shape[0] == 1) and SNR == 0: SNR = M2 / Sigma_n Rn = abs(image - M1) lRn = numpy.log(Rn * (Rn != 0) + 0.001 * (Rn == 0)) LPF2 = lpf(lRn, LPF) Mapa2 = numpy.exp(LPF2) MapaG = Mapa2 * 2 / numpy.sqrt(2) * numpy.exp(-special.psi(1)/2.) LocalMean = 0 if mode == 1: LocalMean = M1 elif mode == 2: LocalMean = M2 Rn = numpy.abs(image - LocalMean) lRn = numpy.log(Rn * (Rn != 0) + 0.001 * (Rn == 0)) LPF2 = lpf(lRn, LPF) Fc1 = correct_rice_gauss(SNR) LPF1 = LPF2 - Fc1 LPF1 = lpf(LPF1, config['lpf_f_Rice'], 2.0) Mapa1 = exp(LPF1) MapaR = Mapa1*2/numpy.sqrt(2)*numpy.exp(-special.psi(1)/2.) return MapaR, MapaG
def chaowangjost(counts): """Entropy calculation using Chao, Wang, Jost correction. doi: 10.1111/2041-210X.12108 Parameters ---------- counts : list bin counts Returns ------- entropy : float """ n_samples = npsum(counts) bcbc = bincount(counts.astype(int)) if len(bcbc) < 3: return grassberger(counts) if bcbc[2] == 0: if bcbc[1] == 0: A = 1. else: A = 2. / ((n_samples - 1.) * (bcbc[1] - 1.) + 2.) else: A = 2. * bcbc[2] / ((n_samples - 1.) * (bcbc[1] - 1.) + 2. * bcbc[2]) pr = arange(1, int(n_samples)) pr = 1. / pr * (1. - A) ** pr entropy = npsum(counts / n_samples * (psi(n_samples) - nan_to_num(psi(counts)))) if bcbc[1] > 0 and A != 1.: entropy += nan_to_num(bcbc[1] / n_samples * (1 - A) ** (1 - n_samples * (-log(A) - npsum(pr)))) return entropy
def entropy(X, k=1): ''' Returns the entropy of the X. Parameters =========== X : array-like, shape (n_samples, n_features) The data the entropy of which is computed k : int, optional number of nearest neighbors for density estimation Notes ====== Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy of a random vector. Probl. Inf. Transm. 23, 95-101. See also: Evans, D. 2008 A computationally efficient estimator for mutual information, Proc. R. Soc. A 464 (2093), 1203-1215. and: Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual information. Phys Rev E 69(6 Pt 2):066138. ''' # Distance to kth nearest neighbor r = nearest_distances(X, k) # squared distances n, d = X.shape volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1) ''' F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures for Continuous Random Variables. Advances in Neural Information Processing Systems 21 (NIPS). Vancouver (Canada), December. return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k) ''' return (d*np.mean(np.log(r + np.finfo(X.dtype).eps)) + np.log(volume_unit_ball) + psi(n) - psi(k))
def knn_mutinf(x, y, k=None, boxsize=None): """Entropy calculation Parameters ---------- x : array_like, shape = (n_samples, n_dim) Independent variable y : array_like, shape = (n_samples, n_dim) Independent variable k : int Number of bins. boxsize : float (or None) Wrap space between [0., boxsize) Returns ------- mi : float """ data = hstack((x, y)) k = k if k else max(3, int(data.shape[0] * 0.01)) # Find nearest neighbors in joint space, p=inf means max-norm dvec = nearest_distances(data, k=k) a, b, c, d = ( avgdigamma(atleast_2d(x).reshape(data.shape[0], -1), dvec), avgdigamma(atleast_2d(y).reshape(data.shape[0], -1), dvec), psi(k), psi(data.shape[0]), ) return -a - b + c + d
def _e_log_beta(c0,d0,c,d): ''' Calculates expectation of log pdf of beta distributed parameter''' log_C = gammaln(c0 + d0) - gammaln(c0) - gammaln(d0) psi_cd = psi(c+d) log_mu = (c0 - 1) * ( psi(c) - psi_cd ) log_i_mu = (d0 - 1) * ( psi(d) - psi_cd ) return np.sum(log_C + log_mu + log_i_mu)
def _update_resps(self, X, alphaK, *args): ''' Updates distribution of latent variable with Dirichlet prior ''' e_log_weights = psi(alphaK) - psi(np.sum(alphaK)) return self._update_resps_parametric(X,e_log_weights,self.n_components, *args)
def compute_mle(d): global alpha,beta,Gamma,Phi,doc,timer,doc_cnt; res = 0; res += sp.gammaln(K * alpha); res -= K * sp.gammaln(alpha); gamma_sum = sum_vector(Gamma[d]); length = len(doc[d]); psi = []; for i in range(K): psi.append(sp.psi(Gamma[d][i]) - sp.psi(gamma_sum)); for i in range(K): res += (alpha - 1) * psi[i]; res += sp.gammaln(Gamma[d][i]); res -= (Gamma[d][i] - 1) * psi[i]; now = time.time(); for n in range(length): for i in range(K): res += doc_cnt[d][n] * Phi[d][n][i] * psi[i]; # res -= doc_cnt[d][n] * Phi[d][n][i] * math.log(Phi[d][n][i]); res += doc_cnt[d][n] * Phi[d][n][i] * math.log(beta[i][doc[d][n]]/Phi[d][n][i]); timer += time.time() - now; res -= sp.gammaln(gamma_sum); return res;
def update_Z(self, corpus, iter): lb = 0 bp = self.beta * self.p corpus.A = bp + corpus.phi_doc # taylor approximation on E[\sum lnZ] xi = np.sum(corpus.A / corpus.B, 1) E_inv_w = np.zeros([corpus.M, corpus.K]) ln_E_w = np.zeros([corpus.M, corpus.K]) for mi in xrange(corpus.M): E_inv_w[mi, :] = np.prod((corpus.w_A / corpus.w_B)[corpus.R[mi, :] == 1, :], 0) ln_E_w[mi, :] = np.sum((np.log(corpus.w_B) - psi(corpus.w_A)) * corpus.R[mi, :][:, np.newaxis], 0) if iter < self.hdp_init_step: corpus.B = 1.0 + (corpus.Nm / xi)[:, np.newaxis] else: corpus.B = E_inv_w + (corpus.Nm / xi)[:, np.newaxis] # expectation of p(Z) lb += np.sum( -bp * ln_E_w + (bp - 1) * (psi(corpus.A) - np.log(corpus.B)) - E_inv_w * (corpus.A / corpus.B) - gammaln(bp) ) # entropy of q(Z) lb -= np.sum( corpus.A * np.log(corpus.B) + (corpus.A - 1) * (psi(corpus.A) - np.log(corpus.B)) - corpus.A - gammaln(corpus.A) ) if self.is_verbose: print "p(z)-q(z) %f" % lb return lb
def knn_entropy(*args, k=None, boxsize=None): """Entropy calculation Parameters ---------- args : numpy.ndarray, shape = (n_samples, ) or (n_samples, n_dims) Data of which to calculate entropy. Each array must have the same number of samples. k : int Number of bins. boxsize : float (or None) Wrap space between [0., boxsize) Returns ------- entropy : float """ data = vstack((args)).T n_samples = data.shape[0] k = k if k else max(3, int(data.shape[0] * 0.01)) n_dims = data.shape[1] nneighbor = nearest_distances(data, k=k) const = psi(n_samples) - psi(k) + n_dims * log(2) return (const + n_dims * log(nneighbor).mean())
def update_beta(self, corpus): ElogW = np.log(corpus.w_B) - psi(corpus.w_A) lnZ = psi(corpus.A) - np.log(corpus.B) first = np.zeros([corpus.M, self.K]) for mi in xrange(corpus.M): first[mi, :] = -self.p * np.sum(ElogW[corpus.R[mi, :] == 1, :], 0) # first_sum = np.sum(first) # second = np.sum(lnZ * self.p) # for i in xrange(1): # last = - corpus.M * np.sum(self.p*psi(self.beta * self.p)) # gradient = first_sum + second + last # gradient /= corpus.M * np.sum(self.p * self.p * psi(self.beta*self.p)) # step = self.getstepBeta(gradient, self.beta, first, lnZ, self.p, corpus) # self.beta += step*gradient # since beta does not change a lot, this way is more efficient candidate = np.linspace(-1, 1, 31) f = np.zeros(len(candidate)) for i in xrange(len(candidate)): step = candidate[i] new_beta = self.beta + self.beta * step if new_beta < 0: f[i] = -np.inf else: bp = new_beta * self.p f[i] = np.sum(new_beta * first) + np.sum(bp * lnZ) - np.sum(corpus.M * gammaln(bp)) best_idx = f.argsort()[-1] maxstep = candidate[best_idx] self.beta += self.beta * maxstep if self.is_verbose: print "new beta = %.2f, %.2f" % (self.beta, candidate[best_idx])
def e_step_one_iter(alpha, beta, docs, phi, ips): M, K = docs.size, alpha.size for m in xrange(M): N_m = docs[m].size psi_sum_ips = psi(ips[m, :].sum()) for n in xrange(N_m): for i in xrange(K): E_q = psi(ips[m, i]) - psi_sum_ips phi[m][n, i] = (beta[i, docs[m][n]] * np.exp(E_q)) phi[m] /= phi[m].sum(axis=1)[:, None] # normalize phi ips[m] = alpha + phi[m].sum(axis=0) # gradient computation grad_ips = np.zeros(ips.shape, dtype=np.float64) for m in xrange(M): for i in xrange(K): grad_ips[m, i]\ = (polygamma(1, ips[m, i]) * (alpha[i] + phi[m][:, i].sum() - ips[m, i]) - polygamma(1, ips[m, :].sum()) * (alpha.sum() + phi[m].sum() - ips[m, :].sum())) return (phi, ips, grad_ips)
def dkl_wishart(a1,B1,a2,B2): """ returns the KL divergence bteween two Wishart distribution of parameters (a1,B1) and (a2,B2), where a1 and a2 are degrees of freedom B1 and B2 are scale matrices """ from scipy.special import psi,gammaln from numpy.linalg import det,pinv tiny = 1.e-15 # fixme: check size dim = B1.shape[0] d1 = max(det(B1),tiny) d2 = max(det(B2),tiny) lgc = dim*(dim-1)*np.log(np.pi)/4 lg1 = lgc lg2 = lgc lw1 = -np.log(d1) + dim*np.log(2) lw2 = -np.log(d2) + dim*np.log(2) for i in range(dim): lg1 += gammaln((a1-i)/2) lg2 += gammaln((a2-i)/2) lw1 += psi((a1-i)/2) lw2 += psi((a2-i)/2) lz1 = 0.5*a1*dim*np.log(2)-0.5*a1*np.log(d1)+lg1 lz2 = 0.5*a2*dim*np.log(2)-0.5*a2*np.log(d2)+lg2 dkl = (a1-dim-1)*lw1-(a2-dim-1)*lw2-a1*dim dkl += a1*np.trace(np.dot(B2,pinv(B1))) dkl /=2 dkl += (lz2-lz1) return dkl
def get_vlb(self): vlb = 0 # Get the VLB of the expected class assignments E_ln_m = self.mf_expected_log_m() for n in xrange(self.N): # Add the cross entropy of p(c | m) vlb += Discrete().negentropy(E_x=self.mf_m[n,:], E_ln_p=E_ln_m) # Subtract the negative entropy of q(c) vlb -= Discrete(self.mf_m[n,:]).negentropy() # Get the VLB of the connection probability matrix # Add the cross entropy of p(p | tau1, tau0) vlb += Beta(self.tau1, self.tau0).\ negentropy(E_ln_p=(psi(self.mf_tau1) - psi(self.mf_tau0 + self.mf_tau1)), E_ln_notp=(psi(self.mf_tau0) - psi(self.mf_tau0 + self.mf_tau1))).sum() # Subtract the negative entropy of q(p) vlb -= Beta(self.mf_tau1, self.mf_tau0).negentropy().sum() # Get the VLB of the block probability vector, m # Add the cross entropy of p(m | pi) vlb += Dirichlet(self.pi).negentropy(E_ln_g=self.mf_expected_log_m()) # Subtract the negative entropy of q(m) vlb -= Dirichlet(self.mf_pi).negentropy() for c1 in xrange(self.C): for c2 in xrange(self.C): vlb += self.weight_models[c1][c2].get_vlb() return vlb
def compute_moments_and_cgf(self, phi, mask=True): r""" Compute the moments and :math:`g(\phi)`. .. math:: \overline{\mathbf{u}} (\boldsymbol{\phi}) &= \begin{bmatrix} \psi(\phi_1) - \psi(\sum_d \phi_{1,d}) \end{bmatrix} \\ g_{\boldsymbol{\phi}} (\boldsymbol{\phi}) &= TODO """ sum_gammaln = np.sum(special.gammaln(phi[0]), axis=-1) gammaln_sum = special.gammaln(np.sum(phi[0], axis=-1)) psi_sum = special.psi(np.sum(phi[0], axis=-1, keepdims=True)) # Moments <log x> u0 = special.psi(phi[0]) - psi_sum u = [u0] # G g = gammaln_sum - sum_gammaln return (u, g)
def estimate_abundances(self): """ Compute expectations and variances of the log relative abundances (log rho) of each target. Use these to compute 95% confidence intervals of the relative abundances themselves. """ log_theta = np.zeros(self.ntargs) sd_log_theta = np.zeros(self.ntargs) for t in xrange(self.ntargs): log_theta[t] = psi(self.alpha[t]) - psi(self.alpha[t]+self.beta[t]) var_log_theta = polygamma(1,self.alpha[t]) - polygamma(1, self.alpha[t]+self.beta[t]) for j in xrange(t): log_theta[t] += psi(self.beta[j]) - psi(self.alpha[j]+self.beta[j]) var_log_theta += polygamma(1,self.beta[j]) - polygamma(1, self.alpha[j]+self.beta[j]) sd_log_theta[t] = sqrt(var_log_theta) self.log_theta = log_theta self.sd_log_theta = sd_log_theta theta_ci_low = np.zeros(self.ntargs) theta_ci_hi = np.zeros(self.ntargs) for t in xrange(self.ntargs): self.targ_samp_prob[t] = exp(log_theta[t]) theta_ci_low[t] = exp(log_theta[t] - ci95sd * sd_log_theta[t]) theta_ci_hi[t] = exp(log_theta[t] + ci95sd * sd_log_theta[t]) # Compute relative abundances and confidence limits w = self.targ_samp_prob / self.eff_len self.rho = w / sum(w) w_low = theta_ci_low / self.eff_len self.rho_ci_low = w_low / sum(w_low) w_hi = theta_ci_hi / self.eff_len self.rho_ci_hi = w_hi / sum(w_hi)
def dirichlet_expectation(alpha): """ For a vector theta ~ Dir(alpha), computes E[log(theta)] given alpha. """ if (len(alpha.shape) == 1): return(psi(alpha) - psi(n.sum(alpha))) return(psi(alpha) - psi(n.sum(alpha, 1))[:, n.newaxis])
def update_alpha_fp(alpha, theta, sentence_subj, tol=1e-12): # Fixed point method in [Minka00] K = np.size(alpha, 0) M, S = np.shape(theta) for k in xrange(K): theta_k = theta[sentence_subj == k, :] log_p = 1.0 / M * np.sum(np.log(theta_k), 0) print log_p while True: oldnorm = np.linalg.norm(alpha[k]) alpha[k] = inversepsi(psi(np.sum(alpha[k])) + log_p) if abs(np.linalg.norm(alpha[k]) - oldnorm) < tol: break return alpha
def findAlphaBeta(self): # ADJUST ALPHA AND BETA BY USING MINKA'S FIXED-POINT ITERATION numerator = 0 denominator = 0 for d in range(self.DOCS): numerator += psi(self.cntDT[d] + self.alpha) - psi(self.alpha) denominator += psi(np.sum(self.cntDT[d] + self.alpha)) - psi( np.sum(self.alpha)) self.alpha *= numerator / denominator # UPDATE ALPHA numerator = 0 denominator = 0 for z in range(self.TOPICS): numerator += np.sum( psi(self.cntTW[z] + self.beta) - psi(self.beta)) denominator += psi(np.sum(self.cntTW[z] + self.beta)) - psi( self.VOCABS * self.beta) self.beta = (self.beta * numerator) / (self.VOCABS * denominator ) # UPDATE BETA
def _expec_s(self): if not self.use_svi: return super(GPClassifierSVI, self)._expec_s() self.old_s = self.s invK_mm_expecFF = self.invK_mm.dot( self.uS + self.um_minus_mu0.dot(self.um_minus_mu0.T)) self.rate_s = self.rate_s0 + 0.5 * np.trace(invK_mm_expecFF) # Update expectation of s. See approximations for Binary Gaussian Process Classification, Hannes Nickisch self.s = self.shape_s / self.rate_s self.Elns = psi(self.shape_s) - np.log(self.rate_s) if self.verbose: logging.debug("Updated inverse output scale: " + str(self.s)) self.Ks_mm = self.K_mm / self.s self.invKs_mm = self.invK_mm * self.s self.Ks_nm = self.K_nm / self.s
def _init_component(self, m, dim): assert self.mode_dims[m] == dim K = self.n_components s = self.smoothness if not self.debug: gamma_DK = s * rn.gamma(s, 1. / s, size=(dim, K)) delta_DK = s * rn.gamma(s, 1. / s, size=(dim, K)) else: gamma_DK = s * np.ones((dim, K)) delta_DK = s * np.ones((dim, K)) self.gamma_DK_M[m] = gamma_DK self.delta_DK_M[m] = delta_DK self.E_DK_M[m] = gamma_DK / delta_DK self.sumE_MK[m, :] = self.E_DK_M[m].sum(axis=0) self.G_DK_M[m] = np.exp(sp.psi(gamma_DK) - np.log(delta_DK)) if m == 0 or not self.debug: self.beta_M[m] = 1. / self.E_DK_M[m].mean()
def update(self, X): Y = np.zeros(self.K) XY = np.zeros((self.K, self.N)) for x in X: L = np.array([ psi(self.phi[k]) - self.tau[k]**(-1) - ((x - self.mu[k])**2).sum() / 2 for k in range(self.K) ]) y = np.exp(L) / np.exp(L).sum() Y += y XY += np.array([x * y[k] for k in range(self.K)]) self.phi = self.phi + Y self.mu = np.array([ (self.tau[k] * self.mu[k] + XY[k]) / (self.tau[k] + Y[k]) for k in range(self.K) ]) self.tau = self.tau + Y
def compute_likelihood(self, u, gamma, digamma_gamma, gammaSum, phiO, phiD, phiT, betaO, betaD, betaT, docs, idx_corpus_o, idx_corpus_d, idx_corpus_t): J = self.J K = self.K L = self.L alpha = self.alpha likelihood = 0 digsum = psi(gammaSum) likelihood = loggamma( alpha * J * K * L) - J * K * L * loggamma(alpha) - ( loggamma(gammaSum)) # 1.1, 1.2, 1.3 for j in range(J): for k in range(K): for l in range(L): likelihood += (alpha - 1) * ( digamma_gamma[j, k, l] - digsum) + loggamma( gamma[u, j, k, l]) - (gamma[u, j, k, l] - 1) * ( digamma_gamma[j, k, l] - digsum ) # 2.1, 2.2, 2.3 for w in range(len(idx_corpus_o[u]) ): # int(docs.iloc[u]['wordcount']) if phiO[w, j] > 0 and phiD[w, k] > 0 and phiT[w, l] > 0: likelihood += phiO[w, j] * phiD[w, k] * phiT[w, l] * ( digamma_gamma[j, k, l] - digsum) # 3.1 for j in range(self.J): for wo in range(len(idx_corpus_o[u])): if phiO[wo, j] > 0: likelihood += -phiO[wo, j] * math.log(phiO[wo, j]) + phiO[ wo, j] * betaO[j, idx_corpus_o[u][wo]] # 3.2 O; 3.3 O for k in range(self.K): for wd in range(len(idx_corpus_d[u])): if phiD[wd, k] > 0: likelihood += -phiD[wd, k] * math.log(phiD[wd, k]) + phiD[ wd, k] * betaD[k, idx_corpus_d[u][wd]] # 3.2 D; 3.3 D for l in range(self.L): for wt in range(len(idx_corpus_t[u])): if phiT[wt, l] > 0: likelihood += -phiT[wt, l] * math.log(phiT[wt, l]) + phiT[ wt, l] * betaT[l, idx_corpus_t[u][wt]] # 3.2 T; 3.3 T return likelihood
def wishpart(self, k): part1 = sum( [psi((self.Vr[k] + 1 - d) / 2) for d in range(1, self.D + 1)]) part1 += self.D * np.log(2) + np.log(np.linalg.det(self.VW[k])) part1 *= (self.r - self.Vr[k]) / 2 part2 = np.linalg.inv(self.VW[k]) - np.linalg.inv(self.W) part2 = np.dot(part2, self.Vr[k] * self.VW[k]) part2 = 0.5 * np.trace(part2) part3 = -(self.r / 2) * np.log(np.linalg.det(self.W)) part4 = (self.Vr[k] / 2) * np.log(np.linalg.det(self.VW[k])) part5 = (self.Vr[k] - self.r) * (self.D / 2) * np.log(2) part6 = sum( [loggamma((self.Vr[k] + 1 - d) / 2) for d in range(1, self.D + 1)]) part6 -= sum( [loggamma((self.r + 1 - d) / 2) for d in range(1, self.D + 1)]) res = part1 + part2 + part3 + part4 + part5 + part6 return res
def _fixedpoint(D, tol=1e-7, maxiter=None): '''Simple fixed point iteration method for MLE of Dirichlet distribution''' N, K = D.shape logp = log(D).mean(axis=0) a0 = _init_a(D) # Start updating if maxiter is None: maxiter = sys.maxint for i in xrange(maxiter): a1 = _ipsi(psi(a0.sum()) + logp) # if norm(a1-a0) < tol: if abs(loglikelihood(D, a1)-loglikelihood(D, a0)) < tol: # much faster return a1 a0 = a1 raise Exception('Failed to converge after {} iterations, values are {}.' .format(maxiter, a1))
def predictFactor(self): """Predict expected factor values from prior parameters""" for conditioner in self.conditionerRanges: nu=self.pseudoCounts[conditioner] fsum=0.0 ccond=tuple() if conditioner != (None,): ccond=conditioner for condrv in self.conditionedRanges: self.factor[condrv+ccond]=np.exp(spf.psi(nu*self.naturalParams[(condrv,conditioner)])-spf.psi(nu)) fsum+=self.factor[condrv+ccond] for condrv in self.conditionedRanges: self.factor[condrv+ccond]/=fsum
def invpsi(x): r""" Inverse digamma (psi) function. The digamma function is the derivative of the log gamma function. This calculates the value Y > 0 for a value X such that digamma(Y) = X. See: http://www4.ncsu.edu/~pfackler/ """ L = 1.0 y = np.exp(x) while (L > 1e-10): y += L * np.sign(x - special.psi(y)) L /= 2 # Ad hoc by Jaakko y[x < -10] = -1 / x[x < -10] return y
def update_alpha_beta(self): # Update Beta x = 0 y = 0 for z in range(self.TOPICS): x += np.sum(psi(self.cntTW[z] + self.beta) - psi(self.beta)) y += psi(np.sum(self.cntTW[z] + self.beta)) - psi( self.VOCABS * self.beta) self.beta = (self.beta * x) / (self.VOCABS * y) # UPDATE BETA # Update Alpha x = 0 y = 0 for d in range(self.DOCS): y += psi(np.sum(self.cntDT[d] + self.alpha)) - psi( np.sum(self.alpha)) x += psi(self.cntDT[d] + self.alpha) - psi(self.alpha) self.alpha *= x / y # UPDATE ALPHA
def _expec_lnPi(self, posterior=True): self.expec_responsibilities() self.expec_weights() # check if E_t has been initialised. Only update alpha if it has. Otherwise E[lnPi] is given by the prior if np.any(self.E_t) and posterior: self._post_Alpha() sumAlpha = np.sum(self.alpha, 1) psiSumAlpha = psi(sumAlpha) for j in range(self.nclasses): for s in range(self.nscores): self.lnPi[:, s, :] = (psi(self.alpha[:, s, :]) - psiSumAlpha)[np.newaxis, :] #.dot(self.r) # need to update the cluster pseudo-count distributions first to get new expected eta and beta #translate \eta and \beta to \alpha. worker_counts = self.alpha - self.alpha_tr #the counts for each worker self.a = np.zeros((self.nclasses, self.nscores, self.nclusters)) self.b = np.zeros((self.nclasses, self.nscores, self.nclusters)) for j in range(self.nclasses): # v_j^(k) ~ Beta( \beta_j^{q_k} ), where q_k is the cluster ID of worker k logv_j = psi(self.beta[j, :, :].dot(self.r.T)) - psi( self.beta[j, :, :].dot(self.r.T) + np.sum(worker_counts[j, :, :], axis=0)[np.newaxis, :]) # s^(k)_{j, l} ~ Antoniak( n^(k)_{j, l}, \beta_j^{q_k} \eta_{j, l}^{q_k} ) #The exact computation of the expected number of tables is given in: # A Note on the Implementation of Hierarchical Dirichlet Processes, Phil Blunsom et al. #The antoniak distribution is explained in: Distributed Algorithms for Topic Models, David Newman et al. s_j = np.zeros((self.nscores, self.nclusters)) for l in range(self.nscores): counts = worker_counts[j, l, :][:, np.newaxis] conc = (self.beta[j, 0, :] * self.eta[j, l, :])[np.newaxis, :] # For the updates to eta and beta, we take an expectation of ln p(s^(k)_{j, l}) over cluster membership of k by # computing s^(k) using a weighted sum with weights p(q_k = m) # -- this follows from the equations in Moreno and Teh s_jl = conc * (psi(conc + counts) - psi(conc)) # nclusters x K s_j[l, :] = np.sum(s_jl * self.r, axis=0) # \eta_j^(m) ~ Dir( sum_{ k where q_k=m } s^(k)_{j, .} + \phi_j \gamma_j ) # We need to determine expectation of \eta self.phigamma[ j, :, :] = s_j + self.phi0[j, :, :] * self.gamma0[j, :, :] self.eta[j, :, :] = self.phigamma[j, :, :] / np.sum( self.phigamma[j, :, :], axis=0)[np.newaxis, :] # \beta_j^(k) ~ Gamma( sum_{k where q_k=m} sum_{l} s_{j, l}^(k) + a_j, b_j - sum_{k where q_k=m} log(v_{j}^(k) ) ) # we need expectation of beta self.a[j, :, :] = np.sum(s_j, axis=0) + self.a0[j] self.b[j, :, :] = self.b0[j] - logv_j.dot(self.r) self.beta = self.a / self.b
def partialLogL_alt(self, problem, allpars, fitIndex): """ Return the partial derivative of log( likelihood ) to the parameters. Parameters ---------- problem : Problem to be solved allpars : array_like parameters of the problem fitIndex : array_like indices of parameters to be fitted """ self.ncalls += 1 scale = allpars[-2] power = allpars[-1] res = problem.residuals(allpars[:-2]) ars = numpy.abs(res / scale) rsp = numpy.power(ars, power) if problem.weights is not None: rsp = rsp * problem.weights dLdm = power * rsp / res dM = problem.partial(allpars[:-2]) dL = numpy.zeros(len(fitIndex), dtype=float) i = 0 for k in fitIndex: if k >= 0: dL[i] = numpy.sum(dLdm * dM[:, k]) i += 1 elif k == -2: dL[-2] = -problem.sumweight / scale + power * numpy.sum( rsp) / scale else: # special.psi( x ) is the same as special.polygamma( 1, x ) dldp = problem.sumweight * (power + special.psi(1.0 / power)) dldp /= (power * power) dldp -= (numpy.sum(rsp * numpy.log(ars))) dL[-1] = dldp return dL
def nextPartialData(self, problem, allpars, fitIndex, mockdata=None): """ Return the partial derivative of all elements of the log( likelihood ) to the parameters. Parameters ---------- problem : Problem to be solved allpars : array_like parameters of the problem fitIndex : array_like indices of parameters to be fitted mockdata : array_like as calculated by the model """ param = allpars[:-2] res = problem.residuals(param, mockdata=mockdata) scale = allpars[-2] power = allpars[-1] ars = numpy.abs(res / scale) rsp = numpy.power(ars, power) if problem.weights is not None: rsp = rsp * problem.weights wgt = problem.weights else: wgt = 1.0 dLdm = power * rsp / res dM = problem.partial(param) ## TBD import mockdata into partial # dM = problem.partial( param, mockdata=mockdata ) # special.psi( x ) is the same as special.polygamma( 1, x ) dlp = wgt * (power + special.psi(1.0 / power)) / (power * power) for k in fitIndex: if k >= 0: yield (dLdm * dM[:, k]) elif k == -2: yield (power * rsp - wgt) / scale else: yield dlp - rsp * numpy.log(ars)
def _update_em_full(X, L, F, a, fix): fix_l, fix_f, fix_a = fix e = sys.float_info.min ## update a if not fix_a: LFt = L @ F.T start = time.time() # res = minimize(_obj_a, 1, method='nelder-mead',args = (X,LFt, a), # options={'xtol': 1e-5, 'disp': False, 'maxiter':10}) I, J = X.shape C1 = np.sum(psi(X + a.reshape(1, -1)) - log(LFt + a.reshape(1, -1)), axis=0) C2 = np.sum((X + a.reshape(1, -1)) / (LFt + a.reshape(1, -1)), axis=0) params = [I, J, C1, C2] res = minimize(_obj_a, a, method='nelder-mead', args=(params), options={ 'xtol': 1e-5, 'disp': False, 'maxiter': 50 }) # res = minimize(_obj_a, 1, method='Newton-CG', jac=_obj_a_der,args = (params), # options={'xtol': 1e-5, 'disp': False, 'maxiter':10}) runtime = time.time() - start a = res.x ## update L if not fix_l: #LFt = L @ F.T M1 = (X / LFt) @ F M2 = ((X + a.reshape(1, -1)) / (LFt + a.reshape(1, -1))) @ F L = L * (M1 / M2) L = np.clip(L, a_min=e, a_max=None) ## update F if not fix_f: LFt = L @ F.T N1 = (X / LFt).T @ L N2 = ((X + a.reshape(1, -1)) / (LFt + a.reshape(1, -1))).T @ L F = F * (N1 / N2) F = np.clip(F, a_min=e, a_max=None) return L, F, a
def predict_features(self, features): # get the expected log word likelihoods of each token self.features = np.array(features) # compute ERho ElnRho = [] for j in range(self.L): ElnL = np.sum(psi( (self.nu[j] + 1 + np.arange(1, self.D + 1)) / 2)) + self.D * np.log(2.) + np.log(np.linalg.det(self.W[j])) Ecov = self.D / self.beta[j] + self.nu[j] * ( self.features - self.m[j][None, :]) @ self.W[j] @ ( self.features - self.m[j][None, :]).T ElnRho.append(ElnL - self.D / 2.0 * np.log(2 * np.pi) - 0.5 * Ecov) lnptext_given_t = np.array(ElnRho).T lnptext_given_t -= logsumexp(lnptext_given_t, axis=1)[:, None] self.ElnRho = lnptext_given_t return lnptext_given_t # N x nclasses where N is number of tokens/data points
def uniform_divergence(x, tx, m=2): x = normalize(x, tx) cx = Counter(x) xk = np.array(cx.keys(), dtype=float) xk.sort() delta = np.zeros(len(xk)) if len(xk) > 1: delta[0] = xk[1] - xk[0] delta[1:-1] = (xk[m:] - xk[:-m]) / m delta[-1] = xk[-1] - xk[-2] else: delta = np.array(np.sqrt(12)) counter = np.array([cx[i] for i in xk], dtype=float) delta = delta / np.sum(delta) hx = np.sum(counter * np.log(counter / delta)) / len(x) hx -= np.log(len(x)) hx += (psi(m) - np.log(m)) return hx
def estimation(self, y): """ Estimate Shannon entropy. Parameters ---------- y : (number of samples, dimension)-ndarray One row of y corresponds to one sample. Returns ------- h : float Estimated Shannon entropy. References ---------- M. N. Goria, Nikolai N. Leonenko, V. V. Mergel, and P. L. Novi Inverardi. A new class of random vector entropy estimators and its applications in testing statistical hypotheses. Journal of Nonparametric Statistics, 17: 277-297, 2005. (S={k}) Harshinder Singh, Neeraj Misra, Vladimir Hnizdo, Adam Fedorowicz and Eugene Demchuk. Nearest neighbor estimates of entropy. American Journal of Mathematical and Management Sciences, 23, 301-321, 2003. (S={k}) L. F. Kozachenko and Nikolai N. Leonenko. A statistical estimate for the entropy of a random vector. Problems of Information Transmission, 23:9-16, 1987. (S={1}) Examples -------- h = co.estimation(y) """ num_of_samples, dim = y.shape distances_yy = knn_distances(y, y, True, self.knn_method, self.k, self.eps, 2)[0] v = volume_of_the_unit_ball(dim) distances_yy[:, self.k - 1][distances_yy[:, self.k - 1] == 0] = 1e-6 h = log(num_of_samples - 1) - psi(self.k) + log(v) + \ dim * sum(log(distances_yy[:, self.k-1])) / num_of_samples return h
def partialLogL(self, model, parlist, fitIndex): """ Return the partial derivative of log( likelihood ) to the parameters. Parameters ---------- model : Model model to calculate mock data parlist : array_like parameters of the problem fitIndex : array_like indices of the parameters to be fitted """ self.ncalls += 1 np = model.npchain scale = parlist[np] power = parlist[np + 1] res = self.getResiduals(model, parlist[:np]) ars = numpy.abs(res / scale) rsp = numpy.power(ars, power) if self.weights is not None: rsp = rsp * self.weights dLdm = power * rsp / res dM = model.partial(self.xdata, parlist[:np]) dL = numpy.zeros(len(fitIndex), dtype=float) i = 0 for k in fitIndex: if k < np: dL[i] = numpy.sum(dLdm * dM[:, k]) elif k == np: dL[i] = -self.sumweight / scale + power * numpy.sum( rsp) / scale else: # special.psi( x ) is the same as special.polygamma( 1, x ) dL[i] = self.sumweight * (power + special.psi(1.0 / power)) dL[i] /= (power * power) dL[i] -= (numpy.sum(rsp * numpy.log(ars))) i += 1 return dL
def klgamma(self, pa, pb, qa, qb): ## The KL distance for the gamma distribution. It is not used, but ported from the MATLAB code. n = max([pb.shape[1], pa.shape[1]]) if pa.shape[1] == 1: pa = pa * np.ones((1, n)) if pb.shape[1] == 1: pb = pb * np.ones((1, n)) qa = qa * np.ones((1, n)) qb = qb * np.ones((1, n)) kl = sum(pa * np.log(pb) - gammaln(pa) - qa * np.log(qb) + gammaln(qa) + (pa - qa) * (psi(pa) - np.log(pb)) - (pb - qb) * pa / pb) return kl
def update_resp(self, Xf): """Updates the responsibilities matrix, based on the current goodness-of-fit of the classifiers, and the current gating weight vectors. Xf is the gateing feature matrix. """ R, cls = self.R, self.cls Dy = float(cls[0].W.shape[0]) # fill R with goodness-of-fit data from classifiers for k in xrange(R.shape[1]): cl = cls[k] tau_ak, tau_bk = cl.tau_ak, cl.tau_bk # k'th column is exp( Dy/2 E[ln Tk] - 1/2 (E[Tk] res + Dy var) ) R[:, k] = exp(0.5 * (Dy * (psi(tau_ak) - log(tau_bk)) - (tau_ak / tau_bk) * cl.res + Dy * cl.var)) # multiply with current gating R *= self.gating_matrix(Xf) # normalise row vectors R /= sum(R, 1).reshape(R.shape[0], 1)
def _estimate_alpha_beta(self): # ADJUST ALPHA AND BETA BY USING MINKA'S FIXED-POINT ITERATION numerator = 0 denominator = 0 previous_min = np.min(self.alpha) for r in range(self.nb_records): numerator += psi(self.cnt_rk[r] + self.alpha) - psi(self.alpha) denominator += psi(np.sum(self.cnt_rk[r] + self.alpha)) - psi( np.sum(self.alpha)) self.alpha = self.alpha * (numerator / denominator) # UPDATE ALPHA if 0 in self.alpha: #THIS CASE IS VERY RARE AND HAPPENS WHERE A HIDDEN CLASS K HAS RECEIVED 0 ASSIGNMENTS print( "|----WARNING: alpha = 0 encountered" ) #FORCE THE 0 ALPHAS TO THE MINIMUM BETWEEN (THE SMALLEST NON NULL APLHA, 1/NB_RECORDS, self.alpha[self.alpha == 0] = min( previous_min, np.min(self.alpha[self.alpha > 0]), 1.0 / self.nb_records) #AND THE SMALLEST APLHA BEFORE UPDATE for f in range(self.nb_features): numerator = 0 denominator = 0 previous_min = np.min(self.beta[f]) for k in range(self.nb_hclass): numerator += psi(self.cnt_kv[f][k] + self.beta[f]) - psi( self.beta[f]) denominator += psi( np.sum(self.cnt_kv[f][k] + self.beta[f])) - psi( np.sum(self.beta[f])) self.beta[f] = self.beta[f] * (numerator / denominator ) # UPDATE BETA if 0 in self.beta[ f]: #THIS CASE IS VERY RARE AND HAPPENS WHERE A VALUE DO NOT HAVE ANY OCCURENCE IN THE CORPUS print( "|----WARNING: beta = 0 encountered" ) #FORCE THE 0 BETAS TO THE MINIMUM BETWEEN (THE SMALLEST NON NULL APLHA, 1/NB_VALUES, self.beta[f][self.beta[f] == 0] = min( previous_min, np.min(self.beta[f][self.beta[f] > 0]), 1.0 / self.vocab_size[f]) #AND THE SMALLEST APLHA BEFORE UPDATE