def _check_multigammaln_array_result(a, d): # Test that the shape of the array returned by multigammaln # matches the input shape, and that all the values match # the value computed when multigammaln is called with a scalar. result = multigammaln(a, d) assert_array_equal(a.shape, result.shape) a1 = a.ravel() result1 = result.ravel() for i in range(a.size): assert_array_almost_equal_nulp(result1[i], multigammaln(a1[i], d))
def data_prob(hp, ss): """ Murphy, Eq. 266 """ z = _intermediates(hp, ss) d = len(ss.sum) return ( -0.5 * ss.n * d * 1.1447298858493991 + multigammaln(0.5 * z.nu, d) - multigammaln(0.5 * hp.nu, d) + 0.5 * hp.nu * log(linalg.det(hp.Lambda)) - 0.5 * z.nu * log(linalg.det(z.Lambda)) + 0.5 * d * log(hp.kappa / z.kappa) )
def logMarginalLikelihood(s,x): d = s.delta.shape[0] n=x.shape[1] if not x.shape[0] == d: raise ValueError scatter = np.dot(x,x.T) (sign,logdetD) = np.linalg.slogdet(s.delta) (sign,logdetDS) = np.linalg.slogdet(s.delta+scatter) logP = multigammaln((s.nu+n)*.5,d) - multigammaln(s.nu*.5,d) logP -= 0.5*(n*d)*np.log(np.pi) logP += s.nu*0.5 * logdetD logP -= (s.nu+n)*0.5 * logdetDS return logP
def score_data(self, shared): """ \cite{murphy2007conjugate}, Eq. 266 """ kappa0, psi0, nu0 = shared.kappa, shared.psi, shared.nu post = shared.plus_group(self) kappa_n, psi_n, nu_n = post.kappa, post.psi, post.nu n = self.count D = shared.dim() return ( multigammaln(nu_n / 2., D) + nu0 / 2. * np.log(np.linalg.det(psi0)) - (n * D / 2.) * np.log(math.pi) - multigammaln(nu0 / 2., D) - nu_n / 2. * np.log(np.linalg.det(psi_n)) + D / 2. * np.log(kappa0 / kappa_n))
def _entropy(self, dim, df, log_det_scale): """ Parameters ---------- dim : int Dimension of the scale matrix df : int Degrees of freedom log_det_scale : float Logarithm of the determinant of the scale matrix Notes ----- As this function does no argument checking, it should not be called directly; use 'entropy' instead. """ return ( 0.5 * (dim+1) * log_det_scale + 0.5 * dim * (dim+1) * _LOG_2 + multigammaln(0.5*df, dim) - 0.5 * (df - dim - 1) * np.sum( [psi(0.5*(df + 1 - (i+1))) for i in range(dim)] ) + 0.5 * df * dim )
def test2(self): # A test of the identity # Gamma_2(a) = sqrt(pi) * Gamma(a) * Gamma(a - 0.5) a = np.array([2.5, 10.0]) result = multigammaln(a, 2) expected = np.log(np.sqrt(np.pi)) + gammaln(a) + gammaln(a - 0.5) assert_almost_equal(result, expected)
def test_ararg(self): d = 5 a = np.abs(np.random.randn(3, 2)) + d tr = multigammaln(a, d) assert_array_equal(tr.shape, a.shape) for i in range(a.size): assert_array_equal(tr.ravel()[i], multigammaln(a.ravel()[i], d)) d = 5 a = np.abs(np.random.randn(1, 2)) + d tr = multigammaln(a, d) assert_array_equal(tr.shape, a.shape) for i in range(a.size): assert_array_equal(tr.ravel()[i], multigammaln(a.ravel()[i], d))
def invwishart_logpdf(X, S, nu): """Compute logpdf of inverse-wishart distribution Args: X (p x p matrix): rv value for which to compute logpdf S (p x p matrix): scale matrix parameter of inverse-wishart distribution (psd Matrix) nu: degrees of freedom nu > p - 1 where p is the dimension of S Returns: float: logpdf of X given parameters S and nu """ #pdf is \frac{\left|{\mathbf{S}}\right|^{\frac{\nu}{2}}}{2^{\frac{\nu p}{2}}\Gamma_p(\frac{\nu}{2})} \left|\mathbf{X}\right|^{-\frac{\nu+p+1}{2}}e^{-\frac{1}{2}\operatorname{tr}({\mathbf{S}}\mathbf{X}^{-1})} p = S.shape[0] assert(len(S.shape) == 2 and S.shape[0] == S.shape[1] and nu > p - 1) if (len(X.shape) != 2 or X.shape[0] != X.shape[1] or X.shape[0] != S.shape[0]): return -np.inf nu_h = nu/2 log_S_term = (nu_h * (log(np.linalg.det(S)) - p * log(2)) - multigammaln(nu_h, p)) log_X_term = -(nu + p + 1) / 2 * log(np.linalg.det(X)) log_e_term = - np.dot(S.T.flat, inv(X).flat) / 2 #using an efficient formula for trace(dot(.,.)) return log_S_term + log_X_term + log_e_term
def generalized_ln_poisson(data,expectation): """ When the data set is not integer based, we need a different way to calculate the poisson likelihood, so we'll use this version, which is appropriate for float data types (using the continuous version of the poisson pmf) as well as the standard integer data type for the discrete Poisson pmf. Returns: the natural logarithm of the value of the continuous form of the poisson probability mass function, given detected counts, 'data' from expected counts 'expectation'. """ if not np.alltrue(data >= 0.0): raise ValueError( "Template must have all bins >= 0.0! Template generation bug?") ln_poisson = 0.0 if bool(re.match('^int',data.dtype.name)): return np.log(poisson.pmf(data,expectation)) elif bool(re.match('^float',data.dtype.name)): return (data*np.log(expectation) - expectation - multigammaln(data+1.0,1)) else: raise ValueError( "Unknown data dtype: %s. Must be float or int!"%psuedo_data.dtype)
def wishart_pdf(X, S, v, d, chol=False, log_form = False): '''Wishart probability density with possible use of the cholesky decomposition of S. Returns the same output as scipy.stats.wishart(df=v, scale=S).pdf(X). The equation is (Wikipedia or Kevin P. Murphy, 2007): {|X|**[0.5(v-d-1)] exp[-0.5tr(inv(S)X)]}/{2**[0.5vd] |S|**[0.5v] [multivariate_gamma_function(0.5v, d)]} Thomas Minka (1998) has a different form for the equation, but both are equivalent for the same inputs: {1}/{[multivariate_gamma_function(0.5v, d)] |X|**(0.5(d+1))} {|0.5X inv(S)|**(0.5v)} {exp[-0.5tr(inv(S)X)]} Parameters ---------- X: array-like. Positive definite dxd matrix for which the probability function is to be estimated. If chol, this must be the matrix L, instead. L is a lower triangular decomposition of X, such that X = LL'. S:array-like Positive definite dxd scale matrix If chol, this must be the matrix L2, instead. L2 is a lower triangular decomposition of S, such that S = L2L2' v: int or float. degrees of freedom for the distribution. v must be >d d: int dimension of each row or column of X Outputs -------- If log_form returns the logpdf estimate of X, else it returns the pdf estimate of X ''' #constants if chol: det_X = chol_log_determinant(X) det_S = chol_log_determinant(S) iS = lpack.dtrtri(S, lower=1)[0] trace = np.einsum('ij,ji', iS.T.dot(iS), X.dot(X.T)) else: det_X = np.linalg.slogdet(X)[1] det_S = np.linalg.slogdet(S)[1] trace = np.trace(np.linalg.inv(S).dot(X)) # p1 = 0.5*(v-d-1)*det_X p2 = -0.5*trace p3 = -0.5*(v*d)*math.log(2) p4 = -0.5*(v)*det_S p5 = -spe.multigammaln(0.5*v,d) if log_form: return p1+p2+p3+p4+p5 else: return math.exp(p1+p2+p3+p4+p5)
def calc_log_z(_mu, _lambda, _kappa, _nu): d = len(_mu) sign, detr = slogdet(_lambda) log_z = (LOG2*(_nu*d/2.0) + (d/2.0)*math.log(2*math.pi/_kappa) + multigammaln(_nu/2, d) - (_nu/2.0)*detr) return log_z
def logpbeta(beta,k=1,s=1,w=1,nd=1,logdetw=1,temp=1): """The log of the second part of Eq.9 in Rasmussen (2000)""" return -1.5*np.log(beta - nd + 1.0) \ - 0.5*nd/(beta - nd + 1.0) \ + 0.5*beta*k*nd*np.log(0.5*beta) \ + 0.5*beta*k*logdetw \ + 0.5*beta*temp \ - k*spec.multigammaln(0.5*beta,nd)
def mniw_log_partitionfunction(nu, S, M, K): n = M.shape[0] return ( n * nu / 2 * np.log(2) + special.multigammaln(nu / 2.0, n) - nu / 2 * np.linalg.slogdet(S)[1] - n / 2 * np.linalg.slogdet(K)[1] )
def __call__(self, x): if x not in self.res: res = sps.multigammaln(x, self.d) self.res[x] = res else: res = self.res[x] return res
def invwishart_pdf(X, S, v, d, chol=False, log_form = False): '''Inverse Wishart probability density with possible use of the cholesky decomposition of S and X. Returns the output that is comparable to scipy.stats.invwishart(df=v, scale=S).pdf(X). The equation is (Wikipedia or Kevin P. Murphy, 2007): {|S|**[0.5v] |X|**[-0.5(v+d+1)] exp[-0.5tr(S inv(X))]}/{2**[0.5vd] [multivariate_gamma_function(0.5v, d)]} Parameters ---------- X: array-like. Positive definite dxd matrix for which the probability function is to be estimated. If chol, this must be the matrix L, instead. L is a lower triangular decomposition of X, such that X = LL'. S:array-like Positive definite dxd scale matrix If chol, this must be the matrix L2, instead. L2 is a lower triangular decomposition of S, such that S = L2L2' v: int or float. degrees of freedom for the distribution. v must be >d d: int dimension of each row or column of X Outputs -------- If log_form returns the logpdf estimate of X, else it returns the pdf estimate of X ''' #constants if chol: det_X = chol_log_determinant(X) det_S = chol_log_determinant(S) iX = lpack.dtrtri(X, lower=1)[0] trace = np.einsum('ij,ji', S.dot(S.T),iX.T.dot(iX)) else: det_X = np.linalg.slogdet(X)[1] det_S = np.linalg.slogdet(S)[1] trace = np.trace(S.dot(np.linalg.inv(X))) # p1 = -0.5*(v*d)*math.log(2) p2 = -spe.multigammaln(0.5*v,d) p3 = 0.5*(v)*det_S p4 = -0.5*(v+d+1)*det_X p5 = -0.5*trace if log_form: return p1+p2+p3+p4+p5 else: return math.exp(p1+p2+p3+p4+p5)
def logPdf(s,S): d = s.delta.shape[0] logPdf = -.5*np.trace(s.delta.dot(inv(S))) (sign,logdetDelta) = np.linalg.slogdet(s.delta) (sign,logdetS) = np.linalg.slogdet(S) logPdf += logdetS*(-.5*(s.nu+1.+d)) logPdf += logdetDelta*(.5*s.nu) logPdf -= np.log(2.0)*0.5*s.nu*d logPdf -= multigammaln(0.5*s.nu,d) return logPdf
def fullcov_obs_log_likelihood(data, t, s): '''Full Covariance model from xuan et al''' s += 1 n = s - t x = data[t:s] if len(x.shape)==2: dim = x.shape[1] else: dim = 1 x = np.atleast_2d(x).T N0 = dim # weakest prior we can use to retain proper prior V0 = np.var(x)*np.eye(dim) Vn = V0 + np.array([np.outer(x[i], x[i].T) for i in xrange(x.shape[0])]).sum(0) # section 3.2 from Xuan paper: return -(dim*n/2)*np.log(np.pi) + (N0/2)*np.linalg.slogdet(V0)[1] - \ multigammaln(N0/2,dim) + multigammaln((N0+n)/2,dim) - \ ((N0+n)/2)*np.linalg.slogdet(Vn)[1]
def _compute_moments_and_cgf(phi, mask=True): U = utils.m_chol(-phi[0]) k = np.shape(phi[0])[-1] #k = self.dims[0][0] logdet_phi0 = utils.m_chol_logdet(U) u0 = phi[1][...,np.newaxis,np.newaxis] * utils.m_chol_inv(U) u1 = -logdet_phi0 + utils.m_digamma(phi[1], k) u = [u0, u1] g = phi[1] * logdet_phi0 - special.multigammaln(phi[1], k) return (u, g)
def __init__(self, W, v): self.d = W.shape[0] if W.shape != (self.d, self.d): raise TypeError('W must be a square matrix!') self.W = W self.v = v self.logC = -(0.5*self.v*self.d*np.log(2)+\ 0.5*self.v*np.log(np.linalg.det(self.W))+\ multigammaln(0.5*v, self.d)) self.Winv = np.linalg.inv(W)
def singular_wishart_density(val, vec, P): """ Density of a singular wishart distribution at X = vec*diag(val)*vec.T """ d,r = vec.shape norm = 0.5*r*(r-d)*np.log(np.pi) - 0.5*r*d*np.log(2.0) \ - special.multigammaln(r/2,r) - 0.5*r*np.log(la.det(P)) pptn = 0.5*(r-d-1)*np.sum(np.log(val)) \ - 0.5*np.trace( np.dot(np.dot(vec.T,la.solve(P,vec)),np.diag(val)) ) pdf = norm + pptn return pdf
def analytic_postparam_logevidence_mvnorm_unknown_K_li(D, mu_pr, prec_pr, kappa_pr, nu_pr): D_mean = np.mean(D, 0) (n, dim) = D.shape (kappa_post, nu_post) = (kappa_pr + n, nu_pr + n) mu_post = (mu_pr * kappa_pr + D_mean * n) / (kappa_pr + n) scatter = scatter_matr(D) m_mu_pr = (D_mean - mu_pr) m_mu_pr.shape = (1, np.prod(m_mu_pr.shape)) prec_post = prec_pr + scatter + kappa_pr * n /(kappa_pr + n) * m_mu_pr.T.dot(m_mu_pr) (sign, ldet_pr) = np.linalg.slogdet(prec_pr) (sign, ldet_post) = np.linalg.slogdet(prec_post) evid = (-(log(np.pi)*n*dim/2) + multigammaln(nu_post/2, dim) - multigammaln(nu_pr / 2, dim) + ldet_pr * nu_pr/2 - ldet_post * nu_post/2 + dim/2 * (log(kappa_pr) - log(kappa_post)) ) return ((mu_post, prec_post, kappa_post, nu_post), evid)
def test_speed(d, sim): xs = np.ceil(np.random.rand(sim)*100) + d lg = ln_gamma_d(d=d) t0 = time.time() y = [lg(x) for x in xs] # @UnusedVariable t1 = time.time() string = "lookup = %.4f msec/sim (sim ,d ) = (%d %d) "%(1000*np.double(t1-t0)/sim, sim, d ) print(string) t0 = time.time() yd = [sps.multigammaln(x,d) for x in xs] # @UnusedVariable t1 = time.time() string = "sps.multigammaln = %.4f msec/sim (sim ,d ) = (%d %d) "%(1000*np.double(t1-t0)/sim, sim, d ) print(string)
def fullcov_obs_log_likelihood(data, t, s): '''Full Covariance model from xuan et al''' s += 1 n = s - t x = data[t:s] if len(x.shape)==2: dim = x.shape[1] else: dim = 1 x = np.atleast_2d(x).T N0 = dim # weakest prior we can use to retain proper prior V0 = np.var(x)*np.eye(dim) # Improvement over np.outer # http://stackoverflow.com/questions/17437523/python-fast-way-to-sum-outer-products # Vn = V0 + np.array([np.outer(x[i], x[i].T) for i in xrange(x.shape[0])]).sum(0) Vn = V0 + np.einsum('ij,ik->jk', x, x) # section 3.2 from Xuan paper: return -(dim*n/2)*np.log(np.pi) + (N0/2)*np.linalg.slogdet(V0)[1] - \ multigammaln(N0/2,dim) + multigammaln((N0+n)/2,dim) - \ ((N0+n)/2)*np.linalg.slogdet(Vn)[1]
def _logpdf(self, x, dim, df, scale, log_det_scale): """ Parameters ---------- x : ndarray Points at which to evaluate the log of the probability density function. dim : int Dimension of the scale matrix df : int Degrees of freedom scale : ndarray Scale matrix log_det_scale : float Logarithm of the determinant of the scale matrix Notes ----- As this function does no argument checking, it should not be called directly; use 'logpdf' instead. """ log_det_x = np.zeros(x.shape[-1]) #scale_x_inv = np.zeros(x.shape) x_inv = np.copy(x).T if dim > 1: _cho_inv_batch(x_inv) # works in-place else: x_inv = 1./x_inv tr_scale_x_inv = np.zeros(x.shape[-1]) for i in range(x.shape[-1]): C, lower = scipy.linalg.cho_factor(x[:,:,i], lower=True) log_det_x[i] = 2 * np.sum(np.log(C.diagonal())) #scale_x_inv[:,:,i] = scipy.linalg.cho_solve((C, True), scale).T tr_scale_x_inv[i] = np.dot(scale, x_inv[i]).trace() # Log PDF out = ( (0.5 * df * log_det_scale - 0.5 * tr_scale_x_inv) - (0.5 * df * dim * _LOG_2 + 0.5 * (df + dim + 1) * log_det_x) - multigammaln(0.5*df, dim) ) return out
def _logpdf(self, x, dim, df, scale, log_det_scale, C): """ Parameters ---------- x : ndarray Points at which to evaluate the log of the probability density function dim : int Dimension of the scale matrix df : int Degrees of freedom scale : ndarray Scale matrix log_det_scale : float Logarithm of the determinant of the scale matrix C : ndarray Cholesky factorization of the scale matrix, lower triagular. Notes ----- As this function does no argument checking, it should not be called directly; use 'logpdf' instead. """ # log determinant of x # Note: x has components along the last axis, so that x.T has # components alone the 0-th axis. Then since det(A) = det(A'), this # gives us a 1-dim vector of determinants # TODO slogdet is unavailable as long as Numpy 1.5.x is supported # s, log_det_x = np.linalg.slogdet(x.T) # Retrieve tr(scale^{-1} x) log_det_x = np.zeros(x.shape[-1]) scale_inv_x = np.zeros(x.shape) tr_scale_inv_x = np.zeros(x.shape[-1]) for i in range(x.shape[-1]): log_det_x[i] = np.log(np.linalg.det(x[:,:,i])) scale_inv_x[:,:,i] = scipy.linalg.cho_solve((C, True), x[:,:,i]) tr_scale_inv_x[i] = scale_inv_x[:,:,i].trace() # Log PDF out = ( (0.5 * (df - dim - 1) * log_det_x - 0.5 * tr_scale_inv_x) - (0.5 * df * dim * _LOG_2 + 0.5 * df * log_det_scale + multigammaln(0.5*df, dim)) ) return out
def singular_inverse_wishart_density(val, vec, P): """ Density of a singular inverse wishart distribution at X = vec*diag(val)*vec.T """ d,r = vec.shape norm = 0.5*r*np.log(la.det(P)) \ -0.5*r*d*np.log(2.0) \ -0.5*r*(d-r)*np.log(np.pi) \ -special.multigammaln(r/2,r) pptn = -0.5*(3*d-r+1)*np.sum(np.log(val)) \ -0.5*np.trace( np.dot(np.dot(vec.T,np.dot(P,vec)),np.diag(1/val)) ) pdf = norm + pptn return pdf
def compute_moments_and_cgf(self, phi, mask=True): r""" Return moments and cgf for given natural parameters .. math:: \langle u \rangle = \begin{bmatrix} \phi_2 (-\phi_1)^{-1} \\ -\log|-\phi_1| + \psi_k(\phi_2) \end{bmatrix} \\ g(\phi) = \phi_2 \log|-\phi_1| - \log \Gamma_k(\phi_2) """ U = linalg.chol(-phi[0]) k = np.shape(phi[0])[-1] #k = self.dims[0][0] logdet_phi0 = linalg.chol_logdet(U) u0 = phi[1][...,np.newaxis,np.newaxis] * linalg.chol_inv(U) u1 = -logdet_phi0 + misc.multidigamma(phi[1], k) u = [u0, u1] g = phi[1] * logdet_phi0 - special.multigammaln(phi[1], k) return (u, g)
def wishart_logpdf(X, S, nu): """Compute logpdf of wishart distribution Args: X (p x p matrix): rv value for which to compute logpdf S (p x p matrix): scale matrix parameter of inverse-wishart distribution (psd Matrix) nu: degrees of freedom nu > p - 1 where p is the dimension of S Returns: float: logpdf of X given parameters S and nu """ p = S.shape[0] assert(len(S.shape) == 2 and S.shape[0] == S.shape[1] and nu > p - 1) if (len(X.shape) != 2 or X.shape[0] != X.shape[1] or X.shape[0] != S.shape[0]): return -np.inf nu_h = nu/2 log_S_term = (nu_h * (log(np.linalg.det(S)) + p * log(2)) + multigammaln(nu_h, p)) log_X_term = (nu - p - 1) / 2 * log(np.linalg.det(X)) log_e_term = - np.dot(inv(S).T.flat, X.flat) / 2 #using an efficient formula for trace(dot(.,.)) return log_X_term + log_e_term - log_S_term
def test_bararg(self): try: multigammaln(0.5, 1.2) raise Exception("Expected this call to fail") except ValueError: pass
def _log_partition_function(self, mu, sigma, kappa, nu): D = len(mu) chol = np.linalg.cholesky(sigma) return nu*D/2*np.log(2) + special.multigammaln(nu/2,D) + D/2*np.log(2*np.pi/kappa) \ - nu*np.log(chol.diagonal()).sum()
def compute_fixed_moments(n): """ Compute moments for fixed x. """ u0 = n u1 = special.multigammaln(0.5 * n, k) return [u0, u1]
def log_B_func(W, dof): """ Bishop's book Eq. B.79 """ D = W.shape[-1] log_part1 = -dof/2. * np.linalg.slogdet(W)[1] log_part2 = -(dof*D/2.)*np.log(2.) -multigammaln(dof*0.5, D) return log_part1 + log_part2
def var_bound(data, modelState, queryState): ''' Determines the variational bounds. Values are mutated in place, but are reset afterwards to their initial values. So it's safe to call in a serial manner. ''' # Unpack the the structs, for ease of access and efficiency W = data.words D, _ = W.shape means, expMeans, varcs, docLens = queryState.means, queryState.expMeans, queryState.varcs, queryState.docLens K, topicMean, sigT, vocab, vocabPrior, A = modelState.K, modelState.topicMean, modelState.sigT, modelState.vocab, modelState.vocabPrior, modelState.A # Calculate some implicit variables isigT = la.inv(sigT) bound = 0 if USE_NIW_PRIOR: pseudoObsMeans = K + NIW_PSEUDO_OBS_MEAN pseudoObsVar = K + NIW_PSEUDO_OBS_VAR # distribution over topic covariance bound -= 0.5 * K * pseudoObsVar * log(NIW_PSI) bound -= 0.5 * K * pseudoObsVar * log(2) bound -= fns.multigammaln(pseudoObsVar / 2., K) bound -= 0.5 * (pseudoObsVar + K - 1) * safe_log_det(sigT) bound += 0.5 * NIW_PSI * np.trace(isigT) # and its entropy # is a constant which we skip # distribution over means bound -= 0.5 * K * log(1. / pseudoObsMeans) * safe_log_det(sigT) bound -= 0.5 / pseudoObsMeans * (topicMean).T.dot(isigT).dot(topicMean) # and its entropy bound += 0.5 * safe_log_det(sigT) # + a constant # Distribution over document topics bound -= (D * K) / 2. * LN_OF_2_PI bound -= D / 2. * la.det(sigT) diff = means - topicMean[np.newaxis, :] bound -= 0.5 * np.sum(diff.dot(isigT) * diff) bound -= 0.5 * np.sum( varcs * np.diag(isigT)[np.newaxis, :] ) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. # And its entropy # bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) # Distribution over word-topic assignments and words and the formers # entropy. This is somewhat jumbled to avoid repeatedly taking the # exp and log of the means expMeans = np.exp(means - means.max(axis=1)[:, np.newaxis], out=expMeans) R = sparseScalarQuotientOfDot( W, expMeans, vocab ) # D x V [W / TB] is the quotient of the original over the reconstructed doc-term matrix V = expMeans * (R.dot(vocab.T)) # D x K bound += np.sum(docLens * np.log(np.sum(expMeans, axis=1))) bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeans, vocab).data) bound += np.sum(means * V) bound += np.sum(2 * ssp.diags(docLens, 0) * means.dot(A) * means) bound -= 2. * scaledSelfSoftDot(means, docLens) bound -= 0.5 * np.sum(docLens[:, np.newaxis] * V * (np.diag(A))[np.newaxis, :]) bound -= np.sum(means * V) return bound
def Gibbs(X, T): np.random.seed(1) N, d = X.shape # Initialize parameters m0 = np.mean(X, 0) c0 = 0.1 a0 = d A0 = np.cov(X.T) B0 = c0 * d * A0 alpha0 = 1 K = 20 X_c = np.zeros(N).astype(int) mu = np.zeros((K, d)) lamb = np.zeros((K, d, d)) mu[0, :], lamb[0, :, :] = update_param(X, m0, c0, a0, B0, X_c, 0) top_clusters = np.zeros((T, 6)) num_clusters = np.zeros(T) for t in range(T): phi = np.zeros((N, K)) for i in range(N): all_clusters = np.array( [len(np.where(np.delete(X_c, i) == k)[0]) for k in range(K)]) clusters = np.where(all_clusters > 0)[0] for j in range(len(clusters)): if j != clusters[j]: X_c[np.where(X_c == clusters[j])] = j mu[j], mu[clusters[j]] = mu[clusters[j]], mu[j] lamb[j], lamb[clusters[j]] = lamb[clusters[j]], lamb[j] phi[i, j] = stats.multivariate_normal.pdf( X[i, :], mean=mu[j], cov=inv( lamb[j])) * all_clusters[clusters[j]] / (alpha0 + N - 1) j_max = int(max(X_c) + 1) xminusm = (X[i, :] - m0).reshape(d, 1) marginal = ((c0 / ((c0 + 1) * np.pi))**(d / 2) * det(B0 + c0 / (1 + c0) * xminusm.dot(xminusm.T))**(-0.5 * (a0 + 1)) / det(B0)**(-0.5 * a0) * np.exp(multigammaln((a0 + 1) / 2, d)) / np.exp(multigammaln(a0 / 2, d))) phi[i, j_max] = alpha0 / (alpha0 + N - 1) * marginal phi[i] = phi[i] / np.sum(phi[i]) cluster_list = np.where(phi[i] > 0)[0] discrete_dist = stats.rv_discrete(values=(range(len(cluster_list)), phi[i][cluster_list])) X_c[i] = discrete_dist.rvs(size=1)[0] if X_c[i] == j_max: mu[j_max], lamb[j_max] = update_param(X, m0, c0, a0, B0, X_c, j_max) for j in np.unique(X_c): mu[j], lamb[j] = update_param(X, m0, c0, a0, B0, X_c, j) top_clusters[t, :] = np.array( sorted([len(np.where(X_c == j)[0]) for j in range(K)], reverse=True))[0:6] num_clusters[t] = len(np.unique(X_c)) return top_clusters, num_clusters
def log_gamma_distrib(a, p): return special.multigammaln(a, p)
def var_bound(data, modelState, queryState): ''' Determines the variational bounds. Values are mutated in place, but are reset afterwards to their initial values. So it's safe to call in a serial manner. ''' # Unpack the the structs, for ease of access and efficiency W, L, X = data.words, data.links, data.feats D, _ = W.shape means, varcs, docLens = queryState.means, queryState.varcs, queryState.docLens K, topicMean, topicCov, vocab, A = modelState.K, modelState.topicMean, modelState.topicCov, modelState.vocab, modelState.A # Calculate some implicit variables itopicCov = la.inv(topicCov) bound = 0 expMeansOut = np.exp(means - means.max(axis=1)[:, np.newaxis]) expMeansIn = np.exp(means - means.max(axis=0)[np.newaxis, :]) lse_at_k = expMeansIn.sum(axis=0) if USE_NIW_PRIOR: pseudoObsMeans = K + NIW_PSEUDO_OBS_MEAN pseudoObsVar = K + NIW_PSEUDO_OBS_VAR # distribution over topic covariance bound -= 0.5 * K * pseudoObsVar * log(NIW_PSI) bound -= 0.5 * K * pseudoObsVar * log(2) bound -= fns.multigammaln(pseudoObsVar / 2., K) bound -= 0.5 * (pseudoObsVar + K - 1) * safe_log_det(topicCov) bound += 0.5 * NIW_PSI * np.trace(itopicCov) # and its entropy # is a constant which we skip # distribution over means bound -= 0.5 * K * log(1. / pseudoObsMeans) * safe_log_det(topicCov) bound -= 0.5 / pseudoObsMeans * ( topicMean).T.dot(itopicCov).dot(topicMean) # and its entropy bound += 0.5 * safe_log_det(topicCov) # + a constant # Distribution over document topics bound -= (D * K) / 2. * LN_OF_2_PI bound -= D / 2. * la.det(topicCov) diff = means - topicMean[np.newaxis, :] bound -= 0.5 * np.sum(diff.dot(itopicCov) * diff) bound -= 0.5 * np.sum( varcs * np.diag(itopicCov)[np.newaxis, :] ) # = -0.5 * sum_d tr(V_d \Sigma^{-1}) when V_d is diagonal only. # And its entropy # bound += 0.5 * D * K * LN_OF_2_PI_E + 0.5 * np.sum(np.log(varcs)) # Distribution over word-topic assignments and words and the formers # entropy, and similaarly for out-links. This is somewhat jumbled to # avoid repeatedly taking the exp and log of the means W_weights = sparseScalarQuotientOfDot( W, expMeansOut, vocab ) # D x V [W / TB] is the quotient of the original over the reconstructed doc-term matrix w_top_sums = expMeansOut * (W_weights.dot(vocab.T)) # D x K L_weights = sparseScalarQuotientOfNormedDot(L, expMeansOut, expMeansIn, lse_at_k) l_top_sums = L_weights.dot(expMeansIn) / lse_at_k[ np.newaxis, :] * expMeansOut bound += np.sum(docLens * np.log(np.sum(expMeansOut, axis=1))) bound += np.sum(sparseScalarProductOfSafeLnDot(W, expMeansOut, vocab).data) # means = np.log(expMeans, out=expMeans) #means = safe_log(expMeansOut, out=means) bound += np.sum(means * w_top_sums) bound += np.sum(2 * ssp.diags(docLens, 0) * means.dot(A) * means) bound -= 2. * scaledSelfSoftDot(means, docLens) bound -= 0.5 * np.sum(docLens[:, np.newaxis] * w_top_sums * (np.diag(A))[np.newaxis, :]) bound -= np.sum(means * w_top_sums) return bound
def ssmultigammaln(a, b): return ss.multigammaln(a[0], b)
def log_z(self, S, nu): d = S.shape[-1] return (-0.5 * nu * np.linalg.slogdet(S)[1] + 0.5 * nu * d * np.log(2.) + multigammaln(0.5 * nu, d))
def test1(self): # A test of the identity # Gamma_1(a) = Gamma(a) np.random.seed(1234) a = np.abs(np.random.randn()) assert_array_equal(multigammaln(a, 1), gammaln(a))
def compute_fixed_moments(self, n): """ Compute moments for fixed x. """ u0 = np.asanyarray(n) u1 = special.multigammaln(0.5*u0, self.k) return [u0, u1]
def logB(W0, v0): D, _ = W0.shape return -((v0 / 2.) * np.log(sp.linalg.det(W0)) + (v0 * D / 2.) * np.log(2.) + multigammaln(v0 / 2., D))
def calc_ELBO_Opti(self, *args): """ Calculates partial (or total) ELBO for the given cluster indices Parameters: ----------- maskedData: maskData object suffStat: suffStatistics object vbParam: vbPar object param: Config object (see Config.py) K_ind (optional): list Cluster indices for which the partial ELBO is being calculated. Defaults to all clusters """ if len(args[0]) < 5: maskedData, suffStat, vbParam, param = args[0] nfeature, Khat, nchannel = vbParam.muhat.shape P = nfeature * nchannel # N = vbParam.rhat.shape[0] k_ind = np.arange(Khat) else: maskedData, suffStat, vbParam, param, k_ind = args[0] nfeature, Khat, nchannel = vbParam.muhat.shape P = nfeature * nchannel # N = vbParam.rhat.shape[0] prior = param.cluster_prior fit_term = np.zeros(Khat) bmterm = np.zeros(Khat) entropy_term = np.zeros(Khat) rhatp = vbParam.rhat[:, k_ind] muhat = np.transpose(vbParam.muhat, [1, 2, 0]) Vhat = np.transpose(vbParam.Vhat, [2, 3, 0, 1]) sumY = np.transpose(suffStat.sumY, [1, 2, 0]) logdetVhat = np.sum(np.linalg.slogdet(Vhat)[1], axis=1, keepdims=False) # fit term fterm1temp = np.squeeze(np.sum(np.matmul( np.matmul(muhat[k_ind, :, np.newaxis, :], Vhat[k_ind, :, :, :]), muhat[k_ind, :, :, np.newaxis]), axis=1, keepdims=False), axis=(1, 2)) fterm1 = -fterm1temp * vbParam.nuhat[k_ind] * suffStat.Nhat[k_ind] / 2.0 fterm2 = -2.0 * np.squeeze( np.sum(np.matmul( np.matmul(sumY[k_ind, :, np.newaxis, :], Vhat[k_ind, :, :, :]), muhat[k_ind, :, :, np.newaxis]), axis=(1), keepdims=False)) fterm2 *= -vbParam.nuhat[k_ind] / 2.0 fterm3 = np.sum(suffStat.sumYSq1[:, :, k_ind, :] * vbParam.Vhat[:, :, k_ind, :], axis=(0, 1, 3), keepdims=False) fterm3 *= -vbParam.nuhat[k_ind] / 2.0 fterm4 = -nchannel * suffStat.Nhat[k_ind] / 2.0 * ( nfeature / vbParam.lambdahat[k_ind] - nfeature * np.log(2.0) - mult_psi(vbParam.nuhat[k_ind, np.newaxis] / 2.0, nfeature).ravel() + nfeature * np.log(2 * np.pi)) fterm5 = suffStat.Nhat[k_ind] * logdetVhat[k_ind] / 2.0 fterm6 = -np.sum(np.trace(np.matmul( np.transpose(suffStat.sumYSq2[:, :, k_ind, :], [2, 3, 0, 1]), Vhat[k_ind, :, :, :]), axis1=2, axis2=3), axis=1, keepdims=0) * vbParam.nuhat[k_ind] / 2.0 fit_term[k_ind] = fterm1 + fterm2 + fterm3 + fterm4 + fterm5 + fterm6 # BM Term bmterm1 = 0.5 * prior.nu * \ np.sum(np.linalg.slogdet(Vhat[k_ind, :, :, :]/prior.V)[1], axis=1) bmterm2 = -0.5 * vbParam.nuhat[k_ind] * (np.sum(np.trace( Vhat[k_ind, :, :, :] / prior.V, axis1=2, axis2=3), axis=1)) bmterm3 = 0.5 * (vbParam.nuhat[k_ind] * P + P - P * prior.lambda0 / vbParam.lambdahat[k_ind] + P * np.log(prior.lambda0 / vbParam.lambdahat[k_ind])) # temp = np.squeeze(np.sum( # np.matmul(np.matmul(muhat[k_ind, :, np.newaxis, :], Vhat[k_ind, :, :, :]), muhat[k_ind, :, :, np.newaxis]), # axis=1, keepdims=False), axis=(1, 2)) bmterm4 = -0.5 * vbParam.nuhat[k_ind] * prior.lambda0 * fterm1temp bmterm5 = nchannel * ( specsci.multigammaln(vbParam.nuhat[k_ind] / 2.0, nfeature) - specsci.multigammaln(prior.nu / 2.0, nfeature) + 0.5 * (prior.nu - vbParam.nuhat[k_ind]) * mult_psi(vbParam.nuhat[k_ind, np.newaxis] / 2.0, nfeature).ravel()) bmterm[k_ind] = bmterm1 + bmterm2 + bmterm3 + bmterm4 + bmterm5 # Entropy term entropy_term1 = np.sum(vbParam.rhat * maskedData.weight[:, np.newaxis] * (specsci.digamma(vbParam.ahat) - specsci.digamma(np.sum(vbParam.ahat))), axis=0) entropy_term2 = np.zeros(Khat) entropy_term2[k_ind] = -np.sum( maskedData.weight[:, np.newaxis] * rhatp * np.log(rhatp + 1e-200), axis=0).ravel() entropy_term = entropy_term1 + entropy_term2 # Dirichlet terms dc_term = - specsci.gammaln(np.sum(vbParam.ahat)) + np.sum(specsci.gammaln(vbParam.ahat)) \ + specsci.gammaln(Khat * param.cluster_prior.a) - Khat * specsci.gammaln(param.cluster_prior.a) \ + np.sum( (param.cluster_prior.a - vbParam.ahat) * (specsci.digamma(vbParam.ahat) - specsci.digamma(np.sum(vbParam.ahat)))) # prior term pterm = np.log(prior.beta**Khat * np.exp(-prior.beta) / math.factorial(Khat)) self.percluster = fit_term + bmterm + entropy_term2 self.rest_term = np.sum(entropy_term1) + dc_term + pterm self.total = np.sum(self.percluster) + self.rest_term
def ssmultigammaln(a, b): return np.array(ss.multigammaln(a[0], b), config.floatX)
def log_partition(self): return 0.5 * self.nu * self.dim * np.log(2)\ + multigammaln(self.nu / 2., self.dim)\ + self.nu * np.sum(np.log(np.diag(self.psi_chol)))
def test1(self): a = np.abs(np.random.randn()) assert_array_equal(multigammaln(a, 1), gammaln(a))
def scipy_fun(a): return osp_special.multigammaln(a, d)
def compute_fixed_moments(n): """ Compute moments for fixed x. """ u0 = np.asanyarray(n) u1 = special.multigammaln(0.5*u0, k) return [u0, u1]
def multigamma_ln(a, d): return special.multigammaln(a, d)
def logB(W, v): D = W.shape[0] return -0.5*D*v*np.log(2.0) - 0.5*v*np.log(np.linalg.det(W)) - multigammaln(0.5*v, D)
def mniw_log_partitionfunction(nu, S, M, K): n = M.shape[0] return n*nu/2*np.log(2) + special.multigammaln(nu/2., n) \ - nu/2*np.linalg.slogdet(S)[1] - n/2*np.linalg.slogdet(K)[1]