def setPostFromEstParams(self, EstParams, Data=None, nTotalTokens=0, **kwargs): ''' Set attribute Post based on values in EstParams. ''' K = EstParams.K D = EstParams.D if Data is not None: nTotalTokens = Data.word_count.sum() if isinstance(nTotalTokens, int) or nTotalTokens.ndim == 0: nTotalTokens = float(nTotalTokens) / float(K) * np.ones(K) if np.any(nTotalTokens == 0): priorScale = self.Prior.lam.sum() warnings.warn("Enforcing minimum scale of %.3f for lam" % (priorScale)) nTotalTokens = np.maximum(nTotalTokens, priorScale) if 'lam' in kwargs and kwargs['lam'] is not None: lam = kwargs['lam'] else: WordCounts = EstParams.phi * nTotalTokens[:, np.newaxis] assert WordCounts.max() > 0 lam = WordCounts + self.Prior.lam self.Post = ParamBag(K=K, D=D) self.Post.setField('lam', lam, dims=('K', 'D')) self.K = K
def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None, lam=None, WordCounts=None, **kwargs): ''' Set attribute Post to provided values. ''' self.ClearCache() if obsModel is not None: if hasattr(obsModel, 'Post'): self.Post = obsModel.Post.copy() self.K = self.Post.K else: self.setPostFromEstParams(obsModel.EstParams) return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updatePost(SS) else: if WordCounts is not None: lam = as2D(WordCounts) + lam else: lam = as2D(lam) K, D = lam.shape self.Post = ParamBag(K=K, D=D) self.Post.setField('lam', lam, dims=('K', 'D')) self.K = self.Post.K
def updateEstParams_MAP(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the MAP objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) Prior = self.Prior nu = Prior.nu + SS.N kappa = Prior.kappa + SS.N PB = Prior.B + Prior.kappa * np.outer(Prior.m, Prior.m) m = np.empty((SS.K, SS.D)) B = np.empty((SS.K, SS.D, SS.D)) for k in xrange(SS.K): km_x = Prior.kappa * Prior.m + SS.x[k] m[k] = 1.0 / kappa[k] * km_x B[k] = PB + SS.xxT[k] - 1.0 / kappa[k] * np.outer(km_x, km_x) mu, Sigma = MAPEstParams_inplace(nu, B, m, kappa) self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = SS.K
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, phi=None, topics=None, **kwargs): ''' Create EstParams ParamBag with fields phi ''' if topics is not None: phi = topics self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: self.EstParams = ParamBag(K=phi.shape[0], D=phi.shape[1]) self.EstParams.setField('phi', phi, dims=('K', 'D')) self.K = self.EstParams.K
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, phi=None, **kwargs): ''' Set attribute EstParams to provided values. ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: self.EstParams = ParamBag(K=phi.shape[0], D=phi.shape[1]) self.EstParams.setField('phi', phi, dims=( 'K', 'D', )) self.K = self.EstParams.K
def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None, nu=0, B=0, **kwargs): ''' Set attribute Post to provided values. ''' self.ClearCache() if obsModel is not None: if hasattr(obsModel, 'Post'): self.Post = obsModel.Post.copy() self.K = self.Post.K else: self.setPostFromEstParams(obsModel.EstParams) return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updatePost(SS) else: K = B.shape[0] self.Post = ParamBag(K=K, D=self.D) self.Post.setField('nu', as1D(nu), dims=('K')) self.Post.setField('B', B, dims=('K', 'D', 'D')) self.K = self.Post.K
def updateEstParams_MaxLik(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the maximum likelihood objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D, E=SS.E) minCovMat = self.min_covar * np.eye(SS.D) if SS.E == SS.D: minCovMat_EE = minCovMat else: minCovMat_EE = self.min_covar * np.eye(SS.E) A = np.zeros((SS.K, self.D, self.E)) Sigma = np.zeros((SS.K, self.D, self.D)) for k in xrange(SS.K): # Add small pos multiple of identity to make invertible # TODO: This is source of potential stability issues. A[k] = np.linalg.solve(SS.ppT[k] + minCovMat_EE, SS.pxT[k]).T Sigma[k] = SS.xxT[k] \ - 2 * np.dot(SS.pxT[k].T, A[k].T) \ + np.dot(A[k], np.dot(SS.ppT[k], A[k].T)) Sigma[k] /= SS.N[k] # Sigma[k] = 0.5 * (Sigma[k] + Sigma[k].T) # symmetry! Sigma[k] += minCovMat self.EstParams.setField('A', A, dims=('K', 'D', 'E')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = SS.K
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, A=None, Sigma=None, **kwargs): ''' Initialize EstParams attribute with fields A, Sigma. ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: A = as3D(A) Sigma = as3D(Sigma) self.EstParams = ParamBag(K=A.shape[0], D=A.shape[1], E=A.shape[2]) self.EstParams.setField('A', A, dims=('K', 'D', 'E')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D'))
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, mu=None, Sigma=None, **kwargs): ''' Create EstParams ParamBag with fields mu, Sigma ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: Sigma = as3D(Sigma) K, D, D2 = Sigma.shape mu = as2D(mu) if mu.shape[0] != K: mu = mu.T assert mu.shape[0] == K assert mu.shape[1] == D self.EstParams = ParamBag(K=K, D=D) self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def packParamBagForPost(nu=None, beta=None, m=None, kappa=None, D=None, Post=None, **kwargs): ''' ''' m = as2D(m) beta = as2D(beta) if D is None: D = m.shape[1] if m.shape[1] != D: m = m.T.copy() if beta.shape[1] != D: beta = beta.T.copy() K, _ = m.shape if Post is None: Post = ParamBag(K=K, D=D) else: assert isinstance(Post, ParamBag) assert Post.K == K assert Post.D == D Post.setField('nu', as1D(nu), dims=('K')) Post.setField('beta', beta, dims=('K', 'D')) Post.setField('m', m, dims=('K', 'D')) Post.setField('kappa', as1D(kappa), dims=('K')) return Post
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, Sigma=None, **kwargs): ''' Create EstParams ParamBag with fields Sigma ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: K = Sigma.shape[0] self.EstParams = ParamBag(K=K, D=self.D) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def setEstParams(self, obsModel=None, SS=None, LP=None, Data=None, mu=None, sigma=None, Sigma=None, **kwargs): ''' Create EstParams ParamBag with fields mu, Sigma ''' self.ClearCache() if obsModel is not None: self.EstParams = obsModel.EstParams.copy() self.K = self.EstParams.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updateEstParams(SS) else: K = mu.shape[0] if Sigma is not None: assert Sigma.ndim == 3 sigma = np.empty((Sigma.shape[0], Sigma.shape[1])) for k in xrange(K): sigma[k] = np.diag(Sigma[k]) assert sigma.ndim == 2 self.EstParams = ParamBag(K=K, D=mu.shape[1]) self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('sigma', sigma, dims=('K', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post): ''' Convert from Post (nu, B) to EstParams (Sigma), each EstParam is set to its posterior mean. ''' D = Post.D self.EstParams = ParamBag(K=Post.K, D=D) Sigma = Post.B / (Post.nu - D - 1)[:, np.newaxis, np.newaxis] self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post): ''' Convert from Post (nu, beta, m, kappa) to EstParams (mu, Sigma), each EstParam is set to its posterior mean. ''' self.EstParams = ParamBag(K=Post.K, D=self.D) mu = Post.m.copy() sigma = Post.beta / (Post.nu - 2)[:, np.newaxis] self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('sigma', sigma, dims=('K', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post=None, **kwargs): ''' Convert from Post (lam) to EstParams (phi), each EstParam is set to its posterior mean. ''' if Post is None: Post = self.Post self.EstParams = ParamBag(K=Post.K, D=Post.D) phi = Post.lam / np.sum(Post.lam, axis=1)[:, np.newaxis] self.EstParams.setField('phi', phi, dims=('K', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post): ''' Convert from Post (nu, B, m, kappa) to EstParams (mu, Sigma), each EstParam is set to its posterior mean. ''' self.EstParams = ParamBag(K=Post.K, D=Post.D) mu = Post.m.copy() Sigma = Post.B / (Post.nu[:, np.newaxis, np.newaxis] - Post.D - 1) self.EstParams.setField('mu', mu, dims=('K', 'D')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post): ''' Convert from Post to EstParams. ''' D = Post.D self.EstParams = ParamBag(K=Post.K, D=D, E=Post.E) A = Post.M.copy() Sigma = Post.B / (Post.nu - D - 1)[:, np.newaxis, np.newaxis] self.EstParams.setField('A', A, dims=('K', 'D', 'E')) self.EstParams.setField('Sigma', Sigma, dims=('K', 'D', 'D')) self.K = self.EstParams.K
def setEstParamsFromPost(self, Post=None): ''' Set attribute EstParams based on values in Post. ''' if Post is None: Post = self.Post self.EstParams = ParamBag(K=Post.K, D=Post.D) phi = Post.lam1 / (Post.lam1 + Post.lam0) self.EstParams.setField('phi', phi, dims=( 'K', 'D', )) self.K = self.EstParams.K
def createPrior(self, Data, lam=1.0, min_phi=1e-100, **kwargs): ''' Initialize Prior ParamBag attribute. ''' D = self.D self.min_phi = min_phi self.Prior = ParamBag(K=0, D=D) lam = np.asarray(lam, dtype=np.float) if lam.ndim == 0: lam = lam * np.ones(D) assert lam.size == D self.Prior.setField('lam', lam, dims=('D')) self.prior_cFunc = c_Func(lam)
def updateEstParams_MAP(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the MAP objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D, E=SS.E) raise NotImplemented('TODO')
def updateEstParams_MAP(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the MAP objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) phi = SS.WordCounts + self.Prior.lam - 1 phi /= phi.sum(axis=1)[:, np.newaxis] self.EstParams.setField('phi', phi, dims=('K', 'D'))
def updatePost(self, SS): ''' Update attribute Post for all comps given suff stats. Update uses the variational objective. Post Condition --------- Attributes K and Post updated in-place. ''' self.ClearCache() if not hasattr(self, 'Post') or self.Post.K != SS.K: self.Post = ParamBag(K=SS.K, D=SS.D) lam = self.calcPostParams(SS) self.Post.setField('lam', lam, dims=('K', 'D')) self.K = SS.K
def updateEstParams_MaxLik(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the maximum likelihood objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) phi = SS.WordCounts / SS.SumWordCounts[:, np.newaxis] # prevent entries from reaching exactly 0 np.maximum(phi, self.min_phi, out=phi) self.EstParams.setField('phi', phi, dims=('K', 'D'))
def updateEstParams_MAP(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the MAP objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) phi_numer = SS.Count1 + self.Prior.lam1 - 1 phi_denom = SS.Count1 + SS.Count0 + \ self.Prior.lam1 + self.Prior.lam0 - 2 phi = phi_numer / phi_denom self.EstParams.setField('phi', phi, dims=('K', 'D'))
def createParamBagForPrior(Data, D=0, nu=0, beta=None, m=None, kappa=None, MMat='zero', ECovMat=None, sF=1.0, Prior=None, **kwargs): ''' Initialize ParamBag of parameters which specify prior. Returns ------- Prior : ParamBag ''' if Data is None: D = int(D) else: D = int(Data.dim) nu = np.maximum(nu, D + 2) kappa = np.maximum(kappa, 1e-8) if beta is None: if ECovMat is None or isinstance(ECovMat, str): ECovMat = createECovMatFromUserInput(D, Data, ECovMat, sF) beta = np.diag(ECovMat) * (nu - 2) else: if beta.ndim == 0: beta = np.asarray([beta], dtype=np.float) if m is None: if MMat == 'data': m = np.sum(Data.X, axis=0) else: m = np.zeros(D) elif m.ndim < 1: m = np.asarray([m], dtype=np.float) if Prior is None: Prior = ParamBag(K=0, D=D) assert Prior.D == D Prior.setField('nu', nu, dims=None) Prior.setField('kappa', kappa, dims=None) Prior.setField('m', m, dims=('D')) Prior.setField('beta', beta, dims=('D')) return Prior
def setPostFromEstParams(self, EstParams, Data=None, nTotalTokens=1, **kwargs): ''' Set attribute Post based on values in EstParams. ''' K = EstParams.K D = EstParams.D WordCounts = EstParams.phi * nTotalTokens lam1 = WordCounts + self.Prior.lam1 lam0 = (1 - WordCounts) + self.Prior.lam0 self.Post = ParamBag(K=K, D=D) self.Post.setField('lam1', lam1, dims=('K', 'D')) self.Post.setField('lam0', lam0, dims=('K', 'D')) self.K = K
def createPrior(self, Data, nu=0, B=None, ECovMat=None, sF=1.0, **kwargs): ''' Initialize Prior ParamBag attribute. Post Condition ------ Prior expected covariance matrix set to match provided value. ''' D = self.D nu = np.maximum(nu, D + 2) if B is None: if ECovMat is None or isinstance(ECovMat, str): ECovMat = createECovMatFromUserInput(D, Data, ECovMat, sF) B = ECovMat * (nu - D - 1) else: B = as2D(B) self.Prior = ParamBag(K=0, D=D) self.Prior.setField('nu', nu, dims=None) self.Prior.setField('B', B, dims=('D', 'D'))
def updateEstParams_MaxLik(self, SS): ''' Update attribute EstParams for all comps given suff stats. Update uses the maximum likelihood objective for point estimation. Post Condition --------- Attributes K and EstParams updated in-place. ''' self.ClearCache() if not hasattr(self, 'EstParams') or self.EstParams.K != SS.K: self.EstParams = ParamBag(K=SS.K, D=SS.D) minCovMat = self.min_covar * np.eye(SS.D) covMat = np.tile(minCovMat, (SS.K, 1, 1)) for k in xrange(SS.K): covMat[k] += SS.xxT[k] / SS.N[k] self.EstParams.setField('Sigma', covMat, dims=('K', 'D', 'D')) self.K = SS.K
def setPostFromEstParams(self, EstParams, Data=None, N=None): ''' Set attribute Post based on values in EstParams. ''' K = EstParams.K D = EstParams.D if Data is not None: N = Data.nObsTotal N = np.asarray(N, dtype=np.float) if N.ndim == 0: N = float(N) / K * np.ones(K) nu = self.Prior.nu + N B = np.zeros((K, D, D)) for k in xrange(K): B[k] = (nu[k] - D - 1) * EstParams.Sigma[k] self.Post = ParamBag(K=K, D=D) self.Post.setField('nu', nu, dims=('K')) self.Post.setField('B', B, dims=('K', 'D', 'D')) self.K = K
def createPrior(self, Data, lam1=1.0, lam0=1.0, priorMean=None, priorScale=None, eps_phi=1e-8, **kwargs): ''' Initialize Prior ParamBag attribute. ''' D = self.D self.eps_phi = eps_phi self.Prior = ParamBag(K=0, D=D) if priorMean is None or priorMean.lower().count('none'): lam1 = np.asarray(lam1, dtype=np.float) lam0 = np.asarray(lam0, dtype=np.float) elif isinstance(priorMean, str) and priorMean.count("data"): assert priorScale is not None priorScale = float(priorScale) if hasattr(Data, 'word_id'): X = Data.getDocTypeBinaryMatrix() dataMean = np.mean(X, axis=0) else: dataMean = np.mean(Data.X, axis=0) dataMean = np.minimum(dataMean, 0.95) # Make prior more smooth dataMean = np.maximum(dataMean, 0.05) lam1 = priorScale * dataMean lam0 = priorScale * (1 - dataMean) else: assert priorScale is not None priorScale = float(priorScale) priorMean = np.asarray(priorMean, dtype=np.float64) lam1 = priorScale * priorMean lam0 = priorScale * (1 - priorMean) if lam1.ndim == 0: lam1 = lam1 * np.ones(D) if lam0.ndim == 0: lam0 = lam0 * np.ones(D) assert lam1.size == D assert lam0.size == D self.Prior.setField('lam1', lam1, dims=('D', )) self.Prior.setField('lam0', lam0, dims=('D', ))