def packParamBagForPost(nu=None, beta=None, m=None, kappa=None, D=None, Post=None, **kwargs): ''' ''' m = as2D(m) beta = as2D(beta) if D is None: D = m.shape[1] if m.shape[1] != D: m = m.T.copy() if beta.shape[1] != D: beta = beta.T.copy() K, _ = m.shape if Post is None: Post = ParamBag(K=K, D=D) else: assert isinstance(Post, ParamBag) assert Post.K == K assert Post.D == D Post.setField('nu', as1D(nu), dims=('K')) Post.setField('beta', beta, dims=('K', 'D')) Post.setField('m', m, dims=('K', 'D')) Post.setField('kappa', as1D(kappa), dims=('K')) return Post
def packParamBagForPost(pnu_K=None, ptau_K=None, w_KE=None, P_KEE=None, Post=None, **kwargs): ''' Parse provided array args and pack into parameter bag Returns ------- Post : ParamBag, with K clusters ''' pnu_K = as1D(pnu_K) ptau_K = as1D(ptau_K) w_KE = as2D(w_KE) P_KEE = as3D(P_KEE) K = pnu_K.size E = w_KE.shape[1] if Post is None: Post = ParamBag(K=K, D=E - 1, E=E) elif not hasattr(Post, 'E'): Post.E = E assert Post.K == K assert Post.D == E - 1 assert Post.E == E Post.setField('pnu_K', pnu_K, dims=('K')) Post.setField('ptau_K', ptau_K, dims=('K')) Post.setField('w_KE', w_KE, dims=('K', 'E')) Post.setField('P_KEE', P_KEE, dims=('K', 'E', 'E')) return Post
def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None, nu=0, B=0, **kwargs): ''' Set attribute Post to provided values. ''' self.ClearCache() if obsModel is not None: if hasattr(obsModel, 'Post'): self.Post = obsModel.Post.copy() self.K = self.Post.K else: self.setPostFromEstParams(obsModel.EstParams) return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updatePost(SS) else: K = B.shape[0] self.Post = ParamBag(K=K, D=self.D) self.Post.setField('nu', as1D(nu), dims=('K')) self.Post.setField('B', B, dims=('K', 'D', 'D')) self.K = self.Post.K
def getPrefixForLapQuery(taskpath, lapQuery): ''' Search among checkpoint laps for one nearest to query. Returns -------- prefix : str For lap 1, prefix = 'Lap0001.000'. For lap 5.5, prefix = 'Lap0005.500'. lap : int lap checkpoint for saved params close to lapQuery ''' try: saveLaps = np.loadtxt(os.path.join(taskpath, 'snapshot_lap.txt')) except IOError: fileList = glob.glob(os.path.join(taskpath, 'Lap*Topic*')) if len(fileList) == 0: fileList = glob.glob(os.path.join(taskpath, 'Lap*.log_prob_w')) assert len(fileList) > 0 saveLaps = list() for fpath in sorted(fileList): basename = fpath.split(os.path.sep)[-1] lapstr = basename[3:11] saveLaps.append(float(lapstr)) saveLaps = np.sort(np.asarray(saveLaps)) saveLaps = as1D(saveLaps) if lapQuery is None: bestLap = saveLaps[-1] # take final saved value else: distances = np.abs(lapQuery - saveLaps) bestLap = saveLaps[np.argmin(distances)] return makePrefixForLap(bestLap), bestLap
def calcSummaryStats(Data, SS, LP, **kwargs): ''' Calculate summary statistics for given dataset and local parameters Returns -------- SS : SuffStatBag object, with K components. ''' if not hasattr(Data, 'X_NE'): Data.X_NE = np.hstack([Data.X, np.ones(Data.nObs)[:, np.newaxis]]) Y_N = Data.Y X_NE = Data.X_NE E = X_NE.shape[1] if 'resp' in LP: # Dense responsibility calculations resp = LP['resp'] K = resp.shape[1] S_yy_K = dotATB(resp, np.square(Y_N)).flatten() S_yx_KE = dotATB(resp, Y_N * X_NE) # Expected outer product S_xxT_KEE = np.zeros((K, E, E)) sqrtResp_k_N = np.sqrt(resp[:, 0]) sqrtR_X_k_NE = sqrtResp_k_N[:, np.newaxis] * X_NE S_xxT_KEE[0] = dotATA(sqrtR_X_k_NE) for k in xrange(1, K): np.sqrt(resp[:, k], out=sqrtResp_k_N) np.multiply(sqrtResp_k_N[:, np.newaxis], X_NE, out=sqrtR_X_k_NE) S_xxT_KEE[k] = dotATA(sqrtR_X_k_NE) else: raise ValueError("TODO") spR = LP['spR'] K = spR.shape[1] if SS is None: SS = SuffStatBag(K=K, D=Data.dim, E=E) elif not hasattr(SS, 'E'): SS._Fields.E = E SS.setField('xxT_KEE', S_xxT_KEE, dims=('K', 'E', 'E')) SS.setField('yx_KE', S_yx_KE, dims=('K', 'E')) SS.setField('yy_K', S_yy_K, dims=('K')) # Expected count for each k # Usually computed by allocmodel. But just in case... if not hasattr(SS, 'N'): if 'resp' in LP: SS.setField('N', LP['resp'].sum(axis=0), dims='K') else: SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K') #SS.setField("N_K", SS.N, dims="K") return SS
def calcSmoothedMu(self, X, W=None): ''' Compute smoothed estimate of mean of statistic xxT. Args ---- X : 2D array, size N x D Returns ------- Mu_1 : 2D array, size D x D Expected value of Cov[ X[n] ] Mu_2 : 1D array, size D Expected value of Mean[ X[n] ] ''' if X is None: Mu1 = self.Prior.B / self.Prior.nu Mu2 = self.Prior.m return Mu1, Mu2 if X.ndim == 1: X = X[np.newaxis, :] N, D = X.shape # Compute suff stats if W is None: sum_wxxT = np.dot(X.T, X) sum_wx = np.sum(X, axis=0) sum_w = X.shape[0] else: W = as1D(W) sqrtWX = np.sqrt(W)[:, np.newaxis] * X sum_wxxT = np.dot(sqrtWX.T, sqrtWX) sum_wx = np.dot(W, X) sum_w = np.sum(W) kappa = self.Prior.kappa + sum_w m = (self.Prior.m * self.Prior.kappa + sum_wx) / kappa Mu_2 = m prior_kmmT = self.Prior.kappa * np.outer(self.Prior.m, self.Prior.m) post_kmmT = kappa * np.outer(m, m) B = sum_wxxT + self.Prior.B + prior_kmmT - post_kmmT Mu_1 = B / (self.Prior.nu + sum_w) assert Mu_1.ndim == 2 assert Mu_1.shape == ( D, D, ) assert Mu_2.shape == (D, ) return Mu_1, Mu_2
def calcSmoothedMu(self, X, W=None): ''' Compute smoothed estimate of mean of statistic xxT. Args ---- X : 2D array, size N x D Returns ------- Mu_1 : 2D array, size D Expected value of Var[ X[n,d] ] Mu_2 : 1D array, size D Expected value of Mean[ X[n] ] ''' if X is None: Mu1 = self.Prior.beta / self.Prior.nu Mu2 = self.Prior.m return Mu1, Mu2 if X.ndim == 1: X = X[np.newaxis, :] N, D = X.shape # Compute suff stats if W is None: sum_wxx = np.sum(np.square(X), axis=0) sum_wx = np.sum(X, axis=0) sum_w = X.shape[0] else: W = as1D(W) sum_wxx = np.dot(W, np.square(X)) sum_wx = np.dot(W, X) sum_w = np.sum(W) post_kappa = self.Prior.kappa + sum_w post_m = (self.Prior.m * self.Prior.kappa + sum_wx) / post_kappa Mu_2 = post_m prior_kmm = self.Prior.kappa * (self.Prior.m * self.Prior.m) post_kmm = post_kappa * (post_m * post_m) post_beta = sum_wxx + self.Prior.beta + prior_kmm - post_kmm Mu_1 = post_beta / (self.Prior.nu + sum_w) assert Mu_1.ndim == 1 assert Mu_1.shape == (D, ) assert Mu_2.shape == (D, ) return Mu_1, Mu_2
def calcSummaryStats(Data, SS, LP, **kwargs): ''' Calculate summary statistics for given dataset and local parameters Returns -------- SS : SuffStatBag object, with K components. ''' X = Data.X D = Data.dim if 'resp' in LP: resp = LP['resp'] K = resp.shape[1] # Compute expected outer-product statistic S_xxT = np.zeros((K, Data.dim, Data.dim)) sqrtResp_k = np.sqrt(resp[:, 0]) sqrtRX_k = sqrtResp_k[:, np.newaxis] * Data.X S_xxT[0] = dotATA(sqrtRX_k) for k in xrange(1, K): np.sqrt(resp[:, k], out=sqrtResp_k) np.multiply(sqrtResp_k[:, np.newaxis], Data.X, out=sqrtRX_k) S_xxT[k] = dotATA(sqrtRX_k) sqrtResp = np.sqrt(resp) xxT = np.zeros((K, D, D)) for k in xrange(K): xxT[k] = dotATA(sqrtResp[:, k][:, np.newaxis] * Data.X) assert np.allclose(xxT, S_xxT) else: spR = LP['spR'] K = spR.shape[1] # Compute expected outer-product statistic S_xxT = calcSpRXXT(X=X, spR_csr=spR) if SS is None: SS = SuffStatBag(K=K, D=D) # Expected outer-product for each state k SS.setField('xxT', S_xxT, dims=('K', 'D', 'D')) # Expected count for each k # Usually computed by allocmodel. But sometimes not (eg TopicModel) if not hasattr(SS, 'N'): if 'resp' in LP: SS.setField('N', LP['resp'].sum(axis=0), dims='K') else: SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K') return SS
def calcSummaryStats(Data, SS, LP, DataAtomType='doc', **kwargs): ''' Calculate summary statistics for given dataset and local parameters Returns -------- SS : SuffStatBag object, with K components. ''' if 'resp' in LP: K = LP['resp'].shape[1] else: K = LP['spR'].shape[1] nnzPerRow = LP['nnzPerRow'] if SS is None: SS = SuffStatBag(K=K, D=Data.vocab_size) if DataAtomType == 'doc': # X : 2D sparse matrix, size nDoc x vocab_size X = Data.getSparseDocTypeCountMatrix() # WordCounts : 2D array, size K x vocab_size # obtained by sparse matrix multiply # here, '*' operator does this because X is sparse matrix type Nvec = None if 'resp' in LP: WordCounts = LP['resp'].T * X if not hasattr(SS, 'N'): Nvec = LP['resp'].sum(axis=0) else: WordCounts = (LP['spR'].T * X).toarray() if not hasattr(SS, 'N'): Nvec = as1D(toCArray(LP['spR'].sum(axis=0))) if Nvec is not None: SS.setField('N', Nvec, dims=('K')) else: # 2D sparse matrix, size V x N X = Data.getSparseTokenTypeCountMatrix() if 'resp' in LP: WordCounts = (X * LP['resp']).T # matrix-matrix product else: WordCounts = (X * LP['spR']).T.toarray() SS.setField('WordCounts', WordCounts, dims=('K', 'D')) SS.setField('SumWordCounts', np.sum(WordCounts, axis=1), dims=('K')) return SS """
def setPostFactors(self, obsModel=None, SS=None, LP=None, Data=None, nu=0, B=0, M=0, V=0, **kwargs): ''' Set Post attribute to provided values. ''' self.ClearCache() if obsModel is not None: if hasattr(obsModel, 'Post'): self.Post = obsModel.Post.copy() else: self.setPostFromEstParams(obsModel.EstParams) self.K = self.Post.K return if LP is not None and Data is not None: SS = self.calcSummaryStats(Data, None, LP) if SS is not None: self.updatePost(SS) else: M = as3D(M) B = as3D(B) V = as3D(V) K, D, E = M.shape assert D == self.D assert E == self.E self.Post = ParamBag(K=K, D=self.D, E=self.E) self.Post.setField('nu', as1D(nu), dims=('K')) self.Post.setField('B', B, dims=('K', 'D', 'D')) self.Post.setField('M', M, dims=('K', 'D', 'E')) self.Post.setField('V', V, dims=('K', 'E', 'E')) self.K = self.Post.K
def calcSmoothedMu(self, X, W=None): ''' Compute smoothed estimate of mean of statistic xxT. Args ---- X : 2D array, size N x D Returns ------- Mu : 2D array, size D x D ''' Prior_nu = self.Prior.nu - self.D - 1 # Prior_nu = self.Prior.nu if X is None: Mu = self.Prior.B / (Prior_nu) return Mu if X.ndim == 1: X = X[np.newaxis, :] N, D = X.shape # Compute suff stats if W is None: sum_wxxT = np.dot(X.T, X) sum_w = X.shape[0] else: W = as1D(W) wX = np.sqrt(W)[:, np.newaxis] * X sum_wxxT = np.dot(wX.T, wX) sum_w = np.sum(W) Mu = (self.Prior.B + sum_wxxT) / (Prior_nu + sum_w) assert Mu.ndim == 2 assert Mu.shape == ( D, D, ) return Mu
def calcSummaryStats(Data, SS, LP, **kwargs): ''' Calculate summary statistics for given dataset and local parameters Returns -------- SS : SuffStatBag object, with K components. ''' X = Data.X if 'resp' in LP: resp = LP['resp'] K = resp.shape[1] # 1/2: Compute mean statistic S_x = dotATB(resp, X) # 2/2: Compute expected outer-product statistic S_xx = calcRXX_withDenseResp(resp, X) else: spR = LP['spR'] K = spR.shape[1] # 1/2: Compute mean statistic S_x = spR.T * X # 2/2: Compute expected outer-product statistic S_xx = calcSpRXX(X=X, spR_csr=spR) if SS is None: SS = SuffStatBag(K=K, D=Data.dim) # Expected mean for each state k SS.setField('x', S_x, dims=('K', 'D')) # Expected sum-of-squares for each state k SS.setField('xx', S_xx, dims=('K', 'D')) # Expected count for each k # Usually computed by allocmodel. But sometimes not (eg TopicModel) if not hasattr(SS, 'N'): if 'resp' in LP: SS.setField('N', LP['resp'].sum(axis=0), dims='K') else: SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K') return SS
def createParamBagForPrior(Data=None, D=0, pnu=0, ptau=None, w_E=0, P_EE=None, P_diag_E=None, P_diag_val=1.0, Prior=None, **kwargs): ''' Initialize Prior ParamBag attribute. Returns ------- Prior : ParamBag with dimension attributes K, D, E with parameter attributes pnu, ptau, w_E, P_EE ''' if Data is None: D = int(D) else: D = int(Data.dim) E = D + 1 # Init parameters of 1D Wishart prior on delta pnu = np.maximum(pnu, 1e-9) ptau = np.maximum(ptau, 1e-9) # Initialize precision matrix of the weight vector if P_EE is not None: P_EE = np.asarray(P_EE) elif P_diag_E is not None: P_EE = np.diag(np.asarray(P_diag_E)) else: P_EE = np.diag(P_diag_val * np.ones(E)) assert P_EE.ndim == 2 assert P_EE.shape == (E, E) # Initialize mean of the weight vector w_E = as1D(np.asarray(w_E)) if w_E.size < E: w_E = np.tile(w_E, E)[:E] assert w_E.ndim == 1 assert w_E.size == E if Prior is None: Prior = ParamBag(K=0, D=D, E=E) if not hasattr(Prior, 'E'): Prior.E = E assert Prior.D == D assert Prior.E == E Prior.setField('pnu', pnu, dims=None) Prior.setField('ptau', ptau, dims=None) Prior.setField('w_E', w_E, dims=('E')) Prior.setField('P_EE', P_EE, dims=('E', 'E')) Pw_E = np.dot(P_EE, w_E) wPw_1 = np.dot(w_E, Pw_E) Prior.setField('Pw_E', Pw_E, dims=('E')) Prior.setField('wPw_1', wPw_1, dims=None) return Prior
def loadTopicModelFromTxtFiles(snapshotPath, returnTPA=False, returnWordCounts=False, normalizeProbs=True, normalizeTopics=True, **kwargs): ''' Load from snapshot text files. Returns ------- hmodel ''' Mdict = dict() possibleKeys = [ 'K', 'probs', 'alpha', 'beta', 'lam', 'gamma', 'nTopics', 'nTypes', 'vocab_size' ] keyMap = dict(beta='lam', nTopics='K', nTypes='vocab_size') for key in possibleKeys: try: arr = np.loadtxt(snapshotPath + "/%s.txt" % (key)) if key in keyMap: Mdict[keyMap[key]] = arr else: Mdict[key] = arr except Exception: pass assert 'K' in Mdict assert 'lam' in Mdict K = int(Mdict['K']) V = int(Mdict['vocab_size']) if os.path.exists(snapshotPath + "/topics.txt"): Mdict['topics'] = np.loadtxt(snapshotPath + "/topics.txt") Mdict['topics'] = as2D(toCArray(Mdict['topics'], dtype=np.float64)) assert Mdict['topics'].ndim == 2 assert Mdict['topics'].shape == (K, V) else: TWC_data = np.loadtxt(snapshotPath + "/TopicWordCount_data.txt") TWC_inds = np.loadtxt(snapshotPath + "/TopicWordCount_indices.txt", dtype=np.int32) if os.path.exists(snapshotPath + "/TopicWordCount_cscindptr.txt"): TWC_cscindptr = np.loadtxt(snapshotPath + "/TopicWordCount_cscindptr.txt", dtype=np.int32) TWC = scipy.sparse.csc_matrix((TWC_data, TWC_inds, TWC_cscindptr), shape=(K, V)) else: TWC_csrindptr = np.loadtxt(snapshotPath + "/TopicWordCount_indptr.txt", dtype=np.int32) TWC = scipy.sparse.csr_matrix((TWC_data, TWC_inds, TWC_csrindptr), shape=(K, V)) Mdict['WordCounts'] = TWC.toarray() if returnTPA: # Load topics : 2D array, K x vocab_size if 'WordCounts' in Mdict: topics = Mdict['WordCounts'] + Mdict['lam'] else: topics = Mdict['topics'] topics = as2D(toCArray(topics, dtype=np.float64)) assert topics.ndim == 2 K = topics.shape[0] if normalizeTopics: topics /= topics.sum(axis=1)[:, np.newaxis] # Load probs : 1D array, size K try: probs = Mdict['probs'] except KeyError: probs = (1.0 / K) * np.ones(K) probs = as1D(toCArray(probs, dtype=np.float64)) assert probs.ndim == 1 assert probs.size == K if normalizeProbs: probs = probs / np.sum(probs) # Load alpha : scalar float > 0 try: alpha = float(Mdict['alpha']) except KeyError: if 'alpha' in os.environ: alpha = float(os.environ['alpha']) else: raise ValueError('Unknown parameter alpha') return topics, probs, alpha # BUILD HMODEL FROM LOADED TXT infAlg = 'VB' # avoids circular import from bnpy.HModel import HModel if 'gamma' in Mdict: aPriorDict = dict(alpha=Mdict['alpha'], gamma=Mdict['gamma']) HDPTopicModel = AllocModelConstructorsByName['HDPTopicModel'] amodel = HDPTopicModel(infAlg, aPriorDict) else: FiniteTopicModel = AllocModelConstructorsByName['FiniteTopicModel'] amodel = FiniteTopicModel(infAlg, dict(alpha=Mdict['alpha'])) omodel = ObsModelConstructorsByName['Mult'](infAlg, **Mdict) hmodel = HModel(amodel, omodel) hmodel.set_global_params(**Mdict) if returnWordCounts: return hmodel, Mdict['WordCounts'] return hmodel
def loadTopicModel(matfilepath, queryLap=None, prefix=None, returnWordCounts=0, returnTPA=0, normalizeTopics=0, normalizeProbs=0, **kwargs): ''' Load saved topic model Returns ------- topics : 2D array, K x vocab_size (if returnTPA) probs : 1D array, size K (if returnTPA) alpha : scalar (if returnTPA) hmodel : HModel WordCounts : 2D array, size K x vocab_size (if returnWordCounts) ''' if prefix is None: prefix, lapQuery = getPrefixForLapQuery(matfilepath, queryLap) # avoids circular import from bnpy.HModel import HModel if len(glob.glob(os.path.join(matfilepath, "*.log_prob_w"))) > 0: return loadTopicModelFromMEDLDA(matfilepath, prefix, returnTPA=returnTPA) snapshotList = glob.glob(os.path.join(matfilepath, 'Lap*TopicSnapshot')) matfileList = glob.glob(os.path.join(matfilepath, 'Lap*TopicModel.mat')) if len(snapshotList) > 0: if prefix is None: snapshotList.sort() snapshotPath = snapshotList[-1] else: snapshotPath = None for curPath in snapshotList: if curPath.count(prefix): snapshotPath = curPath return loadTopicModelFromTxtFiles(snapshotPath, normalizeTopics=normalizeTopics, normalizeProbs=normalizeProbs, returnWordCounts=returnWordCounts, returnTPA=returnTPA) if prefix is not None: matfilepath = os.path.join(matfilepath, prefix + 'TopicModel.mat') Mdict = loadDictFromMatfile(matfilepath) if 'SparseWordCount_data' in Mdict: data = np.asarray(Mdict['SparseWordCount_data'], dtype=np.float64) K = int(Mdict['K']) vocab_size = int(Mdict['vocab_size']) try: indices = Mdict['SparseWordCount_indices'] indptr = Mdict['SparseWordCount_indptr'] WordCounts = scipy.sparse.csr_matrix((data, indices, indptr), shape=(K, vocab_size)) except KeyError: rowIDs = Mdict['SparseWordCount_i'] - 1 colIDs = Mdict['SparseWordCount_j'] - 1 WordCounts = scipy.sparse.csr_matrix((data, (rowIDs, colIDs)), shape=(K, vocab_size)) Mdict['WordCounts'] = WordCounts.toarray() if returnTPA: # Load topics : 2D array, K x vocab_size if 'WordCounts' in Mdict: topics = Mdict['WordCounts'] + Mdict['lam'] else: topics = Mdict['topics'] topics = as2D(toCArray(topics, dtype=np.float64)) assert topics.ndim == 2 K = topics.shape[0] if normalizeTopics: topics /= topics.sum(axis=1)[:, np.newaxis] # Load probs : 1D array, size K try: probs = Mdict['probs'] except KeyError: probs = (1.0 / K) * np.ones(K) probs = as1D(toCArray(probs, dtype=np.float64)) assert probs.ndim == 1 assert probs.size == K if normalizeProbs: probs = probs / np.sum(probs) # Load alpha : scalar float > 0 try: alpha = float(Mdict['alpha']) except KeyError: if 'alpha' in os.environ: alpha = float(os.environ['alpha']) else: raise ValueError('Unknown parameter alpha') if 'eta' in Mdict: return topics, probs, alpha, as1D(toCArray(Mdict['eta'])) return topics, probs, alpha infAlg = 'VB' if 'gamma' in Mdict: aPriorDict = dict(alpha=Mdict['alpha'], gamma=Mdict['gamma']) HDPTopicModel = AllocModelConstructorsByName['HDPTopicModel'] amodel = HDPTopicModel(infAlg, aPriorDict) else: FiniteTopicModel = AllocModelConstructorsByName['FiniteTopicModel'] amodel = FiniteTopicModel(infAlg, dict(alpha=Mdict['alpha'])) omodel = ObsModelConstructorsByName['Mult'](infAlg, **Mdict) hmodel = HModel(amodel, omodel) hmodel.set_global_params(**Mdict) if returnWordCounts: return hmodel, Mdict['WordCounts'] return hmodel
def plotGauss1DFromHModel(hmodel, compListToPlot=None, compsToHighlight=None, activeCompIDs=None, MaxKToDisplay=50, proba_thr=0.0001, ax_handle=None, Colors=Colors, dataset=None, **kwargs): ''' Make line plot of pdf for each component (1D observations). ''' if ax_handle is not None: pylab.sca(ax_handle) if compsToHighlight is not None: compsToHighlight = as1D(np.asarray(compsToHighlight)) else: compsToHighlight = list() if compListToPlot is None: compListToPlot = np.arange(0, hmodel.obsModel.K) if activeCompIDs is None: activeCompIDs = np.arange(0, hmodel.obsModel.K) # Load appearance probabilities as single vector if hmodel.allocModel.K == hmodel.obsModel.K: w = hmodel.allocModel.get_active_comp_probs() else: w = np.ones(hmodel.obsModel.K) if dataset is not None: if hasattr(dataset, 'X'): pylab.hist(dataset.X[:, 0], 50, normed=1) #Xtile = np.tile(Data.X[:, 0], (2, 1)) #ys = 0.1 * np.arange(2) #pylab.plot(Xtile, ys, 'k-') nSkip = 0 nGood = 0 for ii, compID in enumerate(compListToPlot): if compID not in activeCompIDs: continue kk = np.flatnonzero(activeCompIDs == compID) assert kk.size == 1 kk = kk[0] if w[kk] < proba_thr and compID not in compsToHighlight: nSkip += 1 continue mu = hmodel.obsModel.get_mean_for_comp(kk) sigma2 = hmodel.obsModel.get_covar_mat_for_comp(kk) if len(compsToHighlight) == 0 or compID in compsToHighlight: color = Colors[ii % len(Colors)] plotGauss1D(mu, sigma2, color=color) elif kk not in compsToHighlight: plotGauss1D(mu, sigma2, color='k') nGood += 1 if nGood >= MaxKToDisplay: print( 'DISPLAY LIMIT EXCEEDED. Showing %d/%d components' \ % (nGood, len(activeCompIDs))) break if nSkip > 0: print('SKIPPED %d comps with size below %.2f' % (nSkip, proba_thr))
def __init__(self, X=None, doc_range=None, nDocTotal=None, Xprev=None, TrueZ=None, TrueParams=None, fileNames=None, summary=None, **kwargs): ''' Create an instance of GroupXData for provided array X Post Condition --------- self.X : 2D array, size N x D with standardized dtype, alignment, byteorder. self.Xprev : 2D array, size N x D with standardized dtype, alignment, byteorder. self.doc_range : 1D array, size nDoc+1 ''' self.X = as2D(toCArray(X, dtype=np.float64)) self.doc_range = as1D(toCArray(doc_range, dtype=np.int32)) if summary is not None: self.summary = summary if Xprev is not None: self.Xprev = as2D(toCArray(Xprev, dtype=np.float64)) # Verify attributes are consistent self._set_dependent_params(doc_range, nDocTotal) self._check_dims() # Add optional true parameters / true hard labels if TrueParams is not None: self.TrueParams = dict() for key, arr in TrueParams.items(): self.TrueParams[key] = toCArray(arr) if TrueZ is not None: if not hasattr(self, 'TrueParams'): self.TrueParams = dict() self.TrueParams['Z'] = as1D(toCArray(TrueZ)) self.TrueParams['K'] = np.unique(self.TrueParams['Z']).size # Add optional source files for each group/sequence if fileNames is not None: if hasattr(fileNames, 'shape') and fileNames.shape == (1, 1): fileNames = fileNames[0, 0] if len(fileNames) > 1: self.fileNames = [ str(x).strip() for x in np.squeeze(fileNames) ] else: self.fileNames = [str(fileNames[0])] # Add extra data attributes custom for the dataset for key in kwargs: if hasattr(self, key): continue if not key.startswith("__"): arr = np.squeeze(as1D(kwargs[key])) if arr.shape == (): try: arr = float(arr) except TypeError: continue setattr(self, key, arr)
def __init__(self, X=None, nObsTotal=None, TrueZ=None, Xprev=None, Y=None, TrueParams=None, name=None, summary=None, dtype='auto', row_names=None, column_names=None, y_column_names=None, xprev_column_names=None, do_copy=True, **kwargs): ''' Constructor for XData instance given in-memory dense array X. Post Condition --------- self.X : 2D array, size N x D with standardized dtype, alignment, byteorder. ''' if dtype == 'auto': dtype = X.dtype if not do_copy and X.dtype == dtype: self.X = as2D(X) else: self.X = as2D(toCArray(X, dtype=dtype)) if Xprev is not None: self.Xprev = as2D(toCArray(Xprev, dtype=dtype)) if Y is not None: self.Y = as2D(toCArray(Y, dtype=dtype)) # Verify attributes are consistent self._set_dependent_params(nObsTotal=nObsTotal) self._check_dims(do_copy=do_copy) # Add optional true parameters / true hard labels if TrueParams is not None: self.TrueParams = TrueParams if TrueZ is not None: if not hasattr(self, 'TrueParams'): self.TrueParams = dict() self.TrueParams['Z'] = as1D(toCArray(TrueZ)) self.TrueParams['K'] = np.unique(self.TrueParams['Z']).size if summary is not None: self.summary = summary if name is not None: self.name = str(name) # Add optional row names # this line is added by Tingting self.row_names = np.arange(0, self.nObs, 1) if row_names is None: self.row_names = np.arange(0, self.nObs, 1) ## map(str, range(self.nObs)) else: assert len(list(self.row_names)) == self.nObs # self.row_names = map(str, row_names) # Add optional column names if column_names is None: self.column_names = map(lambda n: "dim_%d" % n, range(self.dim)) else: assert len(column_names) == self.dim self.column_names = map(str, column_names)
def from_dict(self, Dict): self.inferType = Dict['inferType'] self.K = Dict['K'] self.rho = as1D(Dict['rho']) self.omega = as1D(Dict['omega'])