def calcSummaryStats(Data, LP, doPrecompEntropy=0, doPrecompMergeEntropy=0, mPairIDs=None, trackDocUsage=0, **kwargs): ''' Calculate summary statistics for given data slice and local params. Returns ------- SS : SuffStatBag ''' if mPairIDs is None: M = 0 else: M = len(mPairIDs) resp = LP['resp'] K = resp.shape[1] startLocIDs = Data.doc_range[:-1] StartStateCount = np.sum(resp[startLocIDs], axis=0) N = np.sum(resp, axis=0) if 'TransCount' in LP: TransStateCount = np.sum(LP['TransCount'], axis=0) else: respPair = LP['respPair'] TransStateCount = np.sum(respPair, axis=0) SS = SuffStatBag(K=K, D=Data.dim, M=M) SS.setField('StartStateCount', StartStateCount, dims=('K')) SS.setField('TransStateCount', TransStateCount, dims=('K', 'K')) SS.setField('N', N, dims=('K')) SS.setField('nDoc', Data.nDoc, dims=None) if doPrecompEntropy or 'Htable' in LP: # Compute entropy terms! # 'Htable', 'Hstart' will both be in Mdict Mdict = calcELBO_NonlinearTerms(Data=Data, LP=LP, returnMemoizedDict=1) SS.setELBOTerm('Htable', Mdict['Htable'], dims=('K', 'K')) SS.setELBOTerm('Hstart', Mdict['Hstart'], dims=('K')) if doPrecompMergeEntropy: subHstart, subHtable = HMMUtil.PrecompMergeEntropy_SpecificPairs( LP, Data, mPairIDs) SS.setMergeTerm('Hstart', subHstart, dims=('M')) SS.setMergeTerm('Htable', subHtable, dims=('M', 2, 'K')) SS.mPairIDs = np.asarray(mPairIDs) if trackDocUsage: # Track how often topic appears in a seq. with mass > thresh. DocUsage = np.zeros(K) for n in range(Data.nDoc): start = Data.doc_range[n] stop = Data.doc_range[n + 1] DocUsage += np.sum(LP['resp'][start:stop], axis=0) > 0.01 SS.setSelectionTerm('DocUsageCount', DocUsage, dims='K') return SS