def calcSummaryStats(Data, SS, LP, **kwargs): ''' Calculate summary statistics for given dataset and local parameters Returns -------- SS : SuffStatBag object, with K components. ''' if not hasattr(Data, 'X_NE'): Data.X_NE = np.hstack([Data.X, np.ones(Data.nObs)[:, np.newaxis]]) Y_N = Data.Y X_NE = Data.X_NE E = X_NE.shape[1] if 'resp' in LP: # Dense responsibility calculations resp = LP['resp'] K = resp.shape[1] S_yy_K = dotATB(resp, np.square(Y_N)).flatten() S_yx_KE = dotATB(resp, Y_N * X_NE) # Expected outer product S_xxT_KEE = np.zeros((K, E, E)) sqrtResp_k_N = np.sqrt(resp[:, 0]) sqrtR_X_k_NE = sqrtResp_k_N[:, np.newaxis] * X_NE S_xxT_KEE[0] = dotATA(sqrtR_X_k_NE) for k in xrange(1, K): np.sqrt(resp[:, k], out=sqrtResp_k_N) np.multiply(sqrtResp_k_N[:, np.newaxis], X_NE, out=sqrtR_X_k_NE) S_xxT_KEE[k] = dotATA(sqrtR_X_k_NE) else: raise ValueError("TODO") spR = LP['spR'] K = spR.shape[1] if SS is None: SS = SuffStatBag(K=K, D=Data.dim, E=E) elif not hasattr(SS, 'E'): SS._Fields.E = E SS.setField('xxT_KEE', S_xxT_KEE, dims=('K', 'E', 'E')) SS.setField('yx_KE', S_yx_KE, dims=('K', 'E')) SS.setField('yy_K', S_yy_K, dims=('K')) # Expected count for each k # Usually computed by allocmodel. But just in case... if not hasattr(SS, 'N'): if 'resp' in LP: SS.setField('N', LP['resp'].sum(axis=0), dims='K') else: SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K') #SS.setField("N_K", SS.N, dims="K") return SS
def calcSummaryStats(Data, SS, LP, **kwargs): ''' Calculate summary statistics for given dataset and local parameters Returns -------- SS : SuffStatBag object, with K components. ''' X = Data.X D = Data.dim if 'resp' in LP: resp = LP['resp'] K = resp.shape[1] # Compute expected outer-product statistic S_xxT = np.zeros((K, Data.dim, Data.dim)) sqrtResp_k = np.sqrt(resp[:, 0]) sqrtRX_k = sqrtResp_k[:, np.newaxis] * Data.X S_xxT[0] = dotATA(sqrtRX_k) for k in xrange(1, K): np.sqrt(resp[:, k], out=sqrtResp_k) np.multiply(sqrtResp_k[:, np.newaxis], Data.X, out=sqrtRX_k) S_xxT[k] = dotATA(sqrtRX_k) sqrtResp = np.sqrt(resp) xxT = np.zeros((K, D, D)) for k in xrange(K): xxT[k] = dotATA(sqrtResp[:, k][:, np.newaxis] * Data.X) assert np.allclose(xxT, S_xxT) else: spR = LP['spR'] K = spR.shape[1] # Compute expected outer-product statistic S_xxT = calcSpRXXT(X=X, spR_csr=spR) if SS is None: SS = SuffStatBag(K=K, D=D) # Expected outer-product for each state k SS.setField('xxT', S_xxT, dims=('K', 'D', 'D')) # Expected count for each k # Usually computed by allocmodel. But sometimes not (eg TopicModel) if not hasattr(SS, 'N'): if 'resp' in LP: SS.setField('N', LP['resp'].sum(axis=0), dims='K') else: SS.setField('N', as1D(toCArray(LP['spR'].sum(axis=0))), dims='K') return SS
def calcSummaryStatsForContigBlock(self, Data, SS=None, a=0, b=0): ''' Calculate sufficient stats for a single contiguous block of data ''' D = Data.X.shape[1] E = Data.Xprev.shape[1] if SS is None: SS = SuffStatBag(K=1, D=D, E=E) elif not hasattr(SS, 'E'): SS._Fields.E = E ppT = dotATA(Data.Xprev[a:b])[np.newaxis, :, :] xxT = dotATA(Data.X[a:b])[np.newaxis, :, :] pxT = dotATB(Data.Xprev[a:b], Data.X[a:b])[np.newaxis, :, :] SS.setField('N', (b - a) * np.ones(1), dims='K') SS.setField('xxT', xxT, dims=('K', 'D', 'D')) SS.setField('ppT', ppT, dims=('K', 'E', 'E')) SS.setField('pxT', pxT, dims=('K', 'E', 'D')) return SS
def calcRXXT_withDenseResp(R=None, X=None, **kwargs): N, K = R.shape NX, D = X.shape assert N == NX stat_XXT = np.zeros((K, D, D)) for k in xrange(K): sqrtRX_k = np.sqrt(R[:, k])[:, np.newaxis] * X stat_XXT[k] = dotATA(sqrtRX_k) #RX_k = R[:, k][:,np.newaxis] * X #stat_XXT[k] = np.dot(RX_k.T, X) return stat_XXT
def calcSummaryStats(Data, SS, LP, **kwargs): ''' Calculate sufficient statistics for local params at data slice. Returns ------- SS ''' X = Data.X Xprev = Data.Xprev resp = LP['resp'] K = resp.shape[1] D = Data.X.shape[1] E = Data.Xprev.shape[1] if SS is None: SS = SuffStatBag(K=K, D=D, E=E) elif not hasattr(SS, 'E'): SS._Fields.E = E # Expected count for each k # Usually computed by allocmodel. But just in case... if not hasattr(SS, 'N'): SS.setField('N', np.sum(resp, axis=0), dims='K') # Expected outer products sqrtResp = np.sqrt(resp) xxT = np.empty((K, D, D)) ppT = np.empty((K, E, E)) pxT = np.empty((K, E, D)) for k in xrange(K): sqrtResp_k = sqrtResp[:, k][:, np.newaxis] xxT[k] = dotATA(sqrtResp_k * Data.X) ppT[k] = dotATA(sqrtResp_k * Data.Xprev) pxT[k] = np.dot(Data.Xprev.T, resp[:, k][:, np.newaxis] * Data.X) SS.setField('xxT', xxT, dims=('K', 'D', 'D')) SS.setField('ppT', ppT, dims=('K', 'E', 'E')) SS.setField('pxT', pxT, dims=('K', 'E', 'D')) return SS
def calcSummaryStatsForContigBlock(self, Data, SS=None, a=0, b=0): ''' Calculate sufficient stats for a single contiguous block of data ''' if SS is None: SS = SuffStatBag(K=1, D=Data.dim) SS.setField('N', (b - a) * np.ones(1), dims='K') SS.setField('x', np.sum(Data.X[a:b], axis=0)[np.newaxis, :], dims=('K', 'D')) SS.setField('xxT', dotATA(Data.X[a:b])[np.newaxis, :, :], dims=('K', 'D', 'D')) return SS
def calcSummaryStatsForContigBlock(self, Data, SS=None, a=None, b=None, **kwargs): ''' Calculate summary statistics for specific block of dataset Returns -------- SS : SuffStatBag object, with K components. ''' SS = SuffStatBag(K=1, D=Data.dim) # Expected count SS.setField('N', (b - a) * np.ones(1, dtype=np.float64), dims='K') # Expected outer-product xxT = dotATA(Data.X[a:b])[np.newaxis, :, :] SS.setField('xxT', xxT, dims=('K', 'D', 'D')) return SS