def L_top(rho=None, omega=None, alpha=None, gamma=None, kappa=0, startAlpha=0, **kwargs): ''' Evaluate the top-level term of the surrogate objective ''' if startAlpha == 0: startAlpha = alpha K = rho.size eta1 = rho * omega eta0 = (1 - rho) * omega digamma_omega = digamma(omega) ElogU = digamma(eta1) - digamma_omega Elog1mU = digamma(eta0) - digamma_omega diff_cBeta = K * c_Beta(1.0, gamma) - c_Beta(eta1, eta0) tAlpha = K * K * np.log(alpha) + K * np.log(startAlpha) if kappa > 0: coefU = K + 1.0 - eta1 coef1mU = K * OptimizerRhoOmega.kvec(K) + 1.0 + gamma - eta0 sumEBeta = np.sum(rho2beta(rho, returnSize='K')) tBeta = sumEBeta * (np.log(alpha + kappa) - np.log(kappa)) tKappa = K * (np.log(kappa) - np.log(alpha + kappa)) else: coefU = (K + 1) + 1.0 - eta1 coef1mU = (K + 1) * OptimizerRhoOmega.kvec(K) + gamma - eta0 tBeta = 0 tKappa = 0 diff_logU = np.inner(coefU, ElogU) \ + np.inner(coef1mU, Elog1mU) return tAlpha + tKappa + tBeta + diff_cBeta + diff_logU
def DocTopicCount_to_sumLogPi( rho=None, omega=None, betaK=None, DocTopicCount=None, alpha=None, gamma=None, **kwargs): ''' Returns ------- f : scalar ''' K = rho.size if betaK is None: betaK = rho2beta(rho, returnSize="K") theta = DocTopicCount + alpha * betaK[np.newaxis,:] thetaRem = alpha * (1 - np.sum(betaK)) assert np.allclose(theta.sum(axis=1) + thetaRem, alpha + DocTopicCount.sum(axis=1)) digammaSum = digamma(theta.sum(axis=1) + thetaRem) ElogPi = digamma(theta) - digammaSum[:,np.newaxis] ElogPiRem = digamma(thetaRem) - digammaSum sumLogPiActiveVec = np.sum(ElogPi, axis=0) sumLogPiRemVec = np.zeros(K) sumLogPiRemVec[-1] = np.sum(ElogPiRem) LP = dict( ElogPi=ElogPi, ElogPiRem=ElogPiRem, digammaSumTheta=digammaSum, theta=theta, thetaRem=thetaRem) return sumLogPiActiveVec, sumLogPiRemVec, LP
def setParamsFromCountVec(self, K, N=None): """ Set params to reasonable values given counts for each comp. Parameters -------- K : int number of components N : 1D array, size K. optional, default=[1 1 1 1 ... 1] size of each component Post Condition for EM -------- Attribute w is set to posterior mean given provided vector N. Default behavior sets w to uniform distribution. Post Condition for VB --------- Attribute theta is set so q(w) equals posterior given vector N. Default behavior has q(w) with mean of uniform and moderate variance. """ if N is None: N = 1.0 * np.ones(K) assert N.ndim == 1 assert N.size == K self.K = int(K) if self.inferType == 'EM': self.w = N + (self.gamma / K) self.w /= self.w.sum() else: self.theta = N + self.gamma / K self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
def get_trans_prob_matrix(self): ''' Get matrix of transition probabilities for all K active states ''' digammaSumVec = digamma(np.sum(self.transTheta, axis=1)) expELogPi = digamma(self.transTheta) - digammaSumVec[:, np.newaxis] np.exp(expELogPi, out=expELogPi) return expELogPi[0:self.K, 0:self.K]
def calcELBO_LinearTerms(SS=None, StartStateCount=None, TransStateCount=None, rho=None, omega=None, Ebeta=None, startTheta=None, transTheta=None, startAlpha=0, alpha=0, kappa=None, gamma=None, afterGlobalStep=0, todict=0, **kwargs): """ Calculate ELBO objective terms that are linear in suff stats. Returns ------- L : scalar float L is sum of any term in ELBO that is const/linear wrt suff stats. """ Ltop = L_top(rho=rho, omega=omega, alpha=alpha, gamma=gamma, kappa=kappa, startAlpha=startAlpha) LdiffcDir = -c_Dir(transTheta) - c_Dir(startTheta) if afterGlobalStep: if todict: return dict(Lalloc=Ltop + LdiffcDir, Lslack=0) return Ltop + LdiffcDir K = rho.size if Ebeta is None: Ebeta = rho2beta(rho, returnSize='K+1') if SS is not None: StartStateCount = SS.StartStateCount TransStateCount = SS.TransStateCount # Augment suff stats to be sure have 0 in final column, # which represents inactive states. if StartStateCount.size == K: StartStateCount = np.hstack([StartStateCount, 0]) if TransStateCount.shape[-1] == K: TransStateCount = np.hstack([TransStateCount, np.zeros((K, 1))]) LstartSlack = np.inner(StartStateCount + startAlpha * Ebeta - startTheta, digamma(startTheta) - digamma(startTheta.sum())) alphaEbetaPlusKappa = alpha * np.tile(Ebeta, (K, 1)) alphaEbetaPlusKappa[:, :K] += kappa * np.eye(K) digammaSum = digamma(np.sum(transTheta, axis=1)) LtransSlack = np.sum((TransStateCount + alphaEbetaPlusKappa - transTheta) * (digamma(transTheta) - digammaSum[:, np.newaxis])) if todict: return dict(Lalloc=Ltop + LdiffcDir, Lslack=LstartSlack + LtransSlack) return Ltop + LdiffcDir + LstartSlack + LtransSlack
def updateLPGivenDocTopicCount(self, LP, DocTopicCount): ''' Update all local parameters, given topic counts for all docs in set. Returns -------- LP : dict of local params, with updated fields * eta1, eta0 * ElogVd, Elog1mVd * ElogPi ''' DocTopicCount_gt = gtsum(DocTopicCount) Ebeta, Ebeta_gt = self.E_beta_and_betagt() eta1 = DocTopicCount + self.alpha * Ebeta eta0 = DocTopicCount_gt + self.alpha * Ebeta_gt ## Double-check! Ebeta2, Ebeta_gt2 = self.E_beta_and_betagt() assert np.allclose(Ebeta2, Ebeta) assert np.allclose(Ebeta_gt2, Ebeta_gt) digammaBoth = digamma(eta1 + eta0) ElogV = digamma(eta1) - digammaBoth Elog1mV = digamma(eta0) - digammaBoth ElogPi = ElogV.copy() ElogPi[:, 1:] += np.cumsum(Elog1mV[:, :-1], axis=1) LP['DocTopicCount_gt'] = DocTopicCount_gt LP['eta1'] = eta1 LP['eta0'] = eta0 LP['ElogV'] = ElogV LP['Elog1mV'] = Elog1mV LP['ElogPi'] = ElogPi return LP
def learn_rhoomega_fromFixedCounts(DocTopicCount_d=None, nDoc=0, alpha=None, gamma=None, initrho=None, initomega=None): Nd = np.sum(DocTopicCount_d) K = DocTopicCount_d.size if initrho is None: rho = OptimizerRhoOmega.create_initrho(K) else: rho = initrho if initomega is None: omega = OptimizerRhoOmega.create_initomega(K, nDoc, gamma) else: omega = initomega evalELBOandPrint( rho=rho, omega=omega, DocTopicCount=np.tile(DocTopicCount_d, (nDoc, 1)), alpha=alpha, gamma=gamma, msg='init', ) betaK = rho2beta(rho, returnSize="K") prevbetaK = np.zeros_like(betaK) iterid = 0 while np.sum(np.abs(betaK - prevbetaK)) > 0.000001: iterid += 1 theta_d = DocTopicCount_d + alpha * betaK thetaRem = alpha * (1 - np.sum(betaK)) assert np.allclose(theta_d.sum() + thetaRem, alpha + Nd) digammaSum = digamma(theta_d.sum() + thetaRem) Elogpi_d = digamma(theta_d) - digammaSum ElogpiRem = digamma(thetaRem) - digammaSum sumLogPi = nDoc * np.hstack([Elogpi_d, ElogpiRem]) rho, omega, f, Info = OptimizerRhoOmega.\ find_optimum_multiple_tries( alpha=alpha, gamma=gamma, sumLogPi=sumLogPi, nDoc=nDoc, initrho=rho, initomega=omega, approx_grad=1, ) prevbetaK = betaK.copy() betaK = rho2beta(rho, returnSize="K") if iterid < 5 or iterid % 10 == 0: evalELBOandPrint( rho=rho, omega=omega, DocTopicCount=np.tile(DocTopicCount_d, (nDoc, 1)), alpha=alpha, gamma=gamma, msg=str(iterid), ) return rho, omega
def get_init_prob_vector(self): ''' Get vector of initial probabilities for all K active states ''' expELogPi0 = digamma( self.startTheta) - digamma(np.sum(self.startTheta)) np.exp(expELogPi0, out=expELogPi0) return expELogPi0[0:self.K]
def set_global_params(self, hmodel=None, K=None, w=None, beta=None, theta=None, **kwargs): """ Set global parameters to provided values. Post Condition for EM ------- w set to valid vector with K components. Post Condition for VB ------- theta set to define valid posterior over K components. """ if hmodel is not None: self.setParamsFromHModel(hmodel) elif beta is not None: self.setParamsFromBeta(K, beta=beta) elif w is not None: self.setParamsFromBeta(K, beta=w) elif theta is not None and self.inferType.count('VB'): self.K = int(K) self.theta = theta self.Elogw = digamma(self.theta) - digamma(self.theta.sum()) else: raise ValueError("Unrecognized set_global_params args")
def setParamsFromBeta(self, K, beta=None): """ Set params to reasonable values given comp probabilities. Parameters -------- K : int number of components beta : 1D array, size K. optional, default=[1 1 1 1 ... 1] probability of each component Post Condition for EM -------- Attribute w is set to posterior mean given provided vector N. Default behavior sets w to uniform distribution. Post Condition for VB --------- Attribute theta is set so q(w) has mean of beta and moderate variance. """ if beta is None: beta = 1.0 / K * np.ones(K) assert beta.ndim == 1 assert beta.size == K self.K = int(K) if self.inferType == 'EM': self.w = beta.copy() else: self.theta = self.K * beta self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
def from_dict(self, myDict): self.inferType = myDict['inferType'] self.K = myDict['K'] if self.inferType.count('VB') >0: self.alpha = myDict['alpha'] self.Elogw = digamma( self.alpha ) - digamma( self.alpha.sum() ) elif self.inferType == 'EM': self.w = myDict['w']
def from_dict(self, myDict): self.inferType = myDict["inferType"] self.K = myDict["K"] if self.inferType.count("VB") > 0: self.alpha = myDict["alpha"] self.Elogw = digamma(self.alpha) - digamma(self.alpha.sum()) elif self.inferType == "EM": self.w = myDict["w"]
def from_dict(self, myDict): self.inferType = myDict['inferType'] self.K = myDict['K'] if self.inferType == 'EM': self.w = myDict['w'] else: self.theta = myDict['theta'] self.Elogw = digamma(self.theta) - digamma(self.theta.sum())
def test_FixedCount_GlobalStepOnce(self, K=2, gamma=10.0, alpha=5.0, DocTopicCount_d=[100. / 2, 100 / 2]): ''' Given fixed counts, run one global update to rho/omega. Verify that regardless of initialization, the recovered beta value is roughly the same. ''' print '' DocTopicCount_d = np.asarray(DocTopicCount_d, dtype=np.float64) print '------------- alpha %6.3f gamma %6.3f' % (alpha, gamma) print '------------- DocTopicCount [%s]' % (np2flatstr(DocTopicCount_d, fmt='%d'), ) print '------------- DocTopicProb [%s]' % (np2flatstr( DocTopicCount_d / DocTopicCount_d.sum(), fmt='%.3f'), ) Nd = np.sum(DocTopicCount_d) theta_d = DocTopicCount_d + alpha * 1.0 / (K + 1) * np.ones(K) thetaRem = alpha * 1 / (K + 1) assert np.allclose(theta_d.sum() + thetaRem, alpha + Nd) digammaSum = digamma(theta_d.sum() + thetaRem) Elogpi_d = digamma(theta_d) - digammaSum ElogpiRem = digamma(thetaRem) - digammaSum for nDoc in [1, 10, 100, 1000]: sumLogPi = nDoc * np.hstack([Elogpi_d, ElogpiRem]) # Now, run inference from many inits to find optimal rho/omega Results = list() for initrho in [None, 1, 2, 3]: initomega = None if isinstance(initrho, int): PRNG = np.random.RandomState(initrho) initrho = PRNG.rand(K) initomega = 100 * PRNG.rand(K) rho, omega, f, Info = OptimizerRhoOmega.\ find_optimum_multiple_tries( alpha=alpha, gamma=gamma, sumLogPi=sumLogPi, nDoc=nDoc, initrho=initrho, initomega=initomega, ) betaK = rho2beta(rho, returnSize='K') Info.update(nDoc=nDoc, alpha=alpha, gamma=gamma, rho=rho, omega=omega, betaK=betaK) Results.append(Info) pprintResult(Results) beta1 = Results[0]['betaK'] for i in range(1, len(Results)): beta_i = Results[i]['betaK'] assert np.allclose(beta1, beta_i, atol=0.0001, rtol=0)
def get_init_prob_vector(self): ''' Get vector of initial probabilities for all K active states ''' if self.inferType == 'EM': pi0 = self.startPi else: pi0 = np.exp(digamma(self.startTheta) - digamma(np.sum(self.startTheta))) return pi0
def calc_local_params(self, Data, LP, **kwargs): ''' Calculate local parameters for each data item and each component. This is part of the E-step. Note that this is the main place we differ from FiniteMixtureModel.py Args ------- Data : bnpy data object with Data.nObs observations LP : local param dict with fields E_log_soft_ev : Data.nObs x K x K array E_log_soft_ev[n,l,m] = log p(data obs n | comps l, m) Returns ------- LP : local param dict with fields resp : 2D array, size Data.nObs x K array resp[n,l,m] = posterior responsibility comps. l,m have for item n ''' if self.inferType.count('EM') > 0: raise NotImplementedError( 'EM not implemented for FiniteSMSB (yet)') N = Data.nNodes K = self.K logSoftEv = LP['E_log_soft_ev'] # E x K x K logSoftEv[np.where(Data.sourceID == Data.destID), :, :] = 0 logSoftEv = np.reshape(logSoftEv, (N, N, K, K)) if 'respSingle' not in LP: LP['respSingle'] = np.ones((N, K)) / K resp = LP['respSingle'] Elogpi = digamma(self.theta) - digamma(np.sum(self.theta)) # Size K respTerm = np.zeros(K) for lap in xrange(self.EStepLaps): for i in xrange(Data.nNodes): respTerm = np.einsum( 'jlm,jm->l', logSoftEv[i, :, :, :], resp) + \ np.einsum('jlm,jl->m', logSoftEv[:, i, :, :], resp) resp[i, :] = np.exp(Elogpi + respTerm) resp[i, :] /= np.sum(resp[i, :]) # For now, do the stupid thing of building the N^2 x K resp matrix # (soon to change when using sparse data) # np.einsum makes fullResp[i,j,l,m] = resp[i,l]*resp[j,m] fullResp = np.einsum('il,jm->ijlm', resp, resp) fullResp = fullResp.reshape((N**2, K, K)) fullResp[np.where(Data.sourceID == Data.destID), :, :] = 0 LP['resp'] = fullResp LP['respSingle'] = resp self.make_hard_asgn_local_params(Data, LP) return LP
def get_trans_prob_matrix(self): ''' Get matrix of transition probabilities for all K active states ''' if self.inferType == 'EM': EPiMat = self.transPi else: digammasumVec = digamma(np.sum(self.transTheta, axis=1)) EPiMat = np.exp(digamma(self.transTheta) - digammasumVec[:, np.newaxis]) return EPiMat
def E_logPi(self): ''' Compute expected value of log \pi for each node and state. Returns ------- ElogPi : 2D array, nNodes x K ''' sumtheta = self.theta.sum(axis=1) ElogPi = digamma(self.theta) - digamma(sumtheta)[:, np.newaxis] return ElogPi
def update_global_params_VB(self, SS, **kwargs): """ Update attribute theta to optimize the ELBO objective. Post Condition for VB ------- theta set to valid posterior for SS.K components. """ self.theta = self.gamma / SS.K + SS.N self.Elogw = digamma(self.theta) - digamma(self.theta.sum()) self.K = SS.K
def Lalloc(Nvec=None, SS=None, gamma=0.5, theta=None, Elogw=None): assert theta is not None K = theta.size if Elogw is None: Elogw = digamma(theta) - digamma(theta.sum()) if Nvec is None: Nvec = SS.N Lalloc = c_Dir(gamma/K * np.ones(K)) - c_Dir(theta) Lalloc_slack = np.inner(Nvec + gamma/K - theta, Elogw) return Lalloc + Lalloc_slack
def E_logPi(self): ''' Compute expected probability \pi for each node and state Returns ------- ElogPi : nNodes x K ''' ElogPi = digamma(self.theta) - \ digamma(np.sum(self.theta, axis=1))[:, np.newaxis] return ElogPi
def updateRhoOmega(theta=None, thetaRem=None, initrho=None, omega=None, alpha=0.5, gamma=10, logFunc=None): ''' Update rho, omega via numerical optimization. Will set vector omega to reasonable fixed value, and do gradient descent to optimize the vector rho. Returns ------- rho : 1D array, size K omega : 1D array, size K ''' nDoc = theta.shape[0] K = theta.shape[1] # Verify initial rho assert initrho is not None assert initrho.size == K # Verify initial omega assert omega is not None assert omega.size == K # Compute summaries of theta needed to update rho # sumLogPi : 1D array, size K # sumLogPiRem : scalar digammasumtheta = digamma(theta.sum(axis=1) + thetaRem) ElogPi = digamma(theta) - digammasumtheta[:, np.newaxis] sumLogPi = np.sum(ElogPi, axis=0) ElogPiRem = digamma(thetaRem) - digammasumtheta sumLogPiRem = np.sum(ElogPiRem) # Do the optimization try: rho, omega, fofu, Info = \ OptimizerRhoOmegaBetter.find_optimum_multiple_tries( nDoc=nDoc, sumLogPiActiveVec=sumLogPi, sumLogPiRem=sumLogPiRem, gamma=gamma, alpha=alpha, initrho=initrho, initomega=omega, do_grad_omega=0, do_grad_rho=1) except ValueError as error: if logFunc: logFunc('***** Rho optim failed. Remain at cur val. ' + \ str(error)) rho = initrho assert rho.size == K assert omega.size == K return rho, omega
def update_global_params_soVB(self, SS, rho, **kwargs): """ Update attribute theta to optimize stochastic ELBO objective. Post Condition for VB ------- theta set to valid posterior for SS.K components. """ thetaStar = self.gamma / SS.K + SS.N self.theta = rho * thetaStar + (1 - rho) * self.theta self.Elogw = digamma(self.theta) - digamma(self.theta.sum()) self.K = SS.K
def L_slack(self, SS): ''' Compute slack term of the allocation objective function. Returns ------- L : scalar float ''' ElogPi = digamma(self.theta) - \ digamma(np.sum(self.theta, axis=1))[:, np.newaxis] Q = SS.NodeStateCount + self.alpha / SS.K - self.theta Lslack = np.sum(Q * ElogPi) return Lslack
def calcBetaExpectations(eta1, eta0): ''' Evaluate expected value of log u under Beta(u | eta1, eta0) Returns ------- ElogU : 1D array, size K Elog1mU : 1D array, size K ''' digammaBoth = digamma(eta0 + eta1) ElogU = digamma(eta1) - digammaBoth Elog1mU = digamma(eta0) - digammaBoth return ElogU, Elog1mU
def E_logPi(self, returnRem=0): ''' Compute expected probability \pi for each node and state Returns ------- ElogPi : nNodes x K ''' digammasumtheta = digamma(self.theta.sum(axis=1) + self.thetaRem) ElogPi = digamma(self.theta) - digammasumtheta[:, np.newaxis] if returnRem: ElogPiRem = digamma(self.thetaRem) - digammasumtheta return ElogPi, ElogPiRem return ElogPi
def E_logPi(self, returnRem=0): ''' Compute expected value of log \pi for each node and state. Returns ------- ElogPi : 2D array, nNodes x K ''' digammasumtheta = digamma(self.theta.sum(axis=1) + self.thetaRem) ElogPi = digamma(self.theta) - digammasumtheta[:, np.newaxis] if returnRem: ElogPiRem = digamma(self.thetaRem) - digammasumtheta return ElogPi, ElogPiRem return ElogPi
def E_cDalphabeta_surrogate(alpha, rho, omega): ''' Compute expected value of cumulant function of alpha * beta. Returns ------- csur : scalar float ''' K = rho.size eta1 = rho * omega eta0 = (1 - rho) * omega digammaBoth = digamma(eta1 + eta0) ElogU = digamma(eta1) - digammaBoth Elog1mU = digamma(eta0) - digammaBoth OFFcoef = kvec(K) calpha = gammaln(alpha) + (K + 1) * np.log(alpha) return calpha + np.sum(ElogU) + np.inner(OFFcoef, Elog1mU)
def applyHardMergePairToLP(self, LP, kA, kB): ''' Apply hard merge pair to provided local parameters Returns -------- mergeLP : dict of updated local parameters ''' resp = np.delete(LP['resp'], kB, axis=1) theta = np.delete(LP['theta'], kB, axis=1) DocTopicCount = np.delete(LP['DocTopicCount'], kB, axis=1) resp[:, kA] += LP['resp'][:, kB] theta[:, kA] += LP['theta'][:, kB] DocTopicCount[:, kA] += LP['DocTopicCount'][:, kB] ElogPi = np.delete(LP['ElogPi'], kB, axis=1) ElogPi[:, kA] = digamma(theta[:, kA]) - LP['digammaSumTheta'] return dict(resp=resp, theta=theta, thetaRem=LP['thetaRem'], ElogPi=ElogPi, ElogPiRem=LP['ElogPiRem'], DocTopicCount=DocTopicCount, digammaSumTheta=LP['digammaSumTheta'])
def E_logLam(self): ''' E[ \log \lambda_d ] Returns ------- 1D array, length D ''' return digamma(self.a) - np.log(self.b)
def E_sumlogLam(self): ''' \sum_d E[ \log \lambda_d ] Returns ------- float, scalar ''' return np.sum(digamma(self.a) - np.log(self.b))
def set_helper_params( self ): ''' Set dependent attributes given primary global params. For DP mixture, this means precomputing digammas. ''' DENOM = digamma(self.qalpha0 + self.qalpha1) self.ElogV = digamma(self.qalpha1) - DENOM self.Elog1mV = digamma(self.qalpha0) - DENOM if self.truncType == 'v': self.qalpha1[-1] = 1 self.qalpha0[-1] = EPS # avoid digamma(0), which is way too HUGE self.ElogV[-1] = 0 # log(1) => 0 self.Elog1mV[-1] = np.log(1e-40) # log(0) => -INF, never used # Calculate expected mixture weights E[ log w_k ] self.Elogw = self.ElogV.copy() #copy so we can do += without modifying ElogV self.Elogw[1:] += self.Elog1mV[:-1].cumsum()
def set_global_params(self, hmodel=None, K=None, w=None, alpha=None, **kwargs): """ Directly set global parameters alpha to provided values """ if hmodel is not None: self.K = hmodel.allocModel.K if self.inferType == "EM": self.w = hmodel.allocModel.w else: self.alpha = hmodel.allocModel.alpha self.Elogw = digamma(self.alpha) - digamma(self.alpha.sum()) return else: self.K = K if self.inferType == "EM": self.w = w else: self.alpha = alpha self.Elogw = digamma(self.alpha) - digamma(self.alpha.sum())
def update_global_params_soVB(self, SS, rho, **kwargs): alphNew = self.alpha0 + SS.N self.alpha = rho * alphNew + (1 - rho) * self.alpha self.Elogw = digamma(self.alpha) - digamma(self.alpha.sum()) self.K = SS.K
def update_global_params_VB(self, SS, **kwargs): self.alpha = self.alpha0 + SS.N self.Elogw = digamma(self.alpha) - digamma(self.alpha.sum()) self.K = SS.K