def run_many_merge_moves( hmodel, Data, SS, evBound=None, nMergeTrials=1, compList=list(), randstate=np.random, mPairIDs=None, **mergeKwArgs ): """ Run (potentially many) merge move on hmodel Args ------- hmodel Data SS nMergeTrials : number of merges to try compList : list of components to include in attempted merges randstate : numpy random number generator Returns ------- hmodel SS evBound MTracker """ nMergeTrials = np.maximum(nMergeTrials, len(compList)) MTracker = MergeTracker(SS.K) MSelector = MergePairSelector() # Exclude all pairs for which we did not compute the combined entropy Hz # Hz is always stored in KxK matrix. Pairs that were skipped have zeros. aList = list() bList = list() if SS.hasMergeTerm("ElogqZ"): Hz = SS.getMergeTerm("ElogqZ") for kA in xrange(SS.K): for kB in xrange(kA + 1, SS.K): if Hz[kA, kB] == 0: aList.append(kA) bList.append(kB) if len(aList) > 0: MTracker.addPairsToExclude(aList, bList) if evBound is None: newEv = hmodel.calc_evidence(SS=SS) else: newEv = evBound trialID = 0 shift = np.zeros(SS.K, dtype=np.int32) while trialID < nMergeTrials and MTracker.hasAvailablePairs(): oldEv = newEv if mPairIDs is not None: if len(mPairIDs) == 0: break kA, kB = mPairIDs.pop(0) try: MTracker.verifyPair(kA, kB) except AssertionError: print " AssertionError skipped with mPairIDs!", kA, kB continue elif len(compList) > 0: kA = compList.pop() if kA not in MTracker.getAvailableComps(): continue kB = None else: kA = None kB = None hmodel, SS, newEv, MoveInfo = run_merge_move( hmodel, Data, SS, oldEv, kA=kA, kB=kB, randstate=randstate, MSelector=MSelector, MTracker=MTracker, **mergeKwArgs ) if MoveInfo["didAccept"]: assert newEv >= oldEv if mPairIDs is not None: mPairIDs = _reindexCandidatePairsAfterAcceptedMerge(mPairIDs, kA, kB) trialID += 1 MTracker.recordResult(**MoveInfo) return hmodel, SS, newEv, MTracker
def run_many_merge_moves(hmodel, Data, SS, evBound=None, nMergeTrials=1, compList=list(), randstate=np.random, mPairIDs=None, **mergeKwArgs): ''' Run (potentially many) merge move on hmodel Args ------- hmodel Data SS nMergeTrials : number of merges to try compList : list of components to include in attempted merges randstate : numpy random number generator Returns ------- hmodel SS evBound MTracker ''' nMergeTrials = np.maximum(nMergeTrials, len(compList)) MTracker = MergeTracker(SS.K) MSelector = MergePairSelector() # Exclude all pairs for which we did not compute the combined entropy Hz # Hz is always stored in KxK matrix. Pairs that were skipped have zeros. aList = list() bList = list() if SS.hasMergeTerm('ElogqZ'): Hz = SS.getMergeTerm('ElogqZ') for kA in xrange(SS.K): for kB in xrange(kA + 1, SS.K): if Hz[kA, kB] == 0: aList.append(kA) bList.append(kB) if len(aList) > 0: MTracker.addPairsToExclude(aList, bList) if evBound is None: newEv = hmodel.calc_evidence(SS=SS) else: newEv = evBound trialID = 0 shift = np.zeros(SS.K, dtype=np.int32) while trialID < nMergeTrials and MTracker.hasAvailablePairs(): oldEv = newEv if mPairIDs is not None: if len(mPairIDs) == 0: break kA, kB = mPairIDs.pop(0) try: MTracker.verifyPair(kA, kB) except AssertionError: print ' AssertionError skipped with mPairIDs!', kA, kB continue elif len(compList) > 0: kA = compList.pop() if kA not in MTracker.getAvailableComps(): continue kB = None else: kA = None kB = None hmodel, SS, newEv, MoveInfo = run_merge_move(hmodel, Data, SS, oldEv, kA=kA, kB=kB, randstate=randstate, MSelector=MSelector, MTracker=MTracker, **mergeKwArgs) if MoveInfo['didAccept']: assert newEv >= oldEv if mPairIDs is not None: mPairIDs = _reindexCandidatePairsAfterAcceptedMerge( mPairIDs, kA, kB) trialID += 1 MTracker.recordResult(**MoveInfo) return hmodel, SS, newEv, MTracker
def run_merge_move( curModel, Data, SS=None, curEv=None, doVizMerge=False, kA=None, kB=None, MTracker=None, MSelector=None, mergename="marglik", randstate=np.random.RandomState(), doUpdateAllComps=0, savedir=None, doVerbose=False, doWriteLog=False, **kwargs ): """ Creates candidate model with two components merged, and returns either candidate or current model, whichever has higher log probability (ELBO). Args -------- curModel : bnpy model whose components will be merged Data : bnpy Data object SS : bnpy SuffStatDict object for Data under curModel must contain precomputed merge entropy in order to try a merge. curEv : current evidence bound, provided to save re-computation. curEv = curModel.calc_evidence(SS=SS) kA, kB : (optional) integer ids for which specific components to merge excludeList : (optional) list of integer ids excluded when selecting which components to merge. useful when doing multiple rounds of merges, since precomputed merge terms are valid for one merge only. Returns -------- hmodel, SS, evBound, MoveInfo hmodel := candidate or current model (bnpy HModel object) SS := suff stats for Data under hmodel evBound := log evidence (ELBO) of Data under hmodel MoveInfo := dict of info about this merge move, with fields didAccept := boolean flag, true if candidate accepted msg := human-readable string about this move kA, kB := indices of the components to be merged. """ if SS is None: LP = curModel.calc_local_params(Data) SS = curModel.get_global_suff_stats(Data, LP, doPrecompEntropy=True, doPrecompMerge=True) if curEv is None: curEv = curModel.calc_evidence(SS=SS) if MTracker is None: MTracker = MergeTracker(SS.K) if MSelector is None: MSelector = MergePairSelector() # Need at least two components to merge! if curModel.allocModel.K == 1: MoveInfo = dict(didAccept=0, msg="need >= 2 comps to merge") return curModel, SS, curEv, MoveInfo if not SS.hasMergeTerms() and curModel.allocModel.requireMergeTerms(): MoveInfo = dict(didAccept=0, msg="suff stats did not have merge terms") return curModel, SS, curEv, MoveInfo if kA is not None and kA not in MTracker.getAvailableComps(): MoveInfo = dict(didAccept=0, msg="target comp kA must be excluded.") return curModel, SS, curEv, MoveInfo # Select which 2 components kA, kB in {1, 2, ... K} to merge if kA is None or kB is None: kA, kB = select_merge_components( curModel, Data, SS, kA=kA, MTracker=MTracker, MSelector=MSelector, mergename=mergename, randstate=randstate ) # Create candidate merged model propModel, propSS = propose_merge_candidate(curModel, SS, kA, kB, doUpdateAllComps=doUpdateAllComps) # Decide whether to accept the merge propEv = propModel.calc_evidence(SS=propSS) if np.isnan(propEv) or np.isinf(propEv): raise ValueError("propEv should never be nan/inf") if doVizMerge: viz_merge_proposal(curModel, propModel, kA, kB, curEv, propEv) evDiff = propEv - curEv if hasattr(SS, "nDoc") and np.abs(propEv - curEv) > 0.05 * np.abs(curEv): print "CRAP! ---------------------------------------!!!!$$$$$$$$" print " propEv % .5e" % (propEv) print " curEv % .5e" % (curEv) MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg="CRAP. bad proposed evidence.") return curModel, SS, curEv, MoveInfo if hasattr(SS, "nDoc") and (propEv > 0 and curEv < 0): print "CRAP! ---------------------------------------!!!!@@@@@@@@" print " propEv % .5e" % (propEv) print " curEv % .5e" % (curEv) MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg="CRAP. bad proposed evidence.") return curModel, SS, curEv, MoveInfo if propEv >= curEv: MSelector.reindexAfterMerge(kA, kB) msg = "merge %3d & %3d | ev +%.3e ****" % (kA, kB, propEv - curEv) MoveInfo = dict(didAccept=1, kA=kA, kB=kB, msg=msg, evDiff=evDiff) if doWriteLog: log_merge_move(MoveInfo, MSelector, curModel, SS, savedir) return propModel, propSS, propEv, MoveInfo else: msg = "merge %3d & %3d | ev -%.3e" % (kA, kB, curEv - propEv) MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg=msg, evDiff=evDiff) if doWriteLog: log_merge_move(MoveInfo, MSelector, curModel, SS, savedir) return curModel, SS, curEv, MoveInfo
def run_merge_move(curModel, Data, SS=None, curEv=None, doVizMerge=False, kA=None, kB=None, MTracker=None, MSelector=None, mergename='marglik', randstate=np.random.RandomState(), doUpdateAllComps=0, savedir=None, doVerbose=False, doWriteLog=False, **kwargs): ''' Creates candidate model with two components merged, and returns either candidate or current model, whichever has higher log probability (ELBO). Args -------- curModel : bnpy model whose components will be merged Data : bnpy Data object SS : bnpy SuffStatDict object for Data under curModel must contain precomputed merge entropy in order to try a merge. curEv : current evidence bound, provided to save re-computation. curEv = curModel.calc_evidence(SS=SS) kA, kB : (optional) integer ids for which specific components to merge excludeList : (optional) list of integer ids excluded when selecting which components to merge. useful when doing multiple rounds of merges, since precomputed merge terms are valid for one merge only. Returns -------- hmodel, SS, evBound, MoveInfo hmodel := candidate or current model (bnpy HModel object) SS := suff stats for Data under hmodel evBound := log evidence (ELBO) of Data under hmodel MoveInfo := dict of info about this merge move, with fields didAccept := boolean flag, true if candidate accepted msg := human-readable string about this move kA, kB := indices of the components to be merged. ''' if SS is None: LP = curModel.calc_local_params(Data) SS = curModel.get_global_suff_stats(Data, LP, doPrecompEntropy=True, doPrecompMerge=True) if curEv is None: curEv = curModel.calc_evidence(SS=SS) if MTracker is None: MTracker = MergeTracker(SS.K) if MSelector is None: MSelector = MergePairSelector() # Need at least two components to merge! if curModel.allocModel.K == 1: MoveInfo = dict(didAccept=0, msg="need >= 2 comps to merge") return curModel, SS, curEv, MoveInfo if not SS.hasMergeTerms() and curModel.allocModel.requireMergeTerms(): MoveInfo = dict(didAccept=0, msg="suff stats did not have merge terms") return curModel, SS, curEv, MoveInfo if kA is not None and kA not in MTracker.getAvailableComps(): MoveInfo = dict(didAccept=0, msg="target comp kA must be excluded.") return curModel, SS, curEv, MoveInfo # Select which 2 components kA, kB in {1, 2, ... K} to merge if kA is None or kB is None: kA, kB = select_merge_components(curModel, Data, SS, kA=kA, MTracker=MTracker, MSelector=MSelector, mergename=mergename, randstate=randstate) # Create candidate merged model propModel, propSS = propose_merge_candidate( curModel, SS, kA, kB, doUpdateAllComps=doUpdateAllComps) # Decide whether to accept the merge propEv = propModel.calc_evidence(SS=propSS) if np.isnan(propEv) or np.isinf(propEv): raise ValueError('propEv should never be nan/inf') if doVizMerge: viz_merge_proposal(curModel, propModel, kA, kB, curEv, propEv) evDiff = propEv - curEv if hasattr(SS, 'nDoc') and np.abs(propEv - curEv) > 0.05 * np.abs(curEv): print 'CRAP! ---------------------------------------!!!!$$$$$$$$' print ' propEv % .5e' % (propEv) print ' curEv % .5e' % (curEv) MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg="CRAP. bad proposed evidence.") return curModel, SS, curEv, MoveInfo if hasattr(SS, 'nDoc') and (propEv > 0 and curEv < 0): print 'CRAP! ---------------------------------------!!!!@@@@@@@@' print ' propEv % .5e' % (propEv) print ' curEv % .5e' % (curEv) MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg="CRAP. bad proposed evidence.") return curModel, SS, curEv, MoveInfo if propEv >= curEv: MSelector.reindexAfterMerge(kA, kB) msg = "merge %3d & %3d | ev +%.3e ****" % (kA, kB, propEv - curEv) MoveInfo = dict(didAccept=1, kA=kA, kB=kB, msg=msg, evDiff=evDiff) if doWriteLog: log_merge_move(MoveInfo, MSelector, curModel, SS, savedir) return propModel, propSS, propEv, MoveInfo else: msg = "merge %3d & %3d | ev -%.3e" % (kA, kB, curEv - propEv) MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg=msg, evDiff=evDiff) if doWriteLog: log_merge_move(MoveInfo, MSelector, curModel, SS, savedir) return curModel, SS, curEv, MoveInfo