示例#1
0
def run_many_merge_moves(
    hmodel, Data, SS, evBound=None, nMergeTrials=1, compList=list(), randstate=np.random, mPairIDs=None, **mergeKwArgs
):
    """ Run (potentially many) merge move on hmodel

      Args
      -------
      hmodel
      Data
      SS
      nMergeTrials : number of merges to try
      compList : list of components to include in attempted merges
      randstate : numpy random number generator

      Returns
      -------
      hmodel
      SS
      evBound
      MTracker
  """
    nMergeTrials = np.maximum(nMergeTrials, len(compList))

    MTracker = MergeTracker(SS.K)
    MSelector = MergePairSelector()

    # Exclude all pairs for which we did not compute the combined entropy Hz
    #  Hz is always stored in KxK matrix. Pairs that were skipped have zeros.
    aList = list()
    bList = list()
    if SS.hasMergeTerm("ElogqZ"):
        Hz = SS.getMergeTerm("ElogqZ")
        for kA in xrange(SS.K):
            for kB in xrange(kA + 1, SS.K):
                if Hz[kA, kB] == 0:
                    aList.append(kA)
                    bList.append(kB)
    if len(aList) > 0:
        MTracker.addPairsToExclude(aList, bList)

    if evBound is None:
        newEv = hmodel.calc_evidence(SS=SS)
    else:
        newEv = evBound

    trialID = 0
    shift = np.zeros(SS.K, dtype=np.int32)
    while trialID < nMergeTrials and MTracker.hasAvailablePairs():
        oldEv = newEv

        if mPairIDs is not None:
            if len(mPairIDs) == 0:
                break
            kA, kB = mPairIDs.pop(0)
            try:
                MTracker.verifyPair(kA, kB)
            except AssertionError:
                print "  AssertionError skipped with mPairIDs!", kA, kB
                continue
        elif len(compList) > 0:
            kA = compList.pop()
            if kA not in MTracker.getAvailableComps():
                continue
            kB = None
        else:
            kA = None
            kB = None

        hmodel, SS, newEv, MoveInfo = run_merge_move(
            hmodel,
            Data,
            SS,
            oldEv,
            kA=kA,
            kB=kB,
            randstate=randstate,
            MSelector=MSelector,
            MTracker=MTracker,
            **mergeKwArgs
        )
        if MoveInfo["didAccept"]:
            assert newEv >= oldEv
            if mPairIDs is not None:
                mPairIDs = _reindexCandidatePairsAfterAcceptedMerge(mPairIDs, kA, kB)
        trialID += 1
        MTracker.recordResult(**MoveInfo)

    return hmodel, SS, newEv, MTracker
示例#2
0
def run_many_merge_moves(hmodel,
                         Data,
                         SS,
                         evBound=None,
                         nMergeTrials=1,
                         compList=list(),
                         randstate=np.random,
                         mPairIDs=None,
                         **mergeKwArgs):
    ''' Run (potentially many) merge move on hmodel

      Args
      -------
      hmodel
      Data
      SS
      nMergeTrials : number of merges to try
      compList : list of components to include in attempted merges
      randstate : numpy random number generator

      Returns
      -------
      hmodel
      SS
      evBound
      MTracker
  '''
    nMergeTrials = np.maximum(nMergeTrials, len(compList))

    MTracker = MergeTracker(SS.K)
    MSelector = MergePairSelector()

    # Exclude all pairs for which we did not compute the combined entropy Hz
    #  Hz is always stored in KxK matrix. Pairs that were skipped have zeros.
    aList = list()
    bList = list()
    if SS.hasMergeTerm('ElogqZ'):
        Hz = SS.getMergeTerm('ElogqZ')
        for kA in xrange(SS.K):
            for kB in xrange(kA + 1, SS.K):
                if Hz[kA, kB] == 0:
                    aList.append(kA)
                    bList.append(kB)
    if len(aList) > 0:
        MTracker.addPairsToExclude(aList, bList)

    if evBound is None:
        newEv = hmodel.calc_evidence(SS=SS)
    else:
        newEv = evBound

    trialID = 0
    shift = np.zeros(SS.K, dtype=np.int32)
    while trialID < nMergeTrials and MTracker.hasAvailablePairs():
        oldEv = newEv

        if mPairIDs is not None:
            if len(mPairIDs) == 0:
                break
            kA, kB = mPairIDs.pop(0)
            try:
                MTracker.verifyPair(kA, kB)
            except AssertionError:
                print '  AssertionError skipped with mPairIDs!', kA, kB
                continue
        elif len(compList) > 0:
            kA = compList.pop()
            if kA not in MTracker.getAvailableComps():
                continue
            kB = None
        else:
            kA = None
            kB = None

        hmodel, SS, newEv, MoveInfo = run_merge_move(hmodel,
                                                     Data,
                                                     SS,
                                                     oldEv,
                                                     kA=kA,
                                                     kB=kB,
                                                     randstate=randstate,
                                                     MSelector=MSelector,
                                                     MTracker=MTracker,
                                                     **mergeKwArgs)
        if MoveInfo['didAccept']:
            assert newEv >= oldEv
            if mPairIDs is not None:
                mPairIDs = _reindexCandidatePairsAfterAcceptedMerge(
                    mPairIDs, kA, kB)
        trialID += 1
        MTracker.recordResult(**MoveInfo)

    return hmodel, SS, newEv, MTracker
示例#3
0
def run_merge_move(
    curModel,
    Data,
    SS=None,
    curEv=None,
    doVizMerge=False,
    kA=None,
    kB=None,
    MTracker=None,
    MSelector=None,
    mergename="marglik",
    randstate=np.random.RandomState(),
    doUpdateAllComps=0,
    savedir=None,
    doVerbose=False,
    doWriteLog=False,
    **kwargs
):
    """ Creates candidate model with two components merged,
      and returns either candidate or current model,
      whichever has higher log probability (ELBO).

      Args
      --------
       curModel : bnpy model whose components will be merged
       Data : bnpy Data object 
       SS : bnpy SuffStatDict object for Data under curModel
            must contain precomputed merge entropy in order to try a merge.
       curEv : current evidence bound, provided to save re-computation.
                curEv = curModel.calc_evidence(SS=SS)
       kA, kB : (optional) integer ids for which specific components to merge
       excludeList : (optional) list of integer ids excluded when selecting
                      which components to merge. useful when doing multiple 
                      rounds of merges, since precomputed merge terms are 
                      valid for one merge only.
      Returns
      --------
      hmodel, SS, evBound, MoveInfo

      hmodel := candidate or current model (bnpy HModel object)
      SS := suff stats for Data under hmodel
      evBound := log evidence (ELBO) of Data under hmodel
      MoveInfo := dict of info about this merge move, with fields
            didAccept := boolean flag, true if candidate accepted
            msg := human-readable string about this move
            kA, kB := indices of the components to be merged.
  """
    if SS is None:
        LP = curModel.calc_local_params(Data)
        SS = curModel.get_global_suff_stats(Data, LP, doPrecompEntropy=True, doPrecompMerge=True)
    if curEv is None:
        curEv = curModel.calc_evidence(SS=SS)
    if MTracker is None:
        MTracker = MergeTracker(SS.K)
    if MSelector is None:
        MSelector = MergePairSelector()

    # Need at least two components to merge!
    if curModel.allocModel.K == 1:
        MoveInfo = dict(didAccept=0, msg="need >= 2 comps to merge")
        return curModel, SS, curEv, MoveInfo

    if not SS.hasMergeTerms() and curModel.allocModel.requireMergeTerms():
        MoveInfo = dict(didAccept=0, msg="suff stats did not have merge terms")
        return curModel, SS, curEv, MoveInfo

    if kA is not None and kA not in MTracker.getAvailableComps():
        MoveInfo = dict(didAccept=0, msg="target comp kA must be excluded.")
        return curModel, SS, curEv, MoveInfo

    # Select which 2 components kA, kB in {1, 2, ... K} to merge
    if kA is None or kB is None:
        kA, kB = select_merge_components(
            curModel, Data, SS, kA=kA, MTracker=MTracker, MSelector=MSelector, mergename=mergename, randstate=randstate
        )

    # Create candidate merged model
    propModel, propSS = propose_merge_candidate(curModel, SS, kA, kB, doUpdateAllComps=doUpdateAllComps)

    # Decide whether to accept the merge
    propEv = propModel.calc_evidence(SS=propSS)

    if np.isnan(propEv) or np.isinf(propEv):
        raise ValueError("propEv should never be nan/inf")

    if doVizMerge:
        viz_merge_proposal(curModel, propModel, kA, kB, curEv, propEv)

    evDiff = propEv - curEv

    if hasattr(SS, "nDoc") and np.abs(propEv - curEv) > 0.05 * np.abs(curEv):
        print "CRAP! ---------------------------------------!!!!$$$$$$$$"
        print "    propEv % .5e" % (propEv)
        print "    curEv  % .5e" % (curEv)
        MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg="CRAP. bad proposed evidence.")
        return curModel, SS, curEv, MoveInfo

    if hasattr(SS, "nDoc") and (propEv > 0 and curEv < 0):
        print "CRAP! ---------------------------------------!!!!@@@@@@@@"
        print "    propEv % .5e" % (propEv)
        print "    curEv  % .5e" % (curEv)
        MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg="CRAP. bad proposed evidence.")
        return curModel, SS, curEv, MoveInfo

    if propEv >= curEv:
        MSelector.reindexAfterMerge(kA, kB)
        msg = "merge %3d & %3d | ev +%.3e ****" % (kA, kB, propEv - curEv)
        MoveInfo = dict(didAccept=1, kA=kA, kB=kB, msg=msg, evDiff=evDiff)
        if doWriteLog:
            log_merge_move(MoveInfo, MSelector, curModel, SS, savedir)
        return propModel, propSS, propEv, MoveInfo
    else:
        msg = "merge %3d & %3d | ev -%.3e" % (kA, kB, curEv - propEv)
        MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg=msg, evDiff=evDiff)
        if doWriteLog:
            log_merge_move(MoveInfo, MSelector, curModel, SS, savedir)
        return curModel, SS, curEv, MoveInfo
示例#4
0
def run_merge_move(curModel,
                   Data,
                   SS=None,
                   curEv=None,
                   doVizMerge=False,
                   kA=None,
                   kB=None,
                   MTracker=None,
                   MSelector=None,
                   mergename='marglik',
                   randstate=np.random.RandomState(),
                   doUpdateAllComps=0,
                   savedir=None,
                   doVerbose=False,
                   doWriteLog=False,
                   **kwargs):
    ''' Creates candidate model with two components merged,
      and returns either candidate or current model,
      whichever has higher log probability (ELBO).

      Args
      --------
       curModel : bnpy model whose components will be merged
       Data : bnpy Data object 
       SS : bnpy SuffStatDict object for Data under curModel
            must contain precomputed merge entropy in order to try a merge.
       curEv : current evidence bound, provided to save re-computation.
                curEv = curModel.calc_evidence(SS=SS)
       kA, kB : (optional) integer ids for which specific components to merge
       excludeList : (optional) list of integer ids excluded when selecting
                      which components to merge. useful when doing multiple 
                      rounds of merges, since precomputed merge terms are 
                      valid for one merge only.
      Returns
      --------
      hmodel, SS, evBound, MoveInfo

      hmodel := candidate or current model (bnpy HModel object)
      SS := suff stats for Data under hmodel
      evBound := log evidence (ELBO) of Data under hmodel
      MoveInfo := dict of info about this merge move, with fields
            didAccept := boolean flag, true if candidate accepted
            msg := human-readable string about this move
            kA, kB := indices of the components to be merged.
  '''
    if SS is None:
        LP = curModel.calc_local_params(Data)
        SS = curModel.get_global_suff_stats(Data,
                                            LP,
                                            doPrecompEntropy=True,
                                            doPrecompMerge=True)
    if curEv is None:
        curEv = curModel.calc_evidence(SS=SS)
    if MTracker is None:
        MTracker = MergeTracker(SS.K)
    if MSelector is None:
        MSelector = MergePairSelector()

    # Need at least two components to merge!
    if curModel.allocModel.K == 1:
        MoveInfo = dict(didAccept=0, msg="need >= 2 comps to merge")
        return curModel, SS, curEv, MoveInfo

    if not SS.hasMergeTerms() and curModel.allocModel.requireMergeTerms():
        MoveInfo = dict(didAccept=0, msg="suff stats did not have merge terms")
        return curModel, SS, curEv, MoveInfo

    if kA is not None and kA not in MTracker.getAvailableComps():
        MoveInfo = dict(didAccept=0, msg="target comp kA must be excluded.")
        return curModel, SS, curEv, MoveInfo

    # Select which 2 components kA, kB in {1, 2, ... K} to merge
    if kA is None or kB is None:
        kA, kB = select_merge_components(curModel,
                                         Data,
                                         SS,
                                         kA=kA,
                                         MTracker=MTracker,
                                         MSelector=MSelector,
                                         mergename=mergename,
                                         randstate=randstate)

    # Create candidate merged model
    propModel, propSS = propose_merge_candidate(
        curModel, SS, kA, kB, doUpdateAllComps=doUpdateAllComps)

    # Decide whether to accept the merge
    propEv = propModel.calc_evidence(SS=propSS)

    if np.isnan(propEv) or np.isinf(propEv):
        raise ValueError('propEv should never be nan/inf')

    if doVizMerge:
        viz_merge_proposal(curModel, propModel, kA, kB, curEv, propEv)

    evDiff = propEv - curEv

    if hasattr(SS, 'nDoc') and np.abs(propEv - curEv) > 0.05 * np.abs(curEv):
        print 'CRAP! ---------------------------------------!!!!$$$$$$$$'
        print '    propEv % .5e' % (propEv)
        print '    curEv  % .5e' % (curEv)
        MoveInfo = dict(didAccept=0,
                        kA=kA,
                        kB=kB,
                        msg="CRAP. bad proposed evidence.")
        return curModel, SS, curEv, MoveInfo

    if hasattr(SS, 'nDoc') and (propEv > 0 and curEv < 0):
        print 'CRAP! ---------------------------------------!!!!@@@@@@@@'
        print '    propEv % .5e' % (propEv)
        print '    curEv  % .5e' % (curEv)
        MoveInfo = dict(didAccept=0,
                        kA=kA,
                        kB=kB,
                        msg="CRAP. bad proposed evidence.")
        return curModel, SS, curEv, MoveInfo

    if propEv >= curEv:
        MSelector.reindexAfterMerge(kA, kB)
        msg = "merge %3d & %3d | ev +%.3e ****" % (kA, kB, propEv - curEv)
        MoveInfo = dict(didAccept=1, kA=kA, kB=kB, msg=msg, evDiff=evDiff)
        if doWriteLog:
            log_merge_move(MoveInfo, MSelector, curModel, SS, savedir)
        return propModel, propSS, propEv, MoveInfo
    else:
        msg = "merge %3d & %3d | ev -%.3e" % (kA, kB, curEv - propEv)
        MoveInfo = dict(didAccept=0, kA=kA, kB=kB, msg=msg, evDiff=evDiff)
        if doWriteLog:
            log_merge_move(MoveInfo, MSelector, curModel, SS, savedir)
        return curModel, SS, curEv, MoveInfo