def selectCandidateDeleteComps(hmodel, SS, MoveRecordsByUID=dict(), MovePlans=dict(), lapFrac=0, **DArgs): ''' Select specific comps to target with delete move. Returns ------- MovePlans : dict, with fields * d_targetUIDs : list of ints * d_absorbingUIDSet : set of ints, all uids that can absorb target mass OR * failMsg : string explaining why building list of eligible UIDs failed ''' DLogger.pprint("PLANNING delete at lap %.2f" % (lapFrac)) K = SS.K availableUIDs = set(SS.uids) if len(availableUIDs) < 2: DLogger.pprint( "Delete proposal requires at least 2 available UIDs.\n" + \ " Need 1 uid to target, and at least 1 to absorb." + \ " Only have %d total uids in the model." % (len(availableUIDs))) failMsg = "Ineligible. Did not find >= 2 UIDs in entire model." return dict(failMsg=failMsg) uidsBusyWithOtherMoves = set() ''' if 'm_UIDPairs' in MovePlans: for (uidA, uidB) in MovePlans['m_UIDPairs']: availableUIDs.discard(uidA) availableUIDs.discard(uidB) uidsBusyWithOtherMoves.add(uidA) uidsBusyWithOtherMoves.add(uidB) if 'b_shortlistUIDs' in MovePlans: for uid in MovePlans['b_shortlistUIDs']: availableUIDs.discard(uid) uidsBusyWithOtherMoves.add(uid) if len(availableUIDs) < 2: DLogger.pprint("Delete requires at least 2 UIDs" + \ " not occupied by merge or birth.\n" + \ " Need 1 uid to target, and at least 1 to absorb.\n" + \ " Only have %d total uids eligible." % (len(availableUIDs))) failMsg = "Ineligible. Too many uids occupied by merge or shortlisted for birth." return dict(failMsg=failMsg) ''' # Compute score for each eligible state countVec = np.maximum(SS.getCountVec(), 1e-100) eligibleUIDs = list() tooBigUIDs = list() failRecordUIDs = list() nFailRecord = 0 nReactivated = 0 for uid in availableUIDs: k = SS.uid2k(uid) size = countVec[k] if uid not in MoveRecordsByUID: MoveRecordsByUID[uid] = defaultdict(int) # Skip ahead if this cluster is too big if size > DArgs['d_maxNumAtomsForTargetComp']: tooBigUIDs.append(uid) continue # Avoid comps we've failed deleting in the past # unless they have changed by a reasonable amount # or enough laps have passed to try again lapsSinceLastTry = lapFrac - MoveRecordsByUID[uid]['d_latestLap'] nFailRecent_Delete = MoveRecordsByUID[uid]['d_nFailRecent'] > 0 oldsize = MoveRecordsByUID[uid]['d_latestCount'] if oldsize > 0 and nFailRecent_Delete > 0: nFailRecord += 1 sizePercDiff = np.abs(size - oldsize) / (1e-100 + np.abs(oldsize)) if sizePercDiff > DArgs['d_minPercChangeInNumAtomsToReactivate']: nReactivated += 1 elif DArgs['d_nLapToReactivate'] > 0 \ and lapsSinceLastTry > DArgs['d_nLapToReactivate']: nReactivated += 1 else: failRecordUIDs.append(uid) continue # If we make it here, the uid is eligible eligibleUIDs.append(uid) # Log which uids were marked has high potential births msg = "%d/%d UIDs busy with other moves (birth/merge)" % ( len(uidsBusyWithOtherMoves), K) DLogger.pprint(msg) if len(uidsBusyWithOtherMoves) > 0: DLogger.pprint(' ' + vec2str(uidsBusyWithOtherMoves), 'debug') msg = "%d/%d UIDs too large [--d_maxNumAtomsForTargetComp %.2f]" % ( len(tooBigUIDs), K, DArgs['d_maxNumAtomsForTargetComp']) DLogger.pprint(msg) if len(tooBigUIDs) > 0: DLogger.pprint(' ' + vec2str(tooBigUIDs), 'debug') # Log which uids were marked has having a record. msg = '%d/%d UIDs un-deleteable for past failures. %d reactivated.' % ( len(failRecordUIDs), K, nReactivated) DLogger.pprint(msg) if len(failRecordUIDs) > 0: DLogger.pprint(' ' + vec2str(failRecordUIDs), 'debug') # Log all remaining eligible uids msg = '%d/%d UIDs eligible for targeted delete proposal' % ( len(eligibleUIDs), K) DLogger.pprint(msg) if len(eligibleUIDs) == 0: failMsg = ("Empty plan. 0 UIDs eligible as delete target." + \ " %d too busy with other moves." + \ " %d too big." + \ " %d have past failures.") % ( len(uidsBusyWithOtherMoves), len(tooBigUIDs), len(failRecordUIDs)) return dict(failMsg=failMsg) # Log count statistics for each uid eligibleCountVec = [countVec[SS.uid2k(u)] for u in eligibleUIDs] DLogger.pprint(' uid ' + vec2str(eligibleUIDs), 'debug') DLogger.pprint(' count ' + vec2str(eligibleCountVec), 'debug') # Select the single state to target # by taking the one with highest score #Scores = np.asarray([x for x in ScoreByEligibleUID.values()]) #targetUID = eligibleUIDs[np.argmax(eligibleCountVec)] #MovePlans['d_targetUIDs'] = [targetUID] targetUID = eligibleUIDs[np.argmax(eligibleCountVec)] MovePlans['d_targetUIDs'] = [targetUID] # Determine all comps eligible to receive its transfer mass absorbUIDset = set(eligibleUIDs) absorbUIDset.discard(targetUID) absorbUIDset.update(tooBigUIDs) absorbUIDset.update(failRecordUIDs) MovePlans['d_absorbingUIDSet'] = absorbUIDset DLogger.pprint('Selecting one single state to target.') DLogger.pprint('targetUID ' + str(targetUID)) DLogger.pprint('absorbingUIDs: ' + vec2str(absorbUIDset)) return MovePlans
def makeSummaryForBirthProposal(Dslice, curModel, curLPslice, curSSwhole=None, b_creationProposalName='bregmankmeans', targetUID=None, ktarget=None, newUIDs=None, LPkwargs=DefaultLPkwargs, lapFrac=0, batchID=0, seed=0, b_nRefineSteps=3, b_debugOutputDir=None, b_minNumAtomsForNewComp=None, b_doInitCompleteLP=1, b_cleanupWithMerge=1, b_method_initCoordAscent='fromprevious', vocabList=None, **kwargs): ''' Create summary that reassigns mass from target to Kfresh new comps. TODO support other options than bregman??? Returns ------- xSSslice : SuffStatBag Contains exact summaries for reassignment of target mass. * Total mass is equal to mass assigned to ktarget in curLPslice * Number of components is Kfresh Info : dict Contains info for detailed debugging of construction process. ''' # Parse input to decide which cluster to target # * targetUID is the unique ID of this cluster # * ktarget is its position in the current cluster ordering if targetUID is None: targetUID = curSSwhole.k2uid(ktarget) if ktarget is None: ktarget = curSSwhole.uid2k(targetUID) # START log for this birth proposal BLogger.pprint( 'Creating proposal for targetUID %s at lap %.2f batchID %d' % (targetUID, lapFrac, batchID)) # Grab vocabList, if available. if hasattr(Dslice, 'vocabList') and Dslice.vocabList is not None: vocabList = Dslice.vocabList # Parse input to decide where to save HTML output if b_debugOutputDir == 'None': b_debugOutputDir = None if b_debugOutputDir: BLogger.pprint('HTML output:' + b_debugOutputDir) # Create snapshot of current model comps plotCompsFromSS(curModel, curSSwhole, os.path.join(b_debugOutputDir, 'OrigComps.png'), vocabList=vocabList, compsToHighlight=[ktarget]) # Determine exactly how many new states we can make... xK = len(newUIDs) if xK + curSSwhole.K > kwargs['Kmax']: xK = kwargs['Kmax'] - curSSwhole.K newUIDs = newUIDs[:xK] if xK <= 1: errorMsg = 'Cancelled.' + \ 'Adding 2 or more states would exceed budget of %d comps.' % ( kwargs['Kmax']) BLogger.pprint(errorMsg) BLogger.pprint('') return None, dict(errorMsg=errorMsg) # Create suff stats for some new states xInitSStarget, Info = initSS_BregmanDiv( Dslice, curModel, curLPslice, K=xK, ktarget=ktarget, lapFrac=lapFrac, seed=seed + int(1000 * lapFrac), logFunc=BLogger.pprint, NiterForBregmanKMeans=kwargs['b_NiterForBregmanKMeans'], **kwargs) # EXIT EARLY: if proposal initialization fails (not enough data). if xInitSStarget is None: BLogger.pprint('Proposal initialization FAILED. ' + \ Info['errorMsg']) BLogger.pprint('') return None, Info # If here, we have a valid set of initial stats. xInitSStarget.setUIDs(newUIDs[:xInitSStarget.K]) if b_doInitCompleteLP: # Create valid whole-dataset clustering from hard init xInitSSslice, tempInfo = makeExpansionSSFromZ( Dslice=Dslice, curModel=curModel, curLPslice=curLPslice, ktarget=ktarget, xInitSS=xInitSStarget, atomType=Info['atomType'], targetZ=Info['targetZ'], chosenDataIDs=Info['chosenDataIDs'], **kwargs) Info.update(tempInfo) xSSslice = xInitSSslice else: xSSslice = xInitSStarget if b_debugOutputDir: plotCompsFromSS(curModel, xSSslice, os.path.join(b_debugOutputDir, 'NewComps_Init.png'), vocabList=vocabList) # Determine current model objective score curModelFWD = curModel.copy() curModelFWD.update_global_params(SS=curSSwhole) curLdict = curModelFWD.calc_evidence(SS=curSSwhole, todict=1) # Track proposal ELBOs as refinement improves things propLdictList = list() # Create initial proposal if b_doInitCompleteLP: propSS = curSSwhole.copy() propSS.transferMassFromExistingToExpansion(uid=targetUID, xSS=xSSslice) # Verify quality assert np.allclose(propSS.getCountVec().sum(), curSSwhole.getCountVec().sum()) propModel = curModel.copy() propModel.update_global_params(propSS) propLdict = propModel.calc_evidence(SS=propSS, todict=1) BLogger.pprint( "init %d/%d gainL % .3e propL % .3e curL % .3e" % (0, b_nRefineSteps, propLdict['Ltotal'] - curLdict['Ltotal'], propLdict['Ltotal'], curLdict['Ltotal'])) propLdictList.append(propLdict) docUsageByUID = dict() if curModel.getAllocModelName().count('HDP'): for k, uid in enumerate(xInitSStarget.uids): if 'targetZ' in Info: if Info['atomType'].count('doc'): initDocUsage_uid = np.sum(Info['targetZ'] == k) else: initDocUsage_uid = 0.0 for d in xrange(Dslice.nDoc): start = Dslice.doc_range[d] stop = Dslice.doc_range[d + 1] initDocUsage_uid += np.any( Info['targetZ'][start:stop] == k) else: initDocUsage_uid = 0.0 docUsageByUID[uid] = [initDocUsage_uid] # Create initial observation model xObsModel = curModel.obsModel.copy() if b_method_initCoordAscent == 'fromprevious' and 'xLPslice' in Info: xInitLPslice = Info['xLPslice'] else: xInitLPslice = None # Make a function to pretty-print counts as we refine the initialization pprintCountVec = BLogger.makeFunctionToPrettyPrintCounts(xSSslice) BLogger.pprint(" " + vec2str(xInitSStarget.uids)) pprintCountVec(xSSslice) # Log messages to describe the initialization. BLogger.pprint(' Running %d refinement iterations (--b_nRefineSteps)' % (b_nRefineSteps)) prevCountVec = xSSslice.getCountVec() didConvEarly = False convstep = 100 + b_nRefineSteps # Run several refinement steps. # Each step does a restricted local step to improve # the proposed cluster assignments. for rstep in range(b_nRefineSteps): # Update xObsModel xObsModel.update_global_params(xSSslice) # Restricted local step! # * xInitSS : specifies obs-model stats used for initialization xSSslice, refineInfo = summarizeRestrictedLocalStep( Dslice=Dslice, curModel=curModel, curLPslice=curLPslice, curSSwhole=curSSwhole, ktarget=ktarget, xUIDs=xSSslice.uids, xInitSS=xSSslice, xObsModel=xObsModel, xInitLPslice=xInitLPslice, LPkwargs=LPkwargs, nUpdateSteps=1, **kwargs) Info.update(refineInfo) # Get most recent xLPslice for initialization if b_method_initCoordAscent == 'fromprevious' and 'xLPslice' in Info: xInitLPslice = Info['xLPslice'] # On first step, show diagnostics for new states if rstep == 0: targetPi = refineInfo['emptyPi'] + refineInfo['xPiVec'].sum() BLogger.pprint( " target prob redistributed by policy %s (--b_method_xPi)" % (kwargs['b_method_xPi'])) msg = " pi[ktarget] before %.4f after %.4f." % ( targetPi, refineInfo['emptyPi']) BLogger.pprint(msg) BLogger.pprint(" pi[new comps]: " + \ vec2str( refineInfo['xPiVec'], width=6, minVal=0.0001)) logLPConvergenceDiagnostics(refineInfo, rstep=rstep, b_nRefineSteps=b_nRefineSteps) BLogger.pprint(" " + vec2str(xInitSStarget.uids)) # Show diagnostic counts in each fresh state pprintCountVec(xSSslice) # Write HTML debug info if b_debugOutputDir: plotCompsFromSS(curModel, xSSslice, os.path.join(b_debugOutputDir, 'NewComps_Step%d.png' % (rstep + 1)), vocabList=vocabList) propSS = curSSwhole.copy() propSS.transferMassFromExistingToExpansion(uid=targetUID, xSS=xSSslice) # Reordering only lifts score by small amount. Not worth it. # propSS.reorderComps(np.argsort(-1 * propSS.getCountVec())) propModel = curModel.copy() propModel.update_global_params(propSS) propLdict = propModel.calc_evidence(SS=propSS, todict=1) propSSsubset = xSSslice tmpModel = curModelFWD tmpModel.obsModel.update_global_params(propSSsubset) propLdata_subset = tmpModel.obsModel.calcELBO_Memoized( propSSsubset) curSSsubset = xSSslice.copy(includeELBOTerms=0) while curSSsubset.K > 1: curSSsubset.mergeComps(0, 1) tmpModel.obsModel.update_global_params(curSSsubset) curLdata_subset = tmpModel.obsModel.calcELBO_Memoized(curSSsubset) gainLdata_subset = propLdata_subset - curLdata_subset msg = \ "step %d/%d gainL % .3e propL % .3e curL % .3e" % ( rstep+1, b_nRefineSteps, propLdict['Ltotal'] - curLdict['Ltotal'], propLdict['Ltotal'], curLdict['Ltotal']) msg += " gainLdata_subset % .3e" % (gainLdata_subset) BLogger.pprint(msg) propLdictList.append(propLdict) if curModel.getAllocModelName().count('HDP'): docUsageVec = xSSslice.getSelectionTerm('DocUsageCount') for k, uid in enumerate(xSSslice.uids): docUsageByUID[uid].append(docUsageVec[k]) # If converged early and did the final refinement step if didConvEarly and rstep > convstep: break # Cleanup by deleting small clusters if rstep < b_nRefineSteps - 1: if rstep == b_nRefineSteps - 2 or didConvEarly: # After all but last step, # delete small (but not empty) comps minNumAtomsToStay = b_minNumAtomsForNewComp else: # Always remove empty clusters. They waste our time. minNumAtomsToStay = np.minimum(1, b_minNumAtomsForNewComp) xSSslice, xInitLPslice = cleanupDeleteSmallClusters( xSSslice, minNumAtomsToStay, xInitLPslice=xInitLPslice, pprintCountVec=pprintCountVec) # Decide if we have converged early if rstep < b_nRefineSteps - 2 and prevCountVec.size == xSSslice.K: if np.allclose(xSSslice.getCountVec(), prevCountVec, atol=0.5): # Converged. Jump directly to the merge phase! didConvEarly = True convstep = rstep # Cleanup by merging clusters if b_cleanupWithMerge and \ (rstep == b_nRefineSteps - 2 or didConvEarly): # Only cleanup on second-to-last pass, or if converged early Info['mergestep'] = rstep + 1 xSSslice, xInitLPslice = cleanupMergeClusters( xSSslice, curModel, obsSSkeys=xInitSStarget._Fields._FieldDims.keys(), vocabList=vocabList, pprintCountVec=pprintCountVec, xInitLPslice=xInitLPslice, b_debugOutputDir=b_debugOutputDir, **kwargs) prevCountVec = xSSslice.getCountVec().copy() Info['Kfinal'] = xSSslice.K if b_debugOutputDir: savefilename = os.path.join(b_debugOutputDir, 'ProposalTrace_ELBO.png') plotELBOtermsForProposal(curLdict, propLdictList, savefilename=savefilename) if curModel.getAllocModelName().count('HDP'): savefilename = os.path.join(b_debugOutputDir, 'ProposalTrace_DocUsage.png') plotDocUsageForProposal(docUsageByUID, savefilename=savefilename) # EXIT EARLY: error if we didn't create enough "big-enough" states. nnzCount = np.sum(xSSslice.getCountVec() >= b_minNumAtomsForNewComp) if nnzCount < 2: Info['errorMsg'] = \ "Could not create at least two comps" + \ " with mass >= %.1f (--%s)" % ( b_minNumAtomsForNewComp, 'b_minNumAtomsForNewComp') BLogger.pprint('Proposal build phase FAILED. ' + Info['errorMsg']) BLogger.pprint('') # Blank line return None, Info # If here, we have a valid proposal. # Need to verify mass conservation if hasattr(Dslice, 'word_count') and \ curModel.obsModel.DataAtomType.count('word') and \ curModel.getObsModelName().count('Mult'): origMass = np.inner(Dslice.word_count, curLPslice['resp'][:, ktarget]) else: if 'resp' in curLPslice: origMass = curLPslice['resp'][:, ktarget].sum() else: origMass = curLPslice['spR'][:, ktarget].sum() newMass = xSSslice.getCountVec().sum() assert np.allclose(newMass, origMass, atol=1e-6, rtol=0) BLogger.pprint('Proposal build phase DONE.' + \ ' Created %d candidate clusters.' % (Info['Kfinal'])) BLogger.pprint('') # Blank line return xSSslice, Info
def makeSummaryForExistingBirthProposal( Dslice, curModel, curLPslice, curSSwhole=None, targetUID=None, ktarget=None, LPkwargs=DefaultLPkwargs, lapFrac=0, batchID=0, b_nRefineSteps=3, b_debugOutputDir=None, b_method_initCoordAscent='fromprevious', vocabList=None, **kwargs): ''' Create summary that reassigns mass from target given set of comps Given set of comps is a fixed proposal from a previously-seen batch. Returns ------- xSSslice : SuffStatBag Contains exact summaries for reassignment of target mass. * Total mass is equal to mass assigned to ktarget in curLPslice * Number of components is Kfresh Info : dict Contains info for detailed debugging of construction process. ''' if targetUID is None: targetUID = curSSwhole.uids(ktarget) if ktarget is None: ktarget = curSSwhole.uid2k(targetUID) # START log for this birth proposal BLogger.pprint( 'Extending previous birth for targetUID %s at lap %.2f batch %d' % (targetUID, lapFrac, batchID)) # Grab vocabList, if available. if hasattr(Dslice, 'vocabList') and Dslice.vocabList is not None: vocabList = Dslice.vocabList # Parse input to decide where to save HTML output if b_debugOutputDir == 'None': b_debugOutputDir = None if b_debugOutputDir: BLogger.pprint('HTML output:' + b_debugOutputDir) # Create snapshot of current model comps plotCompsFromSS(curModel, curSSwhole, os.path.join(b_debugOutputDir, 'OrigComps.png'), vocabList=vocabList, compsToHighlight=[ktarget]) assert targetUID in curSSwhole.propXSS xinitSS = curSSwhole.propXSS[targetUID] xK = xinitSS.K if xK + curSSwhole.K > kwargs['Kmax']: errorMsg = 'Cancelled.' + \ 'Adding 2 or more states would exceed budget of %d comps.' % ( kwargs['Kmax']) BLogger.pprint(errorMsg) BLogger.pprint('') return None, dict(errorMsg=errorMsg) # Log messages to describe the initialization. # Make a function to pretty-print counts as we refine the initialization pprintCountVec = BLogger.makeFunctionToPrettyPrintCounts(xinitSS) BLogger.pprint(' Using previous proposal with %d clusters %s.' % (xinitSS.K, '(--b_Kfresh=%d)' % kwargs['b_Kfresh'])) BLogger.pprint(" Initial uid/counts from previous proposal:") BLogger.pprint(' ' + vec2str(xinitSS.uids)) pprintCountVec(xinitSS) BLogger.pprint(' Running %d refinement iterations (--b_nRefineSteps)' % (b_nRefineSteps)) xSSinitPlusSlice = xinitSS.copy() if b_debugOutputDir: plotCompsFromSS(curModel, xinitSS, os.path.join(b_debugOutputDir, 'NewComps_Init.png'), vocabList=vocabList) # Determine current model objective score curModelFWD = curModel.copy() curModelFWD.update_global_params(SS=curSSwhole) curLdict = curModelFWD.calc_evidence(SS=curSSwhole, todict=1) # Track proposal ELBOs as refinement improves things propLdictList = list() docUsageByUID = dict() if curModel.getAllocModelName().count('HDP'): for k, uid in enumerate(xinitSS.uids): initDocUsage_uid = 0.0 docUsageByUID[uid] = [initDocUsage_uid] # Create initial observation model xObsModel = curModel.obsModel.copy() xInitLPslice = None Info = dict() # Run several refinement steps. # Each step does a restricted local step to improve # the proposed cluster assignments. nRefineSteps = np.maximum(1, b_nRefineSteps) for rstep in range(nRefineSteps): xObsModel.update_global_params(xSSinitPlusSlice) # Restricted local step! # * xInitSS : specifies obs-model stats used for initialization xSSslice, refineInfo = summarizeRestrictedLocalStep( Dslice=Dslice, curModel=curModel, curLPslice=curLPslice, curSSwhole=curSSwhole, ktarget=ktarget, xUIDs=xSSinitPlusSlice.uids, xObsModel=xObsModel, xInitSS=xSSinitPlusSlice, # first time in loop <= xinitSS xInitLPslice=xInitLPslice, LPkwargs=LPkwargs, **kwargs) xSSinitPlusSlice += xSSslice if rstep >= 1: xSSinitPlusSlice -= prevSSslice prevSSslice = xSSslice Info.update(refineInfo) # Show diagnostics for new states pprintCountVec(xSSslice) logLPConvergenceDiagnostics(refineInfo, rstep=rstep, b_nRefineSteps=b_nRefineSteps) # Get most recent xLPslice for initialization if b_method_initCoordAscent == 'fromprevious' and 'xLPslice' in Info: xInitLPslice = Info['xLPslice'] if b_debugOutputDir: plotCompsFromSS(curModel, xSSslice, os.path.join(b_debugOutputDir, 'NewComps_Step%d.png' % (rstep + 1)), vocabList=vocabList) propSS = curSSwhole.copy() propSS.transferMassFromExistingToExpansion(uid=targetUID, xSS=xSSslice) propModel = curModel.copy() propModel.update_global_params(propSS) propLdict = propModel.calc_evidence(SS=propSS, todict=1) BLogger.pprint( "step %d/%d gainL % .3e propL % .3e curL % .3e" % (rstep + 1, b_nRefineSteps, propLdict['Ltotal'] - curLdict['Ltotal'], propLdict['Ltotal'], curLdict['Ltotal'])) propLdictList.append(propLdict) if curModel.getAllocModelName().count('HDP'): docUsageVec = xSSslice.getSelectionTerm('DocUsageCount') for k, uid in enumerate(xSSslice.uids): docUsageByUID[uid].append(docUsageVec[k]) Info['Kfinal'] = xSSslice.K if b_debugOutputDir: savefilename = os.path.join(b_debugOutputDir, 'ProposalTrace_ELBO.png') plotELBOtermsForProposal(curLdict, propLdictList, savefilename=savefilename) if curModel.getAllocModelName().count('HDP'): savefilename = os.path.join(b_debugOutputDir, 'ProposalTrace_DocUsage.png') plotDocUsageForProposal(docUsageByUID, savefilename=savefilename) # If here, we have a valid proposal. # Need to verify mass conservation if hasattr(Dslice, 'word_count') and \ curModel.obsModel.DataAtomType.count('word') and \ curModel.getObsModelName().count('Mult'): origMass = np.inner(Dslice.word_count, curLPslice['resp'][:, ktarget]) else: if 'resp' in curLPslice: origMass = curLPslice['resp'][:, ktarget].sum() else: origMass = curLPslice['spR'][:, ktarget].sum() newMass = xSSslice.getCountVec().sum() assert np.allclose(newMass, origMass, atol=1e-6, rtol=0) BLogger.pprint('Proposal extension DONE. %d candidate clusters.' % (Info['Kfinal'])) BLogger.pprint('') return xSSslice, Info
def selectCandidateMergePairs(hmodel, SS, MovePlans=dict(), MoveRecordsByUID=dict(), lapFrac=None, m_maxNumPairsContainingComp=3, m_minPercChangeInNumAtomsToReactivate=0.01, m_nLapToReactivate=10, m_pair_ranking_procedure='total_size', m_pair_ranking_direction='descending', m_pair_ranking_do_exclude_by_thr=0, m_pair_ranking_exclusion_thr=-0.000001, **kwargs): ''' Select candidate pairs to consider for merge move. Returns ------- Info : dict, with fields * m_UIDPairs : list of tuples, each defining a pair of uids * m_targetUIDSet : set of all uids involved in a proposed merge pair ''' MLogger.pprint("PLANNING merges at lap %.2f. K=%d" % (lapFrac, SS.K), 'debug') # Mark any targetUIDs used in births as off-limits for merges uidUsageCount = defaultdict(int) if 'b_shortlistUIDs' in MovePlans: for uid in MovePlans['b_shortlistUIDs']: uidUsageCount[uid] = 10 * m_maxNumPairsContainingComp nDisqualified = len(uidUsageCount.keys()) MLogger.pprint( " %d/%d UIDs ineligible because on shortlist for births. " % (nDisqualified, SS.K), 'debug') if nDisqualified > 0: MLogger.pprint( " Ineligible UIDs:" + \ vec2str(uidUsageCount.keys()), 'debug') uid2k = dict() uid2count = dict() for uid in SS.uids: uid2k[uid] = SS.uid2k(uid) uid2count[uid] = SS.getCountForUID(uid) EligibleUIDPairs = list() EligibleAIDPairs = list() nPairTotal = 0 nPairDQ = 0 nPairBusy = 0 for kA, uidA in enumerate(SS.uids): for b, uidB in enumerate(SS.uids[kA + 1:]): kB = kA + b + 1 assert kA < kB nPairTotal += 1 if uidUsageCount[uidA] > 0 or uidUsageCount[uidB] > 0: nPairBusy += 1 continue if uidA < uidB: uidTuple = (uidA, uidB) else: uidTuple = (uidB, uidA) aidTuple = (kA, kB) if uidTuple not in MoveRecordsByUID: EligibleUIDPairs.append(uidTuple) EligibleAIDPairs.append(aidTuple) else: pairRecord = MoveRecordsByUID[uidTuple] assert pairRecord['m_nFailRecent'] >= 1 latestMinCount = pairRecord['m_latestMinCount'] newMinCount = np.minimum(uid2count[uidA], uid2count[uidB]) percDiff = np.abs(latestMinCount - newMinCount) / \ latestMinCount if (lapFrac - pairRecord['m_latestLap']) >= m_nLapToReactivate: EligibleUIDPairs.append(uidTuple) EligibleAIDPairs.append(aidTuple) del MoveRecordsByUID[uidTuple] elif percDiff >= m_minPercChangeInNumAtomsToReactivate: EligibleUIDPairs.append(uidTuple) EligibleAIDPairs.append(aidTuple) del MoveRecordsByUID[uidTuple] else: nPairDQ += 1 MLogger.pprint( " %d/%d pairs eligible. %d disqualified by past failures." % (len(EligibleAIDPairs), nPairTotal, nPairDQ), 'debug') MLogger.pprint( " Prioritizing elible pairs via ranking procedure: %s" % (m_pair_ranking_procedure), 'debug') if m_pair_ranking_procedure == 'random': A = len(EligibleAIDPairs) prng = np.random.RandomState(lapFrac) rank_scores_per_pair = prng.permutation(np.arange(A)) elif m_pair_ranking_procedure == 'total_size': A = len(EligibleAIDPairs) rank_scores_per_pair = np.asarray([ SS.getCountForUID(uidA) + SS.getCountForUID(uidB) for (uidA, uidB) in EligibleUIDPairs ]) elif m_pair_ranking_procedure.count('elbo'): # Compute Ldata gain for each possible pair of comps rank_scores_per_pair = hmodel.obsModel.calcHardMergeGap_SpecificPairs( SS, EligibleAIDPairs) if hasattr(hmodel.allocModel, 'calcHardMergeGap_SpecificPairs'): rank_scores_per_pair = \ rank_scores_per_pair + hmodel.allocModel.calcHardMergeGap_SpecificPairs( SS, EligibleAIDPairs) rank_scores_per_pair /= hmodel.obsModel.getDatasetScale(SS) else: raise ValueError("Unrecognised --m_pair_ranking_procedure: %s" % m_pair_ranking_procedure) # Find pairs with positive gains if m_pair_ranking_direction == 'ascending': if m_pair_ranking_do_exclude_by_thr: MLogger.pprint( "Keeping only uid pairs with score < %.3e" % (m_pair_ranking_exclusion_thr), 'debug') keep_pair_ids = np.flatnonzero( rank_scores_per_pair < m_pair_ranking_exclusion_thr) ranked_pair_locs = keep_pair_ids[np.argsort( rank_scores_per_pair[keep_pair_ids])] else: ranked_pair_locs = np.argsort(rank_scores_per_pair) else: if m_pair_ranking_do_exclude_by_thr: MLogger.pprint( "Keeping only uid pairs with score > %.3e" % (m_pair_ranking_exclusion_thr), 'debug') keep_pair_ids = np.flatnonzero( rank_scores_per_pair > m_pair_ranking_exclusion_thr) ranked_pair_locs = keep_pair_ids[np.argsort( -1 * rank_scores_per_pair[keep_pair_ids])] else: ranked_pair_locs = np.argsort(-1 * rank_scores_per_pair) nKeep = 0 mUIDPairs = list() mAIDPairs = list() mGainVals = list() for loc in ranked_pair_locs: uidA, uidB = EligibleUIDPairs[loc] kA, kB = EligibleAIDPairs[loc] if uidUsageCount[uidA] >= m_maxNumPairsContainingComp or \ uidUsageCount[uidB] >= m_maxNumPairsContainingComp: continue uidUsageCount[uidA] += 1 uidUsageCount[uidB] += 1 mAIDPairs.append((kA, kB)) mUIDPairs.append((uidA, uidB)) mGainVals.append(rank_scores_per_pair[loc]) if nKeep == 0: MLogger.pprint("Chosen uid pairs:", 'debug') MLogger.pprint( "%4d, %4d : pair_score %.3e, size %s %s" % ( uidA, uidB, rank_scores_per_pair[loc], count2str(uid2count[uidA]), count2str(uid2count[uidB]), ), 'debug') nKeep += 1 Info = dict() Info['m_UIDPairs'] = mUIDPairs Info['m_GainVals'] = mGainVals Info['mPairIDs'] = mAIDPairs targetUIDs = set() for uidA, uidB in mUIDPairs: targetUIDs.add(uidA) targetUIDs.add(uidB) if 'b_shortlistUIDs' in MovePlans: for uid in MovePlans['b_shortlistUIDs']: assert uid != uidA assert uid != uidB Info['m_targetUIDSet'] = targetUIDs return Info
def selectCompsForBirthAtCurrentBatch(hmodel=None, SS=None, SSbatch=None, MoveRecordsByUID=dict(), MovePlans=dict(), lapFrac=0, batchID=0, batchPos=0, nBatch=1, isFirstBatch=False, doPrintLotsOfDetails=True, **BArgs): ''' Select specific comps to target with birth move at current batch. Returns ------- MovePlans : dict with updated fields * b_targetUIDs : list of ints, Each uid in b_targetUIDs will be tried immediately, at current batch. MoveRecordsByUID : dict with updated fields * [uid]['byBatch'][batchID] : dict with fields proposalBatchSize proposalTotalSize ''' # Extract num clusters in current model K = SS.K if K > 25: doPrintLotsOfDetails = False statusStr = ' lap %7.3f lapCeil %5d batchPos %3d/%d batchID %3d ' % ( lapFrac, np.ceil(lapFrac), batchPos, nBatch, batchID) BLogger.pprint('PLAN at ' + statusStr) if BArgs['Kmax'] - SS.K <= 0: msg = "Cannot plan any more births." + \ " Reached upper limit of %d existing comps (--Kmax)." % ( BArgs['Kmax']) BLogger.pprint(msg) if 'b_targetUIDs' in MovePlans: del MovePlans['b_targetUIDs'] MovePlans['b_statusMsg'] = msg BLogger.pprint('') return MovePlans if isFirstBatch: assert 'b_targetUIDs' not in MovePlans if isFirstBatch or 'b_firstbatchUIDs' not in MovePlans: MovePlans['b_firstbatchUIDs'] = SSbatch.uids.copy() MovePlans['b_CountVec_SeenThisLap'] = np.zeros(K) for k, uid in enumerate(MovePlans['b_firstbatchUIDs']): MovePlans['b_CountVec_SeenThisLap'][k] += SSbatch.getCountForUID(uid) # Short-circuit. Keep retained clusters. if lapFrac > 1.0 and BArgs['b_retainAcrossBatchesAfterFirstLap']: if not isFirstBatch: if 'b_targetUIDs' in MovePlans: msg = "%d UIDs retained from proposals earlier this lap." + \ " No new proposals at this batch." msg = msg % (len(MovePlans['b_targetUIDs'])) BLogger.pprint(msg) if len(MovePlans['b_targetUIDs']) > 0: BLogger.pprint(vec2str(MovePlans['b_targetUIDs'])) else: BLogger.pprint( 'No UIDs targeted earlier in lap.' + \ ' No new proposals at this batch.') return MovePlans # Compute sizes for each cluster CountVec_b = np.maximum(SSbatch.getCountVec(), 1e-100) CountVec_all = np.maximum(SS.getCountVec(), 1e-100) atomstr = 'atoms' labelstr = 'count_b' uidsBusyWithOtherMoves = list() uidsTooSmall = list() uidsWithFailRecord = list() eligible_mask = np.zeros(K, dtype=np.bool8) for ii, uid in enumerate(SS.uids): if uid not in MoveRecordsByUID: MoveRecordsByUID[uid] = defaultdict(int) if not isinstance(MoveRecordsByUID[uid]['byBatch'], dict): MoveRecordsByUID[uid]['byBatch'] = \ defaultdict(lambda: defaultdict(int)) uidRec = MoveRecordsByUID[uid] uidRec_b = MoveRecordsByUID[uid]['byBatch'][batchID] uidstatusStr = "STATUS uid %5d %s N_b %9.3f N_ttl %9.3f" % ( uid, statusStr, SSbatch.getCountForUID(uid), SS.getCountForUID(uid)) # Continue to track UIDs that are pre-existing targets if 'b_targetUIDs' in MovePlans: if uid in MovePlans['b_targetUIDs']: BLogger.startUIDSpecificLog(uid) BLogger.pprint(uidstatusStr + " CHOSENAGAIN") BLogger.stopUIDSpecificLog(uid) continue # TODO REMOVE DEAD CODE if MoveRecordsByUID[uid]['b_tryAgainFutureLap'] > 0: msg = "Try targeting uid %d again." % (uid) BLogger.pprint(msg) del MoveRecordsByUID[uid]['b_tryAgainFutureLap'] eligible_mask[ii] = 1 continue # Discard uids which are active in another proposal. if 'd_targetUIDs' in MovePlans: if uid in MovePlans['d_targetUIDs']: uidsBusyWithOtherMoves.append(uid) BLogger.startUIDSpecificLog(uid) BLogger.pprint(uidstatusStr + " BUSY DELETE PROPOSAL") BLogger.stopUIDSpecificLog(uid) continue if 'd_absorbingUIDSet' in MovePlans: if uid in MovePlans['d_absorbingUIDSet']: uidsBusyWithOtherMoves.append(uid) BLogger.startUIDSpecificLog(uid) BLogger.pprint(uidstatusStr + " BUSY DELETE PROPOSAL") BLogger.stopUIDSpecificLog(uid) continue if 'm_targetUIDSet' in MovePlans: if uid in MovePlans['m_targetUIDSet']: uidsBusyWithOtherMoves.append(uid) BLogger.startUIDSpecificLog(uid) BLogger.pprint(uidstatusStr + " BUSY MERGE PROPOSAL") BLogger.stopUIDSpecificLog(uid) continue # Filter out uids without large presence in current batch bigEnough = CountVec_b[ii] >= BArgs['b_minNumAtomsForTargetComp'] if not bigEnough: uidsTooSmall.append((uid, CountVec_b[ii])) BLogger.startUIDSpecificLog(uid) BLogger.pprint( uidstatusStr + " TOO SMALL %.2f < %.2f" % (CountVec_b[ii], BArgs['b_minNumAtomsForTargetComp'])) BLogger.stopUIDSpecificLog(uid) continue eligibleSuffix = '' # Filter out uids we've failed on this particular batch before if uidRec_b['nFail'] > 0: prevBatchSize = uidRec_b['proposalBatchSize'] prevTotalSize = uidRec_b['proposalTotalSize'] curBatchSize = SSbatch.getCountForUID(uid) sizePercDiff = np.abs(curBatchSize - prevBatchSize) / (curBatchSize + 1e-100) sizeChangedEnoughToReactivate = sizePercDiff > \ BArgs['b_minPercChangeInNumAtomsToReactivate'] curTotalSize = SS.getCountForUID(uid) totalPercDiff = np.abs(curTotalSize - prevTotalSize) / (curTotalSize + 1e-100) totalsizeChangedEnoughToReactivate = totalPercDiff > \ BArgs['b_minPercChangeInNumAtomsToReactivate'] if sizeChangedEnoughToReactivate: eligibleSuffix = \ "REACTIVATE BY BATCH SIZE." + \ "\n Batch size percDiff %.2f > %.2f" % ( sizePercDiff, BArgs['b_minPercChangeInNumAtomsToReactivate']) \ + "\n prevBatchSize %9.2f \n curBatchSize %9.2f" % ( prevBatchSize, curBatchSize) uidRec_b['nFail'] = 0 # Reactivated elif totalsizeChangedEnoughToReactivate: eligibleSuffix = \ "REACTIVATED BY TOTAL SIZE" + \ "\n Total size percDiff %.2f > %.2f" % ( totalPercDiff, BArgs['b_minPercChangeInNumAtomsToReactivate']) \ + "\n prevTotalSize %9.1f \n curTotalSize %9.1f" % ( prevTotalSize, curTotalSize) uidRec_b['nFail'] = 0 # Reactivated else: uidsWithFailRecord.append(uid) BLogger.startUIDSpecificLog(uid) BLogger.pprint(uidstatusStr + " DISQUALIFIED FOR PAST FAILURE") BLogger.stopUIDSpecificLog(uid) continue # If we've made it here, the uid is eligible. eligible_mask[ii] = 1 BLogger.startUIDSpecificLog(uid) BLogger.pprint(uidstatusStr + " ELIGIBLE " + eligibleSuffix) BLogger.stopUIDSpecificLog(uid) # Notify about uids retained if 'b_targetUIDs' not in MovePlans: MovePlans['b_targetUIDs'] = list() msg = "%d/%d UIDs retained from preexisting proposals." % (len( MovePlans['b_targetUIDs']), K) BLogger.pprint(msg) # Log info about busy disqualifications nDQ_toobusy = len(uidsBusyWithOtherMoves) nDQ_pastfail = len(uidsWithFailRecord) msg = "%d/%d UIDs too busy with other moves (merge/delete)." % ( nDQ_toobusy, K) BLogger.pprint(msg) # Log info about toosmall disqualification nDQ_toosmall = len(uidsTooSmall) msg = "%d/%d UIDs too small (too few %s in current batch)." + \ " Required size >= %d (--b_minNumAtomsForTargetComp)" msg = msg % (nDQ_toosmall, K, atomstr, BArgs['b_minNumAtomsForTargetComp']) BLogger.pprint(msg, 'debug') if nDQ_toosmall > 0 and doPrintLotsOfDetails: lineUID = vec2str([u[0] for u in uidsTooSmall]) lineSize = vec2str([u[1] for u in uidsTooSmall]) BLogger.pprint( [lineUID, lineSize], prefix=['%7s' % 'uids', '%7s' % labelstr], ) # Notify about past failure disqualifications to the log BLogger.pprint( '%d/%d UIDs disqualified for past failures.' % (nDQ_pastfail, K), 'debug') if nDQ_pastfail > 0 and doPrintLotsOfDetails: lineUID = vec2str(uidsWithFailRecord) BLogger.pprint(lineUID) # Store nDQ counts for reporting. MovePlans['b_nDQ_toosmall'] = nDQ_toosmall MovePlans['b_nDQ_toobusy'] = nDQ_toobusy MovePlans['b_nDQ_pastfail'] = nDQ_pastfail # Finalize list of eligible UIDs eligibleUIDs = SS.uids[eligible_mask] BLogger.pprint('%d/%d UIDs eligible for new proposal' % (len(eligibleUIDs), K)) # EXIT if nothing eligible. if len(eligibleUIDs) == 0: BLogger.pprint('') assert 'b_targetUIDs' in MovePlans return MovePlans # Record all uids that are eligible! # And make vector of how recently they have failed in other attempts FailVec = np.inf * np.ones(K) for uid in eligibleUIDs: uidRec['b_latestEligibleLap'] = lapFrac k = SS.uid2k(uid) FailVec[k] = MoveRecordsByUID[uid]['b_nFailRecent'] if doPrintLotsOfDetails: lineUID = vec2str(eligibleUIDs) lineSize = vec2str(CountVec_all[eligible_mask]) lineBatchSize = vec2str(CountVec_b[eligible_mask]) lineFail = vec2str(FailVec[eligible_mask]) BLogger.pprint( [lineUID, lineSize, lineBatchSize, lineFail], prefix=[ '%7s' % 'uids', '%7s' % 'cnt_ttl', '%7s' % 'cnt_b', '%7s' % 'nFail', ], ) # Figure out how many new states we can target this round. # Prioritize the top comps as ranked by the desired score # until we max out the budget of Kmax total comps. maxnewK = BArgs['Kmax'] - SS.K totalnewK_perEligibleComp = np.minimum( np.ceil(CountVec_b[eligible_mask]), np.minimum(BArgs['b_Kfresh'], maxnewK)) # TODO: Worry about retained ids with maxnewK sortorder = argsortBigToSmallByTiers(-1 * FailVec[eligible_mask], CountVec_b[eligible_mask]) sortedCumulNewK = np.cumsum(totalnewK_perEligibleComp[sortorder]) nToKeep = np.searchsorted(sortedCumulNewK, maxnewK + 0.0042) if nToKeep == 0: nToKeep = 1 keepEligibleIDs = sortorder[:nToKeep] newK = np.minimum(sortedCumulNewK[nToKeep - 1], maxnewK) chosenUIDs = [eligibleUIDs[s] for s in keepEligibleIDs] if nToKeep < len(chosenUIDs): BLogger.pprint( 'Selected %d/%d eligible UIDs to do proposals.' % ( nToKeep, len(chosenUIDs)) + \ '\n Could create up to %d new clusters, %d total clusters.' % ( newK, newK + SS.K) + \ '\n Total budget allows at most %d clusters (--Kmax).' % ( BArgs['Kmax']), ) BLogger.pprint('%d/%d UIDs chosen for new proposals (rankby: cnt_b)' % (len(chosenUIDs), len(eligibleUIDs))) if doPrintLotsOfDetails: lineUID = vec2str(chosenUIDs) lineSize = vec2str(CountVec_all[eligible_mask][keepEligibleIDs]) lineBatchSize = vec2str(CountVec_b[eligible_mask][keepEligibleIDs]) lineFail = vec2str(FailVec[eligible_mask][keepEligibleIDs]) BLogger.pprint( [lineUID, lineSize, lineBatchSize, lineFail], prefix=[ '%7s' % 'uids', '%7s' % 'cnt_ttl', '%7s' % 'cnt_b', '%7s' % 'fail', ], ) for uid in chosenUIDs: uidRec = MoveRecordsByUID[uid] uidRec['b_proposalBatchID'] = batchID uidRec_b = MoveRecordsByUID[uid]['byBatch'][batchID] uidRec_b['proposalBatchSize'] = SSbatch.getCountForUID(uid) uidRec_b['proposalTotalSize'] = SSbatch.getCountForUID(uid) # Aggregate all uids MovePlans['b_newlyChosenTargetUIDs'] = chosenUIDs MovePlans['b_preExistingTargetUIDs'] = \ [u for u in MovePlans['b_targetUIDs']] MovePlans['b_targetUIDs'].extend(chosenUIDs) BLogger.pprint('') return MovePlans
def selectShortListForBirthAtLapStart(hmodel, SS, MoveRecordsByUID=dict(), MovePlans=dict(), lapFrac=0, b_minNumAtomsForTargetComp=2, **BArgs): ''' Select list of comps to possibly target with birth during next lap. Shortlist uids are guaranteed to never be involved in a merge/delete. They are kept aside especially for a birth move, at least in this lap. Returns ------- MovePlans : dict with updated fields * b_shortlistUIDs : list of ints, Each uid in b_shortlistUIDs could be a promising birth target. None of these should be touched by deletes or merges in this lap. ''' MovePlans['b_shortlistUIDs'] = list() MovePlans['b_nDQ_toosmall'] = 0 MovePlans['b_nDQ_pastfail'] = 0 MovePlans['b_nDQ_toobusy'] = 0 MovePlans['b_roomToGrow'] = 0 MovePlans['b_maxLenShortlist'] = 0 if not canBirthHappenAtLap(lapFrac, **BArgs): BLogger.pprint('') return MovePlans K = hmodel.obsModel.K KroomToGrow = BArgs['Kmax'] - K MovePlans['b_roomToGrow'] = KroomToGrow # Each birth adds at least 2 comps. # If we have 10 slots left, we can do at most 5 births maxLenShortlist = KroomToGrow / 2 MovePlans['b_maxLenShortlist'] = maxLenShortlist # EXIT: early, if no room to grow. if KroomToGrow <= 1: BLogger.pprint( "Cannot shortlist any comps for birth." + \ " Adding 2 more comps to K=%d exceeds limit of %d (--Kmax)." % ( K, BArgs['Kmax']) ) BLogger.pprint('') return MovePlans # Log reasons for shortlist length if maxLenShortlist < K: msg = " Limiting shortlist to %d possible births this lap." % ( maxLenShortlist) msg += " Any more would cause current K=%d to exceed Kmax=%d" % ( K, BArgs['Kmax']) BLogger.pprint(msg) # Handle initialization case: SS is None # Must just select all possible comps if SS is None: shortlistUIDs = np.arange(K).tolist() shortlistUIDs = shortlistUIDs[:maxLenShortlist] MovePlans['b_shortlistUIDs'] = shortlistUIDs BLogger.pprint("No SS provided. Shortlist contains %d possible comps" % (len(shortlistUIDs))) BLogger.pprint('') return MovePlans assert SS.K == K CountVec = SS.getCountVec() eligible_mask = np.zeros(K, dtype=np.bool8) nTooSmall = 0 nPastFail = 0 for k, uid in enumerate(SS.uids): if uid not in MoveRecordsByUID: MoveRecordsByUID[uid] = defaultdict(int) tooSmall = CountVec[k] <= b_minNumAtomsForTargetComp hasFailRecord = MoveRecordsByUID[uid]['b_nFailRecent'] > 0 if MoveRecordsByUID[uid]['b_tryAgainFutureLap'] > 0: eligible_mask[k] = 1 MovePlans['b_shortlistUIDs'].append(uid) elif (not tooSmall) and (not hasFailRecord): eligible_mask[k] = 1 MovePlans['b_shortlistUIDs'].append(uid) elif tooSmall: nTooSmall += 1 else: assert hasFailRecord nPastFail += 1 assert len(MovePlans['b_shortlistUIDs']) == np.sum(eligible_mask) # Rank the shortlist by size if maxLenShortlist < len(MovePlans['b_shortlistUIDs']): sortIDs = argsort_bigtosmall_stable(CountVec[eligible_mask]) sortIDs = sortIDs[:maxLenShortlist] MovePlans['b_shortlistUIDs'] = [ MovePlans['b_shortlistUIDs'][s] for s in sortIDs ] shortlistCountVec = CountVec[eligible_mask][sortIDs] else: shortlistCountVec = CountVec[eligible_mask] MovePlans['b_nDQ_toosmall'] = nTooSmall MovePlans['b_nDQ_pastfail'] = nPastFail nShortList = len(MovePlans['b_shortlistUIDs']) assert nShortList <= maxLenShortlist BLogger.pprint("%d/%d uids selected for short list." % (nShortList, K)) if nShortList > 0: lineUID = vec2str(MovePlans['b_shortlistUIDs']) lineSize = vec2str(shortlistCountVec) BLogger.pprint( [lineUID, lineSize], prefix=['%7s' % 'uids', '%7s' % 'size'], ) BLogger.pprint('') return MovePlans