def putCurrentClusters(self, clusters): jobManager = ClusterJobManager() existingClusters = list(self.getCurrentClusters()) newClusters = [ cluster for cluster in clusters if cluster not in existingClusters ] expiredClusters = [ cluster for cluster in existingClusters if cluster not in clusters ] for cluster in newClusters: job = WorkerJob(JOB_PROCESSNEWCLUSTER, {JOBARG_PROCESSNEWCLUSTER_CLUSTER: list(cluster)}) jobManager.enqueueJob(job) logging.info("Put process new cluster job. Cluster id: %s.", cluster.id) logging.info("Number of clusters to delete are: %i", len(expiredClusters)) self.clusterTableManager.deleteClusters(expiredClusters)
def archiveStaleDocs(): """ Remove the docs fro current working set Run this job periodically. """ clusterManager = ClusterManager() jobManager = MinerJobManager() logging.info("Archiving old clusters.") staleClusters = clusterManager.archiveOldClusters() for cluster in staleClusters: for docKey in cluster: job = WorkerJob(JOB_CLEANUPDOC, { JOBARG_CLEANUPDOC_DOCID : docKey}) jobManager.enqueueJob(job) logging.info( "Put cleanup doc job for docId: %s. Job id: %s", docKey, job.jobId) logging.info("Archived old clusters and cleaned up docs in them from working set.")