예제 #1
0
    def putCurrentClusters(self, clusters):
        jobManager = ClusterJobManager()

        existingClusters = list(self.getCurrentClusters())
        newClusters = [
            cluster for cluster in clusters if cluster not in existingClusters
        ]
        expiredClusters = [
            cluster for cluster in existingClusters if cluster not in clusters
        ]

        for cluster in newClusters:
            job = WorkerJob(JOB_PROCESSNEWCLUSTER,
                            {JOBARG_PROCESSNEWCLUSTER_CLUSTER: list(cluster)})
            jobManager.enqueueJob(job)
            logging.info("Put process new cluster job. Cluster id: %s.",
                         cluster.id)

        logging.info("Number of clusters to delete are: %i",
                     len(expiredClusters))
        self.clusterTableManager.deleteClusters(expiredClusters)
예제 #2
0
def archiveStaleDocs():
  """
  Remove the docs fro current working set
  Run this job periodically.
  """

  clusterManager = ClusterManager()
  jobManager = MinerJobManager()

  logging.info("Archiving old clusters.")
  staleClusters = clusterManager.archiveOldClusters()

  for cluster in staleClusters:
    for docKey in cluster:
      job = WorkerJob(JOB_CLEANUPDOC, { JOBARG_CLEANUPDOC_DOCID : docKey})
      jobManager.enqueueJob(job)
      logging.info(
        "Put cleanup doc job for docId: %s. Job id: %s",
        docKey,
        job.jobId)

  logging.info("Archived old clusters and cleaned up docs in them from working set.")