Пример #1
0
 def queue(cls, stage, state, data):
     crawler = state.get('crawler')
     job = Job(conn, str(crawler), state['run_id'])
     job_stage = job.get_stage(stage)
     queue_length = job_stage.get_status().get('pending')
     if queue_length > MAX_QUEUE_LENGTH:
         msg = "queue for %s:%s too big."
         raise QueueTooBigError(msg % (str(crawler), stage))
     job_stage.queue(payload=data, context=state)
Пример #2
0
 def queue(cls, stage, state, data):
     crawler = state.get("crawler")
     job = Job(conn, str(crawler), state["run_id"])
     job_stage = job.get_stage(stage.namespaced_name)
     job_stage.sync()
     queue_length = job_stage.get_status().get("pending")
     if queue_length > MAX_QUEUE_LENGTH:
         msg = "queue for %s:%s too big."
         raise QueueTooBigError(msg % (str(crawler), stage))
     job_stage.queue(payload=data, context=state)
Пример #3
0
    def handle(self,
               status,
               operation=None,
               exception=None,
               task=None,
               **payload):
        """Report a processing event that may be related to a task."""
        if not WORKER_REPORTING:
            return

        task = task or self.task
        if task is not None:
            payload["task"] = task.serialize()
            stage = task.stage
        else:
            stage = self.stage
        dataset = stage.job.dataset.name
        job_id = stage.job.id
        operation = operation or stage.stage

        now = datetime.utcnow()
        payload.update({
            "dataset": dataset,
            "operation": operation,
            "job": job_id,
            "status": status,
            "updated_at": now,
            "%s_at" % status: now,
            "has_error": False,
        })

        if exception is not None:
            payload.update({
                "status": Status.ERROR,
                "has_error": True,
                "error_name": exception.__class__.__name__,
                "error_msg": stringify(exception),
            })

        job = Job(stage.conn, dataset, job_id)
        stage = job.get_stage(OP_REPORT)
        stage.queue(payload)
Пример #4
0
def get_stage(collection, stage, job_id=None):
    job_id = job_id or Job.random_id()
    job = Job(kv, collection.foreign_id, job_id)
    return job.get_stage(stage)
Пример #5
0
def get_stage(collection, stage, job_id=None):
    dataset = dataset_from_collection(collection)
    job_id = job_id or Job.random_id()
    job = Job(kv, dataset, job_id)
    return job.get_stage(stage)