示例#1
0
class PostedTask(object):
    """
    In-memory representation of a task posted (or to be posted) to a queue.
    """

    def __init__(self, monque, task, args, kwargs, config):
        self.config = Configuration(**config)
        self.config.parent = task.config

        self.monque = monque
        self.task = task
        self.name = task.get_name()
        self.args = args
        self.kwargs = kwargs

        self.collection = None
        self.id = None
        self.doc = None

        self.logger = self.task.logger

        self.queue = self.config.get("queue", "default")

        self.priority = self.config.get("priority", None)

        self.start_time = self.get_start_time()
        self.result = None

        self.max_in_queue = int(self.config.get("max_in_queue", 0))
        self.max_running = int(self.config.get("max_running", 0))
        self.must_be_unique = self.config.get("must_be_unique", False)
        self.unique_kwargs = self.config.get("unique_kwargs", None)

    def get_start_time(self):
        absolute = self.config.get("at")
        if absolute:
            if isinstance(absolute, datetime.datetime):
                return absolute
            elif type(absolute) == int or type(absolute) == float:
                return datetime.datetime.fromtimestamp(absolute)
            raise ValueError("Unrecognized format of 'at': %s" % (absolute))

        delay = self.config.get("delay")
        if delay:
            if isinstance(absolute, datetime.timedelta):
                return datetime.datetime.utcnow() + delay
            elif type(delay) == int or type(delay) == float:
                return datetime.datetime.utcnow() + datetime.timedelta(seconds=delay)
            raise ValueError("Unrecognized format of 'delay': %s"(delay))

        return None

    def save_into(self, collection):
        """
        This is where the task actually gets inserted into the collection.
        TODO: options for write concern, etc?
        """
        # self.logger.debug("Task save_into() collection=%s task=%s args=%s kwargs=%s" %
        #                  (collection,self.name,self.args,self.kwargs))
        if not self.doc:
            self.doc = self.serialize()
        # self.logger.debug("Task save_into() doc=%s" % (self.doc))
        collection.save(self.doc)
        self.id = self.doc["_id"]
        # self.logger.debug("Task save_into() id=%s" % (self.id))

    def mark_running(self):
        if self.doc:
            self.doc["status"] = "running"
            self.doc["started_at"] = datetime.datetime.utcnow()

        if self.collection and self.id:
            self.collection.find_and_modify(
                query={"_id": self.id},
                update={"$set": {"status": self.doc["status"], "started_at": self.doc["started_at"]}},
            )

    def remove(self):
        if self.collection and self.id:
            self.collection.find_and_modify(query={"_id": self.id}, remove=True)

    def serialize(self):
        """
        Return a serialized version (dict) of the task, as it is to be stored
        in the collection
        """
        doc = {
            "name": self.name,
            "class": self.task.__module__ + "." + self.task.__class__.__name__,
            "queue": self.queue,
            "payload": {"args": self.args, "kwargs": self.kwargs},
            "constraints": {},
            "created_at": datetime.datetime.utcnow(),
            "submitted_at": datetime.datetime.utcnow(),
            "status": "pending",
        }

        # Add constraints:
        if self.priority is not None:
            doc["constraints"]["priority"] = self.priority
        if self.start_time:
            doc["constraints"]["start_time"] = self.start_time
        if self.max_in_queue:
            doc["constraints"]["max_in_queue"] = self.max_in_queue
        if self.max_running:
            doc["constraints"]["max_running"] = self.max_running
        if self.must_be_unique:
            doc["constraints"]["must_be_unique"] = True
            if self.unique_kwargs:
                doc["constraints"]["unique_kwargs"] = self.unique_kwargs

        return doc

    def notify_workers(self, collection):
        """
        Add a doc to the collection (the activity log) that indicates new tasks in the queue,
        so workers that are tailing the collection can immediately pick it up
        """
        collection.insert({"task": self.id, "queue": self.queue})

    def notify_results(self, collection):
        """
        Add a doc to the collection (the activity log) that indicates task results are available,
        so clients waiting for the results can immediately pick it up
        """
        collection.insert({"result": self.id})

    @classmethod
    def get_next(klass, **kwargs):
        collection = kwargs.pop("collection")
        queue = kwargs.pop("queue", None)
        worker = kwargs.pop("worker", None)

        # Set up the queury filters:
        query = {"status": "pending"}

        if queue:
            if type(queue) == str:
                query["queue"] = queue
            elif type(queue) == list:
                if len(queue) == 1:
                    query["queue"] = queue[0]
                else:
                    query["queue"] = {"$in": queue}

        now = datetime.datetime.utcnow()
        query["$or"] = [{"constraints.start_time": {"$exists": False}}, {"constraints.start_time": {"$lte": now}}]

        # As soon as it is picked up, mark it as 'taken',
        # which is the pre-cursor state to 'running',
        # in which pre-run conditions are checked, etc
        update = {"$set": {"status": "taken", "taken_at": datetime.datetime.utcnow(), "worker": worker}}

        found = collection.find_and_modify(
            query=query,
            update=update,
            new=True,
            sort=[("constraints.priority", pymongo.DESCENDING), ("_id", pymongo.ASCENDING)],
        )

        return found

    def unget(self):
        """
        Put a task back into the queue that was 'incorrectly' taken.
        Usually this is for a task that is taken, then one or more pre-execution tasks
        fails (e.g. too many running tasks of a given type)
        """
        if not self.collection or not self.id:
            return

        self.logger.debug("Task unget() id=%s" % (self.id))

        self.collection.find_and_modify(query={"_id": self.id}, update={"$et": {"status": "pending"}})

    def wait(self, timeout=None):
        """
        Wait for the results of the task to be posted to the result queue.
        If timeout (given in seconds) is not None, then wait at least that long
        for the result. If no result is available within that time, returns None.
        If the result is received, the result is returned back.
        """

        query = {"result": self.id}

        expire_at = None
        if timeout:
            expire_at = time.time() + timeout

        while expire_at is None or time.time() < expire_at:
            tail = self.monque.activity_log.find(query, tailable=True, await_data=False)
            got = False
            for doc in tail:
                got = True

            if got:
                break

            time.sleep(0.1)

        result = self.monque.results_collection.find_one(self.id)
        if result:
            return self.handle_result(result)

        return None

    def handle_result(self, result):
        self.result = result

        status = self.result.get("status", None)

        if status == "completed":
            return self.result["result"]

        elif status == "failed":
            exception = self.result["exception"]
            raise PostedTask.RuntimeException(exception)

    class RuntimeException(Exception):
        pass