class PostedTask(object): """ In-memory representation of a task posted (or to be posted) to a queue. """ def __init__(self, monque, task, args, kwargs, config): self.config = Configuration(**config) self.config.parent = task.config self.monque = monque self.task = task self.name = task.get_name() self.args = args self.kwargs = kwargs self.collection = None self.id = None self.doc = None self.logger = self.task.logger self.queue = self.config.get("queue", "default") self.priority = self.config.get("priority", None) self.start_time = self.get_start_time() self.result = None self.max_in_queue = int(self.config.get("max_in_queue", 0)) self.max_running = int(self.config.get("max_running", 0)) self.must_be_unique = self.config.get("must_be_unique", False) self.unique_kwargs = self.config.get("unique_kwargs", None) def get_start_time(self): absolute = self.config.get("at") if absolute: if isinstance(absolute, datetime.datetime): return absolute elif type(absolute) == int or type(absolute) == float: return datetime.datetime.fromtimestamp(absolute) raise ValueError("Unrecognized format of 'at': %s" % (absolute)) delay = self.config.get("delay") if delay: if isinstance(absolute, datetime.timedelta): return datetime.datetime.utcnow() + delay elif type(delay) == int or type(delay) == float: return datetime.datetime.utcnow() + datetime.timedelta(seconds=delay) raise ValueError("Unrecognized format of 'delay': %s"(delay)) return None def save_into(self, collection): """ This is where the task actually gets inserted into the collection. TODO: options for write concern, etc? """ # self.logger.debug("Task save_into() collection=%s task=%s args=%s kwargs=%s" % # (collection,self.name,self.args,self.kwargs)) if not self.doc: self.doc = self.serialize() # self.logger.debug("Task save_into() doc=%s" % (self.doc)) collection.save(self.doc) self.id = self.doc["_id"] # self.logger.debug("Task save_into() id=%s" % (self.id)) def mark_running(self): if self.doc: self.doc["status"] = "running" self.doc["started_at"] = datetime.datetime.utcnow() if self.collection and self.id: self.collection.find_and_modify( query={"_id": self.id}, update={"$set": {"status": self.doc["status"], "started_at": self.doc["started_at"]}}, ) def remove(self): if self.collection and self.id: self.collection.find_and_modify(query={"_id": self.id}, remove=True) def serialize(self): """ Return a serialized version (dict) of the task, as it is to be stored in the collection """ doc = { "name": self.name, "class": self.task.__module__ + "." + self.task.__class__.__name__, "queue": self.queue, "payload": {"args": self.args, "kwargs": self.kwargs}, "constraints": {}, "created_at": datetime.datetime.utcnow(), "submitted_at": datetime.datetime.utcnow(), "status": "pending", } # Add constraints: if self.priority is not None: doc["constraints"]["priority"] = self.priority if self.start_time: doc["constraints"]["start_time"] = self.start_time if self.max_in_queue: doc["constraints"]["max_in_queue"] = self.max_in_queue if self.max_running: doc["constraints"]["max_running"] = self.max_running if self.must_be_unique: doc["constraints"]["must_be_unique"] = True if self.unique_kwargs: doc["constraints"]["unique_kwargs"] = self.unique_kwargs return doc def notify_workers(self, collection): """ Add a doc to the collection (the activity log) that indicates new tasks in the queue, so workers that are tailing the collection can immediately pick it up """ collection.insert({"task": self.id, "queue": self.queue}) def notify_results(self, collection): """ Add a doc to the collection (the activity log) that indicates task results are available, so clients waiting for the results can immediately pick it up """ collection.insert({"result": self.id}) @classmethod def get_next(klass, **kwargs): collection = kwargs.pop("collection") queue = kwargs.pop("queue", None) worker = kwargs.pop("worker", None) # Set up the queury filters: query = {"status": "pending"} if queue: if type(queue) == str: query["queue"] = queue elif type(queue) == list: if len(queue) == 1: query["queue"] = queue[0] else: query["queue"] = {"$in": queue} now = datetime.datetime.utcnow() query["$or"] = [{"constraints.start_time": {"$exists": False}}, {"constraints.start_time": {"$lte": now}}] # As soon as it is picked up, mark it as 'taken', # which is the pre-cursor state to 'running', # in which pre-run conditions are checked, etc update = {"$set": {"status": "taken", "taken_at": datetime.datetime.utcnow(), "worker": worker}} found = collection.find_and_modify( query=query, update=update, new=True, sort=[("constraints.priority", pymongo.DESCENDING), ("_id", pymongo.ASCENDING)], ) return found def unget(self): """ Put a task back into the queue that was 'incorrectly' taken. Usually this is for a task that is taken, then one or more pre-execution tasks fails (e.g. too many running tasks of a given type) """ if not self.collection or not self.id: return self.logger.debug("Task unget() id=%s" % (self.id)) self.collection.find_and_modify(query={"_id": self.id}, update={"$et": {"status": "pending"}}) def wait(self, timeout=None): """ Wait for the results of the task to be posted to the result queue. If timeout (given in seconds) is not None, then wait at least that long for the result. If no result is available within that time, returns None. If the result is received, the result is returned back. """ query = {"result": self.id} expire_at = None if timeout: expire_at = time.time() + timeout while expire_at is None or time.time() < expire_at: tail = self.monque.activity_log.find(query, tailable=True, await_data=False) got = False for doc in tail: got = True if got: break time.sleep(0.1) result = self.monque.results_collection.find_one(self.id) if result: return self.handle_result(result) return None def handle_result(self, result): self.result = result status = self.result.get("status", None) if status == "completed": return self.result["result"] elif status == "failed": exception = self.result["exception"] raise PostedTask.RuntimeException(exception) class RuntimeException(Exception): pass