class RequestedTasks(BaseCollection): _name = "requested_tasks" schema = { "bsonType": "object", "required": ["schedule_name"], "properties": { "status": { "enum": TaskStatus.all() }, "schedule_name": { "bsonType": "string" }, }, } def __init__(self, database=None): if not database: database = Database() super().__init__(database, self._name) def initialize(self): self.create_index("status", name="status") self.create_index("schedule_name", name="schedule_name") self.create_index("timestamp.requested", name="timestamp.requested") self.database.command({ "collMod": self._name, "validator": { "$jsonSchema": self.schema } })
def handle_notification(task_id, event): # alias for all complete status if event in TaskStatus.complete(): event = "ended" # exit early if not a triggering event if event not in GlobalNotifications.events: return task = Tasks().find_one({"_id": task_id}) or RequestedTasks().find_one( {"_id": task_id}) if not task: return # serialize/unserialize task so we use a safe version from now-on task = json.loads(json.dumps(task, cls=Encoder)) global_notifs = GlobalNotifications.entries.get(event, {}) task_notifs = task.get("notification", {}).get(event, {}) # exit early if we don't have notification requests for the event if not global_notifs and not task_notifs: return for method, recipients in list(task_notifs.items()) + list( global_notifs.items()): func = { "mailgun": handle_mailgun_notification, "webhook": handle_webhook_notification, "slack": handle_slack_notification, }.get(method) if func and recipients: func(task, recipients)
def test_cancel_task(self, client, access_token, tasks): for task in filter(lambda x: x["status"] in TaskStatus.incomplete(), tasks): url = "/tasks/{}/cancel".format(task["_id"]) headers = { "Authorization": access_token, "Content-Type": "application/json", } response = client.post(url, headers=headers) assert response.status_code == 204
def get_currently_running_tasks(worker_name): """ list of tasks being run by worker at this moment, including ETA """ running_tasks = list( Tasks().find( {"status": {"$nin": TaskStatus.complete()}, "worker": worker_name}, { "config.resources": 1, "config.platform": 1, "schedule_name": 1, "timestamp": 1, }, ) ) # calculate ETAs of the tasks we are currently running for task in running_tasks: task.update(get_task_eta(task, worker_name)) return running_tasks
def post(self, task_id: str, token: AccessToken.Payload): task = Tasks().find_one( { "status": { "$in": TaskStatus.incomplete() }, "_id": task_id }, {"_id": 1}) if task is None: raise TaskNotFound() task_event_handler(task["_id"], TaskStatus.cancel_requested, {"canceled_by": token.username}) # broadcast cancel-request to worker BROADCASTER.broadcast_cancel_task(task_id) return Response(status=HTTPStatus.NO_CONTENT)
# validators validate_priority = validate.Range(min=0, max=10) validate_schedule_name = validate.Length(min=2) validate_not_empty = validate.Length(min=1) validate_role = validate.OneOf(ROLES.keys()) validate_cpu = validate.Range(min=0) validate_memory = validate.Range(min=0) validate_disk = validate.Range(min=0) validate_lang_code = validate.Length(min=2, max=3) validate_output = validate.Equal("/output") validate_category = validate.OneOf(ScheduleCategory.all()) validate_warehouse_path = validate.OneOf( ScheduleCategory.all_warehouse_paths()) validate_offliner = validate.OneOf(Offliner.all()) validate_status = validate.OneOf(TaskStatus.all()) validate_event = validate.OneOf(TaskStatus.all_events()) validate_worker_name = validate.Length(min=3) validate_periodicity = validate.OneOf(SchedulePeriodicity.all()) validate_platform = validate.OneOf(Platform.all()) validate_platform_value = validate.Range(min=0) def validate_multiple_of_100(value): return value % 100 == 0 # reusable fields skip_field = fields.Integer(required=False, missing=0, validate=validate.Range(min=0))
def save_event(task_id: ObjectId, code: str, timestamp: datetime.datetime, **kwargs): """ save event and its accompagning data to database """ task_updates = {} # neither file events nor scraper_running should update timestamp list (not unique) if code not in TaskStatus.silent_events(): task_updates[f"timestamp.{code}"] = timestamp # insert event and sort by timestamp Tasks().update_one( {"_id": task_id}, { "$push": { "events": { "$each": [{ "code": code, "timestamp": timestamp }], "$sort": { "timestamp": 1 }, } } }, ) # update task status, timestamp and other fields task_updates["status"] = code def add_to_update_if_present(payload_key, update_key): if payload_key in kwargs: task_updates[update_key] = kwargs[payload_key] add_to_update_if_present("worker", "worker") add_to_update_if_present("canceled_by", "canceled_by") add_to_update_if_present("command", "container.command") add_to_update_if_present("image", "container.image") add_to_update_if_present("exit_code", "container.exit_code") add_to_update_if_present("stdout", "container.stdout") add_to_update_if_present("stderr", "container.stderr") add_to_update_if_present("progress", "container.progress") add_to_update_if_present("timeout", "container.timeout") add_to_update_if_present("log", "container.log") add_to_update_if_present("task_log", "debug.log") add_to_update_if_present("task_name", "debug.task_name") add_to_update_if_present("task_args", "debug.task_args") add_to_update_if_present("task_kwargs", "debug.task_kwargs") add_to_update_if_present("traceback", "debug.traceback") add_to_update_if_present("exception", "debug.exception") # files are uploaded as there are created ; 3 events: # - one on file creation with name, size and status=created # - one on file upload complete with name and status=uploaded # - one on file check complete with result and log if kwargs.get("file", {}).get("name"): # mongo doesn't support `.` in keys (so we replace with Unicode Full Stop) fkey = kwargs["file"]["name"].replace(".", ".") fstatus = kwargs["file"].get("status") if fstatus == "created": task_updates[f"files.{fkey}"] = { "name": kwargs["file"]["name"], "size": kwargs["file"].get("size"), # missing in uploaded, "status": fstatus, f"{fstatus}_timestamp": timestamp, } elif fstatus in ("uploaded", "failed"): task_updates[f"files.{fkey}.status"] = fstatus task_updates[f"files.{fkey}.{fstatus}_timestamp"] = timestamp elif fstatus == "checked": task_updates[f"files.{fkey}.check_result"] = kwargs["file"].get( "result") task_updates[f"files.{fkey}.check_log"] = kwargs["file"].get("log") task_updates[f"files.{fkey}.check_timestamp"] = timestamp Tasks().update_one({"_id": task_id}, {"$set": task_updates}) _update_schedule_most_recent_task_status(task_id) if code == TaskStatus.scraper_completed: schedule_name = Tasks().find_one({"_id": task_id}, {"schedule_name": 1})["schedule_name"] update_schedule_duration(schedule_name)