def delete(self, requested_task_id: str, token: AccessToken.Payload): query = {"_id": requested_task_id} task = RequestedTasks().find_one(query, {"_id": 1}) if task is None: raise TaskNotFound() result = RequestedTasks().delete_one(query) return jsonify({"deleted": result.deleted_count})
def make_requested_task(database, make_event): requested_task_ids = [] requested_tasks = RequestedTasks(database=database) def _make_requested_task( schedule_id=ObjectId(), schedule_name="", status=TaskStatus.requested, hostname="zimfarm_worker.com", requested_by="someone", priority=0, ): events = [TaskStatus.requested] timestamp = {event: datetime.now() for event in events} events = [make_event(event, timestamp[event]) for event in events] config = { "flags": { "api-key": "aaaaaa", "id": "abcde", "type": "channel" }, "image": { "name": "openzim/youtube", "tag": "latest" }, "task_name": "youtube", "warehouse_path": "/other", "resources": { "cpu": 3, "memory": 1024, "disk": 1024 }, } requested_task = { "_id": ObjectId(), "status": status, "schedule_name": schedule_name, "timestamp": timestamp, "events": events, "config": config, "notification": {}, "priority": priority, "requested_by": requested_by, } requested_tasks.insert_one(requested_task) requested_task_ids.append(requested_task["_id"]) return requested_task yield _make_requested_task requested_tasks.delete_many({"_id": {"$in": requested_task_ids}})
def get(self, requested_task_id: str): requested_task = RequestedTasks().find_one({"_id": requested_task_id}) if requested_task is None: raise TaskNotFound() return jsonify(requested_task)
def handle_notification(task_id, event): # alias for all complete status if event in TaskStatus.complete(): event = "ended" # exit early if not a triggering event if event not in GlobalNotifications.events: return task = Tasks().find_one({"_id": task_id}) or RequestedTasks().find_one( {"_id": task_id}) if not task: return # serialize/unserialize task so we use a safe version from now-on task = json.loads(json.dumps(task, cls=Encoder)) global_notifs = GlobalNotifications.entries.get(event, {}) task_notifs = task.get("notification", {}).get(event, {}) # exit early if we don't have notification requests for the event if not global_notifs and not task_notifs: return for method, recipients in list(task_notifs.items()) + list( global_notifs.items()): func = { "mailgun": handle_mailgun_notification, "webhook": handle_webhook_notification, "slack": handle_slack_notification, }.get(method) if func and recipients: func(task, recipients)
def request_a_schedule( schedule_name, requested_by: str, worker: str = None, priority: int = 0 ): """ created requested_task for schedule_name if possible else None enabled=False schedules can't be requested schedule can't be requested if already requested on same worker """ # skip if already requested if RequestedTasks().count_documents( {"schedule_name": schedule_name, "worker": worker} ): return None schedule = Schedules().find_one( {"name": schedule_name, "enabled": True}, {"config": 1} ) # schedule might be disabled if not schedule: return None config = schedule["config"] # build and save command-information to config config.update(command_information_for(config)) now = getnow() document = { "schedule_name": schedule_name, "status": TaskStatus.requested, "timestamp": {TaskStatus.requested: now}, "events": [{"code": TaskStatus.requested, "timestamp": now}], "requested_by": requested_by, "priority": priority, "worker": worker, "config": config, } if worker: document["worker"] = worker rt_id = RequestedTasks().insert_one(document).inserted_id document.update({"_id": str(rt_id)}) return document
def post(self, task_id: str, token: AccessToken.Payload): """ create a task from a requested_task_id """ requested_task = RequestedTasks().find_one({"_id": task_id}) if requested_task is None: raise TaskNotFound() request_args = TaskCreateSchema().load(request.args.to_dict()) document = {} document.update(requested_task) try: Tasks().insert_one(requested_task) except pymongo.errors.DuplicateKeyError as exc: logger.exception(exc) response = jsonify({}) response.status_code = 423 # Locked return response except Exception as exc: logger.exception(exc) raise exc payload = {"worker": request_args["worker_name"]} try: task_event_handler(task_id, TaskStatus.reserved, payload) except Exception as exc: logger.exception(exc) logger.error("unable to create task. reverting.") try: Tasks().delete_one({"_id": task_id}) except Exception: logger.debug(f"unable to revert deletion of task {task_id}") raise exc try: RequestedTasks().delete_one({"_id": task_id}) except Exception as exc: logger.exception(exc) # and pass BROADCASTER.broadcast_updated_task(task_id, TaskStatus.reserved, payload) return make_response(jsonify(Tasks().find_one({"_id": task_id})), HTTPStatus.CREATED)
def patch(self, requested_task_id: str, token: AccessToken.Payload): requested_task = RequestedTasks().count_documents({"_id": requested_task_id}) if not requested_task: raise TaskNotFound() try: request_json = UpdateRequestedTaskSchema().load(request.get_json()) except ValidationError as e: raise InvalidRequestJSON(e.messages) update = RequestedTasks().update_one( {"_id": requested_task_id}, {"$set": {"priority": request_json.get("priority", 0)}}, ) if update.modified_count: return Response(status=HTTPStatus.ACCEPTED) return Response(status=HTTPStatus.OK)
def request_tasks_using_schedule(): """ create requested_tasks based on schedule's periodicity field Expected to be ran periodically to compute what needs to be scheduled """ requester = "period-scheduler" priority = 0 worker = None query = {"enabled": True} projection = {"name": 1, "config": 1, "most_recent_task": 1} for period, period_data in { p: PERIODICITIES.get(p) for p in SchedulePeriodicity.all() }.items(): if not period_data: continue # manually has no data period_start = getnow() - datetime.timedelta(days=period_data["days"]) logger.debug(f"requesting for `{period}` schedules (before {period_start})") # find non-requested schedules which last run started before our period start query["periodicity"] = period for schedule in Schedules().find(query, projection): # don't bother if the schedule's already requested if ( RequestedTasks().count_documents({"schedule_name": schedule["name"]}) > 0 ): continue if schedule.get("most_recent_task"): last_run = Tasks().find_one( {"_id": schedule["most_recent_task"]["_id"]}, {"timestamp": 1} ) # don't bother if it started after this rolling period's start if ( last_run and last_run["timestamp"].get( "started", datetime.datetime(2019, 1, 1) ) > period_start ): continue if request_a_schedule(schedule["name"], requester, worker, priority): logger.debug(f"requested {schedule['name']}") else: logger.debug(f"could not request {schedule['name']}")
def request_a_schedule(schedule_name, requested_by: str, worker: str = None, priority: int = 0): """created requested_task for schedule_name if possible else None enabled=False schedules can't be requested schedule can't be requested if already requested on same worker""" # skip if already requested if RequestedTasks().count_documents({ "schedule_name": schedule_name, "worker": worker }): return None schedule = Schedules().find_one({ "name": schedule_name, "enabled": True }, { "config": 1, "notification": 1 }) # schedule might be disabled if not schedule: return None config = schedule["config"] # build and save command-information to config config = expanded_config(config) now = getnow() document = { "schedule_name": schedule_name, "status": TaskStatus.requested, "timestamp": { TaskStatus.requested: now }, "events": [{ "code": TaskStatus.requested, "timestamp": now }], "requested_by": requested_by, "priority": priority, "worker": worker, "config": config, # reverse ObjectId to randomize task ids "_id": ObjectId(str(ObjectId())[::-1]), "upload": { "zim": { "upload_uri": ZIM_UPLOAD_URI, "expiration": ZIM_EXPIRATION, "zimcheck": ZIMCHECK_OPTION, }, "logs": { "upload_uri": LOGS_UPLOAD_URI, "expiration": LOGS_EXPIRATION, }, }, "notification": schedule.get("notification", {}), } if worker: document["worker"] = worker rt_id = RequestedTasks().insert_one(document).inserted_id document.update({"_id": str(rt_id)}) return document
def get_reqs_doable_by(worker): """cursor of RequestedTasks() doable by a worker using all its resources - sorted by priority - sorted by duration (longest first)""" query = {} for res_key in ("cpu", "memory", "disk"): query[f"config.resources.{res_key}"] = { "$lte": worker["resources"][res_key] } query["config.task_name"] = {"$in": worker["offliners"]} if worker.get("selfish", False): query["worker"] = worker["name"] else: query["worker"] = {"$in": [worker["name"], None]} projection = { "_id": 1, "status": 1, "schedule_name": 1, "config.task_name": 1, "config.platform": 1, "config.resources": 1, "timestamp.requested": 1, "requested_by": 1, "priority": 1, "worker": 1, } # make schedule available directly (lookup returned array) extract_schedule_proj = { "schedule": { "$arrayElemAt": ["$schedules", 0] }, } extract_schedule_proj.update(projection) # add a single int value for duration (real or default) for comparisons duration_value_proj = { "duration": { "$mergeObjects": [ { "value": "$schedule.duration.default.value" }, { "value": f"$schedule.duration.workers.{worker['name']}.value" }, ] }, } duration_value_proj.update(projection) return RequestedTasks().aggregate([ { "$match": query }, # inner join on schedules { "$lookup": { "from": "schedules", "localField": "schedule_name", "foreignField": "name", "as": "schedules", } }, { "$project": extract_schedule_proj }, { "$project": duration_value_proj }, { "$sort": SON([ ("priority", pymongo.DESCENDING), ("duration.value", pymongo.DESCENDING), ]) }, ])
def list_of_requested_tasks(token: AccessToken.Payload = None): """ list of requested tasks """ request_args = request.args.to_dict() worker = request_args.get("worker") # record we've seen a worker, if applicable if token and worker: Workers().update_one( { "name": worker, "username": token.username }, {"$set": { "last_seen": getnow() }}, ) request_args["matching_offliners"] = request.args.getlist( "matching_offliners") request_args["schedule_name"] = request.args.getlist("schedule_name") request_args = RequestedTaskSchema().load(request_args) # unpack query parameter skip, limit = request_args["skip"], request_args["limit"] schedule_names = request_args["schedule_name"] priority = request_args.get("priority") # get requested tasks from database query = {} if schedule_names: query["schedule_name"] = {"$in": schedule_names} if priority: query["priority"] = {"$gte": priority} if worker: query["worker"] = {"$in": [None, worker]} for res_key in ("cpu", "memory", "disk"): key = f"matching_{res_key}" if key in request_args: query[f"config.resources.{res_key}"] = {"$lte": request_args[key]} matching_offliners = request_args.get("matching_offliners") if matching_offliners: query["config.task_name"] = {"$in": matching_offliners} cursor = (RequestedTasks().find( query, { "_id": 1, "status": 1, "schedule_name": 1, "config.task_name": 1, "config.resources": 1, "timestamp.requested": 1, "requested_by": 1, "priority": 1, "worker": 1, }, ).sort([ ("priority", pymongo.DESCENDING), ("timestamp.reserved", pymongo.DESCENDING), ("timestamp.requested", pymongo.DESCENDING), ]).skip(skip).limit(limit)) count = RequestedTasks().count_documents(query) return jsonify({ "meta": { "skip": skip, "limit": limit, "count": count }, "items": [task for task in cursor], })
def patch(self, schedule_name: str, token: AccessToken.Payload): """Update all properties of a schedule but _id and most_recent_task""" query = {"name": schedule_name} schedule = Schedules().find_one(query, {"config.task_name": 1}) if not schedule: raise ScheduleNotFound() try: update = UpdateSchema().load(request.get_json()) # , partial=True # empty dict passes the validator but troubles mongo if not request.get_json(): raise ValidationError("Update can't be empty") # ensure we test flags according to new task_name if present if "task_name" in update: if "flags" not in update: raise ValidationError( "Can't update offliner without updating flags" ) flags_schema = ScheduleConfigSchema.get_offliner_schema( update["task_name"] ) else: flags_schema = ScheduleConfigSchema.get_offliner_schema( schedule["config"]["task_name"] ) if "flags" in update: flags_schema().load(update["flags"]) except ValidationError as e: raise InvalidRequestJSON(e.messages) if "name" in update: if Schedules().count_documents({"name": update["name"]}): raise BadRequest( "Schedule with name `{}` already exists".format(update["name"]) ) config_keys = [ "task_name", "warehouse_path", "image", "resources", "platform", "flags", ] mongo_update = { f"config.{key}" if key in config_keys else key: value for key, value in update.items() } matched_count = ( Schedules().update_one(query, {"$set": mongo_update}).matched_count ) if matched_count: tasks_query = {"schedule_name": schedule_name} if "name" in update: Tasks().update_many( tasks_query, {"$set": {"schedule_name": update["name"]}} ) RequestedTasks().update_many( tasks_query, {"$set": {"schedule_name": update["name"]}} ) return Response(status=HTTPStatus.NO_CONTENT) raise ScheduleNotFound()