Exemplo n.º 1
0
    def delete(self, requested_task_id: str, token: AccessToken.Payload):

        query = {"_id": requested_task_id}
        task = RequestedTasks().find_one(query, {"_id": 1})
        if task is None:
            raise TaskNotFound()

        result = RequestedTasks().delete_one(query)
        return jsonify({"deleted": result.deleted_count})
Exemplo n.º 2
0
def make_requested_task(database, make_event):
    requested_task_ids = []
    requested_tasks = RequestedTasks(database=database)

    def _make_requested_task(
        schedule_id=ObjectId(),
        schedule_name="",
        status=TaskStatus.requested,
        hostname="zimfarm_worker.com",
        requested_by="someone",
        priority=0,
    ):
        events = [TaskStatus.requested]
        timestamp = {event: datetime.now() for event in events}
        events = [make_event(event, timestamp[event]) for event in events]

        config = {
            "flags": {
                "api-key": "aaaaaa",
                "id": "abcde",
                "type": "channel"
            },
            "image": {
                "name": "openzim/youtube",
                "tag": "latest"
            },
            "task_name": "youtube",
            "warehouse_path": "/other",
            "resources": {
                "cpu": 3,
                "memory": 1024,
                "disk": 1024
            },
        }

        requested_task = {
            "_id": ObjectId(),
            "status": status,
            "schedule_name": schedule_name,
            "timestamp": timestamp,
            "events": events,
            "config": config,
            "notification": {},
            "priority": priority,
            "requested_by": requested_by,
        }

        requested_tasks.insert_one(requested_task)
        requested_task_ids.append(requested_task["_id"])
        return requested_task

    yield _make_requested_task

    requested_tasks.delete_many({"_id": {"$in": requested_task_ids}})
Exemplo n.º 3
0
    def get(self, requested_task_id: str):

        requested_task = RequestedTasks().find_one({"_id": requested_task_id})
        if requested_task is None:
            raise TaskNotFound()

        return jsonify(requested_task)
Exemplo n.º 4
0
def handle_notification(task_id, event):
    # alias for all complete status
    if event in TaskStatus.complete():
        event = "ended"

    # exit early if not a triggering event
    if event not in GlobalNotifications.events:
        return

    task = Tasks().find_one({"_id": task_id}) or RequestedTasks().find_one(
        {"_id": task_id})
    if not task:
        return

    # serialize/unserialize task so we use a safe version from now-on
    task = json.loads(json.dumps(task, cls=Encoder))
    global_notifs = GlobalNotifications.entries.get(event, {})
    task_notifs = task.get("notification", {}).get(event, {})

    # exit early if we don't have notification requests for the event
    if not global_notifs and not task_notifs:
        return

    for method, recipients in list(task_notifs.items()) + list(
            global_notifs.items()):
        func = {
            "mailgun": handle_mailgun_notification,
            "webhook": handle_webhook_notification,
            "slack": handle_slack_notification,
        }.get(method)
        if func and recipients:
            func(task, recipients)
Exemplo n.º 5
0
def request_a_schedule(
    schedule_name, requested_by: str, worker: str = None, priority: int = 0
):
    """ created requested_task for schedule_name if possible else None

        enabled=False schedules can't be requested
        schedule can't be requested if already requested on same worker """

    # skip if already requested
    if RequestedTasks().count_documents(
        {"schedule_name": schedule_name, "worker": worker}
    ):
        return None

    schedule = Schedules().find_one(
        {"name": schedule_name, "enabled": True}, {"config": 1}
    )
    # schedule might be disabled
    if not schedule:
        return None

    config = schedule["config"]
    # build and save command-information to config
    config.update(command_information_for(config))

    now = getnow()

    document = {
        "schedule_name": schedule_name,
        "status": TaskStatus.requested,
        "timestamp": {TaskStatus.requested: now},
        "events": [{"code": TaskStatus.requested, "timestamp": now}],
        "requested_by": requested_by,
        "priority": priority,
        "worker": worker,
        "config": config,
    }

    if worker:
        document["worker"] = worker

    rt_id = RequestedTasks().insert_one(document).inserted_id

    document.update({"_id": str(rt_id)})
    return document
Exemplo n.º 6
0
    def post(self, task_id: str, token: AccessToken.Payload):
        """ create a task from a requested_task_id """

        requested_task = RequestedTasks().find_one({"_id": task_id})
        if requested_task is None:
            raise TaskNotFound()

        request_args = TaskCreateSchema().load(request.args.to_dict())

        document = {}
        document.update(requested_task)

        try:
            Tasks().insert_one(requested_task)
        except pymongo.errors.DuplicateKeyError as exc:
            logger.exception(exc)
            response = jsonify({})
            response.status_code = 423  # Locked
            return response
        except Exception as exc:
            logger.exception(exc)
            raise exc

        payload = {"worker": request_args["worker_name"]}
        try:
            task_event_handler(task_id, TaskStatus.reserved, payload)
        except Exception as exc:
            logger.exception(exc)
            logger.error("unable to create task. reverting.")
            try:
                Tasks().delete_one({"_id": task_id})
            except Exception:
                logger.debug(f"unable to revert deletion of task {task_id}")
            raise exc

        try:
            RequestedTasks().delete_one({"_id": task_id})
        except Exception as exc:
            logger.exception(exc)  # and pass

        BROADCASTER.broadcast_updated_task(task_id, TaskStatus.reserved,
                                           payload)

        return make_response(jsonify(Tasks().find_one({"_id": task_id})),
                             HTTPStatus.CREATED)
Exemplo n.º 7
0
    def patch(self, requested_task_id: str, token: AccessToken.Payload):

        requested_task = RequestedTasks().count_documents({"_id": requested_task_id})
        if not requested_task:
            raise TaskNotFound()

        try:
            request_json = UpdateRequestedTaskSchema().load(request.get_json())
        except ValidationError as e:
            raise InvalidRequestJSON(e.messages)

        update = RequestedTasks().update_one(
            {"_id": requested_task_id},
            {"$set": {"priority": request_json.get("priority", 0)}},
        )
        if update.modified_count:
            return Response(status=HTTPStatus.ACCEPTED)
        return Response(status=HTTPStatus.OK)
Exemplo n.º 8
0
def request_tasks_using_schedule():
    """ create requested_tasks based on schedule's periodicity field

        Expected to be ran periodically to compute what needs to be scheduled """

    requester = "period-scheduler"
    priority = 0
    worker = None

    query = {"enabled": True}
    projection = {"name": 1, "config": 1, "most_recent_task": 1}

    for period, period_data in {
        p: PERIODICITIES.get(p) for p in SchedulePeriodicity.all()
    }.items():
        if not period_data:
            continue  # manually has no data

        period_start = getnow() - datetime.timedelta(days=period_data["days"])
        logger.debug(f"requesting for `{period}` schedules (before {period_start})")

        # find non-requested schedules which last run started before our period start
        query["periodicity"] = period
        for schedule in Schedules().find(query, projection):
            # don't bother if the schedule's already requested
            if (
                RequestedTasks().count_documents({"schedule_name": schedule["name"]})
                > 0
            ):
                continue

            if schedule.get("most_recent_task"):
                last_run = Tasks().find_one(
                    {"_id": schedule["most_recent_task"]["_id"]}, {"timestamp": 1}
                )
                # don't bother if it started after this rolling period's start
                if (
                    last_run
                    and last_run["timestamp"].get(
                        "started", datetime.datetime(2019, 1, 1)
                    )
                    > period_start
                ):
                    continue

            if request_a_schedule(schedule["name"], requester, worker, priority):
                logger.debug(f"requested {schedule['name']}")
            else:
                logger.debug(f"could not request {schedule['name']}")
Exemplo n.º 9
0
def request_a_schedule(schedule_name,
                       requested_by: str,
                       worker: str = None,
                       priority: int = 0):
    """created requested_task for schedule_name if possible else None

    enabled=False schedules can't be requested
    schedule can't be requested if already requested on same worker"""

    # skip if already requested
    if RequestedTasks().count_documents({
            "schedule_name": schedule_name,
            "worker": worker
    }):
        return None

    schedule = Schedules().find_one({
        "name": schedule_name,
        "enabled": True
    }, {
        "config": 1,
        "notification": 1
    })
    # schedule might be disabled
    if not schedule:
        return None

    config = schedule["config"]
    # build and save command-information to config
    config = expanded_config(config)

    now = getnow()

    document = {
        "schedule_name": schedule_name,
        "status": TaskStatus.requested,
        "timestamp": {
            TaskStatus.requested: now
        },
        "events": [{
            "code": TaskStatus.requested,
            "timestamp": now
        }],
        "requested_by": requested_by,
        "priority": priority,
        "worker": worker,
        "config": config,
        # reverse ObjectId to randomize task ids
        "_id": ObjectId(str(ObjectId())[::-1]),
        "upload": {
            "zim": {
                "upload_uri": ZIM_UPLOAD_URI,
                "expiration": ZIM_EXPIRATION,
                "zimcheck": ZIMCHECK_OPTION,
            },
            "logs": {
                "upload_uri": LOGS_UPLOAD_URI,
                "expiration": LOGS_EXPIRATION,
            },
        },
        "notification": schedule.get("notification", {}),
    }

    if worker:
        document["worker"] = worker

    rt_id = RequestedTasks().insert_one(document).inserted_id

    document.update({"_id": str(rt_id)})
    return document
Exemplo n.º 10
0
def get_reqs_doable_by(worker):
    """cursor of RequestedTasks() doable by a worker using all its resources

    - sorted by priority
    - sorted by duration (longest first)"""
    query = {}
    for res_key in ("cpu", "memory", "disk"):
        query[f"config.resources.{res_key}"] = {
            "$lte": worker["resources"][res_key]
        }

    query["config.task_name"] = {"$in": worker["offliners"]}

    if worker.get("selfish", False):
        query["worker"] = worker["name"]
    else:
        query["worker"] = {"$in": [worker["name"], None]}

    projection = {
        "_id": 1,
        "status": 1,
        "schedule_name": 1,
        "config.task_name": 1,
        "config.platform": 1,
        "config.resources": 1,
        "timestamp.requested": 1,
        "requested_by": 1,
        "priority": 1,
        "worker": 1,
    }

    # make schedule available directly (lookup returned array)
    extract_schedule_proj = {
        "schedule": {
            "$arrayElemAt": ["$schedules", 0]
        },
    }
    extract_schedule_proj.update(projection)
    # add a single int value for duration (real or default) for comparisons
    duration_value_proj = {
        "duration": {
            "$mergeObjects": [
                {
                    "value": "$schedule.duration.default.value"
                },
                {
                    "value":
                    f"$schedule.duration.workers.{worker['name']}.value"
                },
            ]
        },
    }
    duration_value_proj.update(projection)

    return RequestedTasks().aggregate([
        {
            "$match": query
        },
        # inner join on schedules
        {
            "$lookup": {
                "from": "schedules",
                "localField": "schedule_name",
                "foreignField": "name",
                "as": "schedules",
            }
        },
        {
            "$project": extract_schedule_proj
        },
        {
            "$project": duration_value_proj
        },
        {
            "$sort":
            SON([
                ("priority", pymongo.DESCENDING),
                ("duration.value", pymongo.DESCENDING),
            ])
        },
    ])
Exemplo n.º 11
0
def list_of_requested_tasks(token: AccessToken.Payload = None):
    """ list of requested tasks  """

    request_args = request.args.to_dict()
    worker = request_args.get("worker")

    # record we've seen a worker, if applicable
    if token and worker:
        Workers().update_one(
            {
                "name": worker,
                "username": token.username
            },
            {"$set": {
                "last_seen": getnow()
            }},
        )

    request_args["matching_offliners"] = request.args.getlist(
        "matching_offliners")
    request_args["schedule_name"] = request.args.getlist("schedule_name")
    request_args = RequestedTaskSchema().load(request_args)

    # unpack query parameter
    skip, limit = request_args["skip"], request_args["limit"]
    schedule_names = request_args["schedule_name"]
    priority = request_args.get("priority")

    # get requested tasks from database
    query = {}
    if schedule_names:
        query["schedule_name"] = {"$in": schedule_names}

    if priority:
        query["priority"] = {"$gte": priority}

    if worker:
        query["worker"] = {"$in": [None, worker]}

    for res_key in ("cpu", "memory", "disk"):
        key = f"matching_{res_key}"
        if key in request_args:
            query[f"config.resources.{res_key}"] = {"$lte": request_args[key]}
    matching_offliners = request_args.get("matching_offliners")
    if matching_offliners:
        query["config.task_name"] = {"$in": matching_offliners}

    cursor = (RequestedTasks().find(
        query,
        {
            "_id": 1,
            "status": 1,
            "schedule_name": 1,
            "config.task_name": 1,
            "config.resources": 1,
            "timestamp.requested": 1,
            "requested_by": 1,
            "priority": 1,
            "worker": 1,
        },
    ).sort([
        ("priority", pymongo.DESCENDING),
        ("timestamp.reserved", pymongo.DESCENDING),
        ("timestamp.requested", pymongo.DESCENDING),
    ]).skip(skip).limit(limit))
    count = RequestedTasks().count_documents(query)

    return jsonify({
        "meta": {
            "skip": skip,
            "limit": limit,
            "count": count
        },
        "items": [task for task in cursor],
    })
Exemplo n.º 12
0
    def patch(self, schedule_name: str, token: AccessToken.Payload):
        """Update all properties of a schedule but _id and most_recent_task"""

        query = {"name": schedule_name}
        schedule = Schedules().find_one(query, {"config.task_name": 1})
        if not schedule:
            raise ScheduleNotFound()

        try:
            update = UpdateSchema().load(request.get_json())  # , partial=True
            # empty dict passes the validator but troubles mongo
            if not request.get_json():
                raise ValidationError("Update can't be empty")

            # ensure we test flags according to new task_name if present
            if "task_name" in update:
                if "flags" not in update:
                    raise ValidationError(
                        "Can't update offliner without updating flags"
                    )
                flags_schema = ScheduleConfigSchema.get_offliner_schema(
                    update["task_name"]
                )
            else:
                flags_schema = ScheduleConfigSchema.get_offliner_schema(
                    schedule["config"]["task_name"]
                )

            if "flags" in update:
                flags_schema().load(update["flags"])
        except ValidationError as e:
            raise InvalidRequestJSON(e.messages)

        if "name" in update:
            if Schedules().count_documents({"name": update["name"]}):
                raise BadRequest(
                    "Schedule with name `{}` already exists".format(update["name"])
                )

        config_keys = [
            "task_name",
            "warehouse_path",
            "image",
            "resources",
            "platform",
            "flags",
        ]
        mongo_update = {
            f"config.{key}" if key in config_keys else key: value
            for key, value in update.items()
        }

        matched_count = (
            Schedules().update_one(query, {"$set": mongo_update}).matched_count
        )

        if matched_count:
            tasks_query = {"schedule_name": schedule_name}
            if "name" in update:
                Tasks().update_many(
                    tasks_query, {"$set": {"schedule_name": update["name"]}}
                )

                RequestedTasks().update_many(
                    tasks_query, {"$set": {"schedule_name": update["name"]}}
                )

            return Response(status=HTTPStatus.NO_CONTENT)

        raise ScheduleNotFound()