Пример #1
0
class RequestedTasks(BaseCollection):
    _name = "requested_tasks"
    schema = {
        "bsonType": "object",
        "required": ["schedule_name"],
        "properties": {
            "status": {
                "enum": TaskStatus.all()
            },
            "schedule_name": {
                "bsonType": "string"
            },
        },
    }

    def __init__(self, database=None):
        if not database:
            database = Database()
        super().__init__(database, self._name)

    def initialize(self):
        self.create_index("status", name="status")
        self.create_index("schedule_name", name="schedule_name")
        self.create_index("timestamp.requested", name="timestamp.requested")

        self.database.command({
            "collMod": self._name,
            "validator": {
                "$jsonSchema": self.schema
            }
        })
Пример #2
0
def handle_notification(task_id, event):
    # alias for all complete status
    if event in TaskStatus.complete():
        event = "ended"

    # exit early if not a triggering event
    if event not in GlobalNotifications.events:
        return

    task = Tasks().find_one({"_id": task_id}) or RequestedTasks().find_one(
        {"_id": task_id})
    if not task:
        return

    # serialize/unserialize task so we use a safe version from now-on
    task = json.loads(json.dumps(task, cls=Encoder))
    global_notifs = GlobalNotifications.entries.get(event, {})
    task_notifs = task.get("notification", {}).get(event, {})

    # exit early if we don't have notification requests for the event
    if not global_notifs and not task_notifs:
        return

    for method, recipients in list(task_notifs.items()) + list(
            global_notifs.items()):
        func = {
            "mailgun": handle_mailgun_notification,
            "webhook": handle_webhook_notification,
            "slack": handle_slack_notification,
        }.get(method)
        if func and recipients:
            func(task, recipients)
Пример #3
0
 def test_cancel_task(self, client, access_token, tasks):
     for task in filter(lambda x: x["status"] in TaskStatus.incomplete(),
                        tasks):
         url = "/tasks/{}/cancel".format(task["_id"])
         headers = {
             "Authorization": access_token,
             "Content-Type": "application/json",
         }
         response = client.post(url, headers=headers)
         assert response.status_code == 204
Пример #4
0
def get_currently_running_tasks(worker_name):
    """ list of tasks being run by worker at this moment, including ETA """
    running_tasks = list(
        Tasks().find(
            {"status": {"$nin": TaskStatus.complete()}, "worker": worker_name},
            {
                "config.resources": 1,
                "config.platform": 1,
                "schedule_name": 1,
                "timestamp": 1,
            },
        )
    )

    # calculate ETAs of the tasks we are currently running
    for task in running_tasks:
        task.update(get_task_eta(task, worker_name))

    return running_tasks
Пример #5
0
    def post(self, task_id: str, token: AccessToken.Payload):

        task = Tasks().find_one(
            {
                "status": {
                    "$in": TaskStatus.incomplete()
                },
                "_id": task_id
            }, {"_id": 1})
        if task is None:
            raise TaskNotFound()

        task_event_handler(task["_id"], TaskStatus.cancel_requested,
                           {"canceled_by": token.username})

        # broadcast cancel-request to worker
        BROADCASTER.broadcast_cancel_task(task_id)

        return Response(status=HTTPStatus.NO_CONTENT)
Пример #6
0
# validators
validate_priority = validate.Range(min=0, max=10)
validate_schedule_name = validate.Length(min=2)
validate_not_empty = validate.Length(min=1)
validate_role = validate.OneOf(ROLES.keys())
validate_cpu = validate.Range(min=0)
validate_memory = validate.Range(min=0)
validate_disk = validate.Range(min=0)
validate_lang_code = validate.Length(min=2, max=3)
validate_output = validate.Equal("/output")
validate_category = validate.OneOf(ScheduleCategory.all())
validate_warehouse_path = validate.OneOf(
    ScheduleCategory.all_warehouse_paths())
validate_offliner = validate.OneOf(Offliner.all())
validate_status = validate.OneOf(TaskStatus.all())
validate_event = validate.OneOf(TaskStatus.all_events())
validate_worker_name = validate.Length(min=3)
validate_periodicity = validate.OneOf(SchedulePeriodicity.all())
validate_platform = validate.OneOf(Platform.all())
validate_platform_value = validate.Range(min=0)


def validate_multiple_of_100(value):
    return value % 100 == 0


# reusable fields
skip_field = fields.Integer(required=False,
                            missing=0,
                            validate=validate.Range(min=0))
Пример #7
0
def save_event(task_id: ObjectId, code: str, timestamp: datetime.datetime,
               **kwargs):
    """ save event and its accompagning data to database """

    task_updates = {}
    # neither file events nor scraper_running should update timestamp list (not unique)
    if code not in TaskStatus.silent_events():
        task_updates[f"timestamp.{code}"] = timestamp
        # insert event and sort by timestamp
        Tasks().update_one(
            {"_id": task_id},
            {
                "$push": {
                    "events": {
                        "$each": [{
                            "code": code,
                            "timestamp": timestamp
                        }],
                        "$sort": {
                            "timestamp": 1
                        },
                    }
                }
            },
        )

        # update task status, timestamp and other fields
        task_updates["status"] = code

    def add_to_update_if_present(payload_key, update_key):
        if payload_key in kwargs:
            task_updates[update_key] = kwargs[payload_key]

    add_to_update_if_present("worker", "worker")
    add_to_update_if_present("canceled_by", "canceled_by")
    add_to_update_if_present("command", "container.command")
    add_to_update_if_present("image", "container.image")
    add_to_update_if_present("exit_code", "container.exit_code")
    add_to_update_if_present("stdout", "container.stdout")
    add_to_update_if_present("stderr", "container.stderr")
    add_to_update_if_present("progress", "container.progress")
    add_to_update_if_present("timeout", "container.timeout")
    add_to_update_if_present("log", "container.log")
    add_to_update_if_present("task_log", "debug.log")
    add_to_update_if_present("task_name", "debug.task_name")
    add_to_update_if_present("task_args", "debug.task_args")
    add_to_update_if_present("task_kwargs", "debug.task_kwargs")
    add_to_update_if_present("traceback", "debug.traceback")
    add_to_update_if_present("exception", "debug.exception")

    # files are uploaded as there are created ; 3 events:
    # - one on file creation with name, size and status=created
    # - one on file upload complete with name and status=uploaded
    # - one on file check complete with result and log
    if kwargs.get("file", {}).get("name"):
        # mongo doesn't support `.` in keys (so we replace with Unicode Full Stop)
        fkey = kwargs["file"]["name"].replace(".", ".")
        fstatus = kwargs["file"].get("status")
        if fstatus == "created":
            task_updates[f"files.{fkey}"] = {
                "name": kwargs["file"]["name"],
                "size": kwargs["file"].get("size"),  # missing in uploaded,
                "status": fstatus,
                f"{fstatus}_timestamp": timestamp,
            }
        elif fstatus in ("uploaded", "failed"):
            task_updates[f"files.{fkey}.status"] = fstatus
            task_updates[f"files.{fkey}.{fstatus}_timestamp"] = timestamp
        elif fstatus == "checked":
            task_updates[f"files.{fkey}.check_result"] = kwargs["file"].get(
                "result")
            task_updates[f"files.{fkey}.check_log"] = kwargs["file"].get("log")
            task_updates[f"files.{fkey}.check_timestamp"] = timestamp

    Tasks().update_one({"_id": task_id}, {"$set": task_updates})

    _update_schedule_most_recent_task_status(task_id)

    if code == TaskStatus.scraper_completed:
        schedule_name = Tasks().find_one({"_id": task_id},
                                         {"schedule_name": 1})["schedule_name"]
        update_schedule_duration(schedule_name)