Exemplo n.º 1
0
 def execute(self):
     while True:
         q = CoreQueue()
         listing = find_job(name=re.compile("^meetup.example.job.SwarmJob"))
         if listing:
             for doc in listing:
                 q.kill_job(doc["_id"])
                 q.remove_job(doc["_id"])
         else:
             break
Exemplo n.º 2
0
 def initialize(self):
     self.queue = CoreQueue()
     self._collection = {}
Exemplo n.º 3
0
class JobHandler(CoreRequestHandler, core4.queue.query.QueryMixin):
    """
    **DEPRECATED!** Get job listing, job details, kill, delete and restart jobs.
    """

    author = "mra"
    title = "job manager"
    tag = "api jobs"  # idea is to have a FE app; remove api by then

    def initialize(self):
        self.queue = CoreQueue()
        self._collection = {}

    def collection(self, name):
        """
        Singleton connect and return async MongoDB connection.

        :param name: collection name below ``sys``
        :return: :class:`core4.base.collection.CoreCollection`
        """
        if name not in self._collection:
            self._collection[name] = self.config.sys[name]
        return self._collection[name]

    async def get(self, _id=None):
        """
        **DEPRECATED!** Use :class:`core4.api.v1.request.job.JobRequest`.

        Paginated job listing with ``/jobs``,  and single job details with
        ``/jobs/<_id>``. Only jobs with read/execute access permissions granted
        to the current user are returned.

        Methods:
            GET /core4/api/v1/jobs - jobs listing

        Parameters:
            - per_page (int): number of jobs per page
            - page (int): requested page (starts counting with ``0``)
            - sort (str): sort field
            - order (int): sort direction (``1`` for ascending, ``-1`` for
              descending)

        Returns:
            data element with list of job attributes as dictionaries. For
            pagination the following top level attributes are returned:

            - **total_count**: the total number of records
            - **count**: the number of records in current page
            - **page**: current page (starts counting with ``0``)
            - **page_count**: the total number of pages
            - **per_page**: the number of elements per page

        Raises:
            401: Unauthorized

        Examples:
            >>> from requests import get, post
            >>> from pprint import pprint
            >>> import random
            >>> url = "http://localhost:5001/core4/api/v1"
            >>> signin = get(url + "/login?username=admin&password=hans")
            >>> token = signin.json()["data"]["token"]
            >>> h = {"Authorization": "Bearer " + token}
            >>>
            >>> name = "core4.queue.helper.DummyJob"
            >>> for i in range(50):
            >>>     args = {"sleep": i, "id": random.randint(0, 100)}
            >>>     rv = post(url + "/enqueue?name=" + name, headers=h, json=args)
            >>>     print(i, rv.status_code, "-", rv.json()["message"])
            >>>     assert rv.status_code == 200
            >>> rv = get(url + "/jobs?per_page=10&sort=args.id&order=-1", headers=h)
            >>> rv
            <Response [200]>
            >>> rv.json()
            {
                '_id': '5be13b56de8b69468b7ff0b2',
                'code': 200,
                'message': 'OK',
                'timestamp': '2018-11-06T06:57:26.660093',
                'total_count': 50.0,
                'count': 10,
                'page': 0,
                'page_count': 5,
                'per_page': 10,
                'data': [ ... ]
            }

        Methods:
            GET /core4/api/v1/jobs/<_id> - job details

        Parameters:
            _id (str): job _id to get details

        Returns:
            data element with job attributes, see
            :class:`core4.queue.job.CoreJob`.

        Raises:
            400: failed to parse job _id
            401: Unauthorized
            404: job not found

        Examples:
            >>> # continue example from above
            >>> _id = rv.json()["data"][0]["_id"]
            >>> get(url + "/jobs/" + _id, headers=h).json()
            {
                '_id': '5bdb09c6de8b691e497f00ab',
                'code': 200,
                'message': 'OK',
                'timestamp': '2018-11-01T14:12:22.283088',
                'data': {
                    '_id': '5bd72861de8b69147a275e22',
                    'args': {
                        'i': 4, 'sleep': 23
                    },
                    'attempts': 1,
                    'attempts_left': 1,
                    'enqueued': {
                        'at': '2018-10-29T15:33:53',
                        'hostname': 'mra.devops',
                        'parent_id': None,
                        'username': '******'
                    },
                    'finished_at': None,
                    'journal': False,
                    'killed_at': '2018-10-29T15:34:07.084000',
                    'locked': None,
                    'name': 'core4.queue.helper.DummyJob',
                    'priority': 0,
                    'removed_at': None,
                    'runtime': 21.0,
                    'started_at': '2018-10-29T15:33:54',
                    'state': 'killed',
                    'trial': 1,
                    'wall_at': None,
                    'zombie_at': None
                }
            }
        """
        if _id:
            oid = self.parse_id(_id)
            ret = await self.get_detail(oid)
            if not ret:
                raise HTTPError(404, "job _id [{}] not found".format(oid))
        else:
            ret = await self.get_listing()
        self.reply(ret)

    async def post(self, _id=None):
        """
        Same as ``GET``. Paginated job listing with ``/jobs`` and single job
        details with ``/jobs/<_id>``. Additionally this method parses a
        ``filter`` attribute to filter jobs.

        Methods:
            POST /core4/api/v1/jobs - jobs listing

        Parameters:
            - per_page (int): number of jobs per page
            - page (int): requested page (starts counting with ``0``)
            - sort (str): sort field
            - order (int): sort direction (``1`` for ascending, ``-1`` for
              descending)
            - filter (dict): MongoDB query

        **Returns:** see ``GET``

        **Raises:** see ``GET``

        Examples:
            >>> # example continues from above
            >>> args = {"page": "0", "filter": {"args.sleep": {"$lte": 5}}}
            >>> post(url + "/jobs", headers=h, json=args)
        """
        await self.get(_id)

    def parse_id(self, _id):
        """
        parses str into :class:`bson.objectid.ObjectId` and raises
        400 - Bad Request error in case of failure

        :param _id: _id (str)
        :return: _id as :class:`bson.objectid.ObjectId`.
        """
        try:
            return ObjectId(_id)
        except:
            raise HTTPError(400, "failed to parse job _id: [{}]".format(_id))

    async def get_listing(self):
        """
        Retrieve job listing from ``sys.queue``. Only jobs with read/execute
        access permissions granted to the current user are returned.

        :return: :class:`.PageResult`
        """

        per_page = int(self.get_argument("per_page", default=10))
        current_page = int(self.get_argument("page", default=0))
        query_filter = self.get_argument("filter", default={})
        sort_by = self.get_argument("sort", default="_id")
        sort_order = self.get_argument("order", default=1)

        data = []
        async for doc in self.collection("queue").find(query_filter).sort([
            (sort_by, int(sort_order))
        ]):
            if await self.user.has_job_access(doc["name"]):
                data.append(doc)

        async def _length(*args, **kwargs):
            return len(data)

        async def _query(skip, limit, *args, **kwargs):
            return data[skip:(skip + limit)]

        pager = CorePager(
            per_page=int(per_page),
            current_page=int(current_page),
            length=_length,
            query=_query,
            # sort_by=[sort_by, int(sort_order)],
            # filter=query_filter
        )
        return await pager.page()

    async def get_detail(self, _id):
        """
        Retrieve job listing from ``sys.queue`` and ``sys.journal`` using
        :meth:`.project_job_listing` to select job attributes. Only jobs with
        read/execute access permissions granted to the current user are
        returned.

        :param _id: job _id
        :return: dict of job attributes
        """
        doc = await self.collection("queue").find_one(
            filter={"_id": _id}, projection=self.project_job_listing())
        if not doc:
            # fallback to journal
            doc = await self.collection("journal").find_one(
                filter={"_id": _id}, projection=self.project_job_listing())
            if doc:
                doc["journal"] = True
        else:
            doc["journal"] = False
        if not doc:
            raise HTTPError(404, "job_id [%s] not found", _id)
        if await self.user.has_job_access(doc["name"]):
            return doc
        raise HTTPError(403)

    async def delete(self, _id=None):
        """
        Only jobs with execute access permissions granted to the current user
        can be deleted.

        Methods:
            DELETE /core4/api/v1/jobs/<_id> - delete job from ``sys.queue``

        Parameters:
            - _id (str): job _id to delete

        Returns:
            data element with ``True`` for success, else ``False``

        Raises:
            400: failed to parse job _id
            400: requires job _id
            401: Unauthorized
            403: Forbidden
            404: job _id not found

        Examples:
            >>> # continue example from :meth:`.get`
            >>> from requests import delete
            >>> rv = delete(url + "/jobs/" + _id, headers=h)
            >>> rv.json()
            {
                '_id': '5bdb0cc8de8b691e4983c4dc',
                'code': 200,
                'data': True,
                'message': 'OK',
                'timestamp': '2018-11-01T14:25:12.747484'
            }
        """
        if _id:
            oid = self.parse_id(_id)
            if not await self.remove_job(oid):
                raise HTTPError(404, "job _id [%s] not found", oid)
        else:
            raise HTTPError(400, "requires job _id")
        self.reply(True)

    async def put(self, request=None):
        """
        Only jobs with execute access permissions granted to the current user
        can be updated.

        Methods:
            PUT /core4/api/v1/jobs/<action>/<_id> - manage job in ``sys.queue``

        Parameters:
            - action(str): ``delete``, ``kill`` or ``restart``
            - _id (str): job _id

        Returns:
            data element with

            - **OK** (str) for actions delete and kill
            - **_id** (str) with new job ``_id`` for action restart

        Raises:
            400: failed to parse job _id
            400: requires action and job _id
            400: failed to restart job
            401: Unauthorized
            403: Forbidden
            404: job _id not found

        Examples:
            >>> # continue example from :meth:`.get`
            >>> from requests import delete
            >>> rv = delete(url + "/jobs/" + _id, headers=h)
            >>> rv.json()
            {
                '_id': '5bdb0cc8de8b691e4983c4dc',
                'code': 200,
                'data': 'OK',
                'message': 'OK',
                'timestamp': '2018-11-01T14:25:12.747484'
            }
        """
        if request:
            parts = request.split("/")
            oid = self.parse_id(parts[-1])
            if len(parts) == 2:
                action = parts[0].lower()
            else:
                action = self.get_argument("action")
            action_method = {
                "delete": self.remove_job,
                "restart": self.restart_job,
                "kill": self.kill_job
            }
            if action not in action_method:
                raise HTTPError(400,
                                "requires action in (delete, restart, kill)")
            await self._access_by_id(oid)
            self.reply(await action_method[action](oid))
        raise HTTPError(400, "requires action and job_id")

    async def _access_by_id(self, oid):
        doc = await self.collection("queue").find_one(filter={"_id": oid},
                                                      projection=["name"])
        if not doc:
            raise HTTPError(404, "job_id [%s] not found", oid)
        if not await self.user.has_job_exec_access(doc["name"]):
            raise HTTPError(403)

    async def update(self, oid, attr, message, event):
        """
        Update the passed job attribute, used with ``removed_at`` and
        ``killed_at``. Only jobs with execute access permissions granted to the
        current user  can be deleted.

        :param oid: :class:`bson.objectid.ObjectId` of the job
        :param attr: job attribute to update
        :param message: logging helper string
        :return: ``True`` for success, else ``False``
        """
        await self._access_by_id(oid)
        at = core4.util.node.mongo_now()
        ret = await self.collection("queue").update_one(
            {
                "_id": oid,
                attr: None
            }, update={"$set": {
                attr: at
            }})
        if ret.raw_result["n"] == 1:
            self.logger.warning("flagged job [%s] to %s at [%s]", oid, message,
                                at)
            await self.make_stat(event, str(oid))
            return True
        raise HTTPError(404, "failed to flag job [%s] to %s", oid, message)

    async def remove_job(self, oid):
        """
        Flag the passed job ``_id`` in ``removed_at``. Active workers process
        this flag and remove the job from ``sys.queue``.

        :param oid: :class:`bson.objectid.ObjectId` of the job
        :return: ``True`` for success, else ``False``
        """
        return await self.update(oid, "removed_at", "remove",
                                 "request_remove_job")

    async def kill_job(self, oid):
        """
        Flag the passed job ``_id`` in ``killed_at``. Active workers process
        this flag and kill the job.

        :param oid: :class:`bson.objectid.ObjectId` of the job
        :return: ``True`` for success, else ``False``
        """
        return await self.update(oid, "killed_at", "kill", "request_kill_job")

    async def restart_job(self, oid):
        """
        Restart jobs in state *waiting* (``pending``, ``failed``, ``deferred``)
        or journal and re-enqueue jobs in state *stopped* (``error``,
        ``killed``, ``inactive``)

        :param oid: :class:`bson.objectid.ObjectId` of the job
        :return: dict with ``old_id`` and ``new_id``
        """
        if await self.restart_waiting(oid):
            self.logger.warning('successfully restarted [%s]', oid)
            return {"old_id": oid, "new_id": oid}
        else:
            new_id = await self.restart_stopped(oid)
            if new_id:
                self.logger.warning('successfully restarted [%s] '
                                    'with [%s]', oid, new_id)
                return {"old_id": oid, "new_id": new_id}
        raise HTTPError(404, "failed to restart job [%s]", oid)

    async def restart_waiting(self, _id):
        """
        Restart jobs in state *waiting* (``pending``, ``failed``,
        ``deferred``).

        :param _id: :class:`bson.objectid.ObjectId` of the job
        :return: ``True`` for success, else ``False``
        """
        ret = await self.collection("queue").update_one(
            {
                "_id": _id,
                "state": {
                    "$in": STATE_WAITING
                }
            },
            update={"$set": {
                "query_at": None
            }})
        await self.make_stat("restart_waiting", str(_id))
        return ret.modified_count == 1

    async def restart_stopped(self, _id):
        """
        Restart job in state *stopped* (``error``, ``killed``, ``inactive``).

        :param _id: :class:`bson.objectid.ObjectId` of the job
        :return: new job _id
        """
        queue = self.collection("queue")
        job = await queue.find_one(filter={"_id": _id})
        if job:
            if job["state"] in STATE_STOPPED:
                if await self.lock_job(self.application.container.identifier,
                                       _id):
                    ret = await queue.delete_one({"_id": _id})
                    if ret.raw_result["n"] == 1:
                        doc = dict([(k, v) for k, v in job.items()
                                    if k in core4.queue.job.ENQUEUE_ARGS])
                        new_job = self.queue.job_factory(job["name"], **doc)
                        new_job.__dict__["attempts_left"] = new_job.__dict__[
                            "attempts"]
                        new_job.__dict__[
                            "state"] = core4.queue.main.STATE_PENDING
                        new_job.__dict__["enqueued"] = self.who()
                        new_job.__dict__["enqueued"]["parent_id"] = job["_id"]
                        new_doc = new_job.serialise()
                        ret = await queue.insert_one(new_doc)
                        new_doc["_id"] = ret.inserted_id
                        self.logger.info(
                            'successfully enqueued [%s] with [%s]',
                            new_job.qual_name(), new_doc["_id"])
                        job["enqueued"]["child_id"] = new_doc["_id"]
                        await self.collection("journal").insert_one(job)
                        await self.collection("lock").delete_one({"_id": _id})
                        await self.make_stat("restart_stopped", str(_id))
                        return new_doc["_id"]
            raise HTTPError(400, "cannot restart job [%s] in state [%s]", _id,
                            job["state"])
        return None

    async def lock_job(self, identifier, _id):
        """
        Reserve the job for exclusive processing utilising collection
        ``sys.lock``.

        :param identifier: to assign to the reservation
        :param _id: job ``_id``
        :return: ``True`` if reservation succeeded, else ``False``
        """
        try:
            await self.collection("lock").insert_one({
                "_id": _id,
                "owner": identifier
            })
            return True
        except pymongo.errors.DuplicateKeyError:
            return False
        except:
            raise

    def who(self):
        """
        Creates ``enqueued`` dict attribute with timestamp (``at``),
        ``hostname``, and ``username``.

        :return: dict
        """
        x_real_ip = self.request.headers.get("X-Real-IP")
        return {
            "at": core4.util.node.mongo_now(),
            "hostname": x_real_ip or self.request.remote_ip,
            "username": self.current_user
        }

    async def get_queue_count(self):
        """
        Retrieves aggregated information about ``sys.queue`` state. This is

        * ``n`` - the number of jobs in the given state
        * ``state`` - job state
        * ``flags`` - job flags ``zombie``, ``wall``, ``removed`` and
          ``killed``

        :return: dict
        """
        cur = self.collection("queue").aggregate(self.pipeline_queue_count())
        ret = {}
        async for doc in cur:
            ret[doc["state"]] = doc["n"]
        return ret

    async def make_stat(self, event, _id):
        """
        Collects current job state counts from ``sys.queue`` and inserts a
        record into ``sys.event``. See also :meth:`.CoreQueue.make_stat`.

        :param event: to log
        :param _id: job _id
        """
        self.trigger(name=event,
                     channel=core4.const.QUEUE_CHANNEL,
                     data={
                         "_id": _id,
                         "queue": await self.get_queue_count()
                     })
Exemplo n.º 4
0
 def execute(self, child=None):
     q = CoreQueue()
     for doc in find_job(name=re.compile("^meetup.example.job.SwarmJob")):
         if random.random() < 0.2:
             q.kill_job(doc["_id"])