Exemplo n.º 1
0
    def apply_async(self, *args, **kwargs):
        """
        A wrapper around the Celery apply_async method. It allows us to accept a few more
        parameters than Celery does for our own purposes, listed below. It also allows us
        to create and update task status which can be used to track status of this task
        during it's lifetime.

        :param queue:       The queue that the task has been placed into (optional, defaults to
                            the general Celery queue.)
        :type  queue:       basestring
        :param tags:        A list of tags (strings) to place onto the task, used for searching for
                            tasks by tag
        :type  tags:        list
        :return:            An AsyncResult instance as returned by Celery's apply_async
        :rtype:             celery.result.AsyncResult
        """
        routing_key = kwargs.get('routing_key',
                                 defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default)
        tags = kwargs.pop('tags', [])

        async_result = super(Task, self).apply_async(*args, **kwargs)
        async_result.tags = tags

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(
            task_id=async_result.id, task_type=self.name,
            state=constants.CALL_WAITING_STATE, worker_name=routing_key, tags=tags)
        # To avoid the race condition where __call__ method below is called before
        # this change is propagated to all db nodes, using an 'upsert' here and setting
        # the task state to 'waiting' only on an insert.
        task_status.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time'])
        return async_result
Exemplo n.º 2
0
    def apply_async(self, *args, **kwargs):
        """
        A wrapper around the PulpTask apply_async method. It allows us to accept a few more
        parameters than Celery does for our own purposes, listed below. It also allows us
        to create and update task status which can be used to track status of this task
        during it's lifetime.

        :param queue:       The queue that the task has been placed into (optional, defaults to
                            the general Celery queue.)
        :type  queue:       basestring
        :param tags:        A list of tags (strings) to place onto the task, used for searching for
                            tasks by tag
        :type  tags:        list
        :param group_id:    The id that identifies which group of tasks a task belongs to
        :type group_id:     uuid.UUID
        :return:            An AsyncResult instance as returned by Celery's apply_async
        :rtype:             celery.result.AsyncResult
        """
        routing_key = kwargs.get('routing_key',
                                 defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default)
        tag_list = kwargs.pop('tags', [])
        group_id = kwargs.pop('group_id', None)
        async_result = super(Task, self).apply_async(*args, **kwargs)
        async_result.tags = tag_list

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(
            task_id=async_result.id, task_type=self.name,
            state=constants.CALL_WAITING_STATE, worker_name=routing_key, tags=tag_list,
            group_id=group_id)
        # To avoid the race condition where __call__ method below is called before
        # this change is propagated to all db nodes, using an 'upsert' here and setting
        # the task state to 'waiting' only on an insert.
        task_status.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time'])
        return async_result
Exemplo n.º 3
0
    def test_save_update_with_set_on_insert(self):
        """
        Test the save method with set on insert arguments when the object is already in the
        database.
        """
        task_id = str(uuid4())
        worker_name = 'worker_name'
        tags = ['tag_1', 'tag_2']
        state = constants.CALL_ACCEPTED_STATE
        spawned_tasks = ['foo']
        error = {'error': 'some_error'}
        progress_report = {'what do we want?': 'progress!', 'when do we want it?': 'now!'}
        task_type = 'some.task'
        old_start_time = start_time = datetime.now()
        finish_time = start_time + timedelta(minutes=5)
        start_time = dateutils.format_iso8601_datetime(start_time)
        finish_time = dateutils.format_iso8601_datetime(finish_time)
        result = None
        ts = TaskStatus(
            task_id, worker_name, tags, state, spawned_tasks=spawned_tasks, error=error,
            progress_report=progress_report, task_type=task_type, start_time=start_time,
            finish_time=finish_time, result=result)
        # Put the object in the database, and then change some of it settings.
        ts.save()
        new_worker_name = 'a different_worker'
        new_state = constants.CALL_SUSPENDED_STATE
        new_start_time = old_start_time + timedelta(minutes=10)
        new_start_time = dateutils.format_iso8601_datetime(new_start_time)
        ts.worker_name = new_worker_name
        ts.state = new_state
        ts.start_time = new_start_time

        # This should update the worker_name on ts in the database, but should not update the state
        # or start_time
        ts.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time'])

        ts = TaskStatus.objects()
        # There should only be one TaskStatus in the db
        self.assertEqual(len(ts), 1)
        ts = ts[0]
        # Make sure all the attributes are correct
        self.assertEqual(ts['task_id'], task_id)
        # Queue should have been updated
        self.assertEqual(ts['worker_name'], new_worker_name)
        self.assertEqual(ts['tags'], tags)
        # state should not have been updated
        self.assertEqual(ts['state'], state)
        self.assertEqual(ts['error'], error)
        self.assertEqual(ts['spawned_tasks'], spawned_tasks)
        self.assertEqual(ts['progress_report'], progress_report)
        self.assertEqual(ts['task_type'], task_type)
        # start_time should not have been updated
        self.assertEqual(ts['start_time'], start_time)
        self.assertEqual(ts['finish_time'], finish_time)
        self.assertEqual(ts['result'], result)
        # These are always None
        self.assertEqual(ts['traceback'], None)
        self.assertEqual(ts['exception'], None)
Exemplo n.º 4
0
Arquivo: tasks.py Projeto: alexxa/pulp
    def apply_async_with_reservation(self, resource_type, resource_id, *args, **kwargs):
        """
        This method allows the caller to schedule the ReservedTask to run asynchronously just like
        Celery's apply_async(), while also making the named resource. No two tasks that claim the
        same resource reservation can execute concurrently. It accepts type and id of a resource
        and combines them to form a resource id.

        This does not dispatch the task directly, but instead promises to dispatch it later by
        encapsulating the desired task through a call to a _queue_reserved_task task. See the
        docblock on _queue_reserved_task for more information on this.

        This method creates a TaskStatus as a placeholder for later updates. Pulp expects to poll
        on a task just after calling this method, so a TaskStatus entry needs to exist for it
        before it returns.

        For a list of parameters accepted by the *args and **kwargs parameters, please see the
        docblock for the apply_async() method.

        :param resource_type: A string that identifies type of a resource
        :type resource_type:  basestring
        :param resource_id:   A string that identifies some named resource, guaranteeing that only
                              one task reserving this same string can happen at a time.
        :type  resource_id:   basestring
        :param tags:          A list of tags (strings) to place onto the task, used for searching
                              for tasks by tag
        :type  tags:          list
        :param group_id:      The id to identify which group of tasks a task belongs to
        :type  group_id:      uuid.UUID
        :return:              An AsyncResult instance as returned by Celery's apply_async
        :rtype:               celery.result.AsyncResult
        """
        # Form a resource_id for reservation by combining given resource type and id. This way,
        # two different resources having the same id will not block each other.
        resource_id = ":".join((resource_type, resource_id))
        inner_task_id = str(uuid.uuid4())
        task_name = self.name
        tag_list = kwargs.get('tags', [])
        group_id = kwargs.get('group_id', None)

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(task_id=inner_task_id, task_type=task_name,
                                 state=constants.CALL_WAITING_STATE, tags=tag_list,
                                 group_id=group_id)
        # To avoid the race condition where __call__ method below is called before
        # this change is propagated to all db nodes, using an 'upsert' here and setting
        # the task state to 'waiting' only on an insert.
        task_status.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time'])
        try:
            _queue_reserved_task.apply_async(
                args=[task_name, inner_task_id, resource_id, args, kwargs],
                queue=RESOURCE_MANAGER_QUEUE
            )
        except Exception:
            TaskStatus.objects(task_id=task_status.task_id).update(state=constants.CALL_ERROR_STATE)
            raise

        return AsyncResult(inner_task_id)
Exemplo n.º 5
0
    def apply_async_with_reservation(self, resource_type, resource_id, *args, **kwargs):
        """
        This method allows the caller to schedule the ReservedTask to run asynchronously just like
        Celery's apply_async(), while also making the named resource. No two tasks that claim the
        same resource reservation can execute concurrently. It accepts type and id of a resource
        and combines them to form a resource id.

        This does not dispatch the task directly, but instead promises to dispatch it later by
        encapsulating the desired task through a call to a _queue_reserved_task task. See the
        docblock on _queue_reserved_task for more information on this.

        This method creates a TaskStatus as a placeholder for later updates. Pulp expects to poll
        on a task just after calling this method, so a TaskStatus entry needs to exist for it
        before it returns.

        For a list of parameters accepted by the *args and **kwargs parameters, please see the
        docblock for the apply_async() method.

        :param resource_type: A string that identifies type of a resource
        :type resource_type:  basestring
        :param resource_id:   A string that identifies some named resource, guaranteeing that only
                              one task reserving this same string can happen at a time.
        :type  resource_id:   basestring
        :param tags:          A list of tags (strings) to place onto the task, used for searching
                              for tasks by tag
        :type  tags:          list
        :param group_id:      The id to identify which group of tasks a task belongs to
        :type  group_id:      uuid.UUID
        :return:              An AsyncResult instance as returned by Celery's apply_async
        :rtype:               celery.result.AsyncResult
        """
        # Form a resource_id for reservation by combining given resource type and id. This way,
        # two different resources having the same id will not block each other.
        resource_id = ":".join((resource_type, resource_id))
        inner_task_id = str(uuid.uuid4())
        task_name = self.name
        tag_list = kwargs.get('tags', [])
        group_id = kwargs.get('group_id', None)

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(task_id=inner_task_id, task_type=task_name,
                                 state=constants.CALL_WAITING_STATE, tags=tag_list,
                                 group_id=group_id)
        # To avoid the race condition where __call__ method below is called before
        # this change is propagated to all db nodes, using an 'upsert' here and setting
        # the task state to 'waiting' only on an insert.
        task_status.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time'])
        try:
            _queue_reserved_task.apply_async(
                args=[task_name, inner_task_id, resource_id, args, kwargs],
                queue=RESOURCE_MANAGER_QUEUE
            )
        except Exception:
            TaskStatus.objects(task_id=task_status.task_id).update(state=constants.CALL_ERROR_STATE)
            raise

        return AsyncResult(inner_task_id)
Exemplo n.º 6
0
    def apply_async(self, *args, **kwargs):
        """
        A wrapper around the PulpTask apply_async method. It allows us to accept a few more
        parameters than Celery does for our own purposes, listed below. It also allows us
        to create and update task status which can be used to track status of this task
        during it's lifetime.

        :param queue:       The queue that the task has been placed into (optional, defaults to
                            the general Celery queue.)
        :type  queue:       basestring
        :param tags:        A list of tags (strings) to place onto the task, used for searching for
                            tasks by tag
        :type  tags:        list
        :param group_id:    The id that identifies which group of tasks a task belongs to
        :type group_id:     uuid.UUID
        :return:            An AsyncResult instance as returned by Celery's apply_async
        :rtype:             celery.result.AsyncResult
        """
        if celery_version.startswith('4'):
            routing_key = kwargs.get(
                'routing_key',
                defaults.NAMESPACES['task']['default_routing_key'].default)
        else:
            routing_key = kwargs.get(
                'routing_key',
                defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default)
        tag_list = kwargs.pop('tags', [])
        group_id = kwargs.pop('group_id', None)

        try:
            async_result = super(Task, self).apply_async(*args, **kwargs)
        except Exception:
            if 'task_id' in kwargs:
                TaskStatus.objects(task_id=kwargs['task_id']).update(
                    state=constants.CALL_ERROR_STATE)
            raise

        async_result.tags = tag_list

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(task_id=async_result.id,
                                 task_type=self.name,
                                 state=constants.CALL_WAITING_STATE,
                                 worker_name=routing_key,
                                 tags=tag_list,
                                 group_id=group_id)
        # We're now racing with __call__, on_failure and on_success, any of which may
        # have completed by now. To avoid overwriting TaskStatus updates from those callbacks,
        # we'll do an upsert and only touch the fields listed below if we've inserted the object.
        task_status.save_with_set_on_insert(fields_to_set_on_insert=[
            'state', 'start_time', 'finish_time', 'result', 'error',
            'spawned_tasks', 'traceback'
        ])
        return async_result
Exemplo n.º 7
0
Arquivo: tasks.py Projeto: alexxa/pulp
    def apply_async(self, *args, **kwargs):
        """
        A wrapper around the PulpTask apply_async method. It allows us to accept a few more
        parameters than Celery does for our own purposes, listed below. It also allows us
        to create and update task status which can be used to track status of this task
        during it's lifetime.

        :param queue:       The queue that the task has been placed into (optional, defaults to
                            the general Celery queue.)
        :type  queue:       basestring
        :param tags:        A list of tags (strings) to place onto the task, used for searching for
                            tasks by tag
        :type  tags:        list
        :param group_id:    The id that identifies which group of tasks a task belongs to
        :type group_id:     uuid.UUID
        :return:            An AsyncResult instance as returned by Celery's apply_async
        :rtype:             celery.result.AsyncResult
        """
        if celery_version.startswith('4'):
            routing_key = kwargs.get('routing_key',
                                     defaults.NAMESPACES['task']['default_routing_key'].default)
        else:
            routing_key = kwargs.get('routing_key',
                                     defaults.NAMESPACES['CELERY']['DEFAULT_ROUTING_KEY'].default)
        tag_list = kwargs.pop('tags', [])
        group_id = kwargs.pop('group_id', None)

        try:
            async_result = super(Task, self).apply_async(*args, **kwargs)
        except Exception:
            if 'task_id' in kwargs:
                TaskStatus.objects(task_id=kwargs['task_id']).update(
                    state=constants.CALL_ERROR_STATE
                )
            raise

        async_result.tags = tag_list

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(
            task_id=async_result.id, task_type=self.name,
            state=constants.CALL_WAITING_STATE, worker_name=routing_key, tags=tag_list,
            group_id=group_id)
        # We're now racing with __call__, on_failure and on_success, any of which may
        # have completed by now. To avoid overwriting TaskStatus updates from those callbacks,
        # we'll do an upsert and only touch the fields listed below if we've inserted the object.
        task_status.save_with_set_on_insert(fields_to_set_on_insert=[
            'state', 'start_time', 'finish_time', 'result', 'error',
            'spawned_tasks', 'traceback'])
        return async_result
Exemplo n.º 8
0
    def test_save_insert_with_set_on_insert(self):
        """
        Test the save method with set on insert arguments when the object is not already in the
        database.
        """
        task_id = str(uuid4())
        worker_name = 'some_worker'
        tags = ['tag_1', 'tag_2']
        state = constants.CALL_RUNNING_STATE
        spawned_tasks = ['foo']
        error = {'error': 'some_error'}
        progress_report = {'what do we want?': 'progress!', 'when do we want it?': 'now!'}
        task_type = 'some.task'
        start_time = datetime.now()
        finish_time = start_time + timedelta(minutes=5)
        start_time = dateutils.format_iso8601_datetime(start_time)
        finish_time = dateutils.format_iso8601_datetime(finish_time)
        result = None
        ts = TaskStatus(
            task_id, worker_name, tags, state, spawned_tasks=spawned_tasks, error=error,
            progress_report=progress_report, task_type=task_type, start_time=start_time,
            finish_time=finish_time, result=result)

        # This should cause ts to be in the database
        ts.save_with_set_on_insert(fields_to_set_on_insert=['state', 'start_time'])

        ts = TaskStatus.objects()
        # There should only be one TaskStatus in the db
        self.assertEqual(len(ts), 1)
        ts = ts[0]
        # Make sure all the attributes are correct
        self.assertEqual(ts['task_id'], task_id)
        self.assertEqual(ts['worker_name'], worker_name)
        self.assertEqual(ts['tags'], tags)
        self.assertEqual(ts['state'], state)
        self.assertEqual(ts['error'], error)
        self.assertEqual(ts['spawned_tasks'], spawned_tasks)
        self.assertEqual(ts['progress_report'], progress_report)
        self.assertEqual(ts['task_type'], task_type)
        self.assertEqual(ts['start_time'], start_time)
        self.assertEqual(ts['finish_time'], finish_time)
        self.assertEqual(ts['result'], result)
        # These are always None
        self.assertEqual(ts['traceback'], None)
        self.assertEqual(ts['exception'], None)
Exemplo n.º 9
0
    def _apply_async_inner(self, reservation, *args, **kwargs):
        """
         This method allows the caller to schedule the ReservedTask to run asynchronously just like
         Celery's apply_async(), while also locking named resource(s). No two tasks that claim the
         same named-resource(s) can execute concurrently.

         It can accept a list-of-strings, of the form 'resource-type:resource-id'. If only
         asked for one resource (ie, list-len == 1), then call _queue_reserved_task, otherwise
         let _queue_reserved_task_list do the deed.

         This does not dispatch the task directly, but instead promises to dispatch it later. If the
         agument 'is_list' is True, the desired task is encapsualted by a call to
         _queue_reserved_task_list; otherwise, by a call to _queue_reserved_task.

         See the docblock on _queue_reserved_task and _queue_reserved_task_list for more
         information.

         This method creates a TaskStatus as a placeholder for later updates. Pulp expects to poll
         on a task just after calling this method, so a TaskStatus entry needs to exist for it
         before it returns.

         For a list of parameters accepted by the *args and **kwargs parameters, please see the
         docblock for the apply_async() method.

         :param reservation:    A list-of-strings that identify a set of named resources,
                                guaranteeing that only one task reserving any resource-ids in this
                                list can happen at a time.
         :type  reservation:    list
         :param tags:           A list of tags (strings) to place onto the task, used for searching
                                for tasks by tag
         :type  tags:           list
         :param group_id:       The id to identify which group of tasks a task belongs to
         :type  group_id:       uuid.UUID
         :return:               An AsyncResult instance as returned by Celery's apply_async
         :rtype:                celery.result.AsyncResult
         """
        inner_task_id = str(uuid.uuid4())
        task_name = self.name
        tag_list = kwargs.get('tags', [])
        group_id = kwargs.get('group_id', None)

        # Create a new task status with the task id and tags.
        task_status = TaskStatus(task_id=inner_task_id,
                                 task_type=task_name,
                                 state=constants.CALL_WAITING_STATE,
                                 tags=tag_list,
                                 group_id=group_id)
        # To avoid the race condition where __call__ method below is called before
        # this change is propagated to all db nodes, using an 'upsert' here and setting
        # the task state to 'waiting' only on an insert.
        task_status.save_with_set_on_insert(
            fields_to_set_on_insert=['state', 'start_time'])
        try:
            # Decide what to call based on how many reservation(s) we are being asked to make
            if len(reservation) == 1:
                _queue_reserved_task.apply_async(args=[
                    task_name, inner_task_id, reservation[0], args, kwargs
                ],
                                                 queue=RESOURCE_MANAGER_QUEUE)
            else:
                _queue_reserved_task_list.apply_async(
                    args=[task_name, inner_task_id, reservation, args, kwargs],
                    queue=RESOURCE_MANAGER_QUEUE)
        except Exception:
            TaskStatus.objects(task_id=task_status.task_id).update(
                state=constants.CALL_ERROR_STATE)
            raise

        return AsyncResult(inner_task_id)