Exemplo n.º 1
0
class Task(object):
    """Task base class.

    Note:
        When called tasks apply the :meth:`run` method.  This method must
        be defined by all tasks (that is unless the :meth:`__call__` method
        is overridden).
    """

    __trace__ = None
    __v2_compat__ = False  # set by old base in celery.task.base

    MaxRetriesExceededError = MaxRetriesExceededError
    OperationalError = OperationalError

    #: Execution strategy used, or the qualified name of one.
    Strategy = 'celery.worker.strategy:default'

    #: Request class used, or the qualified name of one.
    Request = 'celery.worker.request:Request'

    #: The application instance associated with this task class.
    _app = None

    #: Name of the task.
    name = None

    #: Enable argument checking.
    #: You can set this to false if you don't want the signature to be
    #: checked when calling the task.
    #: Defaults to :attr:`app.strict_typing <@Celery.strict_typing>`.
    typing = None

    #: Maximum number of retries before giving up.  If set to :const:`None`,
    #: it will **never** stop retrying.
    max_retries = 3

    #: Default time in seconds before a retry of the task should be
    #: executed.  3 minutes by default.
    default_retry_delay = 3 * 60

    #: Rate limit for this task type.  Examples: :const:`None` (no rate
    #: limit), `'100/s'` (hundred tasks a second), `'100/m'` (hundred tasks
    #: a minute),`'100/h'` (hundred tasks an hour)
    rate_limit = None

    #: If enabled the worker won't store task state and return values
    #: for this task.  Defaults to the :setting:`task_ignore_result`
    #: setting.
    ignore_result = None

    #: If enabled the request will keep track of subtasks started by
    #: this task, and this information will be sent with the result
    #: (``result.children``).
    trail = True

    #: If enabled the worker will send monitoring events related to
    #: this task (but only if the worker is configured to send
    #: task related events).
    #: Note that this has no effect on the task-failure event case
    #: where a task is not registered (as it will have no task class
    #: to check this flag).
    send_events = True

    #: When enabled errors will be stored even if the task is otherwise
    #: configured to ignore results.
    store_errors_even_if_ignored = None

    #: The name of a serializer that are registered with
    #: :mod:`kombu.serialization.registry`.  Default is `'pickle'`.
    serializer = None

    #: Hard time limit.
    #: Defaults to the :setting:`task_time_limit` setting.
    time_limit = None

    #: Soft time limit.
    #: Defaults to the :setting:`task_soft_time_limit` setting.
    soft_time_limit = None

    #: The result store backend used for this task.
    backend = None

    #: If disabled this task won't be registered automatically.
    autoregister = True

    #: If enabled the task will report its status as 'started' when the task
    #: is executed by a worker.  Disabled by default as the normal behavior
    #: is to not report that level of granularity.  Tasks are either pending,
    #: finished, or waiting to be retried.
    #:
    #: Having a 'started' status can be useful for when there are long
    #: running tasks and there's a need to report what task is currently
    #: running.
    #:
    #: The application default can be overridden using the
    #: :setting:`task_track_started` setting.
    track_started = None

    #: When enabled messages for this task will be acknowledged **after**
    #: the task has been executed, and not *just before* (the
    #: default behavior).
    #:
    #: Please note that this means the task may be executed twice if the
    #: worker crashes mid execution.
    #:
    #: The application default can be overridden with the
    #: :setting:`task_acks_late` setting.
    acks_late = None

    #: When enabled messages for this task will be acknowledged even if it
    #: fails or times out.
    #:
    #: Configuring this setting only applies to tasks that are
    #: acknowledged **after** they have been executed and only if
    #: :setting:`task_acks_late` is enabled.
    #:
    #: The application default can be overridden with the
    #: :setting:`task_acks_on_failure_or_timeout` setting.
    acks_on_failure_or_timeout = None

    #: Even if :attr:`acks_late` is enabled, the worker will
    #: acknowledge tasks when the worker process executing them abruptly
    #: exits or is signaled (e.g., :sig:`KILL`/:sig:`INT`, etc).
    #:
    #: Setting this to true allows the message to be re-queued instead,
    #: so that the task will execute again by the same worker, or another
    #: worker.
    #:
    #: Warning: Enabling this can cause message loops; make sure you know
    #: what you're doing.
    reject_on_worker_lost = None

    #: Tuple of expected exceptions.
    #:
    #: These are errors that are expected in normal operation
    #: and that shouldn't be regarded as a real error by the worker.
    #: Currently this means that the state will be updated to an error
    #: state, but the worker won't log the event as an error.
    throws = ()

    #: Default task expiry time.
    expires = None

    #: Default task priority.
    priority = None

    #: Max length of result representation used in logs and events.
    resultrepr_maxsize = 1024

    #: Task request stack, the current request will be the topmost.
    request_stack = None

    #: Some may expect a request to exist even if the task hasn't been
    #: called.  This should probably be deprecated.
    _default_request = None

    #: Deprecated attribute ``abstract`` here for compatibility.
    abstract = True

    _exec_options = None

    __bound__ = False

    from_config = (
        ('serializer', 'task_serializer'),
        ('rate_limit', 'task_default_rate_limit'),
        ('priority', 'task_default_priority'),
        ('track_started', 'task_track_started'),
        ('acks_late', 'task_acks_late'),
        ('acks_on_failure_or_timeout', 'task_acks_on_failure_or_timeout'),
        ('reject_on_worker_lost', 'task_reject_on_worker_lost'),
        ('ignore_result', 'task_ignore_result'),
        ('store_errors_even_if_ignored', 'task_store_errors_even_if_ignored'),
    )

    _backend = None  # set by backend property.

    # - Tasks are lazily bound, so that configuration is not set
    # - until the task is actually used

    @classmethod
    def bind(cls, app):
        was_bound, cls.__bound__ = cls.__bound__, True
        cls._app = app
        conf = app.conf
        cls._exec_options = None  # clear option cache

        if cls.typing is None:
            cls.typing = app.strict_typing

        for attr_name, config_name in cls.from_config:
            if getattr(cls, attr_name, None) is None:
                setattr(cls, attr_name, conf[config_name])

        # decorate with annotations from config.
        if not was_bound:
            cls.annotate()

            from celery.utils.threads import LocalStack
            cls.request_stack = LocalStack()

        # PeriodicTask uses this to add itself to the PeriodicTask schedule.
        cls.on_bound(app)

        return app

    @classmethod
    def on_bound(cls, app):
        """Called when the task is bound to an app.

        Note:
            This class method can be defined to do additional actions when
            the task class is bound to an app.
        """

    @classmethod
    def _get_app(cls):
        if cls._app is None:
            cls._app = current_app
        if not cls.__bound__:
            # The app property's __set__  method is not called
            # if Task.app is set (on the class), so must bind on use.
            cls.bind(cls._app)
        return cls._app

    app = class_property(_get_app, bind)

    @classmethod
    def annotate(cls):
        for d in resolve_all_annotations(cls.app.annotations, cls):
            for key, value in items(d):
                if key.startswith('@'):
                    cls.add_around(key[1:], value)
                else:
                    setattr(cls, key, value)

    @classmethod
    def add_around(cls, attr, around):
        orig = getattr(cls, attr)
        if getattr(orig, '__wrapped__', None):
            orig = orig.__wrapped__
        meth = around(orig)
        meth.__wrapped__ = orig
        setattr(cls, attr, meth)

    def __call__(self, *args, **kwargs):
        _task_stack.push(self)
        self.push_request(args=args, kwargs=kwargs)
        try:
            return self.run(*args, **kwargs)
        finally:
            self.pop_request()
            _task_stack.pop()

    def __reduce__(self):
        # - tasks are pickled into the name of the task only, and the receiver
        # - simply grabs it from the local registry.
        # - in later versions the module of the task is also included,
        # - and the receiving side tries to import that module so that
        # - it will work even if the task hasn't been registered.
        mod = type(self).__module__
        mod = mod if mod and mod in sys.modules else None
        return (_unpickle_task_v2, (self.name, mod), None)

    def run(self, *args, **kwargs):
        """The body of the task executed by workers."""
        raise NotImplementedError('Tasks must define the run method.')

    def start_strategy(self, app, consumer, **kwargs):
        return instantiate(self.Strategy, self, app, consumer, **kwargs)

    def delay(self, *args, **kwargs):
        """Star argument version of :meth:`apply_async`.

        Does not support the extra options enabled by :meth:`apply_async`.

        Arguments:
            *args (Any): Positional arguments passed on to the task.
            **kwargs (Any): Keyword arguments passed on to the task.
        Returns:
            celery.result.AsyncResult: Future promise.
        """
        return self.apply_async(args, kwargs)

    def apply_async(self,
                    args=None,
                    kwargs=None,
                    task_id=None,
                    producer=None,
                    link=None,
                    link_error=None,
                    shadow=None,
                    **options):
        """Apply tasks asynchronously by sending a message.

        Arguments:
            args (Tuple): The positional arguments to pass on to the task.

            kwargs (Dict): The keyword arguments to pass on to the task.

            countdown (float): Number of seconds into the future that the
                task should execute.  Defaults to immediate execution.

            eta (~datetime.datetime): Absolute time and date of when the task
                should be executed.  May not be specified if `countdown`
                is also supplied.

            expires (float, ~datetime.datetime): Datetime or
                seconds in the future for the task should expire.
                The task won't be executed after the expiration time.

            shadow (str): Override task name used in logs/monitoring.
                Default is retrieved from :meth:`shadow_name`.

            connection (kombu.Connection): Re-use existing broker connection
                instead of acquiring one from the connection pool.

            retry (bool): If enabled sending of the task message will be
                retried in the event of connection loss or failure.
                Default is taken from the :setting:`task_publish_retry`
                setting.  Note that you need to handle the
                producer/connection manually for this to work.

            retry_policy (Mapping): Override the retry policy used.
                See the :setting:`task_publish_retry_policy` setting.

            queue (str, kombu.Queue): The queue to route the task to.
                This must be a key present in :setting:`task_queues`, or
                :setting:`task_create_missing_queues` must be
                enabled.  See :ref:`guide-routing` for more
                information.

            exchange (str, kombu.Exchange): Named custom exchange to send the
                task to.  Usually not used in combination with the ``queue``
                argument.

            routing_key (str): Custom routing key used to route the task to a
                worker server.  If in combination with a ``queue`` argument
                only used to specify custom routing keys to topic exchanges.

            priority (int): The task priority, a number between 0 and 9.
                Defaults to the :attr:`priority` attribute.

            serializer (str): Serialization method to use.
                Can be `pickle`, `json`, `yaml`, `msgpack` or any custom
                serialization method that's been registered
                with :mod:`kombu.serialization.registry`.
                Defaults to the :attr:`serializer` attribute.

            compression (str): Optional compression method
                to use.  Can be one of ``zlib``, ``bzip2``,
                or any custom compression methods registered with
                :func:`kombu.compression.register`.
                Defaults to the :setting:`task_compression` setting.

            link (Signature): A single, or a list of tasks signatures
                to apply if the task returns successfully.

            link_error (Signature): A single, or a list of task signatures
                to apply if an error occurs while executing the task.

            producer (kombu.Producer): custom producer to use when publishing
                the task.

            add_to_parent (bool): If set to True (default) and the task
                is applied while executing another task, then the result
                will be appended to the parent tasks ``request.children``
                attribute.  Trailing can also be disabled by default using the
                :attr:`trail` attribute

            publisher (kombu.Producer): Deprecated alias to ``producer``.

            headers (Dict): Message headers to be included in the message.

        Returns:
            celery.result.AsyncResult: Promise of future evaluation.

        Raises:
            TypeError: If not enough arguments are passed, or too many
                arguments are passed.  Note that signature checks may
                be disabled by specifying ``@task(typing=False)``.
            kombu.exceptions.OperationalError: If a connection to the
               transport cannot be made, or if the connection is lost.

        Note:
            Also supports all keyword arguments supported by
            :meth:`kombu.Producer.publish`.
        """
        if self.typing:
            try:
                check_arguments = self.__header__
            except AttributeError:  # pragma: no cover
                pass
            else:
                check_arguments(*(args or ()), **(kwargs or {}))

        app = self._get_app()
        if app.conf.task_always_eager:
            with app.producer_or_acquire(producer) as eager_producer:
                serializer = options.get(
                    'serializer',
                    (eager_producer.serializer if eager_producer.serializer
                     else app.conf.task_serializer))
                body = args, kwargs
                content_type, content_encoding, data = serialization.dumps(
                    body,
                    serializer,
                )
                args, kwargs = serialization.loads(data,
                                                   content_type,
                                                   content_encoding,
                                                   accept=[content_type])
            with denied_join_result():
                return self.apply(args,
                                  kwargs,
                                  task_id=task_id or uuid(),
                                  link=link,
                                  link_error=link_error,
                                  **options)

        if self.__v2_compat__:
            shadow = shadow or self.shadow_name(self(), args, kwargs, options)
        else:
            shadow = shadow or self.shadow_name(args, kwargs, options)

        preopts = self._get_exec_options()
        options = dict(preopts, **options) if options else preopts

        options.setdefault('ignore_result', self.ignore_result)
        if self.priority:
            options.setdefault('priority', self.priority)

        return app.send_task(self.name,
                             args,
                             kwargs,
                             task_id=task_id,
                             producer=producer,
                             link=link,
                             link_error=link_error,
                             result_cls=self.AsyncResult,
                             shadow=shadow,
                             task_type=self,
                             **options)

    def shadow_name(self, args, kwargs, options):
        """Override for custom task name in worker logs/monitoring.

        Example:
            .. code-block:: python

                from celery.utils.imports import qualname

                def shadow_name(task, args, kwargs, options):
                    return qualname(args[0])

                @app.task(shadow_name=shadow_name, serializer='pickle')
                def apply_function_async(fun, *args, **kwargs):
                    return fun(*args, **kwargs)

        Arguments:
            args (Tuple): Task positional arguments.
            kwargs (Dict): Task keyword arguments.
            options (Dict): Task execution options.
        """

    def signature_from_request(self,
                               request=None,
                               args=None,
                               kwargs=None,
                               queue=None,
                               **extra_options):
        request = self.request if request is None else request
        args = request.args if args is None else args
        kwargs = request.kwargs if kwargs is None else kwargs
        options = request.as_execution_options()
        if queue:
            options['queue'] = queue
        else:
            delivery_info = request.delivery_info or {}
            exchange = delivery_info.get('exchange')
            routing_key = delivery_info.get('routing_key')
            if exchange == '' and routing_key:
                # sent to anon-exchange
                options['queue'] = routing_key
            else:
                options.update(delivery_info)
        return self.signature(args,
                              kwargs,
                              options,
                              type=self,
                              **extra_options)

    subtask_from_request = signature_from_request  # XXX compat

    def retry(self,
              args=None,
              kwargs=None,
              exc=None,
              throw=True,
              eta=None,
              countdown=None,
              max_retries=None,
              **options):
        """Retry the task, adding it to the back of the queue.

        Example:
            >>> from imaginary_twitter_lib import Twitter
            >>> from proj.celery import app

            >>> @app.task(bind=True)
            ... def tweet(self, auth, message):
            ...     twitter = Twitter(oauth=auth)
            ...     try:
            ...         twitter.post_status_update(message)
            ...     except twitter.FailWhale as exc:
            ...         # Retry in 5 minutes.
            ...         raise self.retry(countdown=60 * 5, exc=exc)

        Note:
            Although the task will never return above as `retry` raises an
            exception to notify the worker, we use `raise` in front of the
            retry to convey that the rest of the block won't be executed.

        Arguments:
            args (Tuple): Positional arguments to retry with.
            kwargs (Dict): Keyword arguments to retry with.
            exc (Exception): Custom exception to report when the max retry
                limit has been exceeded (default:
                :exc:`~@MaxRetriesExceededError`).

                If this argument is set and retry is called while
                an exception was raised (``sys.exc_info()`` is set)
                it will attempt to re-raise the current exception.

                If no exception was raised it will raise the ``exc``
                argument provided.
            countdown (float): Time in seconds to delay the retry for.
            eta (~datetime.datetime): Explicit time and date to run the
                retry at.
            max_retries (int): If set, overrides the default retry limit for
                this execution.  Changes to this parameter don't propagate to
                subsequent task retry attempts.  A value of :const:`None`,
                means "use the default", so if you want infinite retries you'd
                have to set the :attr:`max_retries` attribute of the task to
                :const:`None` first.
            time_limit (int): If set, overrides the default time limit.
            soft_time_limit (int): If set, overrides the default soft
                time limit.
            throw (bool): If this is :const:`False`, don't raise the
                :exc:`~@Retry` exception, that tells the worker to mark
                the task as being retried.  Note that this means the task
                will be marked as failed if the task raises an exception,
                or successful if it returns after the retry call.
            **options (Any): Extra options to pass on to :meth:`apply_async`.

        Raises:

            celery.exceptions.Retry:
                To tell the worker that the task has been re-sent for retry.
                This always happens, unless the `throw` keyword argument
                has been explicitly set to :const:`False`, and is considered
                normal operation.
        """
        request = self.request
        retries = request.retries + 1
        max_retries = self.max_retries if max_retries is None else max_retries

        # Not in worker or emulated by (apply/always_eager),
        # so just raise the original exception.
        if request.called_directly:
            # raises orig stack if PyErr_Occurred,
            # and augments with exc' if that argument is defined.
            raise_with_context(exc or Retry('Task can be retried', None))

        if not eta and countdown is None:
            countdown = self.default_retry_delay

        is_eager = request.is_eager
        S = self.signature_from_request(request,
                                        args,
                                        kwargs,
                                        countdown=countdown,
                                        eta=eta,
                                        retries=retries,
                                        **options)

        if max_retries is not None and retries > max_retries:
            if exc:
                # On Py3: will augment any current exception with
                # the exc' argument provided (raise exc from orig)
                raise_with_context(exc)
            raise self.MaxRetriesExceededError(
                "Can't retry {0}[{1}] args:{2} kwargs:{3}".format(
                    self.name, request.id, S.args, S.kwargs),
                task_args=S.args,
                task_kwargs=S.kwargs)

        ret = Retry(exc=exc, when=eta or countdown)

        if is_eager:
            # if task was executed eagerly using apply(),
            # then the retry must also be executed eagerly.
            S.apply().get()
            if throw:
                raise ret
            return ret

        try:
            S.apply_async()
        except Exception as exc:
            raise Reject(exc, requeue=False)
        if throw:
            raise ret
        return ret

    def apply(self,
              args=None,
              kwargs=None,
              link=None,
              link_error=None,
              task_id=None,
              retries=None,
              throw=None,
              logfile=None,
              loglevel=None,
              headers=None,
              **options):
        """Execute this task locally, by blocking until the task returns.

        Arguments:
            args (Tuple): positional arguments passed on to the task.
            kwargs (Dict): keyword arguments passed on to the task.
            throw (bool): Re-raise task exceptions.
                Defaults to the :setting:`task_eager_propagates` setting.

        Returns:
            celery.result.EagerResult: pre-evaluated result.
        """
        # trace imports Task, so need to import inline.
        from celery.app.trace import build_tracer

        app = self._get_app()
        args = args or ()
        kwargs = kwargs or {}
        task_id = task_id or uuid()
        retries = retries or 0
        if throw is None:
            throw = app.conf.task_eager_propagates

        # Make sure we get the task instance, not class.
        task = app._tasks[self.name]

        request = {
            'id': task_id,
            'retries': retries,
            'is_eager': True,
            'logfile': logfile,
            'loglevel': loglevel or 0,
            'hostname': gethostname(),
            'callbacks': maybe_list(link),
            'errbacks': maybe_list(link_error),
            'headers': headers,
            'delivery_info': {
                'is_eager': True
            },
        }
        tb = None
        tracer = build_tracer(
            task.name,
            task,
            eager=True,
            propagate=throw,
            app=self._get_app(),
        )
        ret = tracer(task_id, args, kwargs, request)
        retval = ret.retval
        if isinstance(retval, ExceptionInfo):
            retval, tb = retval.exception, retval.traceback
        state = states.SUCCESS if ret.info is None else ret.info.state
        return EagerResult(task_id, retval, state, traceback=tb)

    def AsyncResult(self, task_id, **kwargs):
        """Get AsyncResult instance for this kind of task.

        Arguments:
            task_id (str): Task id to get result for.
        """
        return self._get_app().AsyncResult(task_id,
                                           backend=self.backend,
                                           task_name=self.name,
                                           **kwargs)

    def signature(self, args=None, *starargs, **starkwargs):
        """Create signature.

        Returns:
            :class:`~celery.signature`:  object for
                this task, wrapping arguments and execution options
                for a single task invocation.
        """
        starkwargs.setdefault('app', self.app)
        return signature(self, args, *starargs, **starkwargs)

    subtask = signature

    def s(self, *args, **kwargs):
        """Create signature.

        Shortcut for ``.s(*a, **k) -> .signature(a, k)``.
        """
        return self.signature(args, kwargs)

    def si(self, *args, **kwargs):
        """Create immutable signature.

        Shortcut for ``.si(*a, **k) -> .signature(a, k, immutable=True)``.
        """
        return self.signature(args, kwargs, immutable=True)

    def chunks(self, it, n):
        """Create a :class:`~celery.canvas.chunks` task for this task."""
        from celery import chunks
        return chunks(self.s(), it, n, app=self.app)

    def map(self, it):
        """Create a :class:`~celery.canvas.xmap` task from ``it``."""
        from celery import xmap
        return xmap(self.s(), it, app=self.app)

    def starmap(self, it):
        """Create a :class:`~celery.canvas.xstarmap` task from ``it``."""
        from celery import xstarmap
        return xstarmap(self.s(), it, app=self.app)

    def send_event(self, type_, retry=True, retry_policy=None, **fields):
        """Send monitoring event message.

        This can be used to add custom event types in :pypi:`Flower`
        and other monitors.

        Arguments:
            type_ (str):  Type of event, e.g. ``"task-failed"``.

        Keyword Arguments:
            retry (bool):  Retry sending the message
                if the connection is lost.  Default is taken from the
                :setting:`task_publish_retry` setting.
            retry_policy (Mapping): Retry settings.  Default is taken
                from the :setting:`task_publish_retry_policy` setting.
            **fields (Any): Map containing information about the event.
                Must be JSON serializable.
        """
        req = self.request
        if retry_policy is None:
            retry_policy = self.app.conf.task_publish_retry_policy
        with self.app.events.default_dispatcher(hostname=req.hostname) as d:
            return d.send(type_,
                          uuid=req.id,
                          retry=retry,
                          retry_policy=retry_policy,
                          **fields)

    def replace(self, sig):
        """Replace this task, with a new task inheriting the task id.

        .. versionadded:: 4.0

        Arguments:
            sig (~@Signature): signature to replace with.

        Raises:
            ~@Ignore: This is always raised, so the best practice
            is to always use ``raise self.replace(...)`` to convey
            to the reader that the task won't continue after being replaced.
        """
        chord = self.request.chord
        if 'chord' in sig.options:
            raise ImproperlyConfigured(
                "A signature replacing a task must not be part of a chord")

        if isinstance(sig, group):
            sig |= self.app.tasks['celery.accumulate'].s(index=0).set(
                link=self.request.callbacks,
                link_error=self.request.errbacks,
            )

        if self.request.chain:
            for t in reversed(self.request.chain):
                sig |= signature(t, app=self.app)

        sig.set(
            chord=chord,
            group_id=self.request.group,
            root_id=self.request.root_id,
        )
        sig.freeze(self.request.id)

        sig.delay()
        raise Ignore('Replaced by new task')

    def add_to_chord(self, sig, lazy=False):
        """Add signature to the chord the current task is a member of.

        .. versionadded:: 4.0

        Currently only supported by the Redis result backend.

        Arguments:
            sig (~@Signature): Signature to extend chord with.
            lazy (bool): If enabled the new task won't actually be called,
                and ``sig.delay()`` must be called manually.
        """
        if not self.request.chord:
            raise ValueError('Current task is not member of any chord')
        sig.set(
            group_id=self.request.group,
            chord=self.request.chord,
            root_id=self.request.root_id,
        )
        result = sig.freeze()
        self.backend.add_to_chord(self.request.group, result)
        return sig.delay() if not lazy else sig

    def update_state(self, task_id=None, state=None, meta=None, **kwargs):
        """Update task state.

        Arguments:
            task_id (str): Id of the task to update.
                Defaults to the id of the current task.
            state (str): New state.
            meta (Dict): State meta-data.
        """
        if task_id is None:
            task_id = self.request.id
        self.backend.store_result(task_id, meta, state, **kwargs)

    def on_success(self, retval, task_id, args, kwargs):
        """Success handler.

        Run by the worker if the task executes successfully.

        Arguments:
            retval (Any): The return value of the task.
            task_id (str): Unique id of the executed task.
            args (Tuple): Original arguments for the executed task.
            kwargs (Dict): Original keyword arguments for the executed task.

        Returns:
            None: The return value of this handler is ignored.
        """

    def on_retry(self, exc, task_id, args, kwargs, einfo):
        """Retry handler.

        This is run by the worker when the task is to be retried.

        Arguments:
            exc (Exception): The exception sent to :meth:`retry`.
            task_id (str): Unique id of the retried task.
            args (Tuple): Original arguments for the retried task.
            kwargs (Dict): Original keyword arguments for the retried task.
            einfo (~billiard.einfo.ExceptionInfo): Exception information.

        Returns:
            None: The return value of this handler is ignored.
        """

    def on_failure(self, exc, task_id, args, kwargs, einfo):
        """Error handler.

        This is run by the worker when the task fails.

        Arguments:
            exc (Exception): The exception raised by the task.
            task_id (str): Unique id of the failed task.
            args (Tuple): Original arguments for the task that failed.
            kwargs (Dict): Original keyword arguments for the task that failed.
            einfo (~billiard.einfo.ExceptionInfo): Exception information.

        Returns:
            None: The return value of this handler is ignored.
        """

    def after_return(self, status, retval, task_id, args, kwargs, einfo):
        """Handler called after the task returns.

        Arguments:
            status (str): Current task state.
            retval (Any): Task return value/exception.
            task_id (str): Unique id of the task.
            args (Tuple): Original arguments for the task.
            kwargs (Dict): Original keyword arguments for the task.
            einfo (~billiard.einfo.ExceptionInfo): Exception information.

        Returns:
            None: The return value of this handler is ignored.
        """

    def add_trail(self, result):
        if self.trail:
            self.request.children.append(result)
        return result

    def push_request(self, *args, **kwargs):
        self.request_stack.push(Context(*args, **kwargs))

    def pop_request(self):
        self.request_stack.pop()

    def __repr__(self):
        """``repr(task)``."""
        return _reprtask(self, R_INSTANCE)

    def _get_request(self):
        """Get current request object."""
        req = self.request_stack.top
        if req is None:
            # task was not called, but some may still expect a request
            # to be there, perhaps that should be deprecated.
            if self._default_request is None:
                self._default_request = Context()
            return self._default_request
        return req

    request = property(_get_request)

    def _get_exec_options(self):
        if self._exec_options is None:
            self._exec_options = extract_exec_options(self)
        return self._exec_options

    @property
    def backend(self):
        backend = self._backend
        if backend is None:
            return self.app.backend
        return backend

    @backend.setter
    def backend(self, value):  # noqa
        self._backend = value

    @property
    def __name__(self):
        return self.__class__.__name__
Exemplo n.º 2
0
class JobtasticTask(Task):
    """
    A base ``Celery.Task`` class that provides some common niceties for running
    tasks that return some kind of result for which you need to wait.

    To create a task that uses these helpers, use ``JobtasticTask`` as a
    subclass and define a ``calculate_result`` method which returns a
    dictionary to be turned in to JSON. You will also need to define the
    following class variables:

    * ``significant_kwargs`` The kwarg values that will be converted to strings
      and hashed to determine if two versions of the same task are equivalent.
      This is a list of 2-tuples with the first item being the kwarg string and
      the second being a callable that converts the value to a hashable string.
      If no second item is given, it's assumed that calling ``str()`` on the
      value works just fine.
    * ``herd_avoidance_timeout`` Number of seconds to hold a lock on this task
      for other equivalent runs. Generally, this should be set to the longest
      estimated amount of time the task could consume.

    The following class members are optional:
    * ``cache_prefix`` A unique string representing this task. Eg.
      ``foo.bar.tasks.BazzTask``
    * ``cache_duration`` The number of seconds for which the result of this
      task should be cached, meaning subsequent equivalent runs will skip
      computation. The default is to do no result caching.
    * ``memleak_threshold`` When a single run of a Task increase the resident
      process memory usage by more than this number of MegaBytes, a warning is
      logged to the logger. This is useful for finding tasks that are behaving
      badly under certain conditions. By default, no logging is performed.
      Set this value to 0 to log all RAM changes and -1 to disable logging.

    Provided are helpers for:

    1. Handling failures to connect the task broker by either directly
      running the task (`delay_or_run`) or by returning a task that
      contains the connection error (`delay_or_fail`). This minimizes
      the user-facing impact of a dead task broker.
    2. Defeating any thundering herd issues by ensuring only one of a task with
      specific arguments can be running at a time by directing subsequent calls
      to latch on to the appropriate result.
    3. Caching the final result for a designated time period so that subsequent
      equivalent calls return quickly.
    4. Returning the results as JSON, so that they can be processed easily by
      client-side javascript.
    5. Returning time-based, continually updating progress estimates to
      front-end code so that users know what to expect.
    """
    abstract = True

    #: The shared cache used for locking and thundering herd protection
    _cache = None

    @classmethod
    def async_or_eager(self, **options):
        """
        Attempt to call self.apply_async, or if that fails because of a problem
        with the broker, run the task eagerly and return an EagerResult.
        """
        args = options.pop("args", None)
        kwargs = options.pop("kwargs", None)
        possible_broker_errors = self._get_possible_broker_errors_tuple()
        try:
            return self.apply_async(args, kwargs, **options)
        except possible_broker_errors:
            return self.apply(args, kwargs, **options)

    @classmethod
    def async_or_fail(self, **options):
        """
        Attempt to call self.apply_async, but if that fails with an exception,
        we fake the task completion using the exception as the result. This
        allows us to seamlessly handle errors on task creation the same way we
        handle errors when a task runs, simplifying the user interface.
        """
        args = options.pop("args", None)
        kwargs = options.pop("kwargs", None)
        possible_broker_errors = self._get_possible_broker_errors_tuple()
        try:
            return self.apply_async(args, kwargs, **options)
        except possible_broker_errors as e:
            return self.simulate_async_error(e)

    @classmethod
    def delay_or_eager(self, *args, **kwargs):
        """
        Wrap async_or_eager with a convenience signiture like delay
        """
        return self.async_or_eager(args=args, kwargs=kwargs)

    @classmethod
    def delay_or_run(self, *args, **kwargs):
        """
        Attempt to call self.delay, or if that fails, call self.run.

        Returns a tuple, (result, required_fallback). ``result`` is the result
        of calling delay or run. ``required_fallback`` is True if the broker
        failed we had to resort to `self.run`.
        """
        warnings.warn(
            "delay_or_run is deprecated. Please use delay_or_eager",
            DeprecationWarning,
        )
        possible_broker_errors = self._get_possible_broker_errors_tuple()
        try:
            result = self.apply_async(args=args, kwargs=kwargs)
            required_fallback = False
        except possible_broker_errors:
            result = self().run(*args, **kwargs)
            required_fallback = True
        return result, required_fallback

    @classmethod
    def delay_or_fail(self, *args, **kwargs):
        """
        Wrap async_or_fail with a convenience signiture like delay
        """
        return self.async_or_fail(args=args, kwargs=kwargs)

    @classmethod
    def _get_possible_broker_errors_tuple(self):
        if hasattr(self.app, 'connection'):
            dummy_conn = self.app.connection()
        else:
            # Celery 2.5 uses `broker_connection` instead
            dummy_conn = self.app.broker_connection()

        possible_broker_errors = (dummy_conn.connection_errors +
                                  dummy_conn.channel_errors)
        try:
            from kombu.exceptions import OperationalError
            # In celery 4.x when a broker is not connected it throws an
            # OperationalError from kombu. It should also be noted that the
            # newest version of kombu (4.1.0) actually hangs forever. So we
            # need to peg versions of kombu until that gets fixed.
            possible_broker_errors += (OperationalError, )
        except ImportError:
            pass
        return possible_broker_errors

    @classmethod
    def simulate_async_error(self, exception):
        """
        Take this exception and store it as an error in the result backend.
        This unifies the handling of broker-connection errors with any other
        type of error that might occur when running the task. So the same
        error-handling that might retry a task or display a useful message to
        the user can also handle this error.
        """
        task_id = gen_unique_id()
        async_result = self.AsyncResult(task_id)
        einfo = ExceptionInfo(sys.exc_info())

        async_result.backend.mark_as_failure(
            task_id,
            exception,
            traceback=einfo.traceback,
        )

        return async_result

    @classmethod
    def apply_async(self, args, kwargs, **options):
        """
        Put this task on the Celery queue as a singleton. Only one of this type
        of task with its distinguishing args/kwargs will be allowed on the
        queue at a time. Subsequent duplicate tasks called while this task is
        still running will just latch on to the results of the running task by
        synchronizing the task uuid. Additionally, identical task calls will
        return those results for the next ``cache_duration`` seconds.
        """
        self._validate_required_class_vars()

        cache_key = self._get_cache_key(**kwargs)

        # Check for an already-computed and cached result
        task_id = self.cache.get(cache_key)  # Check for the cached result
        if task_id:
            # We've already built this result, just latch on to the task that
            # did the work
            logging.info('Found existing cached and completed task: %s',
                         task_id)
            return self.AsyncResult(task_id)

        # Check for an in-progress equivalent task to avoid duplicating work
        task_id = self.cache.get('herd:%s' % cache_key)
        if task_id:
            logging.info('Found existing in-progress task: %s', task_id)
            return self.AsyncResult(task_id)

        # It's not cached and it's not already running. Use an atomic lock to
        # start the task, ensuring there isn't a race condition that could
        # result in multiple identical tasks being fired at once.
        with self.cache.lock('lock:%s' % cache_key):
            task_meta = super(JobtasticTask,
                              self).apply_async(args, kwargs, **options)
            logging.info('Current status: %s', task_meta.status)
            if task_meta.status in (PROGRESS, PENDING):
                self.cache.set('herd:%s' % cache_key,
                               task_meta.task_id,
                               timeout=self.herd_avoidance_timeout)
                logging.info('Setting herd-avoidance cache for task: %s',
                             cache_key)
        return task_meta

    def calc_progress(self, completed_count, total_count):
        """
        Calculate the percentage progress and estimated remaining time based on
        the current number of items completed of the total.

        Returns a tuple of ``(percentage_complete, seconds_remaining)``.
        """
        self.logger.debug(
            "calc_progress(%s, %s)",
            completed_count,
            total_count,
        )
        current_time = time.time()

        time_spent = current_time - self.start_time
        self.logger.debug("Progress time spent: %s", time_spent)

        if total_count == 0:
            return 100, 1

        completion_fraction = completed_count / total_count
        if completion_fraction == 0:
            completion_fraction = 1

        total_time = 0
        total_time = time_spent / completion_fraction
        time_remaining = total_time - time_spent

        completion_display = completion_fraction * 100
        if completion_display == 100:
            return 100, 1  # 1 second to finish up

        return completion_display, time_remaining

    def update_progress(
        self,
        completed_count,
        total_count,
        update_frequency=1,
    ):
        """
        Update the task backend with both an estimated percentage complete and
        number of seconds remaining until completion.

        ``completed_count`` Number of task "units" that have been completed out
        of ``total_count`` total "units."
        ``update_frequency`` Only actually store the updated progress in the
        background at most every ``N`` ``completed_count``.
        """
        if completed_count - self._last_update_count < update_frequency:
            # We've updated the progress too recently. Don't stress out the
            # result backend
            return
        # Store progress for display
        progress_percent, time_remaining = self.calc_progress(
            completed_count, total_count)
        self.logger.debug("Updating progress: %s percent, %s remaining",
                          progress_percent, time_remaining)
        if self.request.id:
            self._last_update_count = completed_count
            self.update_state(
                None, PROGRESS, {
                    "progress_percent": progress_percent,
                    "time_remaining": time_remaining,
                })

    def run(self, *args, **kwargs):
        if get_task_logger:
            self.logger = get_task_logger(self.__class__.__name__)
        else:
            # Celery 2.X fallback
            self.logger = self.get_logger(**kwargs)
        self.logger.info("Starting %s", self.__class__.__name__)

        self.cache_key = self._get_cache_key(**kwargs)

        # Record start time to give estimated time remaining estimates
        self.start_time = time.time()

        # Keep track of progress updates for update_frequency tracking
        self._last_update_count = 0

        # Report to the backend that work has been started.
        if self.request.id:
            self.update_state(None, PROGRESS, {
                "progress_percent": 0,
                "time_remaining": -1,
            })

        memleak_threshold = int(getattr(self, 'memleak_threshold', -1))
        if memleak_threshold >= 0:
            begining_memory_usage = self._get_memory_usage()

        self.logger.info("Calculating result")
        try:
            task_result = self.calculate_result(*args, **kwargs)
        except Exception:
            # Don't want other tasks waiting for this task to finish, since it
            # won't
            self._break_thundering_herd_cache()
            raise  # We can use normal celery exception handling for this

        if hasattr(self, 'cache_duration'):
            cache_duration = self.cache_duration
        else:
            cache_duration = -1  # By default, don't cache
        if cache_duration >= 0:
            # If we're configured to cache this result, do so.
            self.cache.set(self.cache_key, self.request.id, cache_duration)

        # Now that the task is finished, we can stop all of the thundering herd
        # avoidance
        self._break_thundering_herd_cache()

        if memleak_threshold >= 0:
            self._warn_if_leaking_memory(
                begining_memory_usage,
                self._get_memory_usage(),
                memleak_threshold,
                task_kwargs=kwargs,
            )

        return task_result

    def calculate_result(self, *args, **kwargs):
        raise NotImplementedError(("Tasks using JobtasticTask must implement "
                                   "their own calculate_result"))

    @classmethod
    def _validate_required_class_vars(self):
        """
        Ensure that this subclass has defined all of the required class
        variables.
        """
        required_members = (
            'significant_kwargs',
            'herd_avoidance_timeout',
        )
        for required_member in required_members:
            if not hasattr(self, required_member):
                raise Exception("JobtasticTask's must define a %s" %
                                required_member)

    def on_success(self, retval, task_id, args, kwargs):
        """
        Store results in the backend even if we're always eager. This ensures
        the `delay_or_run` calls always at least have results.
        """
        if self.request.is_eager:
            # Store the result because celery wouldn't otherwise
            self.update_state(task_id, SUCCESS, retval)

    def _break_thundering_herd_cache(self):
        self.cache.delete('herd:%s' % self.cache_key)

    @classmethod
    def _get_cache(self):
        """
        Return the cache to use for thundering herd protection, etc.
        """
        if not self._cache:
            self._cache = get_cache(self.app)
        return self._cache

    @classmethod
    def _set_cache(self, cache):
        """
        Set the Jobtastic Cache for the Task

        The cache must support get/set (with timeout)/delete/lock (as a context
        manager).
        """
        self._cache = cache

    cache = class_property(_get_cache, _set_cache)

    @classmethod
    def _get_cache_key(self, **kwargs):
        """
        Take this task's configured ``significant_kwargs`` and build a hash
        that all equivalent task calls will match.

        Takes in kwargs and returns a string.

        To change the way the cache key is generated or do more in-depth
        processing, override this method.
        """
        m = md5()
        for significant_kwarg in self.significant_kwargs:
            key, to_str = significant_kwarg
            try:
                m.update(to_str(kwargs[key]))
            except TypeError:
                # Python 3.x strings aren't accepted by hash.update().
                # String should be byte-encoded first.
                m.update(to_str(kwargs[key]).encode('utf-8'))

        if hasattr(self, 'cache_prefix'):
            cache_prefix = self.cache_prefix
        else:
            cache_prefix = '%s.%s' % (self.__module__, self.__name__)
        return '%s:%s' % (cache_prefix, m.hexdigest())

    def _get_memory_usage(self):
        current_process = psutil.Process(os.getpid())
        usage = current_process.memory_info()

        return usage.rss

    def _warn_if_leaking_memory(
        self,
        begining_usage,
        ending_usage,
        threshold,
        task_kwargs,
    ):
        growth = ending_usage - begining_usage

        threshold_in_bytes = threshold * 1000000

        if growth > threshold_in_bytes:
            self.warn_of_memory_leak(
                growth,
                begining_usage,
                ending_usage,
                task_kwargs,
            )

    def warn_of_memory_leak(
        self,
        growth,
        begining_usage,
        ending_usage,
        task_kwargs,
    ):
        self.logger.warning(
            "Jobtastic:memleak memleak_detected. memory_increase=%05d unit=MB",
            growth / 1000000,
        )
        self.logger.info(
            "Jobtastic:memleak memory_usage_start=%05d unit=MB",
            begining_usage / 1000000,
        )
        self.logger.info(
            "Jobtastic:memleak memory_usage_end=%05d unit=MB",
            ending_usage / 1000000,
        )
        self.logger.info(
            "Jobtastic:memleak task_kwargs=%s",
            repr(task_kwargs),
        )