示例#1
0
 def write(self, preferred_queue, body):
     if 'guid' in body:
         GuidMiddleware.set_guid(body['guid'])
     try:
         # when the cluster heartbeat occurs, clean up internally
         if isinstance(body,
                       dict) and 'cluster_node_heartbeat' in body['task']:
             self.cleanup()
         if self.should_grow:
             self.up()
         # we don't care about "preferred queue" round robin distribution, just
         # find the first non-busy worker and claim it
         workers = self.workers[:]
         random.shuffle(workers)
         for w in workers:
             if not w.busy:
                 w.put(body)
                 break
         else:
             return super(AutoscalePool, self).write(preferred_queue, body)
     except Exception:
         for conn in connections.all():
             # If the database connection has a hiccup, re-establish a new
             # connection
             conn.close_if_unusable_or_obsolete()
         logger.exception('failed to write inbound message')
示例#2
0
文件: task.py 项目: llharris/lab-200a
 def run_callable(self, body):
     '''
     Given some AMQP message, import the correct Python code and run it.
     '''
     task = body['task']
     uuid = body.get('uuid', '<unknown>')
     args = body.get('args', [])
     kwargs = body.get('kwargs', {})
     if 'guid' in body:
         GuidMiddleware.set_guid(body.pop('guid'))
     _call = TaskWorker.resolve_callable(task)
     if inspect.isclass(_call):
         # the callable is a class, e.g., RunJob; instantiate and
         # return its `run()` method
         _call = _call().run
     # don't print kwargs, they often contain launch-time secrets
     logger.debug('task {} starting {}(*{})'.format(uuid, task, args))
     return _call(*args, **kwargs)
示例#3
0
 def run():
     ppid = os.getppid()
     logger.warn('periodic beat started')
     while True:
         if os.getppid() != ppid:
             # if the parent PID changes, this process has been orphaned
             # via e.g., segfault or sigkill, we should exit too
             pid = os.getpid()
             logger.warn(f'periodic beat exiting gracefully pid:{pid}')
             raise SystemExit()
         try:
             for conn in connections.all():
                 # If the database connection has a hiccup, re-establish a new
                 # connection
                 conn.close_if_unusable_or_obsolete()
             GuidMiddleware.set_guid(GuidMiddleware._generate_guid())
             self.run_pending()
         except Exception:
             logger.exception(
                 'encountered an error while scheduling periodic tasks')
         time.sleep(idle_seconds)
示例#4
0
    def perform_job(self, job, queue):
        """
        Set the :class:`pulpcore.app.models.Task` to running and init logging.

        This method is called by the worker's work horse thread (the forked child) just before the
        task begins executing.

        Args:
            job (rq.job.Job): The job to perform
            queue (rq.queue.Queue): The Queue associated with the job
        """
        try:
            task = Task.objects.get(pk=job.get_id())
        except Task.DoesNotExist:
            pass
        else:
            task.set_running()
            user = get_users_with_perms(task).first()
            _set_current_user(user)
            GuidMiddleware.set_guid(task.logging_cid)

        with TaskWorkingDirectory(job):
            return super().perform_job(job, queue)
示例#5
0
    def perform_work(self, body):
        try:
            flush = body.get('event') == 'FLUSH'
            if flush:
                self.last_event = ''
            if not flush:
                event_map = {
                    'job_id': JobEvent,
                    'ad_hoc_command_id': AdHocCommandEvent,
                    'project_update_id': ProjectUpdateEvent,
                    'inventory_update_id': InventoryUpdateEvent,
                    'system_job_id': SystemJobEvent,
                }

                job_identifier = 'unknown job'
                for key, cls in event_map.items():
                    if key in body:
                        job_identifier = body[key]
                        break

                self.last_event = f'\n\t- {cls.__name__} for #{job_identifier} ({body.get("event", "")} {body.get("uuid", "")})'  # noqa

                if body.get('event') == 'EOF':
                    try:
                        if 'guid' in body:
                            GuidMiddleware.set_guid(body['guid'])
                        final_counter = body.get('final_counter', 0)
                        logger.info(
                            'Event processing is finished for Job {}, sending notifications'
                            .format(job_identifier))
                        # EOF events are sent when stdout for the running task is
                        # closed. don't actually persist them to the database; we
                        # just use them to report `summary` websocket events as an
                        # approximation for when a job is "done"
                        emit_channel_notification(
                            'jobs-summary',
                            dict(group_name='jobs',
                                 unified_job_id=job_identifier,
                                 final_counter=final_counter))
                        # Additionally, when we've processed all events, we should
                        # have all the data we need to send out success/failure
                        # notification templates
                        uj = UnifiedJob.objects.get(pk=job_identifier)

                        if isinstance(uj, Job):
                            # *actual playbooks* send their success/failure
                            # notifications in response to the playbook_on_stats
                            # event handling code in main.models.events
                            pass
                        elif hasattr(uj, 'send_notification_templates'):
                            handle_success_and_failure_notifications.apply_async(
                                [uj.id])
                    except Exception:
                        logger.exception(
                            'Worker failed to emit notifications: Job {}'.
                            format(job_identifier))
                    finally:
                        self.subsystem_metrics.inc(
                            'callback_receiver_events_in_memory', -1)
                        GuidMiddleware.set_guid('')
                    return

                skip_websocket_message = body.pop('skip_websocket_message',
                                                  False)

                event = cls.create_from_data(**body)

                if skip_websocket_message:
                    event._skip_websocket_message = True

                self.buff.setdefault(cls, []).append(event)

            retries = 0
            while retries <= self.MAX_RETRIES:
                try:
                    self.flush(force=flush)
                    break
                except (OperationalError, InterfaceError, InternalError):
                    if retries >= self.MAX_RETRIES:
                        logger.exception(
                            'Worker could not re-establish database connectivity, giving up on one or more events.'
                        )
                        return
                    delay = 60 * retries
                    logger.exception(
                        'Database Error Saving Job Event, retry #{i} in {delay} seconds:'
                        .format(i=retries + 1, delay=delay))
                    django_connection.close()
                    time.sleep(delay)
                    retries += 1
                except DatabaseError:
                    logger.exception('Database Error Saving Job Event')
                    break
        except Exception as exc:
            tb = traceback.format_exc()
            logger.error('Callback Task Processor Raised Exception: %r', exc)
            logger.error('Detail: {}'.format(tb))
示例#6
0
def _queue_reserved_task(func, inner_task_id, resources, inner_args, inner_kwargs, options):
    """
    A task that encapsulates another task to be dispatched later.

    This task being encapsulated is called the "inner" task, and a task name, UUID, and accepts a
    list of positional args and keyword args for the inner task. These arguments are named
    inner_args and inner_kwargs. inner_args is a list, and inner_kwargs is a dictionary passed to
    the inner task as positional and keyword arguments using the * and ** operators.

    The inner task is dispatched into a dedicated queue for a worker that is decided at dispatch
    time. The logic deciding which queue receives a task is controlled through the
    find_worker function.

    Args:
        func (basestring): The function to be called
        inner_task_id (basestring): The task_id to be set on the task being called. By providing
            the UUID, the caller can have an asynchronous reference to the inner task
            that will be dispatched.
        resources (basestring): The urls of the resource you wish to reserve for your task.
            The system will ensure that no other tasks that want that same reservation will run
            concurrently with yours.
        inner_args (tuple): The positional arguments to pass on to the task.
        inner_kwargs (dict): The keyword arguments to pass on to the task.
        options (dict): For all options accepted by enqueue see the RQ docs
    """
    redis_conn = connection.get_redis_connection()
    task_status = Task.objects.get(pk=inner_task_id)
    GuidMiddleware.set_guid(task_status.logging_cid)
    task_name = func.__module__ + "." + func.__name__

    while True:
        if task_name == "pulpcore.app.tasks.orphan.orphan_cleanup":
            if ReservedResource.objects.exists():
                # wait until there are no reservations
                time.sleep(0.25)
                continue
            else:
                rq_worker = util.get_current_worker()
                worker = Worker.objects.get(name=rq_worker.name)
                task_status.worker = worker
                task_status.set_running()
                q = Queue("resource-manager", connection=redis_conn, is_async=False)
                try:
                    q.enqueue(
                        func,
                        args=inner_args,
                        kwargs=inner_kwargs,
                        job_id=inner_task_id,
                        job_timeout=TASK_TIMEOUT,
                        **options,
                    )
                    task_status.set_completed()
                except RedisConnectionError as e:
                    task_status.set_failed(e, None)
                return

        try:
            with transaction.atomic():
                # lock the worker - there is a similar lock in mark_worker_offline()
                worker = _acquire_worker(resources)

                # Attempt to lock all resources by their urls. Must be atomic to prevent deadlocks.
                for resource in resources:
                    if worker.reservations.filter(resource=resource).exists():
                        reservation = worker.reservations.get(resource=resource)
                    else:
                        reservation = ReservedResource.objects.create(
                            worker=worker, resource=resource
                        )
                    TaskReservedResource.objects.create(resource=reservation, task=task_status)
        except (Worker.DoesNotExist, IntegrityError):
            # if worker is ready, or we have a worker but we can't create the reservations, wait
            time.sleep(0.25)
        else:
            # we have a worker with the locks
            break

    task_status.worker = worker
    task_status.save()

    try:
        q = Queue(worker.name, connection=redis_conn)
        q.enqueue(
            func,
            args=inner_args,
            kwargs=inner_kwargs,
            job_id=inner_task_id,
            job_timeout=TASK_TIMEOUT,
            **options,
        )
    except RedisConnectionError as e:
        task_status.set_failed(e, None)