def _delete_worker(name, normal_shutdown=False): """ Delete the Worker with _id name from the database, cancel any associated tasks and reservations If the worker shutdown normally, no message is logged, otherwise an error level message is logged. Default is to assume the worker did not shut down normally. Any resource reservations associated with this worker are cleaned up by this function. Any tasks associated with this worker are explicitly canceled. :param name: The name of the worker you wish to delete. :type name: basestring :param normal_shutdown: True if the worker shutdown normally, False otherwise. Defaults to False. :type normal_shutdown: bool """ if normal_shutdown is False: msg = _( 'The worker named %(name)s is missing. Canceling the tasks in its queue.' ) msg = msg % {'name': name} _logger.error(msg) else: msg = _("Cleaning up shutdown worker '%s'.") % name _logger.info(msg) # Delete the worker document Worker.objects(name=name).delete() # Delete all reserved_resource documents for the worker ReservedResource.objects(worker_name=name).delete() # If the worker is a resource manager, we also need to delete the associated lock if name.startswith(RESOURCE_MANAGER_WORKER_NAME): ResourceManagerLock.objects(name=name).delete() # If the worker is a scheduler, we also need to delete the associated lock if name.startswith(SCHEDULER_WORKER_NAME): CeleryBeatLock.objects(name=name).delete() # Cancel all of the tasks that were assigned to this worker's queue for task_status in TaskStatus.objects( worker_name=name, state__in=constants.CALL_INCOMPLETE_STATES): cancel(task_status['task_id'])
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method updates the last heartbeat time of the scheduler. We do not actually send a heartbeat message since it would just get read again by this class. :return: number of seconds before the next tick should run :rtype: float """ # Setting the celerybeat name celerybeat_name = constants.SCHEDULER_WORKER_NAME + "@" + platform.node( ) # this is not an event that gets sent anywhere. We process it # immediately. scheduler_event = { 'timestamp': time.time(), 'local_received': time.time(), 'type': 'scheduler-event', 'hostname': celerybeat_name } worker_watcher.handle_worker_heartbeat(scheduler_event) old_timestamp = datetime.utcnow() - timedelta( seconds=constants.CELERYBEAT_LOCK_MAX_AGE) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(celerybeat_name=celerybeat_name).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug( _('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': celerybeat_name}) ret = self.call_tick(self, celerybeat_name) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(celerybeat_name=celerybeat_name, timestamp=lock_timestamp) new_lock.save() _logger.info( _("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': celerybeat_name}) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(self, celerybeat_name) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = constants.CELERY_TICK_DEFAULT_WAIT_TIME _logger.info( _("Duplicate or new celerybeat Instance, " "ticking again in %(ret)s seconds.") % {'ret': ret}) return ret
def _delete_worker(name, normal_shutdown=False): """ Delete the Worker with _id name from the database, cancel any associated tasks and reservations If the worker shutdown normally, no message is logged, otherwise an error level message is logged. Default is to assume the worker did not shut down normally. Any resource reservations associated with this worker are cleaned up by this function. Any tasks associated with this worker are explicitly canceled. :param name: The name of the worker you wish to delete. :type name: basestring :param normal_shutdown: True if the worker shutdown normally, False otherwise. Defaults to False. :type normal_shutdown: bool """ if normal_shutdown is False: msg = _('The worker named %(name)s is missing. Canceling the tasks in its queue.') msg = msg % {'name': name} _logger.error(msg) else: msg = _("Cleaning up shutdown worker '%s'.") % name _logger.info(msg) # Delete the worker document Worker.objects(name=name).delete() # Delete all reserved_resource documents for the worker ReservedResource.objects(worker_name=name).delete() # If the worker is a resource manager, we also need to delete the associated lock if name.startswith(RESOURCE_MANAGER_WORKER_NAME): ResourceManagerLock.objects(name=name).delete() # If the worker is a scheduler, we also need to delete the associated lock if name.startswith(SCHEDULER_WORKER_NAME): CeleryBeatLock.objects(name=name).delete() # Cancel all of the tasks that were assigned to this worker's queue for task_status in TaskStatus.objects(worker_name=name, state__in=constants.CALL_INCOMPLETE_STATES): cancel(task_status['task_id'], revoke_task=False)
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method updates the last heartbeat time of the scheduler. :return: number of seconds before the next tick should run :rtype: float """ worker_watcher.handle_worker_heartbeat(CELERYBEAT_NAME) if celery_version.startswith('4') and self.schedule_changed: # Setting _heap = None is a workaround for this bug in Celery4 # https://github.com/celery/celery/pull/3958 # Once 3958 is released and updated in Fedora this can be removed self._heap = None now = ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta(seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(name=CELERYBEAT_NAME).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug(_('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': CELERYBEAT_NAME}) ret = self.call_tick(CELERYBEAT_NAME) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(name=CELERYBEAT_NAME, timestamp=lock_timestamp) new_lock.save() _logger.debug(_("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': CELERYBEAT_NAME}) if not self._first_lock_acq_check: msg = _("Failover occurred: '%s' is now the primary celerybeat " "instance") % CELERYBEAT_NAME _logger.warning(msg) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(CELERYBEAT_NAME) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = constants.PULP_PROCESS_HEARTBEAT_INTERVAL if self._first_lock_acq_check: _logger.info(_("Hot spare celerybeat instance '%(celerybeat_name)s' detected.") % {'celerybeat_name': CELERYBEAT_NAME}) self._first_lock_acq_check = False return ret
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method adds a call to trim the failure watcher and updates the last heartbeat time of the scheduler. We do not actually send a heartbeat message since it would just get read again by this class. :return: number of seconds before the next tick should run :rtype: float """ self._failure_watcher.trim() # Setting the celerybeat name celerybeat_name = SCHEDULER_WORKER_NAME + "@" + platform.node() # this is not an event that gets sent anywhere. We process it # immediately. scheduler_event = {'timestamp': time.time(), 'type': 'scheduler-event', 'hostname': celerybeat_name} worker_watcher.handle_worker_heartbeat(scheduler_event) old_timestamp = datetime.utcnow() - timedelta(seconds=CELERYBEAT_WAIT_SECONDS) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(celerybeat_name=celerybeat_name).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug(_('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': celerybeat_name}) ret = self.call_tick(self, celerybeat_name) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(celerybeat_name=celerybeat_name, timestamp=lock_timestamp) new_lock.save() _logger.info(_("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': celerybeat_name}) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(self, celerybeat_name) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = TICK_SECONDS _logger.info(_("Duplicate or new celerybeat Instance, " "ticking again in %(ret)s seconds.") % {'ret': ret}) return ret
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method updates the last heartbeat time of the scheduler. :return: number of seconds before the next tick should run :rtype: float """ worker_watcher.handle_worker_heartbeat(CELERYBEAT_NAME) if celery_version.startswith('4') and self.schedule_changed: # Setting _heap = None is a workaround for this bug in Celery4 # https://github.com/celery/celery/pull/3958 # Once 3958 is released and updated in Fedora this can be removed self._heap = None now = ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta( seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(name=CELERYBEAT_NAME).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug( _('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': CELERYBEAT_NAME}) ret = self.call_tick(CELERYBEAT_NAME) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(name=CELERYBEAT_NAME, timestamp=lock_timestamp) new_lock.save() _logger.debug( _("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': CELERYBEAT_NAME}) if not self._first_lock_acq_check: msg = _( "Failover occurred: '%s' is now the primary celerybeat " "instance") % CELERYBEAT_NAME _logger.warning(msg) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(CELERYBEAT_NAME) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = constants.PULP_PROCESS_HEARTBEAT_INTERVAL if self._first_lock_acq_check: _logger.info( _("Hot spare celerybeat instance '%(celerybeat_name)s' detected." ) % {'celerybeat_name': CELERYBEAT_NAME}) self._first_lock_acq_check = False return ret