def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method updates the last heartbeat time of the scheduler. We do not actually send a heartbeat message since it would just get read again by this class. :return: number of seconds before the next tick should run :rtype: float """ # Setting the celerybeat name celerybeat_name = constants.SCHEDULER_WORKER_NAME + "@" + platform.node( ) # this is not an event that gets sent anywhere. We process it # immediately. scheduler_event = { 'timestamp': time.time(), 'local_received': time.time(), 'type': 'scheduler-event', 'hostname': celerybeat_name } worker_watcher.handle_worker_heartbeat(scheduler_event) old_timestamp = datetime.utcnow() - timedelta( seconds=constants.CELERYBEAT_LOCK_MAX_AGE) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(celerybeat_name=celerybeat_name).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug( _('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': celerybeat_name}) ret = self.call_tick(self, celerybeat_name) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(celerybeat_name=celerybeat_name, timestamp=lock_timestamp) new_lock.save() _logger.info( _("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': celerybeat_name}) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(self, celerybeat_name) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = constants.CELERY_TICK_DEFAULT_WAIT_TIME _logger.info( _("Duplicate or new celerybeat Instance, " "ticking again in %(ret)s seconds.") % {'ret': ret}) return ret
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method updates the last heartbeat time of the scheduler. :return: number of seconds before the next tick should run :rtype: float """ worker_watcher.handle_worker_heartbeat(CELERYBEAT_NAME) if celery_version.startswith('4') and self.schedule_changed: # Setting _heap = None is a workaround for this bug in Celery4 # https://github.com/celery/celery/pull/3958 # Once 3958 is released and updated in Fedora this can be removed self._heap = None now = ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta(seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(name=CELERYBEAT_NAME).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug(_('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': CELERYBEAT_NAME}) ret = self.call_tick(CELERYBEAT_NAME) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(name=CELERYBEAT_NAME, timestamp=lock_timestamp) new_lock.save() _logger.debug(_("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': CELERYBEAT_NAME}) if not self._first_lock_acq_check: msg = _("Failover occurred: '%s' is now the primary celerybeat " "instance") % CELERYBEAT_NAME _logger.warning(msg) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(CELERYBEAT_NAME) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = constants.PULP_PROCESS_HEARTBEAT_INTERVAL if self._first_lock_acq_check: _logger.info(_("Hot spare celerybeat instance '%(celerybeat_name)s' detected.") % {'celerybeat_name': CELERYBEAT_NAME}) self._first_lock_acq_check = False return ret
def test__delete_worker(self, logger, cancel, mock_add_consumer): """ Assert that the correct Tasks get canceled when their Worker is deleted, and that the Worker is removed from the database. """ # cause two workers to be added to the database as having workers worker_watcher.handle_worker_heartbeat({ 'timestamp': time.time(), 'type': 'worker-heartbeat', 'hostname': WORKER_1, }) worker_watcher.handle_worker_heartbeat({ 'timestamp': time.time(), 'type': 'worker-heartbeat', 'hostname': WORKER_2, }) # Let's simulate three tasks being assigned to WORKER_2, with two of them being # in an incomplete state and one in a complete state. We will delete WORKER_2, # which should cause the two to get canceled. Let's put task_1 in progress TaskStatusManager.create_task_status('task_1', WORKER_2_QUEUE, state=CALL_RUNNING_STATE) TaskStatusManager.create_task_status('task_2', WORKER_2_QUEUE, state=CALL_WAITING_STATE) # This task shouldn't get canceled because it isn't in an incomplete state TaskStatusManager.create_task_status('task_3', WORKER_2_QUEUE, state=CALL_FINISHED_STATE) # Let's make a task in a worker that is still present just to make sure it isn't touched. TaskStatusManager.create_task_status('task_4', WORKER_1_QUEUE, state=CALL_RUNNING_STATE) # Let's just make sure the setup worked and that we have a Worker with RR2 worker_collection = Worker.get_collection() self.assertEqual(worker_collection.find({'_id': WORKER_2}).count(), 1) # Now let's delete the Worker named WORKER_2 tasks._delete_worker.apply_async(args=(WORKER_2, ), queue=tasks.RESOURCE_MANAGER_QUEUE) # cancel() should have been called twice with task_1 and task_2 as parameters self.assertEqual(cancel.call_count, 2) # Let's build a set out of the two times that cancel was called. We can't know for sure # which order the Tasks got canceled in, but we can assert that the correct two tasks were # canceled (task_3 should not appear in this set). cancel_param_set = set([c[1] for c in cancel.mock_calls]) self.assertEqual(cancel_param_set, set([('task_1', ), ('task_2', )])) # We should have logged that we are canceling the tasks self.assertEqual(logger.call_count, 0) self.assertTrue(WORKER_2 in logger.mock_calls[0][1][0]) self.assertTrue('Canceling the tasks' in logger.mock_calls[0][1][0]) # The Worker should have been deleted self.assertEqual(worker_collection.find({'_id': WORKER_2}).count(), 0) # the Worker for RW1 should remain self.assertEqual(worker_collection.find({'_id': WORKER_1}).count(), 1)
def test_handle_worker_heartbeat_update(self, mock_worker, mock_logger, mock_datetime): """ Ensure that we don't log when an existing worker is updated. """ mock_worker.objects.return_value.first.return_value = mock.Mock() worker_watcher.handle_worker_heartbeat('fake-worker') self.assertEquals(mock_logger.info.called, False) mock_worker.objects.return_value.update_one.\ assert_called_once_with(set__last_heartbeat=mock_datetime.utcnow(), upsert=True)
def test_handle_worker_heartbeat_new(self, mock_worker, mock_logger, mock_datetime): """ Ensure that we save a record and log when a new worker comes online. """ mock_worker.objects.return_value.first.return_value = None worker_watcher.handle_worker_heartbeat('fake-worker') mock_logger.info.assert_called_once_with('New worker \'fake-worker\' discovered') mock_worker.objects.return_value.update_one.\ assert_called_once_with(set__last_heartbeat=mock_datetime.utcnow(), upsert=True)
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method adds a call to trim the failure watcher and updates the last heartbeat time of the scheduler. We do not actually send a heartbeat message since it would just get read again by this class. :return: number of seconds before the next tick should run :rtype: float """ self._failure_watcher.trim() # Setting the celerybeat name celerybeat_name = SCHEDULER_WORKER_NAME + "@" + platform.node() # this is not an event that gets sent anywhere. We process it # immediately. scheduler_event = {'timestamp': time.time(), 'type': 'scheduler-event', 'hostname': celerybeat_name} worker_watcher.handle_worker_heartbeat(scheduler_event) old_timestamp = datetime.utcnow() - timedelta(seconds=CELERYBEAT_WAIT_SECONDS) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(celerybeat_name=celerybeat_name).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug(_('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': celerybeat_name}) ret = self.call_tick(self, celerybeat_name) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(celerybeat_name=celerybeat_name, timestamp=lock_timestamp) new_lock.save() _logger.info(_("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': celerybeat_name}) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(self, celerybeat_name) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = TICK_SECONDS _logger.info(_("Duplicate or new celerybeat Instance, " "ticking again in %(ret)s seconds.") % {'ret': ret}) return ret
def test__delete_queue(self, logger, cancel, active_queues, mock_add_consumer): """ Assert that the correct Tasks get canceled when their queue is deleted, and that the queue is removed from the database. """ # cause two workers to be added to the database as having available queues worker_watcher.handle_worker_heartbeat({ 'timestamp': time.time(), 'type': 'worker-heartbeat', 'hostname': RESERVED_WORKER_1, }) worker_watcher.handle_worker_heartbeat({ 'timestamp': time.time(), 'type': 'worker-heartbeat', 'hostname': RESERVED_WORKER_2, }) # Let's simulate three tasks being assigned to RESERVED_WORKER_2, with two of them being # in an incomplete state and one in a complete state. We will delete RESERVED_WORKER_2's # queue, which should cause the two to get canceled. Let's put task_1 in progress TaskStatusManager.create_task_status('task_1', RESERVED_WORKER_2, state=CALL_RUNNING_STATE) TaskStatusManager.create_task_status('task_2', RESERVED_WORKER_2, state=CALL_WAITING_STATE) # This task shouldn't get canceled because it isn't in an incomplete state TaskStatusManager.create_task_status('task_3', RESERVED_WORKER_2, state=CALL_FINISHED_STATE) # Let's make a task in a worker that is still present just to make sure it isn't touched. TaskStatusManager.create_task_status('task_4', RESERVED_WORKER_1, state=CALL_RUNNING_STATE) # Let's just make sure the setup worked and that we have an AvailableQueue with RR2 aqc = AvailableQueue.get_collection() self.assertEqual(aqc.find({'_id': RESERVED_WORKER_2}).count(), 1) # Now let's delete the queue named RESERVED_WORKER_2 tasks._delete_queue.apply_async(args=(RESERVED_WORKER_2,), queue=tasks.RESOURCE_MANAGER_QUEUE) # cancel() should have been called twice with task_1 and task_2 as parameters self.assertEqual(cancel.call_count, 2) # Let's build a set out of the two times that cancel was called. We can't know for sure # which order the Tasks got canceled in, but we can assert that the correct two tasks were # canceled (task_3 should not appear in this set). cancel_param_set = set([c[1] for c in cancel.mock_calls]) self.assertEqual(cancel_param_set, set([('task_1',), ('task_2',)])) # We should have logged that we are canceling the tasks self.assertEqual(logger.call_count, 0) self.assertTrue(RESERVED_WORKER_2 in logger.mock_calls[0][1][0]) self.assertTrue('Canceling the tasks' in logger.mock_calls[0][1][0]) # The queue should have been deleted self.assertEqual(aqc.find({'_id': RESERVED_WORKER_2}).count(), 0) # the queue for RW1 should remain self.assertEqual(aqc.find({'_id': RESERVED_WORKER_1}).count(), 1)
def _record_heartbeat(self, consumer): """ This method creates or updates the worker record :param worker: The consumer instance :type worker: celery.worker.consumer.Consumer """ name = consumer.hostname # Update the worker record timestamp and handle logging new workers worker_watcher.handle_worker_heartbeat(name) # If the worker is a resource manager, update the associated ResourceManagerLock timestamp if name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME): ResourceManagerLock.objects(name=name).update_one( set__timestamp=datetime.utcnow(), upsert=False)
def _record_heartbeat(self, consumer): """ This method creates or updates the worker record :param consumer: The consumer instance :type consumer: celery.worker.consumer.Consumer """ name = consumer.hostname # Update the worker record timestamp and handle logging new workers worker_watcher.handle_worker_heartbeat(name) # If the worker is a resource manager, update the associated ResourceManagerLock timestamp if name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME): ResourceManagerLock.objects(name=name).update_one(set__timestamp=datetime.utcnow(), upsert=False)
def test_handle_worker_heartbeat_new(self, mock__logger, mock_gettext, mock_worker, mock_resources, mock_criteria, mock__parse_and_log_event, mock_list): mock_event = mock.Mock() worker_watcher.handle_worker_heartbeat(mock_event) event_info = mock__parse_and_log_event.return_value mock__parse_and_log_event.assert_called_with(mock_event) mock_criteria.assert_called_once_with(filters={'_id': event_info['worker_name']}, fields=('_id', 'last_heartbeat')) mock_resources.filter_workers.assert_called_once(mock_criteria.return_value) mock_worker.assert_called_once_with(event_info['worker_name'], event_info['timestamp']) mock_gettext.assert_called_once_with("New worker '%(worker_name)s' discovered") mock__logger.assert_called_once() mock_worker.return_value.save.assert_called_once_with()
def test_handle_worker_heartbeat_update(self, mock__parse_and_log_event, mock_worker, mock_logger): """ Ensure that we save a record but don't log when an existing worker is updated. """ mock_event = mock.Mock() mock_worker.objects.return_value.first.return_value = mock.Mock() mock__parse_and_log_event.return_value = { 'worker_name': 'fake-worker', 'timestamp': '2014-12-08T15:52:29Z', 'local_received': '2014-12-08T15:52:48Z', 'type': 'fake-type'} worker_watcher.handle_worker_heartbeat(mock_event) mock_worker.objects.return_value.update_one.\ assert_called_once_with(set__last_heartbeat='2014-12-08T15:52:48Z', upsert=True) self.assertEquals(mock_logger.info.called, False)
def test_handle_worker_heartbeat_update(self, mock__parse_and_log_event, mock_worker, mock_logger): """ Ensure that we save a record but don't log when an existing worker is updated. """ mock_event = mock.Mock() mock_worker.objects.return_value.first.return_value = mock.Mock() mock__parse_and_log_event.return_value = {'worker_name': 'fake-worker', 'timestamp': '2014-12-08T15:52:29Z', 'type': 'fake-type'} worker_watcher.handle_worker_heartbeat(mock_event) mock_worker.objects.return_value.update_one.\ assert_called_once_with(set__last_heartbeat='2014-12-08T15:52:29Z', upsert=True) self.assertEquals(mock_logger.info.called, False)
def test_handle_worker_heartbeat_new(self, mock__parse_and_log_event, mock_worker, mock_logger): """ Ensure that we save a record and log when a new worker comes online. """ mock_event = mock.Mock() mock_worker.objects.return_value.first.return_value = None mock__parse_and_log_event.return_value = {'worker_name': 'fake-worker', 'timestamp': '2014-12-08T15:52:29Z', 'type': 'fake-type'} worker_watcher.handle_worker_heartbeat(mock_event) mock_worker.objects.return_value.update_one.\ assert_called_once_with(set__last_heartbeat='2014-12-08T15:52:29Z', upsert=True) mock_logger.info.assert_called_once_with('New worker \'fake-worker\' discovered')
def test_handle_worker_heartbeat_new(self, mock__parse_and_log_event, mock_worker, mock_logger): """ Ensure that we save a record and log when a new worker comes online. """ mock_event = mock.Mock() mock_worker.objects.return_value.first.return_value = None mock__parse_and_log_event.return_value = { 'worker_name': 'fake-worker', 'timestamp': '2014-12-08T15:52:29Z', 'local_received': '2014-12-08T15:52:36Z', 'type': 'fake-type'} worker_watcher.handle_worker_heartbeat(mock_event) mock_worker.objects.return_value.update_one.\ assert_called_once_with(set__last_heartbeat='2014-12-08T15:52:36Z', upsert=True) mock_logger.info.assert_called_once_with('New worker \'fake-worker\' discovered')
def test_handle_worker_heartbeat_update(self, mock_worker, mock_resources, mock_criteria, mock__is_resource_manager, mock__parse_and_log_event, mock_list): mock_event = mock.Mock() worker_watcher.handle_worker_heartbeat(mock_event) event_info = mock__parse_and_log_event.return_value mock__parse_and_log_event.assert_called_with(mock_event) mock_criteria.assert_called_once_with(filters={'_id': event_info['worker_name']}, fields=('_id', 'last_heartbeat')) mock_resources.filter_workers.assert_called_once(mock_criteria.return_value) mock_worker.get_collection.assert_called_once_with() find_and_modify = mock_worker.get_collection.return_value.find_and_modify find_and_modify.assert_called_once_with( query={'_id': event_info['worker_name']}, update={'$set': {'last_heartbeat': event_info['timestamp']}} )
def test_handle_worker_heartbeat_update(self, mock_available_queue, mock_resources, mock_criteria, mock__is_resource_manager, mock__parse_and_log_event, mock_list): mock_event = mock.Mock() worker_watcher.handle_worker_heartbeat(mock_event) event_info = mock__parse_and_log_event.return_value mock__parse_and_log_event.assert_called_with(mock_event) mock_criteria.assert_called_once_with(filters={'_id': event_info['worker_name']}, fields=('_id', 'last_heartbeat', 'num_reservations')) mock_resources.filter_available_queues.assert_called_once(mock_criteria.return_value) mock_available_queue.get_collection.assert_called_once_with() find_and_modify = mock_available_queue.get_collection.return_value.find_and_modify find_and_modify.assert_called_once_with( query={'_id': event_info['worker_name']}, update={'$set': {'last_heartbeat': event_info['timestamp']}} )
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method adds a call to trim the failure watcher and updates the last heartbeat time of the scheduler. We do not actually send a heartbeat message since it would just get read again by this class. :return: number of seconds before the next tick should run :rtype: float """ ret = super(Scheduler, self).tick() self._failure_watcher.trim() # this is not an event that gets sent anywhere. We process it # immediately. scheduler_event = {'timestamp': time.time(), 'type': 'scheduler-event', 'hostname': ("%s@%s" % (SCHEDULER_WORKER_NAME, platform.node()))} worker_watcher.handle_worker_heartbeat(scheduler_event) return ret
def tick(self): """ Superclass runs a tick, that is one iteration of the scheduler. Executes all due tasks. This method updates the last heartbeat time of the scheduler. :return: number of seconds before the next tick should run :rtype: float """ worker_watcher.handle_worker_heartbeat(CELERYBEAT_NAME) if celery_version.startswith('4') and self.schedule_changed: # Setting _heap = None is a workaround for this bug in Celery4 # https://github.com/celery/celery/pull/3958 # Once 3958 is released and updated in Fedora this can be removed self._heap = None now = ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta( seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) # Updating the current lock if lock is on this instance of celerybeat result = CeleryBeatLock.objects(name=CELERYBEAT_NAME).\ update(set__timestamp=datetime.utcnow()) # If current instance has lock and updated lock_timestamp, call super if result == 1: _logger.debug( _('Lock updated by %(celerybeat_name)s') % {'celerybeat_name': CELERYBEAT_NAME}) ret = self.call_tick(CELERYBEAT_NAME) else: # check for old enough time_stamp and remove if such lock is present CeleryBeatLock.objects(timestamp__lte=old_timestamp).delete() try: lock_timestamp = datetime.utcnow() # Insert new lock entry new_lock = CeleryBeatLock(name=CELERYBEAT_NAME, timestamp=lock_timestamp) new_lock.save() _logger.debug( _("New lock acquired by %(celerybeat_name)s") % {'celerybeat_name': CELERYBEAT_NAME}) if not self._first_lock_acq_check: msg = _( "Failover occurred: '%s' is now the primary celerybeat " "instance") % CELERYBEAT_NAME _logger.warning(msg) # After acquiring new lock call super to dispatch tasks ret = self.call_tick(CELERYBEAT_NAME) except mongoengine.NotUniqueError: # Setting a default wait time for celerybeat instances with no lock ret = constants.PULP_PROCESS_HEARTBEAT_INTERVAL if self._first_lock_acq_check: _logger.info( _("Hot spare celerybeat instance '%(celerybeat_name)s' detected." ) % {'celerybeat_name': CELERYBEAT_NAME}) self._first_lock_acq_check = False return ret
def test_handle_worker_heartbeat_with_resource_manager_event(self, mock_criteria, mock__is_resource_manager, mock_list): mock_event = mock.Mock() worker_watcher.handle_worker_heartbeat(mock_event) self.assertTrue(not mock_criteria.called)
def test_handle_worker_heartbeat_with_resource_manager_event( self, mock_criteria, mock__is_resource_manager, mock_list): mock_event = mock.Mock() worker_watcher.handle_worker_heartbeat(mock_event) self.assertTrue(not mock_criteria.called)