def _check_node_raid_jobs(self, task): """Check the progress of running RAID config jobs of a node.""" node = task.node raid_config_job_ids = node.driver_internal_info['raid_config_job_ids'] finished_job_ids = [] for config_job_id in raid_config_job_ids: config_job = drac_job.get_job(node, job_id=config_job_id) if config_job.state == 'Completed': finished_job_ids.append(config_job_id) elif config_job.state == 'Failed': finished_job_ids.append(config_job_id) self._set_raid_config_job_failure(node) if not finished_job_ids: return task.upgrade_lock() self._delete_cached_config_job_id(node, finished_job_ids) if not node.driver_internal_info['raid_config_job_ids']: if not node.driver_internal_info.get('raid_config_job_failure', False): self._resume_cleaning(task) else: self._clear_raid_config_job_failure(node) self._set_clean_failed(task, config_job)
def test_get_job(self, mock_get_drac_client): mock_client = mock.Mock() mock_get_drac_client.return_value = mock_client mock_client.get_job.return_value = self.job job = drac_job.get_job(self.node, 'foo') mock_client.get_job.assert_called_once_with('foo') self.assertEqual(self.job, job)
def _check_node_bios_jobs(self, task): """Check the progress of running BIOS config jobs of a node. This handles jobs for BIOS set and reset. Handle means, it checks for job status to not only signify completed jobs but also handle failures by invoking the 'fail' event, allowing the conductor to put the node into clean/deploy FAIL state. :param task: a TaskManager instance with the node to act on """ node = task.node bios_config_job_ids = node.driver_internal_info['bios_config_job_ids'] finished_job_ids = [] # local variable to track job failures job_failed = False for config_job_id in bios_config_job_ids: config_job = drac_job.get_job(node, job_id=config_job_id) if config_job is None or config_job.status == 'Completed': finished_job_ids.append(config_job_id) elif config_job.status == 'Failed': finished_job_ids.append(config_job_id) job_failed = True # If no job has finished, return if not finished_job_ids: return # The finished jobs will require a node reboot, need to update the # node lock to exclusive, allowing a destructive reboot operation task.upgrade_lock() # Cleanup the database with finished jobs, they're no longer needed self._delete_cached_config_job_ids(node, finished_job_ids) if not job_failed: # Cache the new BIOS settings, caching needs to happen here # since the config steps are async. Decorator won't work. self.cache_bios_settings(task) # if no failure, continue with clean/deploy self._resume_current_operation(task) else: # invoke 'fail' event to allow conductor to put the node in # a clean/deploy fail state error_message = ("Failed config job: {}. Message: '{}'.".format( config_job.id, config_job.message)) self._set_failed(task, error_message)
def _check_node_raid_jobs(self, task): """Check the progress of running RAID config jobs of a node.""" node = task.node raid_config_job_ids = node.driver_internal_info['raid_config_job_ids'] finished_job_ids = [] for config_job_id in raid_config_job_ids: config_job = drac_job.get_job(node, job_id=config_job_id) if config_job is None or config_job.status == 'Completed': finished_job_ids.append(config_job_id) elif config_job.status == 'Failed': finished_job_ids.append(config_job_id) self._set_raid_config_job_failure(node) if not finished_job_ids: return task.upgrade_lock() self._delete_cached_config_job_id(node, finished_job_ids) if not node.driver_internal_info.get('raid_config_job_failure', False): if 'raid_config_substep' in node.driver_internal_info: if node.driver_internal_info['raid_config_substep'] == \ 'delete_foreign_config': self._execute_cleaning_foreign_drives(task, node) elif node.driver_internal_info['raid_config_substep'] == \ 'completed': self._complete_raid_cleaning_substep(task, node) else: self._complete_raid_cleaning_substep(task, node) else: self._clear_raid_substep(node) self._clear_raid_config_job_failure(node) self._set_clean_failed(task, config_job)