def rescue(self, task): """Boot a rescue ramdisk on the node. :param task: a TaskManager instance. :raises: NetworkError if the tenant ports cannot be removed. :raises: InvalidParameterValue when the wrong power state is specified or the wrong driver info is specified for power management. :raises: other exceptions by the node's power driver if something wrong occurred during the power action. :raises: any boot interface's prepare_ramdisk exceptions. :returns: Returns states.RESCUEWAIT """ manager_utils.node_power_action(task, states.POWER_OFF) # NOTE(TheJulia): Revealing that the power is off at any time can # cause external power sync to decide that the node must be off. # This may result in a post-rescued instance being turned off # unexpectedly after rescue has started. # TODO(TheJulia): Once we have power/state callbacks to nova, # the reset of the power_state can be removed. task.node.power_state = states.POWER_ON task.node.save() task.driver.boot.clean_up_instance(task) with manager_utils.power_state_for_network_configuration(task): task.driver.network.unconfigure_tenant_networks(task) task.driver.network.add_rescuing_network(task) if CONF.agent.manage_agent_boot: ramdisk_opts = deploy_utils.build_agent_options(task.node) # prepare_ramdisk will set the boot device task.driver.boot.prepare_ramdisk(task, ramdisk_opts) manager_utils.node_power_action(task, states.POWER_ON) return states.RESCUEWAIT
def _tear_down_managed_boot(task): errors = [] ironic_manages_boot = utils.pop_node_nested_field( task.node, 'driver_internal_info', _IRONIC_MANAGES_BOOT) if not ironic_manages_boot: return errors try: task.driver.boot.clean_up_ramdisk(task) except Exception as exc: errors.append(_('unable to clean up ramdisk boot: %s') % exc) LOG.exception('Unable to clean up ramdisk boot for node %s', task.node.uuid) try: with cond_utils.power_state_for_network_configuration(task): task.driver.network.remove_inspection_network(task) except Exception as exc: errors.append(_('unable to remove inspection ports: %s') % exc) LOG.exception('Unable to remove inspection network for node %s', task.node.uuid) if CONF.inspector.power_off: try: cond_utils.node_power_action(task, states.POWER_OFF) except Exception as exc: errors.append(_('unable to power off the node: %s') % exc) LOG.exception('Unable to power off node %s', task.node.uuid) return errors
def unrescue(self, task): """Attempt to move a rescued node back to active state. :param task: a TaskManager instance. :raises: NetworkError if the rescue ports cannot be removed. :raises: InvalidParameterValue when the wrong power state is specified or the wrong driver info is specified for power management. :raises: other exceptions by the node's power driver if something wrong occurred during the power action. :raises: any boot interface's prepare_instance exceptions. :returns: Returns states.ACTIVE """ manager_utils.node_power_action(task, states.POWER_OFF) # NOTE(TheJulia): Revealing that the power is off at any time can # cause external power sync to decide that the node must be off. # This may result in a post-rescued insance being turned off # unexpectedly after unrescue. # TODO(TheJulia): Once we have power/state callbacks to nova, # the reset of the power_state can be removed. task.node.power_state = states.POWER_ON task.node.save() self.clean_up(task) with manager_utils.power_state_for_network_configuration(task): task.driver.network.configure_tenant_networks(task) task.driver.boot.prepare_instance(task) manager_utils.node_power_action(task, states.POWER_ON) return states.ACTIVE
def _finalize_rescue(self, task): """Call ramdisk to prepare rescue mode and verify result. :param task: A TaskManager instance :raises: InstanceRescueFailure, if rescuing failed """ node = task.node try: result = self._client.finalize_rescue(node) except exception.IronicException as e: raise exception.InstanceRescueFailure(node=node.uuid, instance=node.instance_uuid, reason=e) if ((not result.get('command_status')) or result.get('command_status') != 'SUCCEEDED'): # NOTE(mariojv) Caller will clean up failed rescue in exception # handler. fail_reason = (_('Agent returned bad result for command ' 'finalize_rescue: %(result)s') % {'result': result.get('command_error')}) raise exception.InstanceRescueFailure(node=node.uuid, instance=node.instance_uuid, reason=fail_reason) task.process_event('resume') task.driver.rescue.clean_up(task) with manager_utils.power_state_for_network_configuration(task): task.driver.network.configure_tenant_networks(task) task.process_event('done')
def _start_managed_inspection(task): """Start inspection managed by ironic.""" try: client = _get_client(task.context) endpoint = _get_callback_endpoint(client) params = dict(_parse_kernel_params(), **{'ipa-inspection-callback-url': endpoint}) if CONF.deploy.fast_track: params['ipa-api-url'] = deploy_utils.get_ironic_api_url() cond_utils.node_power_action(task, states.POWER_OFF) with cond_utils.power_state_for_network_configuration(task): task.driver.network.add_inspection_network(task) task.driver.boot.prepare_ramdisk(task, ramdisk_params=params) client.start_introspection(task.node.uuid, manage_boot=False) cond_utils.node_power_action(task, states.POWER_ON) except Exception as exc: LOG.exception( 'Unable to start managed inspection for node %(uuid)s: ' '%(err)s', { 'uuid': task.node.uuid, 'err': exc }) error = _('unable to start inspection: %s') % exc _inspection_error_handler(task, error, raise_exc=True)
def deploy(self, task): if ('configdrive' in task.node.instance_info and 'ramdisk_boot_configdrive' not in task.driver.boot.capabilities): # TODO(dtantsur): make it an actual error? LOG.warning('A configuration drive is present in the ramdisk ' 'deployment request of node %(node)s with boot ' 'interface %(drv)s. The configuration drive will be ' 'ignored for this deployment.', {'node': task.node, 'drv': task.node.boot_interface}) manager_utils.node_power_action(task, states.POWER_OFF) # Tenant neworks must enable connectivity to the boot # location, as reboot() can otherwise be very problematic. # IDEA(TheJulia): Maybe a "trusted environment" mode flag # that we otherwise fail validation on for drivers that # require explicit security postures. with manager_utils.power_state_for_network_configuration(task): task.driver.network.configure_tenant_networks(task) # calling boot.prepare_instance will also set the node # to PXE boot, and update PXE templates accordingly task.driver.boot.prepare_instance(task) # Power-on the instance, with PXE prepared, we're done. manager_utils.node_power_action(task, states.POWER_ON) LOG.info('Deployment setup for node %s done', task.node.uuid) return None
def deploy(self, task): manager_utils.node_power_action(task, states.POWER_OFF) with manager_utils.power_state_for_network_configuration(task): task.driver.network.configure_tenant_networks(task) # calling boot.prepare_instance will also set the node # to PXE boot, and update PXE templates accordingly task.driver.boot.prepare_instance(task) # Power-on the instance, with PXE prepared, we're done. manager_utils.node_power_action(task, states.POWER_ON) LOG.info('Deployment setup for node %s done', task.node.uuid) return None
def deploy(self, task): """Start deployment of the task's node. Fetches instance image, updates the DHCP port options for next boot, and issues a reboot request to the power driver. This causes the node to boot into the deployment ramdisk and triggers the next phase of PXE-based deployment via agent heartbeats. :param task: a TaskManager instance containing the node to act on. :returns: deploy state DEPLOYWAIT. """ node = task.node if manager_utils.is_fast_track(task): LOG.debug('Performing a fast track deployment for %(node)s.', {'node': task.node.uuid}) deploy_utils.cache_instance_image(task.context, node) check_image_size(task) # Update the database for the API and the task tracking resumes # the state machine state going from DEPLOYWAIT -> DEPLOYING task.process_event('wait') self.continue_deploy(task) elif task.driver.storage.should_write_image(task): # Standard deploy process deploy_utils.cache_instance_image(task.context, node) check_image_size(task) # Check if the driver has already performed a reboot in a previous # deploy step. if not task.node.driver_internal_info.get('deployment_reboot', False): manager_utils.node_power_action(task, states.REBOOT) info = task.node.driver_internal_info info.pop('deployment_reboot', None) task.node.driver_internal_info = info task.node.save() return states.DEPLOYWAIT else: # Boot to an Storage Volume # TODO(TheJulia): At some point, we should de-dupe this code # as it is nearly identical to the agent deploy interface. # This is not being done now as it is expected to be # refactored in the near future. manager_utils.node_power_action(task, states.POWER_OFF) with manager_utils.power_state_for_network_configuration(task): task.driver.network.remove_provisioning_network(task) task.driver.network.configure_tenant_networks(task) task.driver.boot.prepare_instance(task) manager_utils.node_power_action(task, states.POWER_ON) return None
def clean_up(self, task): """Clean up after RESCUEWAIT timeout/failure or finishing rescue. Rescue password should be removed from the node and ramdisk boot environment should be cleaned if Ironic is managing the ramdisk boot. :param task: a TaskManager instance with the node. :raises: NetworkError if the rescue ports cannot be removed. """ manager_utils.remove_node_rescue_password(task.node, save=True) if CONF.agent.manage_agent_boot: task.driver.boot.clean_up_ramdisk(task) with manager_utils.power_state_for_network_configuration(task): task.driver.network.remove_rescuing_network(task)
def deploy(self, task): """Perform a deployment to a node. Perform the necessary work to deploy an image onto the specified node. This method will be called after prepare(), which may have already performed any preparatory steps, such as pre-caching some data for the node. :param task: a TaskManager instance. :returns: status of the deploy. One of ironic.common.states. """ if manager_utils.is_fast_track(task): LOG.debug('Performing a fast track deployment for %(node)s.', {'node': task.node.uuid}) # Update the database for the API and the task tracking resumes # the state machine state going from DEPLOYWAIT -> DEPLOYING task.process_event('wait') self.continue_deploy(task) elif task.driver.storage.should_write_image(task): # Check if the driver has already performed a reboot in a previous # deploy step. if not task.node.driver_internal_info.get('deployment_reboot'): manager_utils.node_power_action(task, states.REBOOT) info = task.node.driver_internal_info info.pop('deployment_reboot', None) task.node.driver_internal_info = info task.node.save() return states.DEPLOYWAIT else: # TODO(TheJulia): At some point, we should de-dupe this code # as it is nearly identical to the iscsi deploy interface. # This is not being done now as it is expected to be # refactored in the near future. manager_utils.node_power_action(task, states.POWER_OFF) with manager_utils.power_state_for_network_configuration(task): task.driver.network.remove_provisioning_network(task) task.driver.network.configure_tenant_networks(task) task.driver.boot.prepare_instance(task) manager_utils.node_power_action(task, states.POWER_ON) LOG.info('Deployment to node %s done', task.node.uuid) return None
def tear_down(self, task): """Tear down a previous deployment on the task's node. :param task: a TaskManager instance. :returns: status of the deploy. One of ironic.common.states. :raises: NetworkError if the cleaning ports cannot be removed. :raises: InvalidParameterValue when the wrong power state is specified or the wrong driver info is specified for power management. :raises: StorageError when the storage interface attached volumes fail to detach. :raises: other exceptions by the node's power driver if something wrong occurred during the power action. """ manager_utils.node_power_action(task, states.POWER_OFF) task.driver.storage.detach_volumes(task) deploy_utils.tear_down_storage_configuration(task) with manager_utils.power_state_for_network_configuration(task): task.driver.network.unconfigure_tenant_networks(task) # NOTE(mgoddard): If the deployment was unsuccessful the node may # have ports on the provisioning network which were not deleted. task.driver.network.remove_provisioning_network(task) return states.DELETED
def tear_down(self, task): """Tear down a previous deployment on the task's node. Power off the node. All actual clean-up is done in the clean_up() method which should be called separately. :param task: a TaskManager instance containing the node to act on. :returns: deploy state DELETED. :raises: NetworkError if the cleaning ports cannot be removed. :raises: InvalidParameterValue when the wrong state is specified or the wrong driver info is specified. :raises: StorageError when volume detachment fails. :raises: other exceptions by the node's power driver if something wrong occurred during the power action. """ manager_utils.node_power_action(task, states.POWER_OFF) task.driver.storage.detach_volumes(task) deploy_utils.tear_down_storage_configuration(task) with manager_utils.power_state_for_network_configuration(task): task.driver.network.unconfigure_tenant_networks(task) # NOTE(mgoddard): If the deployment was unsuccessful the node may # have ports on the provisioning network which were not deleted. task.driver.network.remove_provisioning_network(task) return states.DELETED
def reboot_and_finish_deploy(self, task): """Helper method to trigger reboot on the node and finish deploy. This method initiates a reboot on the node. On success, it marks the deploy as complete. On failure, it logs the error and marks deploy as failure. :param task: a TaskManager object containing the node :raises: InstanceDeployFailure, if node reboot failed. """ wait = CONF.agent.post_deploy_get_power_state_retry_interval * 1000 attempts = CONF.agent.post_deploy_get_power_state_retries + 1 @retrying.retry( stop_max_attempt_number=attempts, retry_on_result=lambda state: state != states.POWER_OFF, wait_fixed=wait ) def _wait_until_powered_off(task): return task.driver.power.get_power_state(task) node = task.node if CONF.agent.deploy_logs_collect == 'always': driver_utils.collect_ramdisk_logs(node) # Whether ironic should power off the node via out-of-band or # in-band methods oob_power_off = strutils.bool_from_string( node.driver_info.get('deploy_forces_oob_reboot', False)) try: if not oob_power_off: try: self._client.power_off(node) _wait_until_powered_off(task) except Exception as e: LOG.warning('Failed to soft power off node %(node_uuid)s ' 'in at least %(timeout)d seconds. ' '%(cls)s: %(error)s', {'node_uuid': node.uuid, 'timeout': (wait * (attempts - 1)) / 1000, 'cls': e.__class__.__name__, 'error': e}, exc_info=not isinstance( e, exception.IronicException)) manager_utils.node_power_action(task, states.POWER_OFF) else: # Flush the file system prior to hard rebooting the node result = self._client.sync(node) error = result.get('faultstring') if error: if 'Unknown command' in error: error = _('The version of the IPA ramdisk used in ' 'the deployment do not support the ' 'command "sync"') LOG.warning( 'Failed to flush the file system prior to hard ' 'rebooting the node %(node)s. Error: %(error)s', {'node': node.uuid, 'error': error}) manager_utils.node_power_action(task, states.POWER_OFF) except Exception as e: msg = (_('Error rebooting node %(node)s after deploy. ' '%(cls)s: %(error)s') % {'node': node.uuid, 'cls': e.__class__.__name__, 'error': e}) log_and_raise_deployment_error(task, msg, exc=e) try: with manager_utils.power_state_for_network_configuration(task): task.driver.network.remove_provisioning_network(task) task.driver.network.configure_tenant_networks(task) manager_utils.node_power_action(task, states.POWER_ON) except Exception as e: msg = (_('Error rebooting node %(node)s after deploy. ' '%(cls)s: %(error)s') % {'node': node.uuid, 'cls': e.__class__.__name__, 'error': e}) # NOTE(mgoddard): Don't collect logs since the node has been # powered off. log_and_raise_deployment_error(task, msg, collect_logs=False, exc=e) if not node.deploy_step: # TODO(rloo): delete this 'if' part after deprecation period, when # we expect all (out-of-tree) drivers to support deploy steps. # After which we will always notify_conductor_resume_deploy(). task.process_event('done') LOG.info('Deployment to node %s done', task.node.uuid) else: manager_utils.notify_conductor_resume_deploy(task)