Пример #1
0
    def heartbeat(self, task, **kwargs):
        """Method for agent to periodically check in.

        The agent should be sending its agent_url (so Ironic can talk back)
        as a kwarg. kwargs should have the following format::

         {
             'agent_url': 'http://AGENT_HOST:AGENT_PORT'
         }

        AGENT_PORT defaults to 9999.
        """
        node = task.node
        driver_internal_info = node.driver_internal_info
        LOG.debug(
            'Heartbeat from %(node)s, last heartbeat at %(heartbeat)s.',
            {'node': node.uuid,
             'heartbeat': driver_internal_info.get('agent_last_heartbeat')})
        driver_internal_info['agent_last_heartbeat'] = int(_time())
        try:
            driver_internal_info['agent_url'] = kwargs['agent_url']
        except KeyError:
            raise exception.MissingParameterValue(_('For heartbeat operation, '
                                                    '"agent_url" must be '
                                                    'specified.'))

        node.driver_internal_info = driver_internal_info
        node.save()

        # Async call backs don't set error state on their own
        # TODO(jimrollenhagen) improve error messages here
        msg = _('Failed checking if deploy is done.')
        try:
            if node.maintenance:
                # this shouldn't happen often, but skip the rest if it does.
                LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
                          'not taking any action.', {'node': node.uuid})
                return
            elif node.provision_state == states.DEPLOYWAIT:
                msg = _('Node failed to get image for deploy.')
                self.continue_deploy(task, **kwargs)
            elif (node.provision_state == states.DEPLOYING and
                  self.deploy_is_done(task)):
                msg = _('Node failed to move to active state.')
                self.reboot_to_instance(task, **kwargs)
            elif (node.provision_state == states.CLEANING and
                  not node.clean_step):
                # Agent booted from prepare_cleaning
                manager.set_node_cleaning_steps(task)
                self._notify_conductor_resume_clean(task)
            elif (node.provision_state == states.CLEANING and
                  node.clean_step):
                self.continue_cleaning(task, **kwargs)

        except Exception as e:
            err_info = {'node': node.uuid, 'msg': msg, 'e': e}
            last_error = _('Asynchronous exception for node %(node)s: '
                           '%(msg)s exception: %(e)s') % err_info
            LOG.exception(last_error)
            deploy_utils.set_failed_state(task, last_error)
Пример #2
0
def do_agent_iscsi_deploy(task, agent_client):
    """Method invoked when deployed with the agent ramdisk.

    This method is invoked by drivers for doing iSCSI deploy
    using agent ramdisk.  This method assumes that the agent
    is booted up on the node and is heartbeating.

    :param task: a TaskManager object containing the node.
    :param agent_client: an instance of agent_client.AgentClient
        which will be used during iscsi deploy (for exposing node's
        target disk via iSCSI, for install boot loader, etc).
    :returns: a dictionary containing the following keys:
        For partition image:
            'root uuid': UUID of root partition
            'efi system partition uuid': UUID of the uefi system partition
                                         (if boot mode is uefi).
            NOTE: If key exists but value is None, it means partition doesn't
                  exist.
        For whole disk image:
            'disk identifier': ID of the disk to which image was deployed.
    :raises: InstanceDeployFailure, if it encounters some error
        during the deploy.
    """
    node = task.node
    iscsi_options = build_deploy_ramdisk_options(node)

    iqn = iscsi_options['iscsi_target_iqn']
    result = agent_client.start_iscsi_target(node, iqn)
    if result['command_status'] == 'FAILED':
        msg = (_("Failed to start the iSCSI target to deploy the "
                 "node %(node)s. Error: %(error)s") %
               {'node': node.uuid, 'error': result['command_error']})
        deploy_utils.set_failed_state(task, msg)
        raise exception.InstanceDeployFailure(reason=msg)

    address = parse.urlparse(node.driver_internal_info['agent_url'])
    address = address.hostname

    # TODO(lucasagomes): The 'error' and 'key' parameters in the
    # dictionary below are just being passed because it's needed for
    # the iscsi_deploy.continue_deploy() method, we are fooling it
    # for now. The agent driver doesn't use/need those. So we need to
    # refactor this bits here later.
    iscsi_params = {'error': result['command_error'],
                    'iqn': iqn,
                    'key': iscsi_options['deployment_key'],
                    'address': address}

    uuid_dict_returned = continue_deploy(task, **iscsi_params)
    root_uuid_or_disk_id = uuid_dict_returned.get(
        'root uuid', uuid_dict_returned.get('disk identifier'))

    # TODO(lucasagomes): Move this bit saving the root_uuid to
    # iscsi_deploy.continue_deploy()
    driver_internal_info = node.driver_internal_info
    driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
    node.driver_internal_info = driver_internal_info
    node.save()

    return uuid_dict_returned
Пример #3
0
    def deploy_steps(self, task, **data):
        http_method = data.pop('http_method')
        driver_info = _NodeDriverInfoAdapter(task.node)

        if http_method == 'GET':
            ssh_keys_step = _InjectSSHKeyStepRequest(task, driver_info)
            return ssh_keys_step()

        steps_mapping = _DeployStepMapping()
        data = _DeployStepsAdapter(data)
        try:
            request_cls = steps_mapping.name_to_step[data.action]
        except KeyError:
            if data.action is not None:
                raise RuntimeError(
                    'There is no name mapping for deployment step: '
                    '{!r}'.format(data.action))

            message = (
                'Bareon\'s callback service have failed with internall error')
            if data.status_details:
                message += '\nFailure details: {}'.format(
                    pprint.pformat(data.status_details))
            # TODO(dbogun): add support for existing log extraction mechanism
            deploy_utils.set_failed_state(
                task, message, collect_logs=False)
        else:
            handler = request_cls.result_handler(
                task, driver_info, data)
            handler()

        return {'url': None}
Пример #4
0
def finish_deploy(task, address):
    """Notifies the ramdisk to reboot the node and makes the instance active.

    This method notifies the ramdisk to proceed to reboot and then
    makes the instance active.

    :param task: a TaskManager object.
    :param address: The IP address of the bare metal node.
    :raises: InstanceDeployFailure, if notifying ramdisk failed.
    """
    node = task.node
    try:
        deploy_utils.notify_ramdisk_to_proceed(address)
    except Exception as e:
        LOG.error(
            _LE("Deploy failed for instance %(instance)s. " "Error: %(error)s"),
            {"instance": node.instance_uuid, "error": e},
        )
        msg = _("Failed to notify ramdisk to reboot after bootloader " "installation. Error: %s") % e
        deploy_utils.set_failed_state(task, msg)
        raise exception.InstanceDeployFailure(msg)

    # TODO(lucasagomes): When deploying a node with the DIB ramdisk
    # Ironic will not power control the node at the end of the deployment,
    # it's the DIB ramdisk that reboots the node. But, for the SSH driver
    # some changes like setting the boot device only gets applied when the
    # machine is powered off and on again. So the code below is enforcing
    # it. For Liberty we need to change the DIB ramdisk so that Ironic
    # always controls the power state of the node for all drivers.
    if deploy_utils.get_boot_option(node) == "local" and "ssh" in node.driver:
        manager_utils.node_power_action(task, states.REBOOT)

    LOG.info(_LI("Deployment to node %s done"), node.uuid)
    task.process_event("done")
Пример #5
0
    def continue_deploy(self, task, **kwargs):
        """Method invoked when deployed with the IPA ramdisk.

        This method is invoked during a heartbeat from an agent when
        the node is in wait-call-back state. This deploys the image on
        the node and then configures the node to boot according to the
        desired boot option (netboot or localboot).

        :param task: a TaskManager object containing the node.
        :param kwargs: the kwargs passed from the heartbeat method.
        :raises: InstanceDeployFailure, if it encounters some error during
            the deploy.
        """
        task.process_event("resume")
        node = task.node
        LOG.debug("Continuing the deployment on node %s", node.uuid)

        uuid_dict_returned = do_agent_iscsi_deploy(task, self._client)

        if deploy_utils.get_boot_option(node) == "local":
            # Install the boot loader
            root_uuid = uuid_dict_returned.get("root uuid")
            efi_sys_uuid = uuid_dict_returned.get("efi system partition uuid")
            self.configure_local_boot(task, root_uuid=root_uuid, efi_system_part_uuid=efi_sys_uuid)

        try:
            task.driver.boot.prepare_instance(task)
        except Exception as e:
            LOG.error(
                _LE("Deploy failed for instance %(instance)s. " "Error: %(error)s"),
                {"instance": node.instance_uuid, "error": e},
            )
            msg = _("Failed to continue agent deployment.")
            deploy_utils.set_failed_state(task, msg)
        self.reboot_and_finish_deploy(task)
Пример #6
0
    def reboot_to_instance(self, task, **kwargs):
        task.process_event('resume')
        node = task.node
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') %
                   {'node': node.uuid, 'error': error})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        LOG.info(_LI('Image successfully written to node %s'), node.uuid)
        LOG.debug('Rebooting node %s to instance', node.uuid)

        manager_utils.node_set_boot_device(task, 'disk', persistent=True)
        self.reboot_and_finish_deploy(task)

        # NOTE(TheJulia): If we deployed a whole disk image, we
        # should expect a whole disk image and clean-up the tftp files
        # on-disk incase the node is disregarding the boot preference.
        # TODO(rameshg87): Not all in-tree drivers using reboot_to_instance
        # have a boot interface. So include a check for now. Remove this
        # check once all in-tree drivers have a boot interface.
        if task.driver.boot:
            task.driver.boot.clean_up_ramdisk(task)
Пример #7
0
    def reboot_to_instance(self, task, **kwargs):
        task.process_event('resume')
        node = task.node
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') %
                   {'node': node.uuid, 'error': error})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        LOG.info(_LI('Image successfully written to node %s'), node.uuid)
        LOG.debug('Rebooting node %s to instance', node.uuid)

        manager_utils.node_set_boot_device(task, 'disk', persistent=True)
        self.reboot_and_finish_deploy(task)
        # NOTE(TheJulia): If we we deployed a whole disk image, we
        # should expect a whole disk image and clean-up the tftp files
        # on-disk incase the node is disregarding the boot preference.
        # TODO(rameshg87): This shouldn't get called for virtual media deploy
        # drivers (iLO and iRMC).  This is just a hack, but it will be taken
        # care in boot/deploy interface separation.
        if (_driver_uses_pxe(task.driver) and
                node.driver_internal_info.get('is_whole_disk_image')):
            _clean_up_pxe(task)
Пример #8
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the iSCSI deployment from where ramdisk left off.

        This method continues the iSCSI deployment from the conductor node
        and writes the deploy image to the bare metal's disk. After that,
        it does the following depending on boot_option for deploy:

        - If the boot_option requested for this deploy is 'local', then it
          sets the node to boot from disk (ramdisk installs the boot loader
          present within the image to the bare metal's disk).
        - If the boot_option requested is 'netboot' or no boot_option is
          requested, it finds/creates the boot ISO to boot the instance
          image, attaches the boot ISO to the bare metal and then sets
          the node to boot from CDROM.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs containing parameters for iSCSI deployment.
        :raises: InvalidState
        """
        node = task.node
        task.process_event('resume')

        iwdi = node.driver_internal_info.get('is_whole_disk_image')
        ilo_common.cleanup_vmedia_boot(task)
        uuid_dict = iscsi_deploy.continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict.get(
            'root uuid', uuid_dict.get('disk identifier'))

        try:
            # Set boot mode
            ilo_common.update_boot_mode(task)

            # Need to enable secure boot, if being requested
            _update_secure_boot_mode(task, True)

            # For iscsi_ilo driver, we boot from disk every time if the image
            # deployed is a whole disk image.
            if iscsi_deploy.get_boot_option(node) == "local" or iwdi:
                manager_utils.node_set_boot_device(task, boot_devices.DISK,
                                                   persistent=True)

                # Ask the ramdisk to install bootloader and
                # wait for the call-back through the vendor passthru
                # 'pass_bootloader_install_info', if it's not a whole
                # disk image.
                if not iwdi:
                    deploy_utils.notify_ramdisk_to_proceed(kwargs['address'])
                    task.process_event('wait')
                    return
            else:
                self._configure_vmedia_boot(task, root_uuid_or_disk_id)
        except Exception as e:
            LOG.error(_LE('Deploy failed for instance %(instance)s. '
                          'Error: %(error)s'),
                      {'instance': node.instance_uuid, 'error': e})
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
        else:
            iscsi_deploy.finish_deploy(task, kwargs.get('address'))
Пример #9
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node over iSCSI.

        This method continues the deployment of the baremetal node over iSCSI
        from where the deployment ramdisk has left off.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs for performing iscsi deployment.
        :raises: InvalidState
        """
        node = task.node
        task.process_event('resume')

        _destroy_token_file(node)
        is_whole_disk_image = node.driver_internal_info['is_whole_disk_image']
        uuid_dict = iscsi_deploy.continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict.get(
            'root uuid', uuid_dict.get('disk identifier'))

        # save the node's root disk UUID so that another conductor could
        # rebuild the PXE config file. Due to a shortcoming in Nova objects,
        # we have to assign to node.driver_internal_info so the node knows it
        # has changed.
        driver_internal_info = node.driver_internal_info
        driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
        node.driver_internal_info = driver_internal_info
        node.save()

        try:
            if iscsi_deploy.get_boot_option(node) == "local":
                deploy_utils.try_set_boot_device(task, boot_devices.DISK)

                # If it's going to boot from the local disk, get rid of
                # the PXE configuration files used for the deployment
                pxe_utils.clean_up_pxe_config(task)

                # Ask the ramdisk to install bootloader and
                # wait for the call-back through the vendor passthru
                # 'pass_bootloader_install_info', if it's not a
                # whole disk image.
                if not is_whole_disk_image:
                    deploy_utils.notify_ramdisk_to_proceed(kwargs['address'])
                    task.process_event('wait')
                    return
            else:
                pxe_config_path = pxe_utils.get_pxe_config_file_path(node.uuid)
                boot_mode = deploy_utils.get_boot_mode_for_deploy(node)
                deploy_utils.switch_pxe_config(pxe_config_path,
                                               root_uuid_or_disk_id,
                                               boot_mode, is_whole_disk_image)

        except Exception as e:
            LOG.error(_LE('Deploy failed for instance %(instance)s. '
                          'Error: %(error)s'),
                      {'instance': node.instance_uuid, 'error': e})
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
        else:
            iscsi_deploy.finish_deploy(task, kwargs.get('address'))
Пример #10
0
 def _set_failed_state(self, task, error):
     try:
         deploy_utils.set_failed_state(task, error, collect_logs=False)
     except TypeError:
         LOG.warning(_LW("To have proper error handling please update "
                         "your Ironic installation to contain commit "
                         "bb62f256f7aa55c292ebeae73ca25a4a9f0ec8c0."))
         deploy_utils.set_failed_state(task, error)
Пример #11
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node over iSCSI.

        This method continues the deployment of the baremetal node over iSCSI
        from where the deployment ramdisk has left off.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs for performing iscsi deployment.
        :raises: InvalidState
        """
        node = task.node
        LOG.warning(_LW("The node %s is using the bash deploy ramdisk for "
                        "its deployment. This deploy ramdisk has been "
                        "deprecated. Please use the ironic-python-agent "
                        "(IPA) ramdisk instead."), node.uuid)

        # TODO(rameshg87): Remove the below code once we stop supporting
        # bash ramdisk in Ironic.
        if node.provision_state == states.CLEANWAIT:
            return self._initiate_cleaning(task)

        task.process_event('resume')
        LOG.debug('Continuing the deployment on node %s', node.uuid)

        is_whole_disk_image = node.driver_internal_info['is_whole_disk_image']
        uuid_dict_returned = continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict_returned.get(
            'root uuid', uuid_dict_returned.get('disk identifier'))

        # save the node's root disk UUID so that another conductor could
        # rebuild the PXE config file. Due to a shortcoming in Nova objects,
        # we have to assign to node.driver_internal_info so the node knows it
        # has changed.
        driver_internal_info = node.driver_internal_info
        driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
        node.driver_internal_info = driver_internal_info
        node.save()

        try:
            task.driver.boot.prepare_instance(task)

            if deploy_utils.get_boot_option(node) == "local":
                if not is_whole_disk_image:
                    LOG.debug('Installing the bootloader on node %s',
                              node.uuid)
                    deploy_utils.notify_ramdisk_to_proceed(kwargs['address'])
                    task.process_event('wait')
                    return

        except Exception as e:
            LOG.error(_LE('Deploy failed for instance %(instance)s. '
                          'Error: %(error)s'),
                      {'instance': node.instance_uuid, 'error': e})
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
        else:
            finish_deploy(task, kwargs.get('address'))
Пример #12
0
 def _fail_deploy(task, msg, raise_exception=True):
     """Fail the deploy after logging and setting error states."""
     if isinstance(msg, Exception):
         msg = (_('Deploy failed for instance %(instance)s. '
                  'Error: %(error)s') %
                {'instance': node.instance_uuid, 'error': msg})
     deploy_utils.set_failed_state(task, msg)
     destroy_images(task.node.uuid)
     if raise_exception:
         raise exception.InstanceDeployFailure(msg)
Пример #13
0
def do_agent_iscsi_deploy(task, agent_client):
    """Method invoked when deployed with the agent ramdisk.

    This method is invoked by drivers for doing iSCSI deploy
    using agent ramdisk.  This method assumes that the agent
    is booted up on the node and is heartbeating.

    :param task: a TaskManager object containing the node.
    :param agent_client: an instance of agent_client.AgentClient
        which will be used during iscsi deploy (for exposing node's
        target disk via iSCSI, for install boot loader, etc).
    :returns: a dictionary containing the following keys:
        For partition image:
            'root uuid': UUID of root partition
            'efi system partition uuid': UUID of the uefi system partition
                                         (if boot mode is uefi).
            NOTE: If key exists but value is None, it means partition doesn't
                  exist.
        For whole disk image:
            'disk identifier': ID of the disk to which image was deployed.
    :raises: InstanceDeployFailure, if it encounters some error
        during the deploy.
    """
    node = task.node
    i_info = deploy_utils.parse_instance_info(node)
    wipe_disk_metadata = not i_info['preserve_ephemeral']

    iqn = 'iqn.2008-10.org.openstack:%s' % node.uuid
    portal_port = CONF.iscsi.portal_port
    result = agent_client.start_iscsi_target(
        node, iqn,
        portal_port,
        wipe_disk_metadata=wipe_disk_metadata)
    if result['command_status'] == 'FAILED':
        msg = (_("Failed to start the iSCSI target to deploy the "
                 "node %(node)s. Error: %(error)s") %
               {'node': node.uuid, 'error': result['command_error']})
        deploy_utils.set_failed_state(task, msg)
        raise exception.InstanceDeployFailure(reason=msg)

    address = parse.urlparse(node.driver_internal_info['agent_url'])
    address = address.hostname

    uuid_dict_returned = continue_deploy(task, iqn=iqn, address=address)
    root_uuid_or_disk_id = uuid_dict_returned.get(
        'root uuid', uuid_dict_returned.get('disk identifier'))

    # TODO(lucasagomes): Move this bit saving the root_uuid to
    # continue_deploy()
    driver_internal_info = node.driver_internal_info
    driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
    node.driver_internal_info = driver_internal_info
    node.save()

    return uuid_dict_returned
Пример #14
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node."""

        node = task.node
        task.process_event('resume')
        err_msg = _('Failed to continue deployment with Fuel Agent.')

        agent_status = kwargs.get('status')
        if agent_status != 'ready':
            LOG.error(_LE('Deploy failed for node %(node)s. Fuel Agent is not '
                      'in ready state, error: %(error)s'), {'node': node.uuid,
                      'error': kwargs.get('error_message')})
            deploy_utils.set_failed_state(task, err_msg)
            return

        params = _parse_driver_info(node)
        params['host'] = kwargs.get('address')
        cmd = ('%s --data_driver ironic  --config-file '
               '/etc/fuel-agent/fuel-agent.conf' % params.pop('script'))
        if CONF.debug:
            cmd += ' --debug'
        instance_info = node.instance_info

        try:
            deploy_data = _get_deploy_data(task.context,
                                           instance_info['image_source'])

            image_data = {"/": {"uri": instance_info['image_url'],
                                "format": "raw",
                                "container": "raw"}}

            deploy_data['ks_meta']['image_data'] = image_data

            ssh = utils.ssh_connect(params)
            sftp = ssh.open_sftp()
            _sftp_upload(sftp, json.dumps(deploy_data), '/tmp/provision.json')

            # swift configdrive store should be disabled
            configdrive = instance_info.get('configdrive')
            if configdrive is not None:
                _sftp_upload(sftp, configdrive, '/tmp/config-drive.img')

            _ssh_execute(ssh, cmd, params)
            LOG.info(_LI('Fuel Agent pass on node %s'), node.uuid)
            manager_utils.node_set_boot_device(task, boot_devices.DISK,
                                               persistent=True)
            manager_utils.node_power_action(task, states.REBOOT)
        except Exception as e:
            msg = (_('Deploy failed for node %(node)s. Error: %(error)s') %
                   {'node': node.uuid, 'error': e})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
        else:
            task.process_event('done')
            LOG.info(_LI('Deployment to node %s done'), task.node.uuid)
Пример #15
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node over iSCSI.

        This method continues the deployment of the baremetal node over iSCSI
        from where the deployment ramdisk has left off.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs for performing iscsi deployment.
        :raises: InvalidState
        """
        node = task.node
        LOG.warning(
            _LW(
                "The node %s is using the bash deploy ramdisk for "
                "its deployment. This deploy ramdisk has been "
                "deprecated. Please use the ironic-python-agent "
                "(IPA) ramdisk instead."
            ),
            node.uuid,
        )
        task.process_event("resume")
        LOG.debug("Continuing the deployment on node %s", node.uuid)

        is_whole_disk_image = node.driver_internal_info["is_whole_disk_image"]
        uuid_dict_returned = continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict_returned.get("root uuid", uuid_dict_returned.get("disk identifier"))

        # save the node's root disk UUID so that another conductor could
        # rebuild the PXE config file. Due to a shortcoming in Nova objects,
        # we have to assign to node.driver_internal_info so the node knows it
        # has changed.
        driver_internal_info = node.driver_internal_info
        driver_internal_info["root_uuid_or_disk_id"] = root_uuid_or_disk_id
        node.driver_internal_info = driver_internal_info
        node.save()

        try:
            task.driver.boot.prepare_instance(task)

            if deploy_utils.get_boot_option(node) == "local":
                if not is_whole_disk_image:
                    LOG.debug("Installing the bootloader on node %s", node.uuid)
                    deploy_utils.notify_ramdisk_to_proceed(kwargs["address"])
                    task.process_event("wait")
                    return

        except Exception as e:
            LOG.error(
                _LE("Deploy failed for instance %(instance)s. " "Error: %(error)s"),
                {"instance": node.instance_uuid, "error": e},
            )
            msg = _("Failed to continue iSCSI deployment.")
            deploy_utils.set_failed_state(task, msg)
        else:
            finish_deploy(task, kwargs.get("address"))
Пример #16
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node over iSCSI.

        This method continues the deployment of the baremetal node over iSCSI
        from where the deployment ramdisk has left off.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs for performing iscsi deployment.
        :raises: InvalidState
        """
        node = task.node
        task.process_event('resume')
        LOG.debug('Continuing the deployment on node %s', node.uuid)

        is_whole_disk_image = node.driver_internal_info['is_whole_disk_image']
        uuid_dict_returned = continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict_returned.get(
            'root uuid', uuid_dict_returned.get('disk identifier'))

        # save the node's root disk UUID so that another conductor could
        # rebuild the PXE config file. Due to a shortcoming in Nova objects,
        # we have to assign to node.driver_internal_info so the node knows it
        # has changed.
        driver_internal_info = node.driver_internal_info
        driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
        node.driver_internal_info = driver_internal_info
        node.save()

        try:
            if deploy_utils.get_boot_option(node) == "local":
                deploy_utils.try_set_boot_device(task, boot_devices.DISK)

                if not is_whole_disk_image:
                    LOG.debug('Installing the bootloader on node %s',
                              node.uuid)
                    deploy_utils.notify_ramdisk_to_proceed(kwargs['address'])
                    task.process_event('wait')
                    return

            task.driver.boot.prepare_instance(task)
        except Exception as e:
            LOG.error(_LE('Deploy failed for instance %(instance)s. '
                          'Error: %(error)s'),
                      {'instance': node.instance_uuid, 'error': e})
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
        else:
            finish_deploy(task, kwargs.get('address'))
Пример #17
0
def log_and_raise_deployment_error(task, msg, collect_logs=True, exc=None):
    """Helper method to log the error and raise exception.

    :param task: a TaskManager instance containing the node to act on.
    :param msg: the message to set in last_error of the node.
    :param collect_logs: Boolean indicating whether to attempt to collect
                         logs from IPA-based ramdisk. Defaults to True.
                         Actual log collection is also affected by
                         CONF.agent.deploy_logs_collect config option.
    :param exc: Exception that caused the failure.
    """
    log_traceback = (exc is not None and
                     not isinstance(exc, exception.IronicException))
    LOG.error(msg, exc_info=log_traceback)
    deploy_utils.set_failed_state(task, msg, collect_logs=collect_logs)
    raise exception.InstanceDeployFailure(msg)
Пример #18
0
    def heartbeat(self, task, **kwargs):
        """Method for agent to periodically check in.

        The agent should be sending its agent_url (so Ironic can talk back)
        as a kwarg. kwargs should have the following format::

         {
             'agent_url': 'http://AGENT_HOST:AGENT_PORT'
         }

        AGENT_PORT defaults to 9999.
        """
        node = task.node
        driver_info = node.driver_info
        LOG.debug(
            'Heartbeat from %(node)s, last heartbeat at %(heartbeat)s.',
            {'node': node.uuid,
             'heartbeat': driver_info.get('agent_last_heartbeat')})
        driver_info['agent_last_heartbeat'] = int(_time())
        try:
            driver_info['agent_url'] = kwargs['agent_url']
        except KeyError:
            raise exception.MissingParameterValue(_('For heartbeat operation, '
                                                    '"agent_url" must be '
                                                    'specified.'))

        node.driver_info = driver_info
        node.save()

        # Async call backs don't set error state on their own
        # TODO(jimrollenhagen) improve error messages here
        msg = _('Failed checking if deploy is done.')
        try:
            if node.provision_state == states.DEPLOYWAIT:
                msg = _('Node failed to get image for deploy.')
                self._continue_deploy(task, **kwargs)
            elif (node.provision_state == states.DEPLOYING
                    and self._deploy_is_done(node)):
                msg = _('Node failed to move to active state.')
                self._reboot_to_instance(task, **kwargs)
        except Exception:
            LOG.exception(_LE('Async exception for %(node)s: %(msg)s'),
                          {'node': node,
                           'msg': msg})
            deploy_utils.set_failed_state(task, msg)
Пример #19
0
    def reboot_to_instance(self, task, **kwargs):
        node = task.node
        LOG.debug("Preparing to reboot to instance for node %s", node.uuid)
        error = self._check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = _("node %(node)s command status errored: %(error)s") % ({"node": node.uuid, "error": error})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        LOG.debug("Rebooting node %s to disk", node.uuid)

        manager_utils.node_set_boot_device(task, "disk", persistent=True)
        manager_utils.node_power_action(task, states.REBOOT)

        task.process_event("done")
Пример #20
0
    def reboot_to_instance(self, task, **kwargs):
        node = task.node
        LOG.debug('Preparing to reboot to instance for node %s',
                  node.uuid)
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') %
                   {'node': node.uuid, 'error': error})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        LOG.debug('Rebooting node %s to disk', node.uuid)

        manager_utils.node_set_boot_device(task, 'disk', persistent=True)
        self.reboot_and_finish_deploy(task)
Пример #21
0
    def _continue_deploy(self, task, **kwargs):
        """Continues the iSCSI deployment from where ramdisk left off.

        Continues the iSCSI deployment from the conductor node, finds the
        boot ISO to boot the node, and sets the node to boot from boot ISO.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs containing parameters for iSCSI deployment.
        :raises: InvalidState
        """
        node = task.node
        task.process_event('resume')

        ilo_common.cleanup_vmedia_boot(task)
        root_uuid = iscsi_deploy.continue_deploy(task, **kwargs)

        if not root_uuid:
            return

        try:
            boot_iso = _get_boot_iso(task, root_uuid)

            if not boot_iso:
                LOG.error(_LE("Cannot get boot ISO for node %s"), node.uuid)
                return

            ilo_common.setup_vmedia_for_boot(task, boot_iso)
            manager_utils.node_set_boot_device(task, boot_devices.CDROM)

            address = kwargs.get('address')
            deploy_utils.notify_deploy_complete(address)

            LOG.info(_LI('Deployment to node %s done'), node.uuid)

            i_info = node.instance_info
            i_info['ilo_boot_iso'] = boot_iso
            node.instance_info = i_info
            task.process_event('done')
        except Exception as e:
            LOG.error(_LE('Deploy failed for instance %(instance)s. '
                          'Error: %(error)s'),
                      {'instance': node.instance_uuid, 'error': e})
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
Пример #22
0
def validate_bootloader_install_status(task, input_params):
    """Validate if bootloader was installed.

    This method first validates if deploy key sent in vendor passthru
    was correct one, and then validates whether bootloader installation
    was successful or not.

    :param task: A TaskManager object.
    :param input_params: A dictionary of params sent as input to passthru.
    :raises: InstanceDeployFailure, if bootloader installation was
        reported from ramdisk as failure.
    """
    if input_params['status'] != 'SUCCEEDED':
        msg = (_('Failed to install bootloader on node %(node)s. '
                 'Error: %(error)s.') %
               {'node': task.node.uuid, 'error': input_params.get('error')})
        LOG.error(msg)
        deploy_utils.set_failed_state(task, msg)
        raise exception.InstanceDeployFailure(msg)
Пример #23
0
    def _get_uuid_from_result(self, task, type_uuid):
        command = self._client.get_commands_status(task.node)[-1]

        if command['command_result'] is not None:
            words = command['command_result']['result'].split()
            for word in words:
                if type_uuid in word:
                    result = word.split('=')[1]
                    if not result:
                        msg = (_('Command result did not return %(type_uuid)s '
                                 'for node %(node)s. The version of the IPA '
                                 'ramdisk used in the deployment might not '
                                 'have support for provisioning of '
                                 'partition images.') %
                               {'type_uuid': type_uuid,
                                'node': task.node.uuid})
                        LOG.error(msg)
                        deploy_utils.set_failed_state(task, msg)
                        return
                    return result
Пример #24
0
    def deploy(self, task):
        """Perform a deployment to a node."""
        manager_utils.node_power_action(task, states.REBOOT)
        if CONF.ansible.use_ramdisk_callback:
            return states.DEPLOYWAIT

        node = task.node
        ip_addr = _get_node_ip_dhcp(task)
        try:
            self._ansible_deploy(task, ip_addr)
        except Exception as e:
            error = _('Deploy failed for node %(node)s: '
                      'Error: %(exc)s') % {'node': node.uuid,
                                           'exc': six.text_type(e)}
            LOG.exception(error)
            deploy_utils.set_failed_state(task, error, collect_logs=False)

        else:
            self.reboot_to_instance(task)
            return states.DEPLOYDONE
Пример #25
0
    def reboot_to_instance(self, task):
        task.process_event('resume')
        node = task.node
        iwdi = task.node.driver_internal_info.get('is_whole_disk_image')
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') %
                   {'node': node.uuid, 'error': error})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return
        if not iwdi:
            root_uuid = self._get_uuid_from_result(task, 'root_uuid')
            if deploy_utils.get_boot_mode_for_deploy(node) == 'uefi':
                efi_sys_uuid = (
                    self._get_uuid_from_result(task,
                                               'efi_system_partition_uuid'))
            else:
                efi_sys_uuid = None
            driver_internal_info = task.node.driver_internal_info
            driver_internal_info['root_uuid_or_disk_id'] = root_uuid
            task.node.driver_internal_info = driver_internal_info
            task.node.save()
            self.prepare_instance_to_boot(task, root_uuid, efi_sys_uuid)
        LOG.info('Image successfully written to node %s', node.uuid)
        LOG.debug('Rebooting node %s to instance', node.uuid)
        if iwdi:
            manager_utils.node_set_boot_device(task, 'disk', persistent=True)

        self.reboot_and_finish_deploy(task)

        # NOTE(TheJulia): If we deployed a whole disk image, we
        # should expect a whole disk image and clean-up the tftp files
        # on-disk incase the node is disregarding the boot preference.
        # TODO(rameshg87): Not all in-tree drivers using reboot_to_instance
        # have a boot interface. So include a check for now. Remove this
        # check once all in-tree drivers have a boot interface.
        if task.driver.boot and iwdi:
            task.driver.boot.clean_up_ramdisk(task)
Пример #26
0
    def _continue_deploy(self, task, **kwargs):
        """Continues the deployment of baremetal node over iSCSI.

        This method continues the deployment of the baremetal node over iSCSI
        from where the deployment ramdisk has left off.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs for performing iscsi deployment.
        :raises: InvalidState
        """
        node = task.node
        task.process_event('resume')

        _destroy_token_file(node)

        root_uuid = iscsi_deploy.continue_deploy(task, **kwargs)

        if not root_uuid:
            return

        try:
            if iscsi_deploy.get_boot_option(node) == "local":
                try_set_boot_device(task, boot_devices.DISK)
                # If it's going to boot from the local disk, get rid of
                # the PXE configuration files used for the deployment
                pxe_utils.clean_up_pxe_config(task)
            else:
                pxe_config_path = pxe_utils.get_pxe_config_file_path(node.uuid)
                deploy_utils.switch_pxe_config(pxe_config_path, root_uuid,
                    driver_utils.get_node_capability(node, 'boot_mode'))

            deploy_utils.notify_deploy_complete(kwargs['address'])
            LOG.info(_LI('Deployment to node %s done'), node.uuid)
            task.process_event('done')
        except Exception as e:
            LOG.error(_LE('Deploy failed for instance %(instance)s. '
                          'Error: %(error)s'),
                      {'instance': node.instance_uuid, 'error': e})
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
Пример #27
0
def continue_deploy(task, **kwargs):
    """Resume a deployment upon getting POST data from deploy ramdisk.

    This method raises no exceptions because it is intended to be
    invoked asynchronously as a callback from the deploy ramdisk.

    :param task: a TaskManager instance containing the node to act on.
    :param kwargs: the kwargs to be passed to deploy.
    :returns: UUID of the root partition or None on error.
    """
    node = task.node

    node.provision_state = states.DEPLOYING
    node.save()

    params = get_deploy_info(node, **kwargs)
    ramdisk_error = kwargs.get('error')

    if ramdisk_error:
        LOG.error(_LE('Error returned from deploy ramdisk: %s'),
                  ramdisk_error)
        deploy_utils.set_failed_state(task, _('Failure in deploy ramdisk.'))
        destroy_images(node.uuid)
        return

    LOG.info(_LI('Continuing deployment for node %(node)s, params %(params)s'),
                 {'node': node.uuid, 'params': params})

    root_uuid = None
    try:
        root_uuid = deploy_utils.deploy(**params)
    except Exception as e:
        LOG.error(_LE('Deploy failed for instance %(instance)s. '
                      'Error: %(error)s'),
                  {'instance': node.instance_uuid, 'error': e})
        deploy_utils.set_failed_state(task, _('Failed to continue '
                                              'iSCSI deployment.'))

    destroy_images(node.uuid)
    return root_uuid
Пример #28
0
    def _continue_deploy(self, task, **kwargs):
        """Continues the deployment of baremetal node over iSCSI.

        This method continues the deployment of the baremetal node over iSCSI
        from where the deployment ramdisk has left off.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs for performing iscsi deployment.
        """
        node = task.node

        if node.provision_state != states.DEPLOYWAIT:
            LOG.error(_LE('Node %s is not waiting to be deployed.'), node.uuid)
            return

        _destroy_token_file(node)

        root_uuid = iscsi_deploy.continue_deploy(task, **kwargs)

        if not root_uuid:
            return

        try:
            pxe_config_path = pxe_utils.get_pxe_config_file_path(node.uuid)
            deploy_utils.switch_pxe_config(pxe_config_path, root_uuid,
                          driver_utils.get_node_capability(node, 'boot_mode'))

            deploy_utils.notify_deploy_complete(kwargs['address'])

            LOG.info(_LI('Deployment to node %s done'), node.uuid)
            node.provision_state = states.ACTIVE
            node.target_provision_state = states.NOSTATE
            node.save()
        except Exception as e:

            LOG.error(_LE('Deploy failed for instance %(instance)s. '
                          'Error: %(error)s'),
                      {'instance': node.instance_uuid, 'error': e})
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
Пример #29
0
    def _reboot_to_instance(self, task, **kwargs):
        node = task.node
        LOG.debug('Preparing to reboot to instance for node %s',
                  node.uuid)
        error = self._check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = _('node %(node)s command status errored: %(error)s') % (
                   {'node': node.uuid, 'error': error})
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        LOG.debug('Rebooting node %s to disk', node.uuid)

        manager_utils.node_set_boot_device(task, 'disk', persistent=True)
        manager_utils.node_power_action(task, states.REBOOT)

        node.provision_state = states.ACTIVE
        node.target_provision_state = states.NOSTATE
        node.save()
Пример #30
0
def validate_bootloader_install_status(task, input_params):
    """Validate if bootloader was installed.

    This method first validates if deploy key sent in vendor passthru
    was correct one, and then validates whether bootloader installation
    was successful or not.

    :param task: A TaskManager object.
    :param input_params: A dictionary of params sent as input to passthru.
    :raises: InstanceDeployFailure, if bootloader installation was
        reported from ramdisk as failure.
    """
    node = task.node
    if input_params["status"] != "SUCCEEDED":
        msg = _("Failed to install bootloader on node %(node)s. " "Error: %(error)s.") % {
            "node": node.uuid,
            "error": input_params.get("error"),
        }
        LOG.error(msg)
        deploy_utils.set_failed_state(task, msg)
        raise exception.InstanceDeployFailure(msg)

    LOG.info(_LI("Bootloader successfully installed on node %s"), node.uuid)
Пример #31
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node over iSCSI.

        This method continues the deployment of the baremetal node over iSCSI
        from where the deployment ramdisk has left off.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs for performing iscsi deployment.
        :raises: InvalidState
        """
        node = task.node
        task.process_event('resume')
        LOG.debug('Continuing the deployment on node %s', node.uuid)

        is_whole_disk_image = node.driver_internal_info['is_whole_disk_image']
        uuid_dict = iscsi_deploy.continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict.get('root uuid',
                                             uuid_dict.get('disk identifier'))

        # save the node's root disk UUID so that another conductor could
        # rebuild the PXE config file. Due to a shortcoming in Nova objects,
        # we have to assign to node.driver_internal_info so the node knows it
        # has changed.
        driver_internal_info = node.driver_internal_info
        driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
        node.driver_internal_info = driver_internal_info
        node.save()

        try:
            if iscsi_deploy.get_boot_option(node) == "local":
                deploy_utils.try_set_boot_device(task, boot_devices.DISK)

                # If it's going to boot from the local disk, get rid of
                # the PXE configuration files used for the deployment
                pxe_utils.clean_up_pxe_config(task)

                # Ask the ramdisk to install bootloader and
                # wait for the call-back through the vendor passthru
                # 'pass_bootloader_install_info', if it's not a
                # whole disk image.
                if not is_whole_disk_image:
                    LOG.debug('Installing the bootloader on node %s',
                              node.uuid)
                    deploy_utils.notify_ramdisk_to_proceed(kwargs['address'])
                    task.process_event('wait')
                    return
            else:
                pxe_config_path = pxe_utils.get_pxe_config_file_path(node.uuid)
                boot_mode = deploy_utils.get_boot_mode_for_deploy(node)
                deploy_utils.switch_pxe_config(
                    pxe_config_path, root_uuid_or_disk_id, boot_mode,
                    is_whole_disk_image,
                    deploy_utils.is_trusted_boot_requested(node))

        except Exception as e:
            LOG.error(
                _LE('Deploy failed for instance %(instance)s. '
                    'Error: %(error)s'), {
                        'instance': node.instance_uuid,
                        'error': e
                    })
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
        else:
            iscsi_deploy.finish_deploy(task, kwargs.get('address'))
Пример #32
0
    def heartbeat(self, task, callback_url):
        """Process a heartbeat.

        :param task: task to work with.
        :param callback_url: agent HTTP API URL.
        """
        # TODO(dtantsur): upgrade lock only if we actually take action other
        # than updating the last timestamp.
        task.upgrade_lock()

        node = task.node
        driver_internal_info = node.driver_internal_info
        LOG.debug(
            'Heartbeat from %(node)s, last heartbeat at %(heartbeat)s.', {
                'node': node.uuid,
                'heartbeat': driver_internal_info.get('agent_last_heartbeat')
            })
        driver_internal_info['agent_last_heartbeat'] = int(time.time())
        try:
            driver_internal_info['agent_url'] = callback_url
        except KeyError:
            raise exception.MissingParameterValue(
                _('For heartbeat operation, '
                  '"agent_url" must be '
                  'specified.'))

        node.driver_internal_info = driver_internal_info
        node.save()

        # Async call backs don't set error state on their own
        # TODO(jimrollenhagen) improve error messages here
        msg = _('Failed checking if deploy is done.')
        try:
            if node.maintenance:
                # this shouldn't happen often, but skip the rest if it does.
                LOG.debug(
                    'Heartbeat from node %(node)s in maintenance mode; '
                    'not taking any action.', {'node': node.uuid})
                return
            elif (node.provision_state == states.DEPLOYWAIT
                  and not self.deploy_has_started(task)):
                msg = _('Node failed to get image for deploy.')
                self.continue_deploy(task)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_is_done(task)):
                msg = _('Node failed to move to active state.')
                self.reboot_to_instance(task)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_has_started(task)):
                node.touch_provisioning()
            elif node.provision_state == states.CLEANWAIT:
                node.touch_provisioning()
                try:
                    if not node.clean_step:
                        LOG.debug('Node %s just booted to start cleaning.',
                                  node.uuid)
                        msg = _('Node failed to start the first cleaning '
                                'step.')
                        # First, cache the clean steps
                        self._refresh_clean_steps(task)
                        # Then set/verify node clean steps and start cleaning
                        manager_utils.set_node_cleaning_steps(task)
                        _notify_conductor_resume_clean(task)
                    else:
                        msg = _('Node failed to check cleaning progress.')
                        self.continue_cleaning(task)
                except exception.NoFreeConductorWorker:
                    # waiting for the next heartbeat, node.last_error and
                    # logging message is filled already via conductor's hook
                    pass

        except Exception as e:
            err_info = {'node': node.uuid, 'msg': msg, 'e': e}
            last_error = _('Asynchronous exception for node %(node)s: '
                           '%(msg)s Exception: %(e)s') % err_info
            LOG.exception(last_error)
            if node.provision_state in (states.CLEANING, states.CLEANWAIT):
                manager_utils.cleaning_error_handler(task, last_error)
            elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
                deploy_utils.set_failed_state(task, last_error)
Пример #33
0
def do_agent_iscsi_deploy(task, agent_client):
    """Method invoked when deployed with the agent ramdisk.

    This method is invoked by drivers for doing iSCSI deploy
    using agent ramdisk.  This method assumes that the agent
    is booted up on the node and is heartbeating.

    :param task: a TaskManager object containing the node.
    :param agent_client: an instance of agent_client.AgentClient
                         which will be used during iscsi deploy
                         (for exposing node's target disk via iSCSI,
                         for install boot loader, etc).
    :returns: a dictionary containing the following keys:

              For partition image:

              * 'root uuid': UUID of root partition
              * 'efi system partition uuid': UUID of the uefi system partition
                (if boot mode is uefi).

                .. note:: If key exists but value is None, it means partition
                          doesn't exist.

              For whole disk image:

              * 'disk identifier': ID of the disk to which image was deployed.
    :raises: InstanceDeployFailure if it encounters some error
             during the deploy.
    """
    node = task.node
    i_info = deploy_utils.parse_instance_info(node)
    wipe_disk_metadata = not i_info['preserve_ephemeral']

    iqn = 'iqn.2008-10.org.openstack:%s' % node.uuid
    portal_port = CONF.iscsi.portal_port
    conv_flags = CONF.iscsi.conv_flags
    result = agent_client.start_iscsi_target(
        node, iqn,
        portal_port,
        wipe_disk_metadata=wipe_disk_metadata)
    if result['command_status'] == 'FAILED':
        msg = (_("Failed to start the iSCSI target to deploy the "
                 "node %(node)s. Error: %(error)s") %
               {'node': node.uuid, 'error': result['command_error']})
        deploy_utils.set_failed_state(task, msg)
        raise exception.InstanceDeployFailure(reason=msg)

    address = urlparse.urlparse(node.driver_internal_info['agent_url'])
    address = address.hostname

    uuid_dict_returned = continue_deploy(task, iqn=iqn, address=address,
                                         conv_flags=conv_flags)
    root_uuid_or_disk_id = uuid_dict_returned.get(
        'root uuid', uuid_dict_returned.get('disk identifier'))

    # TODO(lucasagomes): Move this bit saving the root_uuid to
    # continue_deploy()
    driver_internal_info = node.driver_internal_info
    driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
    node.driver_internal_info = driver_internal_info
    node.save()

    return uuid_dict_returned
Пример #34
0
    def write_image(self, task):
        if not task.driver.storage.should_write_image(task):
            return
        node = task.node
        image_source = node.instance_info.get('image_source')
        LOG.debug('Continuing deploy for node %(node)s with image %(img)s', {
            'node': node.uuid,
            'img': image_source
        })

        image_info = {
            'id': image_source.split('/')[-1],
            'urls': [node.instance_info['image_url']],
            # NOTE(comstud): Older versions of ironic do not set
            # 'disk_format' nor 'container_format', so we use .get()
            # to maintain backwards compatibility in case code was
            # upgraded in the middle of a build request.
            'disk_format': node.instance_info.get('image_disk_format'),
            'container_format':
            node.instance_info.get('image_container_format'),
            'stream_raw_images': CONF.agent.stream_raw_images,
        }

        if node.instance_info.get('image_checksum'):
            image_info['checksum'] = node.instance_info['image_checksum']

        if (node.instance_info.get('image_os_hash_algo')
                and node.instance_info.get('image_os_hash_value')):
            image_info['os_hash_algo'] = node.instance_info[
                'image_os_hash_algo']
            image_info['os_hash_value'] = node.instance_info[
                'image_os_hash_value']

        proxies = {}
        for scheme in ('http', 'https'):
            proxy_param = 'image_%s_proxy' % scheme
            proxy = node.driver_info.get(proxy_param)
            if proxy:
                proxies[scheme] = proxy
        if proxies:
            image_info['proxies'] = proxies
            no_proxy = node.driver_info.get('image_no_proxy')
            if no_proxy is not None:
                image_info['no_proxy'] = no_proxy

        image_info['node_uuid'] = node.uuid
        iwdi = node.driver_internal_info.get('is_whole_disk_image')
        if not iwdi:
            for label in PARTITION_IMAGE_LABELS:
                image_info[label] = node.instance_info.get(label)
            boot_option = deploy_utils.get_boot_option(node)
            image_info['deploy_boot_mode'] = (
                boot_mode_utils.get_boot_mode(node))
            image_info['boot_option'] = boot_option
            disk_label = deploy_utils.get_disk_label(node)
            if disk_label is not None:
                image_info['disk_label'] = disk_label

        has_write_image = agent_base.find_step(task, 'deploy', 'deploy',
                                               'write_image') is not None
        if not has_write_image:
            LOG.warning(
                'The agent on node %s does not have the deploy '
                'step deploy.write_image, using the deprecated '
                'synchronous fall-back', task.node.uuid)

        if self.has_decomposed_deploy_steps and has_write_image:
            configdrive = node.instance_info.get('configdrive')
            # Now switch into the corresponding in-band deploy step and let the
            # result be polled normally.
            new_step = {
                'interface': 'deploy',
                'step': 'write_image',
                'args': {
                    'image_info': image_info,
                    'configdrive': configdrive
                }
            }
            return agent_base.execute_step(task,
                                           new_step,
                                           'deploy',
                                           client=self._client)
        else:
            # TODO(dtantsur): remove in W
            command = self._client.prepare_image(node, image_info, wait=True)
            if command['command_status'] == 'FAILED':
                # TODO(jimrollenhagen) power off if using neutron dhcp to
                #                      align with pxe driver?
                msg = (_('node %(node)s command status errored: %(error)s') % {
                    'node': node.uuid,
                    'error': command['command_error']
                })
                LOG.error(msg)
                deploy_utils.set_failed_state(task, msg)
Пример #35
0
    def heartbeat(self, task, **kwargs):
        """Method for agent to periodically check in.

        The agent should be sending its agent_url (so Ironic can talk back)
        as a kwarg. kwargs should have the following format::

         {
             'agent_url': 'http://AGENT_HOST:AGENT_PORT'
         }

        AGENT_PORT defaults to 9999.
        """
        node = task.node
        driver_internal_info = node.driver_internal_info
        LOG.debug(
            'Heartbeat from %(node)s, last heartbeat at %(heartbeat)s.', {
                'node': node.uuid,
                'heartbeat': driver_internal_info.get('agent_last_heartbeat')
            })
        driver_internal_info['agent_last_heartbeat'] = int(time.time())
        try:
            driver_internal_info['agent_url'] = kwargs['agent_url']
        except KeyError:
            raise exception.MissingParameterValue(
                _('For heartbeat operation, '
                  '"agent_url" must be '
                  'specified.'))

        node.driver_internal_info = driver_internal_info
        node.save()

        # Async call backs don't set error state on their own
        # TODO(jimrollenhagen) improve error messages here
        msg = _('Failed checking if deploy is done.')
        try:
            if node.maintenance:
                # this shouldn't happen often, but skip the rest if it does.
                LOG.debug(
                    'Heartbeat from node %(node)s in maintenance mode; '
                    'not taking any action.', {'node': node.uuid})
                return
            elif (node.provision_state == states.DEPLOYWAIT
                  and not self.deploy_has_started(task)):
                msg = _('Node failed to get image for deploy.')
                self.continue_deploy(task, **kwargs)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_is_done(task)):
                msg = _('Node failed to move to active state.')
                self.reboot_to_instance(task, **kwargs)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_has_started(task)):
                node.touch_provisioning()
            # TODO(lucasagomes): CLEANING here for backwards compat
            # with previous code, otherwise nodes in CLEANING when this
            # is deployed would fail. Should be removed once the Mitaka
            # release starts.
            elif node.provision_state in (states.CLEANWAIT, states.CLEANING):
                node.touch_provisioning()
                try:
                    if not node.clean_step:
                        LOG.debug('Node %s just booted to start cleaning.',
                                  node.uuid)
                        msg = _('Node failed to start the first cleaning '
                                'step.')
                        # First, cache the clean steps
                        self._refresh_clean_steps(task)
                        # Then set/verify node clean steps and start cleaning
                        manager_utils.set_node_cleaning_steps(task)
                        self.notify_conductor_resume_clean(task)
                    else:
                        msg = _('Node failed to check cleaning progress.')
                        self.continue_cleaning(task, **kwargs)
                except exception.NoFreeConductorWorker:
                    # waiting for the next heartbeat, node.last_error and
                    # logging message is filled already via conductor's hook
                    pass

        except Exception as e:
            err_info = {'node': node.uuid, 'msg': msg, 'e': e}
            last_error = _('Asynchronous exception for node %(node)s: '
                           '%(msg)s Exception: %(e)s') % err_info
            LOG.exception(last_error)
            if node.provision_state in (states.CLEANING, states.CLEANWAIT):
                manager_utils.cleaning_error_handler(task, last_error)
            elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
                deploy_utils.set_failed_state(task, last_error)
Пример #36
0
    def heartbeat(self, task, **kwargs):
        """Method for agent to periodically check in.

        The agent should be sending its agent_url (so Ironic can talk back)
        as a kwarg. kwargs should have the following format::

         {
             'agent_url': 'http://AGENT_HOST:AGENT_PORT'
         }

        AGENT_PORT defaults to 9999.
        """
        node = task.node
        driver_internal_info = node.driver_internal_info
        LOG.debug(
            'Heartbeat from %(node)s, last heartbeat at %(heartbeat)s.', {
                'node': node.uuid,
                'heartbeat': driver_internal_info.get('agent_last_heartbeat')
            })
        driver_internal_info['agent_last_heartbeat'] = int(_time())
        try:
            driver_internal_info['agent_url'] = kwargs['agent_url']
        except KeyError:
            raise exception.MissingParameterValue(
                _('For heartbeat operation, '
                  '"agent_url" must be '
                  'specified.'))

        node.driver_internal_info = driver_internal_info
        node.save()

        # Async call backs don't set error state on their own
        # TODO(jimrollenhagen) improve error messages here
        msg = _('Failed checking if deploy is done.')
        try:
            if node.maintenance:
                # this shouldn't happen often, but skip the rest if it does.
                LOG.debug(
                    'Heartbeat from node %(node)s in maintenance mode; '
                    'not taking any action.', {'node': node.uuid})
                return
            elif (node.provision_state == states.DEPLOYWAIT
                  and not self.deploy_has_started(task)):
                msg = _('Node failed to get image for deploy.')
                self.continue_deploy(task, **kwargs)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_is_done(task)):
                msg = _('Node failed to move to active state.')
                self.reboot_to_instance(task, **kwargs)
            elif (node.provision_state == states.CLEANING
                  and not node.clean_step):
                # Agent booted from prepare_cleaning
                LOG.debug('Node %s just booted to start cleaning.', node.uuid)
                manager.set_node_cleaning_steps(task)
                self._notify_conductor_resume_clean(task)
            elif (node.provision_state == states.CLEANING and node.clean_step):
                self.continue_cleaning(task, **kwargs)

        except Exception as e:
            err_info = {'node': node.uuid, 'msg': msg, 'e': e}
            last_error = _('Asynchronous exception for node %(node)s: '
                           '%(msg)s exception: %(e)s') % err_info
            LOG.exception(last_error)
            deploy_utils.set_failed_state(task, last_error)
Пример #37
0
    def reboot_to_instance(self, task):
        task.process_event('resume')
        node = task.node
        iwdi = task.node.driver_internal_info.get('is_whole_disk_image')
        cpu_arch = task.node.properties.get('cpu_arch')
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') % {
                'node': node.uuid,
                'error': error
            })
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        # If `boot_option` is set to `netboot`, PXEBoot.prepare_instance()
        # would need root_uuid of the whole disk image to add it into the
        # pxe config to perform chain boot.
        # IPA would have returned us the 'root_uuid_or_disk_id' if image
        # being provisioned is a whole disk image. IPA would also provide us
        # 'efi_system_partition_uuid' if the image being provisioned is a
        # partition image.
        # In case of local boot using partition image, we need both
        # 'root_uuid_or_disk_id' and 'efi_system_partition_uuid' to configure
        # bootloader for local boot.
        # NOTE(mjturek): In the case of local boot using a partition image on
        # ppc64* hardware we need to provide the 'PReP_Boot_partition_uuid' to
        # direct where the bootloader should be installed.
        driver_internal_info = task.node.driver_internal_info
        root_uuid = self._get_uuid_from_result(task, 'root_uuid')
        if root_uuid:
            driver_internal_info['root_uuid_or_disk_id'] = root_uuid
            task.node.driver_internal_info = driver_internal_info
            task.node.save()
        elif iwdi and CONF.agent.manage_agent_boot:
            # IPA version less than 3.1.0 will not return root_uuid for
            # whole disk image. Also IPA version introduced a requirement
            # for hexdump utility that may not be always available. Need to
            # fall back to older behavior for the same.
            LOG.warning(
                "With the deploy ramdisk based on Ironic Python Agent "
                "version 3.1.0 and beyond, the drivers using "
                "`direct` deploy interface performs `netboot` or "
                "`local` boot for whole disk image based on value "
                "of boot option setting. When you upgrade Ironic "
                "Python Agent in your deploy ramdisk, ensure that "
                "boot option is set appropriately for the node %s. "
                "The boot option can be set using configuration "
                "`[deploy]/default_boot_option` or as a `boot_option` "
                "capability in node's `properties['capabilities']`. "
                "Also please note that this functionality requires "
                "`hexdump` command in the ramdisk.", node.uuid)

        efi_sys_uuid = None
        if not iwdi:
            if boot_mode_utils.get_boot_mode_for_deploy(node) == 'uefi':
                efi_sys_uuid = (self._get_uuid_from_result(
                    task, 'efi_system_partition_uuid'))

        prep_boot_part_uuid = None
        if cpu_arch is not None and cpu_arch.startswith('ppc64'):
            prep_boot_part_uuid = (self._get_uuid_from_result(
                task, 'PReP_Boot_partition_uuid'))

        LOG.info('Image successfully written to node %s', node.uuid)

        if CONF.agent.manage_agent_boot:
            # It is necessary to invoke prepare_instance() of the node's
            # boot interface, so that the any necessary configurations like
            # setting of the boot mode (e.g. UEFI secure boot) which cannot
            # be done on node during deploy stage can be performed.
            LOG.debug(
                'Executing driver specific tasks before booting up the '
                'instance for node %s', node.uuid)
            self.prepare_instance_to_boot(task, root_uuid, efi_sys_uuid,
                                          prep_boot_part_uuid)
        else:
            manager_utils.node_set_boot_device(task, 'disk', persistent=True)

        LOG.debug('Rebooting node %s to instance', node.uuid)
        self.reboot_and_finish_deploy(task)
Пример #38
0
    def heartbeat(self, task, callback_url, agent_version):
        """Process a heartbeat.

        :param task: task to work with.
        :param callback_url: agent HTTP API URL.
        :param agent_version: The version of the agent that is heartbeating
        """
        # NOTE(pas-ha) immediately skip the rest if nothing to do
        if task.node.provision_state not in self.heartbeat_allowed_states:
            LOG.debug(
                'Heartbeat from node %(node)s in unsupported '
                'provision state %(state)s, not taking any action.', {
                    'node': task.node.uuid,
                    'state': task.node.provision_state
                })
            return

        task.upgrade_lock()

        node = task.node
        LOG.debug('Heartbeat from node %s', node.uuid)

        driver_internal_info = node.driver_internal_info
        driver_internal_info['agent_url'] = callback_url
        driver_internal_info['agent_version'] = agent_version
        node.driver_internal_info = driver_internal_info
        node.save()

        # Async call backs don't set error state on their own
        # TODO(jimrollenhagen) improve error messages here
        msg = _('Failed checking if deploy is done.')
        try:
            if node.maintenance:
                # this shouldn't happen often, but skip the rest if it does.
                LOG.debug(
                    'Heartbeat from node %(node)s in maintenance mode; '
                    'not taking any action.', {'node': node.uuid})
                return
            elif (node.provision_state == states.DEPLOYWAIT
                  and not self.deploy_has_started(task)):
                msg = _('Node failed to deploy.')
                self.continue_deploy(task)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_is_done(task)):
                msg = _('Node failed to move to active state.')
                self.reboot_to_instance(task)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_has_started(task)):
                node.touch_provisioning()
            elif node.provision_state == states.CLEANWAIT:
                node.touch_provisioning()
                if not node.clean_step:
                    LOG.debug('Node %s just booted to start cleaning.',
                              node.uuid)
                    msg = _('Node failed to start the first cleaning step.')
                    # First, cache the clean steps
                    self.refresh_clean_steps(task)
                    # Then set/verify node clean steps and start cleaning
                    manager_utils.set_node_cleaning_steps(task)
                    # The exceptions from RPC are not possible as we using cast
                    # here
                    _notify_conductor_resume_clean(task)
                else:
                    msg = _('Node failed to check cleaning progress.')
                    self.continue_cleaning(task)
            elif (node.provision_state == states.RESCUEWAIT):
                msg = _('Node failed to perform rescue operation.')
                self._finalize_rescue(task)
        except Exception as e:
            err_info = {'msg': msg, 'e': e}
            last_error = _('Asynchronous exception: %(msg)s '
                           'Exception: %(e)s for node') % err_info
            errmsg = last_error + ' %(node)s'
            LOG.exception(errmsg, {'node': node.uuid})
            if node.provision_state in (states.CLEANING, states.CLEANWAIT):
                manager_utils.cleaning_error_handler(task, last_error)
            elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
                deploy_utils.set_failed_state(task,
                                              last_error,
                                              collect_logs=bool(self._client))
            elif node.provision_state in (states.RESCUING, states.RESCUEWAIT):
                manager_utils.rescuing_error_handler(task, last_error)
Пример #39
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node."""

        node = task.node
        task.process_event('resume')
        err_msg = _('Failed to continue deployment with Fuel Agent.')

        agent_status = kwargs.get('status')
        if agent_status != 'ready':
            LOG.error(
                _LE('Deploy failed for node %(node)s. Fuel Agent is not '
                    'in ready state, error: %(error)s'), {
                        'node': node.uuid,
                        'error': kwargs.get('error_message')
                    })
            deploy_utils.set_failed_state(task, err_msg)
            return

        params = _parse_driver_info(node)
        params['host'] = kwargs.get('address')
        cmd = ('%s --data_driver ironic  --config-file '
               '/etc/fuel-agent/fuel-agent.conf' % params.pop('script'))
        if CONF.debug:
            cmd += ' --debug'
        instance_info = node.instance_info

        try:
            deploy_data = _get_deploy_data(task.context,
                                           instance_info['image_source'])

            image_data = {
                "/": {
                    "uri": instance_info['image_url'],
                    "format": "raw",
                    "container": "raw"
                }
            }

            deploy_data['ks_meta']['image_data'] = image_data

            ssh = utils.ssh_connect(params)
            sftp = ssh.open_sftp()
            _sftp_upload(sftp, json.dumps(deploy_data), '/tmp/provision.json')

            # swift configdrive store should be disabled
            configdrive = instance_info.get('configdrive')
            if configdrive is not None:
                _sftp_upload(sftp, configdrive, '/tmp/config-drive.img')

            _ssh_execute(ssh, cmd, params)
            LOG.info(_LI('Fuel Agent pass on node %s'), node.uuid)
            manager_utils.node_set_boot_device(task,
                                               boot_devices.DISK,
                                               persistent=True)
            manager_utils.node_power_action(task, states.REBOOT)
        except Exception as e:
            msg = (_('Deploy failed for node %(node)s. Error: %(error)s') % {
                'node': node.uuid,
                'error': e
            })
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
        else:
            task.process_event('done')
            LOG.info(_LI('Deployment to node %s done'), task.node.uuid)
Пример #40
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the iSCSI deployment from where ramdisk left off.

        This method continues the iSCSI deployment from the conductor node
        and writes the deploy image to the bare metal's disk.  After that,
        it does the following depending on boot_option for deploy:
        - If the boot_option requested for this deploy is 'local', then it
          sets the node to boot from disk (ramdisk installs the boot loader
          present within the image to the bare metal's disk).
        - If the boot_option requested is 'netboot' or no boot_option is
          requested, it finds/creates the boot ISO to boot the instance
          image, attaches the boot ISO to the bare metal and then sets
          the node to boot from CDROM.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs containing parameters for iSCSI deployment.
        :raises: InvalidState
        """
        node = task.node
        task.process_event('resume')

        is_whole_disk_image = node.driver_internal_info.get(
            'is_whole_disk_image')
        uuid_dict = iscsi_deploy.continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict.get('root uuid',
                                             uuid_dict.get('disk identifier'))

        try:
            _cleanup_vmedia_boot(task)
            if (iscsi_deploy.get_boot_option(node) == "local"
                    or is_whole_disk_image):
                manager_utils.node_set_boot_device(task,
                                                   boot_devices.DISK,
                                                   persistent=True)

                # Ask the ramdisk to install bootloader and
                # wait for the call-back through the vendor passthru
                # 'pass_bootloader_install_info', if it's not a whole
                # disk image.
                if not is_whole_disk_image:
                    deploy_utils.notify_ramdisk_to_proceed(kwargs['address'])
                    task.process_event('wait')
                    return

            else:
                _prepare_boot_iso(task, root_uuid_or_disk_id)
                setup_vmedia_for_boot(
                    task, node.driver_internal_info['irmc_boot_iso'])
                manager_utils.node_set_boot_device(task,
                                                   boot_devices.CDROM,
                                                   persistent=True)

        except Exception as e:
            LOG.exception(
                _LE('Deploy failed for instance %(instance)s. '
                    'Error: %(error)s'), {
                        'instance': node.instance_uuid,
                        'error': e
                    })
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
        else:
            iscsi_deploy.finish_deploy(task, kwargs.get('address'))
Пример #41
0
    def heartbeat(self, task, callback_url, agent_version):
        """Process a heartbeat.

        :param task: task to work with.
        :param callback_url: agent HTTP API URL.
        :param agent_version: The version of the agent that is heartbeating
        """
        # NOTE(pas-ha) immediately skip the rest if nothing to do
        if (task.node.provision_state not in self.heartbeat_allowed_states
                and not manager_utils.fast_track_able(task)):
            LOG.debug(
                'Heartbeat from node %(node)s in unsupported '
                'provision state %(state)s, not taking any action.', {
                    'node': task.node.uuid,
                    'state': task.node.provision_state
                })
            return

        try:
            task.upgrade_lock()
        except exception.NodeLocked:
            LOG.warning(
                'Node %s is currently locked, skipping heartbeat '
                'processing (will retry on the next heartbeat)',
                task.node.uuid)
            return

        node = task.node
        LOG.debug('Heartbeat from node %s', node.uuid)
        driver_internal_info = node.driver_internal_info
        driver_internal_info['agent_url'] = callback_url
        driver_internal_info['agent_version'] = agent_version
        # Record the last heartbeat event time in UTC, so we can make
        # decisions about it later. Can be decoded to datetime object with:
        # datetime.datetime.strptime(var, "%Y-%m-%d %H:%M:%S.%f")
        driver_internal_info['agent_last_heartbeat'] = str(
            timeutils.utcnow().isoformat())
        node.driver_internal_info = driver_internal_info
        node.save()

        if node.provision_state in _HEARTBEAT_RECORD_ONLY:
            # We shouldn't take any additional action. The agent will
            # silently continue to heartbeat to ironic until user initiated
            # state change occurs causing it to match a state below.
            LOG.debug(
                'Heartbeat from %(node)s recorded to identify the '
                'node as on-line.', {'node': task.node.uuid})
            return

        # Async call backs don't set error state on their own
        # TODO(jimrollenhagen) improve error messages here
        msg = _('Failed checking if deploy is done.')
        try:
            if node.maintenance:
                # this shouldn't happen often, but skip the rest if it does.
                LOG.debug(
                    'Heartbeat from node %(node)s in maintenance mode; '
                    'not taking any action.', {'node': node.uuid})
                return
            elif (node.provision_state == states.DEPLOYWAIT
                  and not self.deploy_has_started(task)):
                msg = _('Node failed to deploy.')
                self.continue_deploy(task)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_is_done(task)):
                msg = _('Node failed to move to active state.')
                self.reboot_to_instance(task)
            elif (node.provision_state == states.DEPLOYWAIT
                  and self.deploy_has_started(task)):
                node.touch_provisioning()
            elif node.provision_state == states.CLEANWAIT:
                node.touch_provisioning()
                if not node.clean_step:
                    LOG.debug('Node %s just booted to start cleaning.',
                              node.uuid)
                    msg = _('Node failed to start the first cleaning step.')
                    # First, cache the clean steps
                    self.refresh_clean_steps(task)
                    # Then set/verify node clean steps and start cleaning
                    conductor_steps.set_node_cleaning_steps(task)
                    # The exceptions from RPC are not possible as we using cast
                    # here
                    manager_utils.notify_conductor_resume_clean(task)
                else:
                    msg = _('Node failed to check cleaning progress.')
                    self.continue_cleaning(task)
            elif (node.provision_state == states.RESCUEWAIT):
                msg = _('Node failed to perform rescue operation.')
                self._finalize_rescue(task)
        except Exception as e:
            err_info = {'msg': msg, 'e': e}
            last_error = _('Asynchronous exception: %(msg)s '
                           'Exception: %(e)s for node') % err_info
            errmsg = last_error + ' %(node)s'
            LOG.exception(errmsg, {'node': node.uuid})
            if node.provision_state in (states.CLEANING, states.CLEANWAIT):
                manager_utils.cleaning_error_handler(task, last_error)
            elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
                deploy_utils.set_failed_state(task,
                                              last_error,
                                              collect_logs=bool(self._client))
            elif node.provision_state in (states.RESCUING, states.RESCUEWAIT):
                manager_utils.rescuing_error_handler(task, last_error)
Пример #42
0
def log_and_raise_deployment_error(task, msg):
    """Helper method to log the error and raise exception."""
    LOG.error(msg)
    deploy_utils.set_failed_state(task, msg)
    raise exception.InstanceDeployFailure(msg)
Пример #43
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node over iSCSI.

        This method continues the deployment of the baremetal node over iSCSI
        from where the deployment ramdisk has left off.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs for performing iscsi deployment.
        :raises: InvalidState
        """
        node = task.node
        LOG.warning(
            _LW("The node %s is using the bash deploy ramdisk for "
                "its deployment. This deploy ramdisk has been "
                "deprecated. Please use the ironic-python-agent "
                "(IPA) ramdisk instead."), node.uuid)

        # TODO(rameshg87): Remove the below code once we stop supporting
        # bash ramdisk in Ironic.
        if node.provision_state == states.CLEANWAIT:
            return self._initiate_cleaning(task)

        task.process_event('resume')
        LOG.debug('Continuing the deployment on node %s', node.uuid)

        is_whole_disk_image = node.driver_internal_info['is_whole_disk_image']
        uuid_dict_returned = continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict_returned.get(
            'root uuid', uuid_dict_returned.get('disk identifier'))

        # save the node's root disk UUID so that another conductor could
        # rebuild the PXE config file. Due to a shortcoming in Nova objects,
        # we have to assign to node.driver_internal_info so the node knows it
        # has changed.
        driver_internal_info = node.driver_internal_info
        driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
        node.driver_internal_info = driver_internal_info
        node.save()

        try:
            task.driver.boot.prepare_instance(task)

            if deploy_utils.get_boot_option(node) == "local":
                if not is_whole_disk_image:
                    LOG.debug('Installing the bootloader on node %s',
                              node.uuid)
                    deploy_utils.notify_ramdisk_to_proceed(kwargs['address'])
                    task.process_event('wait')
                    return

        except Exception as e:
            LOG.error(
                _LE('Deploy failed for instance %(instance)s. '
                    'Error: %(error)s'), {
                        'instance': node.instance_uuid,
                        'error': e
                    })
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
        else:
            finish_deploy(task, kwargs.get('address'))
Пример #44
0
def do_agent_iscsi_deploy(task, agent_client):
    """Method invoked when deployed with the agent ramdisk.

    This method is invoked by drivers for doing iSCSI deploy
    using agent ramdisk.  This method assumes that the agent
    is booted up on the node and is heartbeating.

    :param task: a TaskManager object containing the node.
    :param agent_client: an instance of agent_client.AgentClient
        which will be used during iscsi deploy (for exposing node's
        target disk via iSCSI, for install boot loader, etc).
    :returns: a dictionary containing the following keys:
        For partition image:
            'root uuid': UUID of root partition
            'efi system partition uuid': UUID of the uefi system partition
                                         (if boot mode is uefi).
            NOTE: If key exists but value is None, it means partition doesn't
                  exist.
        For whole disk image:
            'disk identifier': ID of the disk to which image was deployed.
    :raises: InstanceDeployFailure, if it encounters some error
        during the deploy.
    """
    node = task.node
    iscsi_options = build_deploy_ramdisk_options(node)

    iqn = iscsi_options['iscsi_target_iqn']
    portal_port = iscsi_options['iscsi_portal_port']

    result = agent_client.start_iscsi_target(node, iqn, portal_port)

    if result['command_status'] == 'FAILED':
        msg = (_("Failed to start the iSCSI target to deploy the "
                 "node %(node)s. Error: %(error)s") % {
                     'node': node.uuid,
                     'error': result['command_error']
                 })
        deploy_utils.set_failed_state(task, msg)
        raise exception.InstanceDeployFailure(reason=msg)

    address = parse.urlparse(node.driver_internal_info['agent_url'])
    address = address.hostname

    # TODO(lucasagomes): The 'error' and 'key' parameters in the
    # dictionary below are just being passed because it's needed for
    # the continue_deploy() method, we are fooling it
    # for now. The agent driver doesn't use/need those. So we need to
    # refactor this bits here later.
    iscsi_params = {
        'error': result['command_error'],
        'iqn': iqn,
        'key': iscsi_options['deployment_key'],
        'address': address
    }

    uuid_dict_returned = continue_deploy(task, **iscsi_params)
    root_uuid_or_disk_id = uuid_dict_returned.get(
        'root uuid', uuid_dict_returned.get('disk identifier'))

    # TODO(lucasagomes): Move this bit saving the root_uuid to
    # continue_deploy()
    driver_internal_info = node.driver_internal_info
    driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
    node.driver_internal_info = driver_internal_info
    node.save()

    return uuid_dict_returned
Пример #45
0
    def pass_deploy_info(self, task, **kwargs):
        """Continues the iSCSI deployment from where ramdisk left off.

        This method continues the iSCSI deployment from the conductor node
        and writes the deploy image to the bare metal's disk. After that,
        it does the following depending on boot_option for deploy:

        - If the boot_option requested for this deploy is 'local', then it
          sets the node to boot from disk (ramdisk installs the boot loader
          present within the image to the bare metal's disk).
        - If the boot_option requested is 'netboot' or no boot_option is
          requested, it finds/creates the boot ISO to boot the instance
          image, attaches the boot ISO to the bare metal and then sets
          the node to boot from CDROM.

        :param task: a TaskManager instance containing the node to act on.
        :param kwargs: kwargs containing parameters for iSCSI deployment.
        :raises: InvalidState
        """
        node = task.node
        LOG.warning(
            _LW("The node %s is using the bash deploy ramdisk for "
                "its deployment. This deploy ramdisk has been "
                "deprecated. Please use the ironic-python-agent "
                "(IPA) ramdisk instead."), node.uuid)
        task.process_event('resume')

        iwdi = node.driver_internal_info.get('is_whole_disk_image')
        ilo_common.cleanup_vmedia_boot(task)
        uuid_dict = iscsi_deploy.continue_deploy(task, **kwargs)
        root_uuid_or_disk_id = uuid_dict.get('root uuid',
                                             uuid_dict.get('disk identifier'))
        driver_internal_info = task.node.driver_internal_info
        driver_internal_info['root_uuid_or_disk_id'] = root_uuid_or_disk_id
        task.node.driver_internal_info = driver_internal_info
        task.node.save()

        try:
            # Set boot mode
            ilo_common.update_boot_mode(task)

            # Need to enable secure boot, if being requested
            _update_secure_boot_mode(task, True)

            # For iscsi_ilo driver, we boot from disk every time if the image
            # deployed is a whole disk image.
            if deploy_utils.get_boot_option(node) == "local" or iwdi:
                manager_utils.node_set_boot_device(task,
                                                   boot_devices.DISK,
                                                   persistent=True)

                # Ask the ramdisk to install bootloader and
                # wait for the call-back through the vendor passthru
                # 'pass_bootloader_install_info', if it's not a whole
                # disk image.
                if not iwdi:
                    deploy_utils.notify_ramdisk_to_proceed(kwargs['address'])
                    task.process_event('wait')
                    return
            else:
                self._configure_vmedia_boot(task, root_uuid_or_disk_id)
        except Exception as e:
            LOG.error(
                _LE('Deploy failed for instance %(instance)s. '
                    'Error: %(error)s'), {
                        'instance': node.instance_uuid,
                        'error': e
                    })
            msg = _('Failed to continue iSCSI deployment.')
            deploy_utils.set_failed_state(task, msg)
        else:
            iscsi_deploy.finish_deploy(task, kwargs.get('address'))
Пример #46
0
 def _fail_deploy(task, msg):
     """Fail the deploy after logging and setting error states."""
     LOG.error(msg)
     deploy_utils.set_failed_state(task, msg)
     destroy_images(task.node.uuid)
     raise exception.InstanceDeployFailure(msg)
Пример #47
0
    def heartbeat(self, task, callback_url):
        """Process a heartbeat.

        :param task: task to work with.
        :param callback_url: agent HTTP API URL.
        """
        # TODO(dtantsur): upgrade lock only if we actually take action other
        # than updating the last timestamp.
        task.upgrade_lock()

        node = task.node
        LOG.debug('Heartbeat from node %s', node.uuid)

        driver_internal_info = node.driver_internal_info
        driver_internal_info['agent_url'] = callback_url

        # TODO(rloo): 'agent_last_heartbeat' was deprecated since it wasn't
        # being used so remove that entry if it exists.
        # Hopefully all nodes will have been updated by Pike, so
        # we can delete this code then.
        driver_internal_info.pop('agent_last_heartbeat', None)

        node.driver_internal_info = driver_internal_info
        node.save()

        # Async call backs don't set error state on their own
        # TODO(jimrollenhagen) improve error messages here
        msg = _('Failed checking if deploy is done.')
        try:
            if node.maintenance:
                # this shouldn't happen often, but skip the rest if it does.
                LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
                          'not taking any action.', {'node': node.uuid})
                return
            elif (node.provision_state == states.DEPLOYWAIT and
                  not self.deploy_has_started(task)):
                msg = _('Node failed to deploy.')
                self.continue_deploy(task)
            elif (node.provision_state == states.DEPLOYWAIT and
                  self.deploy_is_done(task)):
                msg = _('Node failed to move to active state.')
                self.reboot_to_instance(task)
            elif (node.provision_state == states.DEPLOYWAIT and
                  self.deploy_has_started(task)):
                node.touch_provisioning()
            elif node.provision_state == states.CLEANWAIT:
                node.touch_provisioning()
                if not node.clean_step:
                    LOG.debug('Node %s just booted to start cleaning.',
                              node.uuid)
                    msg = _('Node failed to start the first cleaning step.')
                    # First, cache the clean steps
                    self.refresh_clean_steps(task)
                    # Then set/verify node clean steps and start cleaning
                    manager_utils.set_node_cleaning_steps(task)
                    # The exceptions from RPC are not possible as we using cast
                    # here
                    _notify_conductor_resume_clean(task)
                else:
                    msg = _('Node failed to check cleaning progress.')
                    self.continue_cleaning(task)

        except Exception as e:
            err_info = {'node': node.uuid, 'msg': msg, 'e': e}
            last_error = _('Asynchronous exception for node %(node)s: '
                           '%(msg)s Exception: %(e)s') % err_info
            LOG.exception(last_error)
            if node.provision_state in (states.CLEANING, states.CLEANWAIT):
                manager_utils.cleaning_error_handler(task, last_error)
            elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT):
                deploy_utils.set_failed_state(
                    task, last_error, collect_logs=bool(self._client))
Пример #48
0
    def reboot_to_instance(self, task):
        task.process_event('resume')
        node = task.node
        iwdi = task.node.driver_internal_info.get('is_whole_disk_image')
        cpu_arch = task.node.properties.get('cpu_arch')
        error = self.check_deploy_success(node)
        if error is not None:
            # TODO(jimrollenhagen) power off if using neutron dhcp to
            #                      align with pxe driver?
            msg = (_('node %(node)s command status errored: %(error)s') % {
                'node': node.uuid,
                'error': error
            })
            LOG.error(msg)
            deploy_utils.set_failed_state(task, msg)
            return

        # If `boot_option` is set to `netboot`, PXEBoot.prepare_instance()
        # would need root_uuid of the whole disk image to add it into the
        # pxe config to perform chain boot.
        # IPA would have returned us the 'root_uuid_or_disk_id' if image
        # being provisioned is a whole disk image. IPA would also provide us
        # 'efi_system_partition_uuid' if the image being provisioned is a
        # partition image.
        # In case of local boot using partition image, we need both
        # 'root_uuid_or_disk_id' and 'efi_system_partition_uuid' to configure
        # bootloader for local boot.
        # NOTE(mjturek): In the case of local boot using a partition image on
        # ppc64* hardware we need to provide the 'PReP_Boot_partition_uuid' to
        # direct where the bootloader should be installed.
        driver_internal_info = task.node.driver_internal_info
        try:
            partition_uuids = self._client.get_partition_uuids(node).get(
                'command_result') or {}
            root_uuid = partition_uuids.get('root uuid')
        except exception.AgentAPIError:
            # TODO(dtantsur): remove in W
            LOG.warning('Old ironic-python-agent detected, please update '
                        'to Victoria or newer')
            partition_uuids = None
            root_uuid = self._get_uuid_from_result(task, 'root_uuid')

        if root_uuid:
            driver_internal_info['root_uuid_or_disk_id'] = root_uuid
            task.node.driver_internal_info = driver_internal_info
            task.node.save()
        elif not iwdi:
            LOG.error(
                'No root UUID returned from the ramdisk for node '
                '%(node)s, the deploy will likely fail. Partition '
                'UUIDs are %(uuids)s', {
                    'node': node.uuid,
                    'uuid': partition_uuids
                })

        efi_sys_uuid = None
        if not iwdi:
            if boot_mode_utils.get_boot_mode(node) == 'uefi':
                # TODO(dtantsur): remove in W
                if partition_uuids is None:
                    efi_sys_uuid = (self._get_uuid_from_result(
                        task, 'efi_system_partition_uuid'))
                else:
                    efi_sys_uuid = partition_uuids.get(
                        'efi system partition uuid')

        prep_boot_part_uuid = None
        if cpu_arch is not None and cpu_arch.startswith('ppc64'):
            # TODO(dtantsur): remove in W
            if partition_uuids is None:
                prep_boot_part_uuid = (self._get_uuid_from_result(
                    task, 'PReP_Boot_partition_uuid'))
            else:
                prep_boot_part_uuid = partition_uuids.get(
                    'PReP Boot partition uuid')

        LOG.info('Image successfully written to node %s', node.uuid)

        if CONF.agent.manage_agent_boot:
            # It is necessary to invoke prepare_instance() of the node's
            # boot interface, so that the any necessary configurations like
            # setting of the boot mode (e.g. UEFI secure boot) which cannot
            # be done on node during deploy stage can be performed.
            LOG.debug(
                'Executing driver specific tasks before booting up the '
                'instance for node %s', node.uuid)
            self.prepare_instance_to_boot(task, root_uuid, efi_sys_uuid,
                                          prep_boot_part_uuid)
        else:
            manager_utils.node_set_boot_device(task, 'disk', persistent=True)

        # Remove symbolic link when deploy is done.
        if CONF.agent.image_download_source == 'http':
            deploy_utils.remove_http_instance_symlink(task.node.uuid)

        LOG.debug('Rebooting node %s to instance', node.uuid)
        self.reboot_and_finish_deploy(task)