Exemplo n.º 1
0
    def test_do_next_deploy_step_oob_reboot_fail(self, mock_execute):
        # When a deploy step fails with no reboot requested go to DEPLOYFAIL
        tgt_prov_state = states.ACTIVE

        self._start_service()
        node = obj_utils.create_test_node(
            self.context, driver='fake-hardware',
            provision_state=states.DEPLOYING,
            target_provision_state=tgt_prov_state,
            last_error=None,
            driver_internal_info={'deploy_steps': self.deploy_steps,
                                  'deploy_step_index': None},
            deploy_step={})
        mock_execute.side_effect = exception.AgentConnectionFailed(
            reason='failed')

        with task_manager.acquire(
                self.context, node.uuid, shared=False) as task:
            deployments.do_next_deploy_step(task, 0, mock.ANY)

        self._stop_service()
        node.refresh()

        # Make sure we go to DEPLOYFAIL, clear deploy_steps
        self.assertEqual(states.DEPLOYFAIL, node.provision_state)
        self.assertEqual(tgt_prov_state, node.target_provision_state)
        self.assertEqual({}, node.deploy_step)
        self.assertNotIn('deploy_step_index', node.driver_internal_info)
        self.assertNotIn('skip_current_deploy_step', node.driver_internal_info)
        self.assertIsNotNone(node.last_error)
        mock_execute.assert_called_once_with(
            mock.ANY, mock.ANY, self.deploy_steps[0])
Exemplo n.º 2
0
    def test_do_next_clean_step_oob_reboot_fail(self, tear_mock, mock_execute):
        # When a clean step fails with no reboot requested go to CLEANFAIL
        tgt_prov_state = states.MANAGEABLE

        node = obj_utils.create_test_node(
            self.context,
            driver='fake-hardware',
            provision_state=states.CLEANING,
            target_provision_state=tgt_prov_state,
            last_error=None,
            driver_internal_info={
                'clean_steps': self.clean_steps,
                'clean_step_index': None
            },
            clean_step={})
        mock_execute.side_effect = exception.AgentConnectionFailed(
            reason='failed')

        with task_manager.acquire(self.context, node.uuid,
                                  shared=False) as task:
            cleaning.do_next_clean_step(task, 0)
            tear_mock.assert_called_once_with(task.driver.deploy, task)

        node.refresh()

        # Make sure we go to CLEANFAIL, clear clean_steps
        self.assertEqual(states.CLEANFAIL, node.provision_state)
        self.assertEqual(tgt_prov_state, node.target_provision_state)
        self.assertEqual({}, node.clean_step)
        self.assertNotIn('clean_step_index', node.driver_internal_info)
        self.assertNotIn('skip_current_clean_step', node.driver_internal_info)
        self.assertIsNotNone(node.last_error)
        self.assertTrue(node.maintenance)
        mock_execute.assert_called_once_with(mock.ANY, mock.ANY,
                                             self.clean_steps[0])
Exemplo n.º 3
0
    def test_do_next_deploy_step_oob_reboot(self, mock_execute):
        # When a deploy step fails, go to DEPLOYWAIT
        tgt_prov_state = states.ACTIVE

        self._start_service()
        node = obj_utils.create_test_node(
            self.context, driver='fake-hardware',
            provision_state=states.DEPLOYING,
            target_provision_state=tgt_prov_state,
            last_error=None,
            driver_internal_info={'deploy_steps': self.deploy_steps,
                                  'deploy_step_index': None,
                                  'deployment_reboot': True},
            clean_step={})
        mock_execute.side_effect = exception.AgentConnectionFailed(
            reason='failed')

        with task_manager.acquire(
                self.context, node.uuid, shared=False) as task:
            deployments.do_next_deploy_step(task, 0)

        self._stop_service()
        node.refresh()

        # Make sure we go to CLEANWAIT
        self.assertEqual(states.DEPLOYWAIT, node.provision_state)
        self.assertEqual(tgt_prov_state, node.target_provision_state)
        self.assertEqual(self.deploy_steps[0], node.deploy_step)
        self.assertEqual(0, node.driver_internal_info['deploy_step_index'])
        self.assertFalse(node.driver_internal_info['skip_current_deploy_step'])
        mock_execute.assert_called_once_with(
            mock.ANY, mock.ANY, self.deploy_steps[0])
Exemplo n.º 4
0
    def _handle_timeout_on_command_execution(self, node, method, params,
                                             error):
        result = None
        # NOTE(dtantsur): it is possible, especially with eventlet+TLS, that
        # agent receives a command but fails to return the result to Ironic.
        # To avoid a failure, check if the last command is the one we're trying
        # to execute.
        try:
            result = self._status_if_last_command_matches(node, method, params)
        except Exception as e:
            msg = (_('Failed to connect to the agent running on node '
                     '%(node)s for checking the last command status '
                     'after failing to invoke command %(method)s. '
                     'Error: %(error)s') % {
                         'node': node.uuid,
                         'method': method,
                         'error': e
                     })
            LOG.error(msg)

        if result is None:
            msg = (_('Failed to connect to the agent running on node %(node)s '
                     'for invoking command %(method)s. Error: %(error)s') % {
                         'node': node.uuid,
                         'method': method,
                         'error': error
                     })
            LOG.error(msg)
            raise exception.AgentConnectionFailed(reason=msg)

        return result
Exemplo n.º 5
0
    def get_commands_status(self, node):
        """Get command status from agent.

        :param node: A Node object.
        :return: A list of command results, each result is related to a
            command been issued to agent. A typical result can be:

            ::

              {
                'command_name': <command name related to the result>,
                'command_params': <params related with the command>,
                'command_status': <current command status,
                                  e.g. 'RUNNING', 'SUCCEEDED', 'FAILED'>,
                'command_error': <error message if command execution
                                 failed>,
                'command_result': <command result if command execution
                                  succeeded, the value is command specific,
                                  e.g.:
                                  * a dictionary containing keys clean_result
                                    and clean_step for the command
                                    clean.execute_clean_step;
                                  * a dictionary containing keys deploy_result
                                    and deploy_step for the command
                                    deploy.execute_deploy_step;
                                  * a string representing result message for
                                    the command standby.cache_image;
                                  * None for the command standby.sync.>
              }
        """
        url = self._get_command_url(node)
        LOG.debug('Fetching status of agent commands for node %s', node.uuid)
        try:
            resp = self.session.get(url, timeout=CONF.agent.command_timeout)
        except (requests.ConnectionError, requests.Timeout) as e:
            msg = (_('Failed to connect to the agent running on node %(node)s '
                     'to collect commands status. '
                     'Error: %(error)s') % {
                         'node': node.uuid,
                         'error': e
                     })
            LOG.error(msg)
            raise exception.AgentConnectionFailed(reason=msg)

        result = resp.json()['commands']
        status = '; '.join(
            '%(cmd)s: result "%(res)s", error "%(err)s"' % {
                'cmd': r.get('command_name'),
                'res': r.get('command_result'),
                'err': r.get('command_error')
            } for r in result)
        LOG.debug('Status of agent commands for node %(node)s: %(status)s', {
            'node': node.uuid,
            'status': status
        })
        return result
Exemplo n.º 6
0
 def _get_command_url(self, node):
     """Get URL endpoint for agent command request"""
     agent_url = node.driver_internal_info.get('agent_url')
     if not agent_url:
         raise exception.AgentConnectionFailed(
             _('Agent driver requires '
               'agent_url in '
               'driver_internal_info'))
     return ('%(agent_url)s/%(api_version)s/commands/' % {
         'agent_url': agent_url,
         'api_version': CONF.agent.agent_api_version
     })
Exemplo n.º 7
0
 def _get():
     try:
         return self.session.get(url,
                                 timeout=CONF.agent.command_timeout)
     except (requests.ConnectionError, requests.Timeout) as e:
         msg = (_('Failed to connect to the agent running on node '
                  '%(node)s to collect commands status. '
                  'Error: %(error)s') % {
                      'node': node.uuid,
                      'error': e
                  })
         logging_call = LOG.debug if expect_errors else LOG.error
         logging_call(msg)
         raise exception.AgentConnectionFailed(reason=msg)
Exemplo n.º 8
0
    def _command(self, node, method, params, wait=False):
        """Sends command to agent.

        :param node: A Node object.
        :param method: A string represents the command to be executed by
                       agent.
        :param params: A dictionary containing params used to form the request
                       body.
        :param wait: True to wait for the command to finish executing, False
                     otherwise.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :returns: A dict containing command result from agent, see
                  get_commands_status for a sample.
        """
        url = self._get_command_url(node)
        body = self._get_command_body(method, params)
        request_params = {'wait': str(wait).lower()}
        LOG.debug('Executing agent command %(method)s for node %(node)s', {
            'node': node.uuid,
            'method': method
        })

        try:
            response = self.session.post(url,
                                         params=request_params,
                                         data=body,
                                         timeout=CONF.agent.command_timeout)
        except (requests.ConnectionError, requests.Timeout) as e:
            msg = (_('Failed to connect to the agent running on node %(node)s '
                     'for invoking command %(method)s. Error: %(error)s') % {
                         'node': node.uuid,
                         'method': method,
                         'error': e
                     })
            LOG.error(msg)
            raise exception.AgentConnectionFailed(reason=msg)
        except requests.RequestException as e:
            msg = (_('Error invoking agent command %(method)s for node '
                     '%(node)s. Error: %(error)s') % {
                         'method': method,
                         'node': node.uuid,
                         'error': e
                     })
            LOG.error(msg)
            raise exception.IronicException(msg)

        # TODO(russellhaering): real error handling
        try:
            result = response.json()
        except ValueError:
            msg = _('Unable to decode response as JSON.\n'
                    'Request URL: %(url)s\nRequest body: "%(body)s"\n'
                    'Response status code: %(code)s\n'
                    'Response: "%(response)s"') % ({
                        'response': response.text,
                        'body': body,
                        'url': url,
                        'code': response.status_code
                    })
            LOG.error(msg)
            raise exception.IronicException(msg)

        LOG.debug(
            'Agent command %(method)s for node %(node)s returned '
            'result %(res)s, error %(error)s, HTTP status code %(code)d', {
                'node': node.uuid,
                'method': method,
                'res': result.get('command_result'),
                'error': result.get('command_error'),
                'code': response.status_code
            })

        if response.status_code >= http_client.BAD_REQUEST:
            LOG.error(
                'Agent command %(method)s for node %(node)s failed. '
                'Expected 2xx HTTP status code, got %(code)d.', {
                    'method': method,
                    'node': node.uuid,
                    'code': response.status_code
                })
            raise exception.AgentAPIError(node=node.uuid,
                                          status=response.status_code,
                                          error=result.get('faultstring'))

        return result
Exemplo n.º 9
0
    def _command(self, node, method, params, wait=False, poll=False):
        """Sends command to agent.

        :param node: A Node object.
        :param method: A string represents the command to be executed by
                       agent.
        :param params: A dictionary containing params used to form the request
                       body.
        :param wait: True to wait for the command to finish executing, False
                     otherwise.
        :param poll: Whether to poll the command until completion. Provides
                     a better alternative to `wait` for long-running commands.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :returns: A dict containing command result from agent, see
                  get_commands_status for a sample.
        """
        assert not (wait and poll)

        url = self._get_command_url(node)
        body = self._get_command_body(method, params)
        request_params = {'wait': str(wait).lower()}
        agent_token = node.driver_internal_info.get('agent_secret_token')
        if agent_token:
            request_params['agent_token'] = agent_token
        LOG.debug('Executing agent command %(method)s for node %(node)s', {
            'node': node.uuid,
            'method': method
        })

        try:
            response = self.session.post(url,
                                         params=request_params,
                                         data=body,
                                         verify=self._get_verify(node),
                                         timeout=CONF.agent.command_timeout)
        except (requests.ConnectionError, requests.Timeout) as e:
            msg = (_('Failed to connect to the agent running on node %(node)s '
                     'for invoking command %(method)s. Error: %(error)s') % {
                         'node': node.uuid,
                         'method': method,
                         'error': e
                     })
            LOG.error(msg)
            raise exception.AgentConnectionFailed(reason=msg)
        except requests.RequestException as e:
            msg = (_('Error invoking agent command %(method)s for node '
                     '%(node)s. Error: %(error)s') % {
                         'method': method,
                         'node': node.uuid,
                         'error': e
                     })
            LOG.error(msg)
            raise exception.IronicException(msg)

        # TODO(russellhaering): real error handling
        try:
            result = response.json()
        except ValueError:
            msg = _('Unable to decode response as JSON.\n'
                    'Request URL: %(url)s\nRequest body: "%(body)s"\n'
                    'Response status code: %(code)s\n'
                    'Response: "%(response)s"') % ({
                        'response': response.text,
                        'body': body,
                        'url': url,
                        'code': response.status_code
                    })
            LOG.error(msg)
            raise exception.IronicException(msg)

        error = result.get('command_error')
        LOG.debug(
            'Agent command %(method)s for node %(node)s returned '
            'result %(res)s, error %(error)s, HTTP status code %(code)d', {
                'node': node.uuid,
                'method': method,
                'res': result.get('command_result'),
                'error': error,
                'code': response.status_code
            })
        if response.status_code >= http_client.BAD_REQUEST:
            faultstring = result.get('faultstring')
            if 'agent_token' in faultstring:
                LOG.error(
                    'Agent command %(method)s for node %(node)s '
                    'failed. Expected 2xx HTTP status code, got '
                    '%(code)d. Error suggests an older ramdisk '
                    'which does not support ``agent_token``. '
                    'This is a fatal error.', {
                        'method': method,
                        'node': node.uuid,
                        'code': response.status_code
                    })
            else:
                LOG.error(
                    'Agent command %(method)s for node %(node)s failed. '
                    'Expected 2xx HTTP status code, got %(code)d.', {
                        'method': method,
                        'node': node.uuid,
                        'code': response.status_code
                    })
            raise exception.AgentAPIError(node=node.uuid,
                                          status=response.status_code,
                                          error=faultstring)

        self._raise_if_typeerror(result, node, method)

        if poll:
            result = self._wait_for_command(node, method)

        return result
Exemplo n.º 10
0
    def _command(self, node, method, params, wait=False,
                 command_timeout_factor=1):
        """Sends command to agent.

        :param node: A Node object.
        :param method: A string represents the command to be executed by
                       agent.
        :param params: A dictionary containing params used to form the request
                       body.
        :param wait: True to wait for the command to finish executing, False
                     otherwise.
        :param command_timeout_factor: An integer, default 1, by which to
                                       multiply the [agent]command_timeout
                                       value. This is intended for use with
                                       extremely long running commands to
                                       the agent ramdisk where a general
                                       timeout value should not be extended
                                       in all cases.
        :raises: IronicException when failed to issue the request or there was
                 a malformed response from the agent.
        :raises: AgentAPIError when agent failed to execute specified command.
        :returns: A dict containing command result from agent, see
                  get_commands_status for a sample.
        """
        url = self._get_command_url(node)
        body = self._get_command_body(method, params)
        request_params = {
            'wait': str(wait).lower()
        }
        agent_token = node.driver_internal_info.get('agent_secret_token')
        if agent_token:
            request_params['agent_token'] = agent_token
        LOG.debug('Executing agent command %(method)s for node %(node)s',
                  {'node': node.uuid, 'method': method})

        try:
            response = self.session.post(
                url, params=request_params, data=body,
                timeout=CONF.agent.command_timeout * command_timeout_factor)
        except (requests.ConnectionError, requests.Timeout) as e:
            msg = (_('Failed to connect to the agent running on node %(node)s '
                     'for invoking command %(method)s. Error: %(error)s') %
                   {'node': node.uuid, 'method': method, 'error': e})
            LOG.error(msg)
            raise exception.AgentConnectionFailed(reason=msg)
        except requests.RequestException as e:
            msg = (_('Error invoking agent command %(method)s for node '
                     '%(node)s. Error: %(error)s') %
                   {'method': method, 'node': node.uuid, 'error': e})
            LOG.error(msg)
            raise exception.IronicException(msg)

        # TODO(russellhaering): real error handling
        try:
            result = response.json()
        except ValueError:
            msg = _(
                'Unable to decode response as JSON.\n'
                'Request URL: %(url)s\nRequest body: "%(body)s"\n'
                'Response status code: %(code)s\n'
                'Response: "%(response)s"'
            ) % ({'response': response.text, 'body': body, 'url': url,
                  'code': response.status_code})
            LOG.error(msg)
            raise exception.IronicException(msg)

        error = result.get('command_error')
        exc_type = None
        if error:
            # if an error, we should see if a type field exists. This type
            # field may signal an exception that is compatability based.
            exc_type = error.get('type')

        LOG.debug('Agent command %(method)s for node %(node)s returned '
                  'result %(res)s, error %(error)s, HTTP status code %(code)d',
                  {'node': node.uuid, 'method': method,
                   'res': result.get('command_result'),
                   'error': error,
                   'code': response.status_code})

        if response.status_code >= http_client.BAD_REQUEST:
            LOG.error('Agent command %(method)s for node %(node)s failed. '
                      'Expected 2xx HTTP status code, got %(code)d.',
                      {'method': method, 'node': node.uuid,
                       'code': response.status_code})
            raise exception.AgentAPIError(node=node.uuid,
                                          status=response.status_code,
                                          error=result.get('faultstring'))
        if exc_type == 'TypeError':
            LOG.error('Agent command %(method)s for node %(node)s failed. '
                      'Internal %(exc_type)s error detected: Error %(error)s',
                      {'method': method, 'node': node.uuid,
                       'exc_type': exc_type, 'error': error})
            raise exception.AgentAPIError(node=node.uuid,
                                          status=error.get('code'),
                                          error=result.get('faultstring'))

        return result