def test_do_next_deploy_step_oob_reboot_fail(self, mock_execute): # When a deploy step fails with no reboot requested go to DEPLOYFAIL tgt_prov_state = states.ACTIVE self._start_service() node = obj_utils.create_test_node( self.context, driver='fake-hardware', provision_state=states.DEPLOYING, target_provision_state=tgt_prov_state, last_error=None, driver_internal_info={'deploy_steps': self.deploy_steps, 'deploy_step_index': None}, deploy_step={}) mock_execute.side_effect = exception.AgentConnectionFailed( reason='failed') with task_manager.acquire( self.context, node.uuid, shared=False) as task: deployments.do_next_deploy_step(task, 0, mock.ANY) self._stop_service() node.refresh() # Make sure we go to DEPLOYFAIL, clear deploy_steps self.assertEqual(states.DEPLOYFAIL, node.provision_state) self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertEqual({}, node.deploy_step) self.assertNotIn('deploy_step_index', node.driver_internal_info) self.assertNotIn('skip_current_deploy_step', node.driver_internal_info) self.assertIsNotNone(node.last_error) mock_execute.assert_called_once_with( mock.ANY, mock.ANY, self.deploy_steps[0])
def test_do_next_clean_step_oob_reboot_fail(self, tear_mock, mock_execute): # When a clean step fails with no reboot requested go to CLEANFAIL tgt_prov_state = states.MANAGEABLE node = obj_utils.create_test_node( self.context, driver='fake-hardware', provision_state=states.CLEANING, target_provision_state=tgt_prov_state, last_error=None, driver_internal_info={ 'clean_steps': self.clean_steps, 'clean_step_index': None }, clean_step={}) mock_execute.side_effect = exception.AgentConnectionFailed( reason='failed') with task_manager.acquire(self.context, node.uuid, shared=False) as task: cleaning.do_next_clean_step(task, 0) tear_mock.assert_called_once_with(task.driver.deploy, task) node.refresh() # Make sure we go to CLEANFAIL, clear clean_steps self.assertEqual(states.CLEANFAIL, node.provision_state) self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertEqual({}, node.clean_step) self.assertNotIn('clean_step_index', node.driver_internal_info) self.assertNotIn('skip_current_clean_step', node.driver_internal_info) self.assertIsNotNone(node.last_error) self.assertTrue(node.maintenance) mock_execute.assert_called_once_with(mock.ANY, mock.ANY, self.clean_steps[0])
def test_do_next_deploy_step_oob_reboot(self, mock_execute): # When a deploy step fails, go to DEPLOYWAIT tgt_prov_state = states.ACTIVE self._start_service() node = obj_utils.create_test_node( self.context, driver='fake-hardware', provision_state=states.DEPLOYING, target_provision_state=tgt_prov_state, last_error=None, driver_internal_info={'deploy_steps': self.deploy_steps, 'deploy_step_index': None, 'deployment_reboot': True}, clean_step={}) mock_execute.side_effect = exception.AgentConnectionFailed( reason='failed') with task_manager.acquire( self.context, node.uuid, shared=False) as task: deployments.do_next_deploy_step(task, 0) self._stop_service() node.refresh() # Make sure we go to CLEANWAIT self.assertEqual(states.DEPLOYWAIT, node.provision_state) self.assertEqual(tgt_prov_state, node.target_provision_state) self.assertEqual(self.deploy_steps[0], node.deploy_step) self.assertEqual(0, node.driver_internal_info['deploy_step_index']) self.assertFalse(node.driver_internal_info['skip_current_deploy_step']) mock_execute.assert_called_once_with( mock.ANY, mock.ANY, self.deploy_steps[0])
def _handle_timeout_on_command_execution(self, node, method, params, error): result = None # NOTE(dtantsur): it is possible, especially with eventlet+TLS, that # agent receives a command but fails to return the result to Ironic. # To avoid a failure, check if the last command is the one we're trying # to execute. try: result = self._status_if_last_command_matches(node, method, params) except Exception as e: msg = (_('Failed to connect to the agent running on node ' '%(node)s for checking the last command status ' 'after failing to invoke command %(method)s. ' 'Error: %(error)s') % { 'node': node.uuid, 'method': method, 'error': e }) LOG.error(msg) if result is None: msg = (_('Failed to connect to the agent running on node %(node)s ' 'for invoking command %(method)s. Error: %(error)s') % { 'node': node.uuid, 'method': method, 'error': error }) LOG.error(msg) raise exception.AgentConnectionFailed(reason=msg) return result
def get_commands_status(self, node): """Get command status from agent. :param node: A Node object. :return: A list of command results, each result is related to a command been issued to agent. A typical result can be: :: { 'command_name': <command name related to the result>, 'command_params': <params related with the command>, 'command_status': <current command status, e.g. 'RUNNING', 'SUCCEEDED', 'FAILED'>, 'command_error': <error message if command execution failed>, 'command_result': <command result if command execution succeeded, the value is command specific, e.g.: * a dictionary containing keys clean_result and clean_step for the command clean.execute_clean_step; * a dictionary containing keys deploy_result and deploy_step for the command deploy.execute_deploy_step; * a string representing result message for the command standby.cache_image; * None for the command standby.sync.> } """ url = self._get_command_url(node) LOG.debug('Fetching status of agent commands for node %s', node.uuid) try: resp = self.session.get(url, timeout=CONF.agent.command_timeout) except (requests.ConnectionError, requests.Timeout) as e: msg = (_('Failed to connect to the agent running on node %(node)s ' 'to collect commands status. ' 'Error: %(error)s') % { 'node': node.uuid, 'error': e }) LOG.error(msg) raise exception.AgentConnectionFailed(reason=msg) result = resp.json()['commands'] status = '; '.join( '%(cmd)s: result "%(res)s", error "%(err)s"' % { 'cmd': r.get('command_name'), 'res': r.get('command_result'), 'err': r.get('command_error') } for r in result) LOG.debug('Status of agent commands for node %(node)s: %(status)s', { 'node': node.uuid, 'status': status }) return result
def _get_command_url(self, node): """Get URL endpoint for agent command request""" agent_url = node.driver_internal_info.get('agent_url') if not agent_url: raise exception.AgentConnectionFailed( _('Agent driver requires ' 'agent_url in ' 'driver_internal_info')) return ('%(agent_url)s/%(api_version)s/commands/' % { 'agent_url': agent_url, 'api_version': CONF.agent.agent_api_version })
def _get(): try: return self.session.get(url, timeout=CONF.agent.command_timeout) except (requests.ConnectionError, requests.Timeout) as e: msg = (_('Failed to connect to the agent running on node ' '%(node)s to collect commands status. ' 'Error: %(error)s') % { 'node': node.uuid, 'error': e }) logging_call = LOG.debug if expect_errors else LOG.error logging_call(msg) raise exception.AgentConnectionFailed(reason=msg)
def _command(self, node, method, params, wait=False): """Sends command to agent. :param node: A Node object. :param method: A string represents the command to be executed by agent. :param params: A dictionary containing params used to form the request body. :param wait: True to wait for the command to finish executing, False otherwise. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command result from agent, see get_commands_status for a sample. """ url = self._get_command_url(node) body = self._get_command_body(method, params) request_params = {'wait': str(wait).lower()} LOG.debug('Executing agent command %(method)s for node %(node)s', { 'node': node.uuid, 'method': method }) try: response = self.session.post(url, params=request_params, data=body, timeout=CONF.agent.command_timeout) except (requests.ConnectionError, requests.Timeout) as e: msg = (_('Failed to connect to the agent running on node %(node)s ' 'for invoking command %(method)s. Error: %(error)s') % { 'node': node.uuid, 'method': method, 'error': e }) LOG.error(msg) raise exception.AgentConnectionFailed(reason=msg) except requests.RequestException as e: msg = (_('Error invoking agent command %(method)s for node ' '%(node)s. Error: %(error)s') % { 'method': method, 'node': node.uuid, 'error': e }) LOG.error(msg) raise exception.IronicException(msg) # TODO(russellhaering): real error handling try: result = response.json() except ValueError: msg = _('Unable to decode response as JSON.\n' 'Request URL: %(url)s\nRequest body: "%(body)s"\n' 'Response status code: %(code)s\n' 'Response: "%(response)s"') % ({ 'response': response.text, 'body': body, 'url': url, 'code': response.status_code }) LOG.error(msg) raise exception.IronicException(msg) LOG.debug( 'Agent command %(method)s for node %(node)s returned ' 'result %(res)s, error %(error)s, HTTP status code %(code)d', { 'node': node.uuid, 'method': method, 'res': result.get('command_result'), 'error': result.get('command_error'), 'code': response.status_code }) if response.status_code >= http_client.BAD_REQUEST: LOG.error( 'Agent command %(method)s for node %(node)s failed. ' 'Expected 2xx HTTP status code, got %(code)d.', { 'method': method, 'node': node.uuid, 'code': response.status_code }) raise exception.AgentAPIError(node=node.uuid, status=response.status_code, error=result.get('faultstring')) return result
def _command(self, node, method, params, wait=False, poll=False): """Sends command to agent. :param node: A Node object. :param method: A string represents the command to be executed by agent. :param params: A dictionary containing params used to form the request body. :param wait: True to wait for the command to finish executing, False otherwise. :param poll: Whether to poll the command until completion. Provides a better alternative to `wait` for long-running commands. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command result from agent, see get_commands_status for a sample. """ assert not (wait and poll) url = self._get_command_url(node) body = self._get_command_body(method, params) request_params = {'wait': str(wait).lower()} agent_token = node.driver_internal_info.get('agent_secret_token') if agent_token: request_params['agent_token'] = agent_token LOG.debug('Executing agent command %(method)s for node %(node)s', { 'node': node.uuid, 'method': method }) try: response = self.session.post(url, params=request_params, data=body, verify=self._get_verify(node), timeout=CONF.agent.command_timeout) except (requests.ConnectionError, requests.Timeout) as e: msg = (_('Failed to connect to the agent running on node %(node)s ' 'for invoking command %(method)s. Error: %(error)s') % { 'node': node.uuid, 'method': method, 'error': e }) LOG.error(msg) raise exception.AgentConnectionFailed(reason=msg) except requests.RequestException as e: msg = (_('Error invoking agent command %(method)s for node ' '%(node)s. Error: %(error)s') % { 'method': method, 'node': node.uuid, 'error': e }) LOG.error(msg) raise exception.IronicException(msg) # TODO(russellhaering): real error handling try: result = response.json() except ValueError: msg = _('Unable to decode response as JSON.\n' 'Request URL: %(url)s\nRequest body: "%(body)s"\n' 'Response status code: %(code)s\n' 'Response: "%(response)s"') % ({ 'response': response.text, 'body': body, 'url': url, 'code': response.status_code }) LOG.error(msg) raise exception.IronicException(msg) error = result.get('command_error') LOG.debug( 'Agent command %(method)s for node %(node)s returned ' 'result %(res)s, error %(error)s, HTTP status code %(code)d', { 'node': node.uuid, 'method': method, 'res': result.get('command_result'), 'error': error, 'code': response.status_code }) if response.status_code >= http_client.BAD_REQUEST: faultstring = result.get('faultstring') if 'agent_token' in faultstring: LOG.error( 'Agent command %(method)s for node %(node)s ' 'failed. Expected 2xx HTTP status code, got ' '%(code)d. Error suggests an older ramdisk ' 'which does not support ``agent_token``. ' 'This is a fatal error.', { 'method': method, 'node': node.uuid, 'code': response.status_code }) else: LOG.error( 'Agent command %(method)s for node %(node)s failed. ' 'Expected 2xx HTTP status code, got %(code)d.', { 'method': method, 'node': node.uuid, 'code': response.status_code }) raise exception.AgentAPIError(node=node.uuid, status=response.status_code, error=faultstring) self._raise_if_typeerror(result, node, method) if poll: result = self._wait_for_command(node, method) return result
def _command(self, node, method, params, wait=False, command_timeout_factor=1): """Sends command to agent. :param node: A Node object. :param method: A string represents the command to be executed by agent. :param params: A dictionary containing params used to form the request body. :param wait: True to wait for the command to finish executing, False otherwise. :param command_timeout_factor: An integer, default 1, by which to multiply the [agent]command_timeout value. This is intended for use with extremely long running commands to the agent ramdisk where a general timeout value should not be extended in all cases. :raises: IronicException when failed to issue the request or there was a malformed response from the agent. :raises: AgentAPIError when agent failed to execute specified command. :returns: A dict containing command result from agent, see get_commands_status for a sample. """ url = self._get_command_url(node) body = self._get_command_body(method, params) request_params = { 'wait': str(wait).lower() } agent_token = node.driver_internal_info.get('agent_secret_token') if agent_token: request_params['agent_token'] = agent_token LOG.debug('Executing agent command %(method)s for node %(node)s', {'node': node.uuid, 'method': method}) try: response = self.session.post( url, params=request_params, data=body, timeout=CONF.agent.command_timeout * command_timeout_factor) except (requests.ConnectionError, requests.Timeout) as e: msg = (_('Failed to connect to the agent running on node %(node)s ' 'for invoking command %(method)s. Error: %(error)s') % {'node': node.uuid, 'method': method, 'error': e}) LOG.error(msg) raise exception.AgentConnectionFailed(reason=msg) except requests.RequestException as e: msg = (_('Error invoking agent command %(method)s for node ' '%(node)s. Error: %(error)s') % {'method': method, 'node': node.uuid, 'error': e}) LOG.error(msg) raise exception.IronicException(msg) # TODO(russellhaering): real error handling try: result = response.json() except ValueError: msg = _( 'Unable to decode response as JSON.\n' 'Request URL: %(url)s\nRequest body: "%(body)s"\n' 'Response status code: %(code)s\n' 'Response: "%(response)s"' ) % ({'response': response.text, 'body': body, 'url': url, 'code': response.status_code}) LOG.error(msg) raise exception.IronicException(msg) error = result.get('command_error') exc_type = None if error: # if an error, we should see if a type field exists. This type # field may signal an exception that is compatability based. exc_type = error.get('type') LOG.debug('Agent command %(method)s for node %(node)s returned ' 'result %(res)s, error %(error)s, HTTP status code %(code)d', {'node': node.uuid, 'method': method, 'res': result.get('command_result'), 'error': error, 'code': response.status_code}) if response.status_code >= http_client.BAD_REQUEST: LOG.error('Agent command %(method)s for node %(node)s failed. ' 'Expected 2xx HTTP status code, got %(code)d.', {'method': method, 'node': node.uuid, 'code': response.status_code}) raise exception.AgentAPIError(node=node.uuid, status=response.status_code, error=result.get('faultstring')) if exc_type == 'TypeError': LOG.error('Agent command %(method)s for node %(node)s failed. ' 'Internal %(exc_type)s error detected: Error %(error)s', {'method': method, 'node': node.uuid, 'exc_type': exc_type, 'error': error}) raise exception.AgentAPIError(node=node.uuid, status=error.get('code'), error=result.get('faultstring')) return result