def test_dcos_diagnostics_units_unit_nodes_node(dcos_api_session): """ test a specific node for a specific unit, endpoint /system/health/v1/units/<unit>/nodes/<node> """ required_node_fields = ['host_ip', 'health', 'role', 'output', 'help'] for master in dcos_api_session.masters: units_response = check_json(dcos_api_session.health.get('/units', node=master)) pulled_units = list(map(lambda unit: unit['id'], units_response['units'])) for unit in pulled_units: nodes_response = check_json(dcos_api_session.health.get('/units/{}/nodes'.format(unit), node=master)) pulled_nodes = list(map(lambda node: node['host_ip'], nodes_response['nodes'])) logging.info('pulled nodes: {}'.format(pulled_nodes)) for node in pulled_nodes: node_response = check_json( dcos_api_session.health.get('/units/{}/nodes/{}'.format(unit, node), node=master)) assert len(node_response) == len(required_node_fields), 'required fields: {}'.format( ', '.format(required_node_fields) ) for required_node_field in required_node_fields: assert required_node_field in node_response, 'field {} must be set'.format(required_node_field) # host_ip, health, role, help cannot be empty assert node_response['host_ip'], 'host_ip field cannot be empty' assert node_response['health'] in [0, 1], 'health must be 0 or 1' assert node_response['role'], 'role field cannot be empty' assert node_response['help'], 'help field cannot be empty'
def test_dcos_diagnostics_units(dcos_api_session): """ test a list of collected units, endpoint /system/health/v1/units """ # get all unique unit names all_units = set() for node in dcos_api_session.masters: node_response = check_json(dcos_api_session.health.get('/', node=node)) for unit in node_response['units']: all_units.add(unit['id']) for node in dcos_api_session.all_slaves: node_response = check_json(dcos_api_session.health.get('/', node=node)) for unit in node_response['units']: all_units.add(unit['id']) # test against masters for master in dcos_api_session.masters: units_response = check_json(dcos_api_session.health.get('/units', node=master)) validate_units(units_response['units']) pulled_units = list(map(lambda unit: unit['id'], units_response['units'])) logging.info('collected units: {}'.format(pulled_units)) diff = set(pulled_units).symmetric_difference(all_units) assert set(pulled_units) == all_units, ('not all units have been collected by dcos-diagnostics ' 'puller, missing: {}'.format(diff))
def test_start_diagnostics_job_error(mock_uuid): responses.add( responses.PUT, 'http://leader.mesos/system/health/v1/diagnostics/f053c58c-b9ce-11e9-8c5b-38d54714bf36', json={ 'code': 507, 'error': 'could not create bundle f053c58c-b9ce-11e9-8c5b-38d54714bf36 workdir', }, status=507) args = dcos_api.DcosApiSession.get_args_from_env() dcos_api_session = dcos_api.DcosApiSession(**args) health_url = dcos_api_session.default_url.copy(path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=[], all_slaves=[], session=dcos_api_session.copy().session, ) with TestCase.assertRaises(TestCase(), HTTPError): response = diagnostics.start_diagnostics_job() check_json(response)
def test_dcos_diagnostics_units(dcos_api_session: DcosApiSession) -> None: """ test a list of collected units, endpoint /system/health/v1/units """ # get all unique unit names all_units = set() for node in dcos_api_session.masters: node_response = check_json(dcos_api_session.health.get('/', node=node)) for unit in node_response['units']: all_units.add(unit['id']) for node in dcos_api_session.all_slaves: node_response = check_json(dcos_api_session.health.get('/', node=node)) for unit in node_response['units']: all_units.add(unit['id']) # test against masters for master in dcos_api_session.masters: units_response = check_json( dcos_api_session.health.get('/units', node=master)) validate_units(units_response['units']) pulled_units = list( map(lambda unit: unit['id'], units_response['units'])) # type: ignore logging.info('collected units: {}'.format(pulled_units)) diff = set(pulled_units).symmetric_difference(all_units) assert set(pulled_units) == all_units, ( 'not all units have been collected by dcos-diagnostics ' 'puller, missing: {}'.format(diff))
def test_dcos_diagnostics_units_unit_nodes(dcos_api_session): """ test a list of nodes for a specific unit, endpoint /system/health/v1/units/<unit>/nodes """ def get_nodes_from_response(response) -> List[str]: assert 'nodes' in response, 'response must have field `nodes`. Got {}'.format( response) nodes_ip_map = make_nodes_ip_map(dcos_api_session) nodes = [] for node in response['nodes']: assert 'host_ip' in node, 'node response must have `host_ip` field. Got {}'.format( node) assert node[ 'host_ip'] in nodes_ip_map, 'nodes_ip_map must have node {}. Got {}'.format( node['host_ip'], nodes_ip_map) nodes.append(nodes_ip_map.get(node['host_ip'])) return nodes for master in dcos_api_session.masters: units_response = check_json( dcos_api_session.health.get('/units', node=master)) pulled_units = list( map(lambda unit: unit['id'], units_response['units'])) for unit in pulled_units: nodes_response = check_json( dcos_api_session.health.get('/units/{}/nodes'.format(unit), node=master)) validate_node(nodes_response['nodes']) # make sure dcos-mesos-master.service has master nodes and dcos-mesos-slave.service has agent nodes master_nodes_response = check_json( dcos_api_session.health.get( '/units/dcos-mesos-master.service/nodes', node=master)) master_nodes = get_nodes_from_response(master_nodes_response) assert len(master_nodes) == len(dcos_api_session.masters), \ '{} != {}'.format(master_nodes, dcos_api_session.masters) assert set(master_nodes) == set( dcos_api_session.masters), 'a list of difference: {}'.format( set(master_nodes).symmetric_difference( set(dcos_api_session.masters))) linux_agent_nodes = list() if 'dcos-mesos-slave.service' in pulled_units: agent_nodes_response = check_json( dcos_api_session.health.get( '/units/dcos-mesos-slave.service/nodes', node=master)) linux_agent_nodes = get_nodes_from_response(agent_nodes_response) windows_agent_nodes = list() if 'WinRM' in pulled_units: agent_nodes_response = check_json( dcos_api_session.health.get('/units/mesos-agent/nodes', node=master)) windows_agent_nodes = get_nodes_from_response(agent_nodes_response) assert set(linux_agent_nodes + windows_agent_nodes) == set( dcos_api_session.slaves)
def test_dcos_diagnostics_units_unit(dcos_api_session): """ test a unit response in a right format, endpoint: /system/health/v1/units/<unit> """ for master in dcos_api_session.masters: units_response = check_json(dcos_api_session.health.get('/units', node=master)) pulled_units = list(map(lambda unit: unit['id'], units_response['units'])) for unit in pulled_units: unit_response = check_json(dcos_api_session.health.get('/units/{}'.format(unit), node=master)) validate_units([unit_response])
def test_dcos_diagnostics_nodes_node(dcos_api_session): """ test a specific node enpoint /system/health/v1/nodes/<node> """ for master in dcos_api_session.masters: # get a list of nodes response = check_json(dcos_api_session.health.get('/nodes', node=master)) nodes = list(map(lambda node: node['host_ip'], response['nodes'])) for node in nodes: node_response = check_json(dcos_api_session.health.get('/nodes/{}'.format(node), node=master)) validate_node([node_response])
def test_dcos_diagnostics_nodes_node_units_unit(dcos_api_session): """ test a specific unit for a specific node, endpoint /system/health/v1/nodes/<node>/units/<unit> """ for master in dcos_api_session.masters: response = check_json(dcos_api_session.health.get('/nodes', node=master)) nodes = list(map(lambda node: node['host_ip'], response['nodes'])) for node in nodes: units_response = check_json(dcos_api_session.health.get('/nodes/{}/units'.format(node), node=master)) unit_ids = list(map(lambda unit: unit['id'], units_response['units'])) for unit_id in unit_ids: validate_unit( check_json(dcos_api_session.health.get('/nodes/{}/units/{}'.format(node, unit_id), node=master)))
def make_nodes_ip_map(dcos_api_session): """ a helper function to make a map detected_ip -> external_ip """ node_private_public_ip_map = {} for node in dcos_api_session.masters: detected_ip = check_json(dcos_api_session.health.get('/', node=node))['ip'] node_private_public_ip_map[detected_ip] = node for node in dcos_api_session.all_slaves: detected_ip = check_json(dcos_api_session.health.get('/', node=node))['ip'] node_private_public_ip_map[detected_ip] = node return node_private_public_ip_map
def test_dcos_diagnostics_nodes_node_units(dcos_api_session): """ test a list of units from a specific node, endpoint /system/health/v1/nodes/<node>/units """ for master in dcos_api_session.masters: # get a list of nodes response = check_json(dcos_api_session.health.get('/nodes', node=master)) nodes = list(map(lambda node: node['host_ip'], response['nodes'])) for node in nodes: units_response = check_json(dcos_api_session.health.get('/nodes/{}/units'.format(node), node=master)) assert len(units_response) == 1, 'unit response should have only 1 field `units`' assert 'units' in units_response validate_units(units_response['units'])
def test_systemd_units_health(dcos_api_session): """ test all units and make sure the units are healthy. This test will fail if any of systemd unit is unhealthy, meaning it focuses on making sure the dcos_api_session is healthy, rather then testing dcos-diagnostics itself. """ unhealthy_output = [] assert dcos_api_session.masters, "Must have at least 1 master node" report_response = check_json(dcos_api_session.health.get('/report', node=dcos_api_session.masters[0])) assert 'Units' in report_response, "Missing `Units` field in response" for unit_name, unit_props in report_response['Units'].items(): assert 'Health' in unit_props, "Unit {} missing `Health` field".format(unit_name) if unit_props['Health'] != 0: assert 'Nodes' in unit_props, "Unit {} missing `Nodes` field".format(unit_name) assert isinstance(unit_props['Nodes'], list), 'Field `Node` must be a list' for node in unit_props['Nodes']: assert 'Health' in node, 'Field `Health` is expected to be in nodes properties, got {}'.format(node) if node['Health'] != 0: assert 'Output' in node, 'Field `Output` is expected to be in nodes properties, got {}'.format(node) assert isinstance(node['Output'], dict), 'Field `Output` must be a dict' assert unit_name in node['Output'], 'unit {} must be in node Output, got {}'.format(unit_name, node['Output']) assert 'IP' in node, 'Field `IP` is expected to be in nodes properties, got {}'.format(node) unhealthy_output.append( 'Unhealthy unit {} has been found on node {}, health status {}. journalctl output {}'.format( unit_name, node['IP'], unit_props['Health'], node['Output'][unit_name])) if unhealthy_output: raise AssertionError('\n'.join(unhealthy_output))
def test_start_diagnostics_job(mock_uuid): responses.add( responses.PUT, 'http://leader.mesos/system/health/v1/diagnostics/f053c58c-b9ce-11e9-8c5b-38d54714bf36', json={ 'id': 'f053c58c-b9ce-11e9-8c5b-38d54714bf36', 'status': 'Started', 'started_at': '2019-08-05T11:31:53.238640571Z', }) args = dcos_api.DcosApiSession.get_args_from_env() dcos_api_session = dcos_api.DcosApiSession(**args) health_url = dcos_api_session.default_url.copy(path='system/health/v1', ) diagnostics = Diagnostics( default_url=health_url, masters=[], all_slaves=[], session=dcos_api_session.copy().session, ) response = diagnostics.start_diagnostics_job() assert check_json(response) == { 'id': 'f053c58c-b9ce-11e9-8c5b-38d54714bf36', 'status': 'Started', 'started_at': '2019-08-05T11:31:53.238640571Z', }
def _wait_for_diagnostics_job(self): session_response = self.get('/diagnostics') response = check_json(session_response) for bundle in response: if bundle['status'] in {'Unknown', 'Started', 'InProgress'}: return False return True
def wait_for_diagnostics_job(self, last_datapoint: dict): """ initial value of last_datapoint should be last_datapoint = { 'time': None, 'value': 0 } """ session_response = self.get('/report/diagnostics/status/all') response = check_json(session_response) job_running = False percent_done = 0 for attributes in response.values(): assert 'is_running' in attributes, '`is_running` field is missing in response' assert 'job_progress_percentage' in attributes, '`job_progress_percentage` field is missing' if attributes['is_running']: percent_done = attributes['job_progress_percentage'] logging.info("Job is running. Progress: {}".format(percent_done)) job_running = True break # if we ran this bit previously, compare the current datapoint with the one we saved if last_datapoint['time'] and last_datapoint['value']: assert (datetime.datetime.now() - last_datapoint['time']) < datetime.timedelta(seconds=15), ( "Job is not progressing" ) last_datapoint['value'] = percent_done last_datapoint['time'] = datetime.datetime.now() return not job_running
def test_dcos_diagnostics_selftest(dcos_api_session): """ test invokes dcos-diagnostics `self test` functionality """ for node in dcos_api_session.masters: response = check_json(dcos_api_session.health.get('/selftest/info', node=node)) for test_name, attrs in response.items(): assert 'Success' in attrs, 'Field `Success` does not exist' assert 'ErrorMessage' in attrs, 'Field `ErrorMessage` does not exist' assert attrs['Success'], '{} failed, error message {}'.format(test_name, attrs['ErrorMessage'])
def test_dcos_diagnostics_report(dcos_api_session): """ test dcos-diagnostics report endpoint /system/health/v1/report """ for master in dcos_api_session.masters: report_response = check_json(dcos_api_session.health.get('/report', node=master)) assert 'Units' in report_response assert len(report_response['Units']) > 0 assert 'Nodes' in report_response assert len(report_response['Nodes']) > 0
def _legacy_get_diagnostics_reports(self) -> list: response = check_json(self.get('/report/diagnostics/list/all')) def _at_least_one_item(bundle): return bundle is not None and isinstance(bundle, list) and len(bundle) > 0 bundles = [] for bundle_list in response.values(): if _at_least_one_item(bundle_list): bundles += map(lambda s: os.path.basename(s['file_name']), bundle_list) return bundles
def test_dcos_diagnostics_units_unit_nodes(dcos_api_session): """ test a list of nodes for a specific unit, endpoint /system/health/v1/units/<unit>/nodes """ def get_nodes_from_response(response): assert 'nodes' in response, 'response must have field `nodes`. Got {}'.format(response) nodes_ip_map = make_nodes_ip_map(dcos_api_session) nodes = [] for node in response['nodes']: assert 'host_ip' in node, 'node response must have `host_ip` field. Got {}'.format(node) assert node['host_ip'] in nodes_ip_map, 'nodes_ip_map must have node {}.Got {}'.format(node['host_ip'], nodes_ip_map) nodes.append(nodes_ip_map.get(node['host_ip'])) return nodes for master in dcos_api_session.masters: units_response = check_json(dcos_api_session.health.get('/units', node=master)) pulled_units = list(map(lambda unit: unit['id'], units_response['units'])) for unit in pulled_units: nodes_response = check_json(dcos_api_session.health.get('/units/{}/nodes'.format(unit), node=master)) validate_node(nodes_response['nodes']) # make sure dcos-mesos-master.service has master nodes and dcos-mesos-slave.service has agent nodes master_nodes_response = check_json( dcos_api_session.health.get('/units/dcos-mesos-master.service/nodes', node=master)) master_nodes = get_nodes_from_response(master_nodes_response) assert len(master_nodes) == len(dcos_api_session.masters), \ '{} != {}'.format(master_nodes, dcos_api_session.masters) assert set(master_nodes) == set(dcos_api_session.masters), 'a list of difference: {}'.format( set(master_nodes).symmetric_difference(set(dcos_api_session.masters)) ) agent_nodes_response = check_json( dcos_api_session.health.get('/units/dcos-mesos-slave.service/nodes', node=master)) agent_nodes = get_nodes_from_response(agent_nodes_response) assert len(agent_nodes) == len(dcos_api_session.slaves), '{} != {}'.format(agent_nodes, dcos_api_session.slaves)
def _check_diagnostics_bundle_status(dcos_api_session): # validate diagnostics job status response diagnostics_bundle_status = check_json(dcos_api_session.health.get('/report/diagnostics/status/all')) required_status_fields = ['is_running', 'status', 'last_bundle_dir', 'job_started', 'diagnostics_bundle_dir', 'diagnostics_job_timeout_min', 'journald_logs_since_hours', 'diagnostics_job_get_since_url_timeout_min', 'command_exec_timeout_sec', 'diagnostics_partition_disk_usage_percent', 'job_progress_percentage'] for _, properties in diagnostics_bundle_status.items(): for required_status_field in required_status_fields: assert required_status_field in properties, 'property {} not found'.format(required_status_field)
def _check_diagnostics_bundle_status(dcos_api_session): # validate diagnostics job status response diagnostics_bundle_status = check_json(dcos_api_session.health.get('/report/diagnostics/status/all')) required_status_fields = ['is_running', 'status', 'errors', 'last_bundle_dir', 'job_started', 'job_ended', 'job_duration', 'diagnostics_bundle_dir', 'diagnostics_job_timeout_min', 'journald_logs_since_hours', 'diagnostics_job_get_since_url_timeout_min', 'command_exec_timeout_sec', 'diagnostics_partition_disk_usage_percent', 'job_progress_percentage'] for _, properties in diagnostics_bundle_status.items(): assert len(properties) == len(required_status_fields), 'response must have the following fields: {}'.format( required_status_fields ) for required_status_field in required_status_fields: assert required_status_field in properties, 'property {} not found'.format(required_status_field)
def test_dcos_diagnostics_nodes(dcos_api_session): """ test a list of nodes with statuses endpoint /system/health/v1/nodes """ for master in dcos_api_session.masters: response = check_json(dcos_api_session.health.get('/nodes', node=master)) assert len(response) == 1, 'nodes response must have only one field: nodes' assert 'nodes' in response assert isinstance(response['nodes'], list) assert len(response['nodes']) == len(dcos_api_session.masters + dcos_api_session.all_slaves), \ ('a number of nodes in response must be {}'. format(len(dcos_api_session.masters + dcos_api_session.all_slaves))) # test nodes validate_node(response['nodes'])
def get_diagnostics_reports(self) -> list: """ Gets the complete list of diagnostics reports :returns: list of report filenames :rtype: list """ response = check_json(self.get('/report/diagnostics/list/all')) def _at_least_one_item(bundle): return bundle is not None and isinstance(bundle, list) and len(bundle) > 0 bundles = [] for bundle_list in response.values(): if _at_least_one_item(bundle_list): bundles += map(lambda s: os.path.basename(s['file_name']), bundle_list) return bundles
def test_dcos_diagnostics_health(dcos_api_session: DcosApiSession) -> None: """ test health endpoint /system/health/v1 """ required_fields = [ 'units', 'hostname', 'ip', 'dcos_version', 'node_role', 'mesos_id', 'dcos_diagnostics_version' ] required_fields_unit = [ 'id', 'health', 'output', 'description', 'help', 'name' ] # Check all masters dcos-diagnostics instances on base port since this is extra-cluster request (outside localhost) for host in dcos_api_session.masters: response = check_json(dcos_api_session.health.get('/', node=host)) assert len(response) == len( required_fields ), 'response must have the following fields: {}'.format( ', '.join(required_fields)) # validate units assert 'units' in response, 'units field not found' assert isinstance(response['units'], list), 'units field must be a list' assert len(response['units']) > 0, 'units field cannot be empty' for unit in response['units']: assert len(unit) == len( required_fields_unit ), 'unit must have the following fields: {}'.format( ', '.join(required_fields_unit)) for required_field_unit in required_fields_unit: assert required_field_unit in unit, '{} must be in a unit repsonse' # id, health and description cannot be empty assert unit['id'], 'id field cannot be empty' assert unit['health'] in [0, 1], 'health field must be 0 or 1' assert unit['description'], 'description field cannot be empty' # check all required fields but units for required_field in required_fields[1:]: assert required_field in response, '{} field not found'.format( required_field) assert response[required_field], '{} cannot be empty'.format( required_field) # Check all agents running dcos-diagnostics behind agent-adminrouter on 61001 for host in dcos_api_session.slaves: response = check_json(dcos_api_session.health.get('/', node=host)) assert len(response) == len( required_fields ), 'response must have the following fields: {}'.format( ', '.join(required_fields)) # validate units assert 'units' in response, 'units field not found' assert isinstance(response['units'], list), 'units field must be a list' assert len(response['units']) > 0, 'units field cannot be empty' for unit in response['units']: assert len(unit) == len( required_fields_unit ), 'unit must have the following fields: {}'.format( ', '.join(required_fields_unit)) for required_field_unit in required_fields_unit: assert required_field_unit in unit, '{} must be in a unit repsonse' # id, health and description cannot be empty assert unit['id'], 'id field cannot be empty' assert unit['health'] in [0, 1], 'health field must be 0 or 1' assert unit['description'], 'description field cannot be empty' # check all required fields but units for required_field in required_fields[1:]: assert required_field in response, '{} field not found'.format( required_field) assert response[required_field], '{} cannot be empty'.format( required_field)
def test_dcos_diagnostics_health(dcos_api_session): """ test health endpoint /system/health/v1 """ required_fields = ['units', 'hostname', 'ip', 'dcos_version', 'node_role', 'mesos_id', 'dcos_diagnostics_version'] required_fields_unit = ['id', 'health', 'output', 'description', 'help', 'name'] # Check all masters dcos-diagnostics instances on base port since this is extra-cluster request (outside localhost) for host in dcos_api_session.masters: response = check_json(dcos_api_session.health.get('/', node=host)) assert len(response) == len(required_fields), 'response must have the following fields: {}'.format( ', '.join(required_fields) ) # validate units assert 'units' in response, 'units field not found' assert isinstance(response['units'], list), 'units field must be a list' assert len(response['units']) > 0, 'units field cannot be empty' for unit in response['units']: assert len(unit) == len(required_fields_unit), 'unit must have the following fields: {}'.format( ', '.join(required_fields_unit) ) for required_field_unit in required_fields_unit: assert required_field_unit in unit, '{} must be in a unit repsonse' # id, health and description cannot be empty assert unit['id'], 'id field cannot be empty' assert unit['health'] in [0, 1], 'health field must be 0 or 1' assert unit['description'], 'description field cannot be empty' # check all required fields but units for required_field in required_fields[1:]: assert required_field in response, '{} field not found'.format(required_field) assert response[required_field], '{} cannot be empty'.format(required_field) # Check all agents running dcos-diagnostics behind agent-adminrouter on 61001 for host in dcos_api_session.slaves: response = check_json(dcos_api_session.health.get('/', node=host)) assert len(response) == len(required_fields), 'response must have the following fields: {}'.format( ', '.join(required_fields) ) # validate units assert 'units' in response, 'units field not found' assert isinstance(response['units'], list), 'units field must be a list' assert len(response['units']) > 0, 'units field cannot be empty' for unit in response['units']: assert len(unit) == len(required_fields_unit), 'unit must have the following fields: {}'.format( ', '.join(required_fields_unit) ) for required_field_unit in required_fields_unit: assert required_field_unit in unit, '{} must be in a unit repsonse' # id, health and description cannot be empty assert unit['id'], 'id field cannot be empty' assert unit['health'] in [0, 1], 'health field must be 0 or 1' assert unit['description'], 'description field cannot be empty' # check all required fields but units for required_field in required_fields[1:]: assert required_field in response, '{} field not found'.format(required_field) assert response[required_field], '{} cannot be empty'.format(required_field)
def _get_diagnostics_reports(self) -> list: response = check_json(self.get('/diagnostics')) return [bundle['id'] for bundle in response if bundle['status'] != 'Deleted']