def check(args): heat = get_openstack_client('orchestration') try: local_heat_endpoint = generate_local_endpoint(str(heat.get_endpoint()), args.ip, args.port, args.protocol, '/build_info') resp = heat.session.get(local_heat_endpoint) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_heat') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_heat') status_err(str(e), m_name='maas_heat') else: is_up = True milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_heat') status_ok(m_name='maas_heat') metric_bool('heat_api_local_status', is_up, m_name='maas_heat') if is_up: # only want to send other metrics if api is up metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): nova = get_openstack_client('compute') try: local_endpoint = generate_local_endpoint( str(nova.get_endpoint()), args.ip, args.port, args.protocol, '/servers/detail?all_tenants=True') resp = nova.session.get(local_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_nova') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_nova') status_err(str(e), m_name='maas_nova') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_nova') milliseconds = resp.elapsed.total_seconds() * 1000 servers = resp.json()['servers'] # gather some metrics status_count = collections.Counter([s['status'] for s in servers]) status_ok(m_name='maas_nova') metric_bool('nova_api_local_status', is_up, m_name='maas_nova') # only want to send other metrics if api is up if is_up: metric('nova_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') for status in SERVER_STATUSES: metric('nova_instances_in_state_%s' % status, 'uint32', status_count[status], 'instances')
def check(args): ironic = get_openstack_client('baremetal') try: ironic_local_endpoint = generate_local_endpoint( str(ironic.get_endpoint()), args.ip, args.port, args.protocol, '/nodes') resp = ironic.session.get(ironic_local_endpoint) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_ironic') status_err(str(e), m_name='maas_ironic') else: is_up = resp.status_code == 200 milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_ironic') status_ok(m_name='maas_ironic') metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic') if is_up: metric('ironic_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): try: nova = get_openstack_client('compute') except Exception as e: metric_bool('client_success', False, m_name='maas_nova') status_err(str(e), m_name='maas_nova') else: metric_bool('client_success', True, m_name='maas_nova') # get some cloud stats stats = [nova.get_hypervisor(i.id) for i in nova.hypervisors()] cloud_stats = collections.defaultdict(dict) count = 0 for stat in stats: count += 1 setattr(stat, 'count', count) for metric_name, vals in iter(stats_mapping.items()): multiplier = 1 if metric_name == 'total_vcpus': multiplier = args.cpu_allocation_ratio elif metric_name == 'total_memory': multiplier = args.mem_allocation_ratio cloud_stats[metric_name]['value'] = \ (getattr(stat, vals['stat_name']) * multiplier) cloud_stats[metric_name]['unit'] = \ vals['unit'] cloud_stats[metric_name]['type'] = \ vals['type'] status_ok(m_name='maas_nova') for metric_name in iter(cloud_stats): metric('cloud_resource_%s' % metric_name, cloud_stats[metric_name]['type'], cloud_stats[metric_name]['value'], cloud_stats[metric_name]['unit'])
def check(args): designate = get_openstack_client('dns') try: if args.ip: # Arbitrary call to /zones to ensure the local API is up designate_local_endpoint = generate_local_endpoint( str(designate.get_endpoint()), args.ip, args.port, args.protocol, '/zones') resp = designate.session.get(designate_local_endpoint, timeout=180) milliseconds = resp.elapsed.total_seconds() * 1000 # NOTE(npawelek): At the time of converting to OpenStack SDK, # DNS is not yet fully integrated. Excluding integration with # the client directly until a later time. api_is_up = resp.ok except (exc.HTTPError, exc.Timeout, exc.ConnectionError): api_is_up = False metric_bool('client_success', False, m_name='maas_designate') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_designate') status_err(str(e), m_name='maas_designate') else: metric_bool('client_success', True, m_name='maas_designate') status_ok(m_name='maas_designate') metric_bool('designate_api_local_status', api_is_up, m_name='maas_designate') if api_is_up: # only want to send other metrics if api is up metric('designate_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): try: nova = get_openstack_client('compute') except Exception as e: metric_bool('client_success', False, m_name='maas_nova') status_err(str(e), m_name='maas_nova') else: metric_bool('client_success', True, m_name='maas_nova') # get some cloud stats stats = [nova.get_hypervisor(i.id) for i in nova.hypervisors()] cloud_stats = collections.defaultdict(dict) count = 0 for stat in stats: count += 1 setattr(stat, 'count', count) for metric_name, vals in stats_mapping.iteritems(): multiplier = 1 if metric_name == 'total_vcpus': multiplier = args.cpu_allocation_ratio elif metric_name == 'total_memory': multiplier = args.mem_allocation_ratio cloud_stats[metric_name]['value'] = \ (getattr(stat, vals['stat_name']) * multiplier) cloud_stats[metric_name]['unit'] = \ vals['unit'] cloud_stats[metric_name]['type'] = \ vals['type'] status_ok(m_name='maas_nova') for metric_name in cloud_stats.iterkeys(): metric('cloud_resource_%s' % metric_name, cloud_stats[metric_name]['type'], cloud_stats[metric_name]['value'], cloud_stats[metric_name]['unit'])
def check(args): octavia = get_openstack_client('load_balancer') try: if args.ip: octavia_local_endpoint = generate_local_endpoint( str(octavia.get_endpoint()), args.ip, args.port, args.protocol, '/lbaas/loadbalancers') resp = octavia.session.get(octavia_local_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_octavia') status_ok(m_name='maas_octavia') metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia') if is_up: loadbalancers = resp.json()['loadbalancers'] num = len([ lb for lb in loadbalancers if lb['provisioning_status'] == 'ERROR' ]) # only want to send other metrics if api is up metric('octavia_num_lb_in_error_status', 'uint32', num, 'ms')
def check(args): ironic = get_openstack_client('baremetal') try: ironic_local_endpoint = generate_local_endpoint( str(ironic.get_endpoint()), args.ip, args.port, args.protocol, '/nodes' ) resp = ironic.session.get(ironic_local_endpoint) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_ironic') status_err(str(e), m_name='maas_ironic') else: is_up = resp.status_code == 200 milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_ironic') status_ok(m_name='maas_ironic') metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic') if is_up: metric('ironic_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): octavia = get_openstack_client('load_balancer') try: if args.ip: octavia_local_endpoint = generate_local_endpoint( str(octavia.get_endpoint()), args.ip, args.port, args.protocol, '/lbaas/loadbalancers' ) resp = octavia.session.get(octavia_local_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_octavia') status_ok(m_name='maas_octavia') metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia') if is_up: loadbalancers = resp.json()['loadbalancers'] num = len([lb for lb in loadbalancers if lb['provisioning_status'] == 'ERROR']) # only want to send other metrics if api is up metric('octavia_num_lb_in_error_status', 'uint32', num, 'ms')
def check(args): heat = get_openstack_client('orchestration') try: local_heat_endpoint = generate_local_endpoint( str(heat.get_endpoint()), args.ip, args.port, args.protocol, '/build_info' ) resp = heat.session.get(local_heat_endpoint) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_heat') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_heat') status_err(str(e), m_name='maas_heat') else: is_up = True milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_heat') status_ok(m_name='maas_heat') metric_bool('heat_api_local_status', is_up, m_name='maas_heat') if is_up: # only want to send other metrics if api is up metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): keystone = get_openstack_client('identity') local_keystone_endpoint = "{}://{}:{}/v{}/services".format( args.protocol, args.ip, args.port, keystone.get_api_major_version()[0]) try: resp = keystone.session.get('%s' % local_keystone_endpoint, timeout=180) milliseconds = resp.elapsed.total_seconds() * 1000 is_up = resp.ok except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_keystone') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_keystone') status_err(str(e), m_name='maas_keystone') else: metric_bool('client_success', True, m_name='maas_keystone') # gather some vaguely interesting metrics to return project_count = len([i for i in keystone.projects()]) user_count = len([i for i in keystone.users()]) status_ok(m_name='maas_keystone') metric_bool('keystone_api_local_status', is_up, m_name='maas_keystone') if is_up: metric('keystone_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('keystone_user_count', 'uint32', user_count, 'users') metric('keystone_tenant_count', 'uint32', project_count, 'tenants')
def check(args): octavia = get_openstack_client('load_balancer') try: if args.ip: octavia_local_endpoint = generate_local_endpoint( str(octavia.get_endpoint()), args.ip, args.port, args.protocol, '/lbaas/loadbalancers?limit=1' ) resp = octavia.session.get(octavia_local_endpoint, timeout=180) except (exc.HTTPError, exc.Timeout, exc.ConnectionError): is_up = False metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_octavia') milliseconds = resp.elapsed.total_seconds() * 1000 status_ok(m_name='maas_octavia') metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia') if is_up: # only want to send other metrics if api is up metric('octavia_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): glance = get_openstack_client('image') try: # Remove version from returned endpoint glance_endpoint = str(glance.get_endpoint().rsplit('/', 2)[0]) local_registry_url = generate_local_endpoint( glance_endpoint, args.ip, args.port, args.protocol, '/images' ) resp = glance.session.get(local_registry_url, timeout=180) milliseconds = resp.elapsed.total_seconds() * 1000 is_up = resp.status_code == 200 except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_glance') except Exception as e: metric_bool('client_success', False, m_name='maas_glance') status_err(str(e), m_name='maas_glance') status_ok(m_name='maas_glance') metric_bool('client_success', True, m_name='maas_glance') metric_bool('glance_registry_local_status', is_up, m_name='maas_glance') # Only send remaining metrics if the API is up if is_up: metric('glance_registry_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): octavia = get_openstack_client('load_balancer') try: if args.ip: octavia_local_endpoint = generate_local_endpoint( str(octavia.get_endpoint()), args.ip, args.port, args.protocol, '/lbaas/loadbalancers?limit=1') resp = octavia.session.get(octavia_local_endpoint, timeout=180) except (exc.HTTPError, exc.Timeout, exc.ConnectionError): is_up = False metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_octavia') milliseconds = resp.elapsed.total_seconds() * 1000 status_ok(m_name='maas_octavia') metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia') if is_up: # only want to send other metrics if api is up metric('octavia_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): cinder = get_openstack_client('block_storage') volume_endpoint = '%s/os-services' % str(cinder.get_endpoint()) try: # We cannot do /os-services?host=X as cinder returns a hostname of # X@lvm for cinder-volume binary resp = cinder.session.get(volume_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e: metric_bool('client_success', False, m_name='maas_cinder') status_err(str(e), m_name='maas_cinder') if not resp.ok: metric_bool('client_success', False, m_name='maas_cinder') status_err( 'Could not get response from Cinder API', m_name='cinder' ) else: metric_bool('client_success', True, m_name='maas_cinder') services = resp.json()['services'] # We need to match against a host of X and X@lvm (or whatever backend) if args.host: backend = ''.join((args.host, '@')) services = [service for service in services if (service['host'].startswith(backend) or service['host'] == args.host)] if len(services) == 0: status_err( 'No host(s) found in the service list', m_name='maas_cinder' ) status_ok(m_name='maas_cinder') if args.host: for service in services: service_is_up = True name = '%s_status' % service['binary'] if service['status'] == 'enabled' and service['state'] != 'up': service_is_up = False if '@' in service['host']: [host, backend] = service['host'].split('@') name = '%s-%s_status' % (service['binary'], backend) metric_bool(name, service_is_up) else: for service in services: service_is_up = True if service['status'] == 'enabled' and service['state'] != 'up': service_is_up = False name = '%s_on_host_%s' % (service['binary'], service['host']) metric_bool(name, service_is_up)
def check(args): nova = get_openstack_client('compute') try: if args.tenant_id: params = {'tenant_id': args.tenant_id, 'project_id': args.tenant_id} else: params = {} compute_url = '%s/limits' % str(nova.get_endpoint()) compute_resp = nova.session.get(compute_url, params=params, timeout=180) if compute_resp.status_code != 200: raise Exception("Nova returned status code %s" % str( compute_resp.status_code)) nova_limits = compute_resp.json()['limits']['absolute'] metric_bool('client_success', True, m_name='maas_octavia') status_ok(m_name='maas_octavia') metric('octavia_cores_quota_usage', 'double', '%.3f' % ( max(0, nova_limits['totalCoresUsed'] / nova_limits[ 'maxTotalCores'] * 100)), '%') metric('octavia_instances_quota_usage', 'double', '%.3f' % (max(0, nova_limits['totalInstancesUsed'] / nova_limits['maxTotalInstances'] * 100)), '%') metric('octavia_ram_quota_usage', 'double', '%.3f' % ( max(0, nova_limits['totalRAMUsed'] / nova_limits[ 'maxTotalRAMSize'] * 100)), '%') metric('octavia_server_group_quota_usage', 'double', '%.3f' % (max(0, nova_limits['totalServerGroupsUsed'] / nova_limits['maxServerGroups'] * 100)), '%') # Neutron got it's limit support in Pike... except (exc.HTTPError, exc.Timeout, exc.ConnectionError): metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: metric_bool('client_success', True, m_name='maas_octavia') status_ok(m_name='maas_octavia')
def check(args): cinder = get_openstack_client('block_storage') try: local_vol_url = generate_local_endpoint( str(cinder.get_endpoint()), args.ip, args.port, args.protocol, '/volumes/detail' ) vol = cinder.session.get(local_vol_url, timeout=180) local_snap_url = generate_local_endpoint( str(cinder.get_endpoint()), args.ip, args.port, args.protocol, '/snapshots/detail' ) snap = cinder.session.get(local_snap_url, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_cinder') except Exception as e: metric_bool('client_success', False, m_name='maas_cinder') status_err(str(e), m_name='maas_cinder') else: is_up = vol.ok and snap.ok milliseconds = vol.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_cinder') # gather some metrics vol_statuses = [v['status'] for v in vol.json()['volumes']] vol_status_count = collections.Counter(vol_statuses) total_vols = len(vol.json()['volumes']) snap_statuses = [v['status'] for v in snap.json()['snapshots']] snap_status_count = collections.Counter(snap_statuses) total_snaps = len(snap.json()['snapshots']) status_ok(m_name='maas_cinder') metric_bool('cinder_api_local_status', is_up, m_name='maas_cinder') # only want to send other metrics if api is up if is_up: metric('cinder_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('total_cinder_volumes', 'uint32', total_vols, 'volumes') for status in VOLUME_STATUSES: metric('cinder_%s_volumes' % status, 'uint32', vol_status_count[status], 'volumes') metric('total_cinder_snapshots', 'uint32', total_snaps, 'snapshots') for status in VOLUME_STATUSES: metric('cinder_%s_snaps' % status, 'uint32', snap_status_count[status], 'snapshots')
def check(args): nova = get_openstack_client('compute') try: if args.host: services = [i for i in nova.services() if i.host == args.host] else: services = [i for i in nova.services()] # not gathering api status metric here so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_nova') for nova_service_type in NOVA_SERVICE_TYPE_LIST: metric('%s_status' % nova_service_type, 'string', '%s cannot reach API' % nova_service_type, m_name='maas_nova') status_err_no_exit(str(e), m_name='maas_nova') return else: metric_bool('client_success', True, m_name='maas_nova') if len(services) == 0: status_err("No host(s) found in the service list", m_name='maas_nova') # return all the things status_ok(m_name='maas_nova') for service in services: service_is_up = "Yes" if service.status.lower() == 'enabled': if service.state.lower() == 'down': service_is_up = "No" elif service.status.lower() == 'disabled': try: if service.disabled_reason: if 'auto' in service.disabled_reason.lower(): service_is_up = "No" except AttributeError: pass if args.host: name = '%s_status' % service.binary else: name = '%s_on_host_%s_status' % (service.binary, service.host) metric(name, 'string', service_is_up, m_name='maas_nova')
def check(args): headers = {'Content-type': 'application/json'} path_options = {} if args.auth: keystone = get_openstack_client('identity') auth_token = keystone.get_token() project_id = keystone.get_project_id() headers['auth_token'] = auth_token path_options['project_id'] = project_id scheme = args.ssl and 'https' or 'http' endpoint = '{scheme}://{ip}:{port}'.format(ip=args.ip, port=args.port, scheme=scheme) if args.version is not None: path_options['version'] = args.version path = args.path.format(path_options) s = requests.Session() s.headers.update(headers) short_name = args.name.split('_')[0] if path and not path.startswith('/'): url = '/'.join((endpoint, path)) else: url = ''.join((endpoint, path)) try: r = s.get(url, verify=False, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_{name}'.format(name=short_name)) else: is_up = True metric_bool('client_success', True, m_name='maas_{name}'.format(name=short_name)) status_ok(m_name='maas_{name}'.format(name=short_name)) metric_bool('{name}_api_local_status'.format(name=args.name), is_up) if is_up and r.ok: milliseconds = r.elapsed.total_seconds() * 1000 metric('{name}_api_local_response_time'.format(name=args.name), 'double', '%.3f' % milliseconds, 'ms')
def check(args): neutron = get_openstack_client('network') try: if args.host: agents = [i for i in neutron.agents(host=args.host)] elif args.fqdn: agents = [i for i in neutron.agents(host=args.fqdn)] else: agents = [i for i in neutron.agents()] # An API status metric is not gathered so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') for neutron_agent_type in NEUTRON_AGENT_TYPE_LIST: metric('%s_status' % neutron_agent_type, 'string', '%s cannot reach API' % neutron_agent_type, m_name='maas_neutron') status_err_no_exit(str(e), m_name='maas_neutron') return else: metric_bool('client_success', True, m_name='maas_neutron') if len(agents) == 0: status_err("No host(s) found in the agents list", m_name='maas_neutron') # Return all the things status_ok(m_name='maas_neutron') for agent in agents: agent_is_up = "Yes" if agent['is_admin_state_up'] and not agent['is_alive']: agent_is_up = "No" if args.host: name = '%s_status' % agent['binary'] elif args.fqdn: name = '%s_status' % agent['binary'] else: name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'], agent['host']) metric(name, 'string', agent_is_up, m_name='maas_neutron')
def check(args): try: nova = get_openstack_client('compute') # not gathering api status metric here so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_nova') status_err(str(e), m_name='maas_nova') else: metric_bool('client_success', True, m_name='maas_nova') # gather nova service states if args.host: services = [i for i in nova.services() if i.host == args.host] else: services = [i for i in nova.services()] if len(services) == 0: status_err("No host(s) found in the service list", m_name='maas_nova') # return all the things status_ok(m_name='maas_nova') for service in services: service_is_up = True if service.status == 'enabled' and service.state == 'down': service_is_up = False if args.host: name = '%s_status' % service.binary else: name = '%s_on_host_%s_status' % (service.binary, service.host) # replace the first 'nova' so the metric name would be like: # 'ironic-compute_status' # notice 'ironic-conductor' is different than 'nova-conductor' # on ironic-compute box, so we preserve nova-conductor metric if 'conductor' not in name: name = name.replace('nova', 'ironic', 1) metric_bool(name, service_is_up, m_name='maas_nova')
def check(args): ironic = get_openstack_client('baremetal') try: nodes = [i for i in ironic.nodes()] except Exception as e: metric_bool('client_success', False, m_name='maas_ironic') status_err(str(e), m_name='maas_ironic') else: is_up = True metric_bool('client_success', True, m_name='maas_ironic') status_ok(m_name='maas_ironic') if is_up: maint_nodes = [n for n in nodes if n.is_maintenance] maint_nodes_count = len(maint_nodes) total_nodes = len(nodes) up_nodes = total_nodes - maint_nodes_count metric('ironic_up_nodes_count', 'uint32', up_nodes) metric('ironic_total_nodes_count', 'uint32', total_nodes)
def check(args): neutron = get_openstack_client('network') try: neutron_local_endpoint = generate_local_endpoint( str(neutron.get_endpoint()), args.ip, args.port, args.protocol, '/agents' ) resp = neutron.session.get(neutron_local_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_neutron') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') status_err(str(e), m_name='maas_neutron') else: is_up = True milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_neutron') # Gather a few metrics agents = len(resp.json()['agents']) networks = len([i for i in neutron.networks()]) routers = len([i for i in neutron.routers()]) subnets = len([i for i in neutron.subnets()]) status_ok(m_name='maas_neutron') metric_bool('neutron_api_local_status', is_up, m_name='maas_neutron') # Only send metrics if the API is up if is_up: metric('neutron_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('neutron_agents', 'uint32', agents, 'agents') metric('neutron_networks', 'uint32', networks, 'networks') metric('neutron_routers', 'uint32', routers, 'agents') metric('neutron_subnets', 'uint32', subnets, 'subnets')
def check(args): glance = get_openstack_client('image') try: local_image_url = generate_local_endpoint( str(glance.get_endpoint()), args.ip, args.port, args.protocol, '/images' ) resp = glance.session.get(local_image_url, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_glance') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_glance') status_err(str(e), m_name='maas_glance') else: is_up = resp.ok milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_glance') images = resp.json()['images'] status_count = collections.Counter([i['status'] for i in images]) status_ok(m_name='maas_glance') metric_bool('glance_api_local_status', is_up, m_name='maas_glance') # only want to send other metrics if api is up if is_up: metric('glance_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') for status in IMAGE_STATUSES: metric('glance_%s_images' % status, 'uint32', status_count[status], 'images')
def check(args): nova = get_openstack_client('compute') try: local_endpoint = generate_local_endpoint( str(nova.get_endpoint()), args.ip, args.port, args.protocol, '/servers/detail?all_tenants=True' ) resp = nova.session.get(local_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_nova') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_nova') status_err(str(e), m_name='maas_nova') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_nova') milliseconds = resp.elapsed.total_seconds() * 1000 servers = resp.json()['servers'] # gather some metrics status_count = collections.Counter( [s['status'] for s in servers] ) status_ok(m_name='maas_nova') metric_bool('nova_api_local_status', is_up, m_name='maas_nova') # only want to send other metrics if api is up if is_up: metric('nova_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') for status in SERVER_STATUSES: metric('nova_instances_in_state_%s' % status, 'uint32', status_count[status], 'instances')
def check(args): neutron = get_openstack_client('network') try: neutron_local_endpoint = generate_local_endpoint( str(neutron.get_endpoint()), args.ip, args.port, args.protocol, '/agents') resp = neutron.session.get(neutron_local_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_neutron') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') status_err(str(e), m_name='maas_neutron') else: is_up = True milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_neutron') # Gather a few metrics agents = len(resp.json()['agents']) networks = len([i for i in neutron.networks()]) routers = len([i for i in neutron.routers()]) subnets = len([i for i in neutron.subnets()]) status_ok(m_name='maas_neutron') metric_bool('neutron_api_local_status', is_up, m_name='maas_neutron') # Only send metrics if the API is up if is_up: metric('neutron_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('neutron_agents', 'uint32', agents, 'agents') metric('neutron_networks', 'uint32', networks, 'networks') metric('neutron_routers', 'uint32', routers, 'agents') metric('neutron_subnets', 'uint32', subnets, 'subnets')
def check(args): designate = get_openstack_client('dns') try: if args.ip: # Arbitrary call to /zones to ensure the local API is up designate_local_endpoint = generate_local_endpoint( str(designate.get_endpoint()), args.ip, args.port, args.protocol, '/zones' ) resp = designate.session.get(designate_local_endpoint, timeout=180) milliseconds = resp.elapsed.total_seconds() * 1000 # NOTE(npawelek): At the time of converting to OpenStack SDK, # DNS is not yet fully integrated. Excluding integration with # the client directly until a later time. api_is_up = resp.ok except (exc.HTTPError, exc.Timeout, exc.ConnectionError): api_is_up = False metric_bool('client_success', False, m_name='maas_designate') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_designate') status_err(str(e), m_name='maas_designate') else: metric_bool('client_success', True, m_name='maas_designate') status_ok(m_name='maas_designate') metric_bool('designate_api_local_status', api_is_up, m_name='maas_designate') if api_is_up: # only want to send other metrics if api is up metric('designate_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def main(): nova = get_openstack_client('compute') iptables_exist = False bridge_sysctl = False bridge_params = ["bridge-nf-call-arptables", "bridge-nf-call-ip6tables", "bridge-nf-call-iptables"] bridge_param_metrics = {} # Check for active instances on the host. If none are found, simply # force the check to pass. # # A power_state of 1 means the instance is 'running' try: instances = [i for i in nova.servers(host=args.host) if i.power_state == 1 and i.vm_state == 'active'] except Exception as e: status("error", str(e), force_print=False) else: if len(instances) > 0: instances_running = True else: instances_running = False # No instances are active so force the metrics to pass if instances_running is False: iptables_exist = True bridge_sysctl = True for param in bridge_params: bridge_param_metrics[param] = "1" else: try: bridge_sysctl = True for param in bridge_params: bridge_param_metrics[param] = str( subprocess.check_output( ['cat', '/proc/sys/net/bridge/' + param]) ).rstrip('\n') if bridge_param_metrics[param] != "1": bridge_sysctl = False except Exception as e: status('error', str(e), force_print=False) # Check if iptables rules are in place iptables_rules = '' try: iptables_rules = str(subprocess.check_output( ['iptables-save'])).split('\n') except Exception as e: status('error', str(e), force_print=False) for rule in iptables_rules: if "DROP" in rule: iptables_exist = True if bridge_sysctl is True and iptables_exist is True: metric_bool('iptables_status', True, m_name='iptables_active') status_ok(m_name='iptables_active') else: metric_bool('iptables_status', False, m_name='iptables_active') metric('bridge-nf-call-arptables', 'int64', bridge_param_metrics.get('bridge-nf-call-arptables', 0)) metric('bridge-nf-call-iptables', 'int64', bridge_param_metrics.get('bridge-nf-call-iptables', 0)) metric('bridge-nf-call-ip6tables', 'int64', bridge_param_metrics.get('bridge-nf-call-ip6tables', 0))
def check(args): neutron = get_openstack_client('network') # Identify where the service check should be run try: container = [] if lxc_module_active: all_containers = lxc.list_containers() for c in all_containers: if 'neutron_agents' in c: container.append(c) break elif 'neutron_server' in c: container.append(c) break if len(container) == 0: container = None else: container = container[0] except OSError: pass except IndexError: status_err('found no neutron agent or server containers', m_name='maas_neutron') try: # Only check networks which have a port with DHCP enabled networks = [i.network_id for i in neutron.ports() if i.device_owner == 'network:dhcp'] # Not gathering API status metric, so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') status_err(str(e), m_name='maas_neutron') else: metric_bool('client_success', True, m_name='maas_neutron') # Iterate through each namespace and validate the metadata # service is responsive. A 404 from the request is typical as # the IP used for validation does not exist. failures = [] for net in networks: namespace = 'qdhcp-%s' % net service_check_cmd = SERVICE_CHECK % namespace try: if container is None: command = service_check_cmd subprocess.check_output(command, shell=False, stderr=subprocess.STDOUT) elif lxc_module_active: command = shlex.split('lxc-attach -n %s -- %s' % ( container, service_check_cmd )) subprocess.check_output(command, shell=False, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: # A HTTP 404 response is expected if '404 Not Found' not in e.output: failures.append(net) is_ok = len(failures) == 0 if is_ok: status_ok(m_name='maas_neutron') metric_bool('neutron-metadata-proxy_status', is_ok, m_name='maas_neutron') else: metric_bool('neutron-metadata-proxy_status', is_ok, m_name='maas_neutron')
def check(args): neutron = get_openstack_client('network') try: # Gather neutron agent states if args.host: agents = [i for i in neutron.agents(host=args.host)] elif args.fqdn: agents = [i for i in neutron.agents(host=args.fqdn)] else: agents = [i for i in neutron.agents()] # An API status metric is not gathered so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') metric('%s_status' % "neutron-openvswitch-agent", 'string', '%s cannot reach API' % "neutron-openvswitch-agent", m_name='maas_neutron') status_err_no_exit(str(e), m_name='maas_neutron') return else: metric_bool('client_success', True, m_name='maas_neutron') try: ovs_agent = next( a for a in agents if 'openvswitch' in a['binary'] ) except StopIteration: status_err("No host(s) found in the agents list", m_name='maas_neutron') else: # Return all the things status_ok(m_name='maas_neutron') agent_is_up = "Yes" if ovs_agent['is_admin_state_up'] and not ovs_agent['is_alive']: agent_is_up = "No" if args.host: name = '%s_status' % ovs_agent['binary'] elif args.fqdn: name = '%s_status' % ovs_agent['binary'] else: name = '%s_%s_on_host_%s' % (ovs_agent['binary'], ovs_agent['id'], ovs_agent['host']) metric(name, 'string', agent_is_up, m_name='maas_neutron') if on_lxc_container: all_containers = lxc.list_containers() neutron_containers_list = [] neutron_agent_containers_list = [] # NOTE(npawelek): The neutron container architecture was # refactored in recent versions removing all neutron containers # with the exception of one, or even using baremetal directly. # Since logic is looking for the presence of LXC, we do not need # to account for baremetal here. for container in all_containers: if 'neutron_agents' in container: neutron_agent_containers_list.append(container) if 'neutron' in container: neutron_containers_list.append(container) if len(neutron_containers_list) == 1 and \ 'neutron_server' in neutron_containers_list[0]: valid_containers = neutron_containers_list elif len(neutron_agent_containers_list) > 0: valid_containers = neutron_agent_containers_list else: valid_containers = 0 if len(valid_containers) == 0: status_err('no neutron agent or server containers found', m_name='maas_neutron') return for container in valid_containers: # Get the neutron_agent_container's init PID. try: c = lxc.Container(container) # If the container wasn't found, exit now. if c.init_pid == -1: metric_bool('container_success', False, m_name='maas_neutron') status_err( 'Could not find PID for container {}'.format( container ), m_name='maas_neutron' ) except (Exception, SystemError) as e: metric_bool('container_success', False, m_name='maas_neutron') status_err( 'Container lookup failed on "{}". ERROR: "{}"' .format( container, e ), m_name='maas_neutron' ) else: metric_bool('container_success', True, m_name='maas_neutron') # c is the lxc container instance of this # neutron_agent_container check_process_statuses(container, c) else: ovs_agent_host = socket.gethostname() check_process_statuses(ovs_agent_host)
def main(): nova = get_openstack_client('compute') iptables_exist = False bridge_sysctl = False bridge_params = [ "bridge-nf-call-arptables", "bridge-nf-call-ip6tables", "bridge-nf-call-iptables" ] bridge_param_metrics = {} # Check for active instances on the host. If none are found, simply # force the check to pass. # # A power_state of 1 means the instance is 'running' try: instances = [ i for i in nova.servers(host=args.host) if i.power_state == 1 and i.vm_state == 'active' ] except Exception as e: status("error", str(e), force_print=False) else: if len(instances) > 0: instances_running = True else: instances_running = False # No instances are active so force the metrics to pass if instances_running is False: iptables_exist = True bridge_sysctl = True for param in bridge_params: bridge_param_metrics[param] = "1" else: try: bridge_sysctl = True for param in bridge_params: bridge_param_metrics[param] = str( subprocess.check_output( ['cat', '/proc/sys/net/bridge/' + param])).rstrip('\n') if bridge_param_metrics[param] != "1": bridge_sysctl = False except Exception as e: status('error', str(e), force_print=False) # Check if iptables rules are in place iptables_rules = '' try: iptables_rules = str(subprocess.check_output(['iptables-save' ])).split('\n') except Exception as e: status('error', str(e), force_print=False) for rule in iptables_rules: if "DROP" in rule: iptables_exist = True if bridge_sysctl is True and iptables_exist is True: metric_bool('iptables_status', True, m_name='iptables_active') status_ok(m_name='iptables_active') else: metric_bool('iptables_status', False, m_name='iptables_active') metric('bridge-nf-call-arptables', 'int64', bridge_param_metrics.get('bridge-nf-call-arptables', 0)) metric('bridge-nf-call-iptables', 'int64', bridge_param_metrics.get('bridge-nf-call-iptables', 0)) metric('bridge-nf-call-ip6tables', 'int64', bridge_param_metrics.get('bridge-nf-call-ip6tables', 0))
def check(args): nova = get_openstack_client('compute') cinder = get_openstack_client('block_storage') try: if args.tenant_id: params = {'tenant_id': args.tenant_id, 'project_id': args.tenant_id} else: params = {} compute_url = '%s/limits' % str(nova.get_endpoint()) compute_resp = nova.session.get(compute_url, params=params, timeout=180) volume_url = '%s/limits' % str(cinder.get_endpoint()) volume_resp = cinder.session.get(volume_url, params=params, timeout=180) if compute_resp.status_code != 200: raise Exception("Nova returned status code %s" % str( compute_resp.status_code)) nova_limits = compute_resp.json()['limits']['absolute'] if volume_resp.status_code != 200: raise Exception("Volume returned status code %s" % str( volume_resp.status_code)) volume_limits = volume_resp.json()['limits']['absolute'] metric_bool('client_success', True, m_name='maas_octavia') status_ok(m_name='maas_octavia') metric('octavia_cores_quota_usage', 'double', '%.3f' % ( max(0, nova_limits['totalCoresUsed'] / nova_limits[ 'maxTotalCores'] * 100)), '%') metric('octavia_instances_quota_usage', 'double', '%.3f' % (max(0, nova_limits['totalInstancesUsed'] / nova_limits['maxTotalInstances'] * 100)), '%') metric('octavia_ram_quota_usage', 'double', '%.3f' % ( max(0, nova_limits['totalRAMUsed'] / nova_limits[ 'maxTotalRAMSize'] * 100)), '%') metric('octavia_server_group_quota_usage', 'double', '%.3f' % (max(0, nova_limits['totalServerGroupsUsed'] / nova_limits['maxServerGroups'] * 100)), '%') metric('octavia_volume_gb_quota_usage', 'double', '%.3f' % (max(0, volume_limits['totalGigabytesUsed'] / volume_limits['maxTotalVolumeGigabytes'] * 100)), '%') metric('octavia_num_volume_quota_usage', 'double', '%.3f' % (max(0, volume_limits['totalVolumesUsed'] / volume_limits['maxTotalVolumes'] * 100)), '%') # Neutron got it's limit support in Pike... except (exc.HTTPError, exc.Timeout, exc.ConnectionError): metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: metric_bool('client_success', True, m_name='maas_octavia') status_ok(m_name='maas_octavia')
def check(args): neutron = get_openstack_client('network') try: # Gather neutron agent states if args.host: agents = [i for i in neutron.agents(host=args.host)] elif args.fqdn: agents = [i for i in neutron.agents(host=args.fqdn)] else: agents = [i for i in neutron.agents()] # An API status metric is not gathered so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') metric('%s_status' % "neutron-openvswitch-agent", 'string', '%s cannot reach API' % "neutron-openvswitch-agent", m_name='maas_neutron') status_err_no_exit(str(e), m_name='maas_neutron') return else: metric_bool('client_success', True, m_name='maas_neutron') try: ovs_agent = next(a for a in agents if 'openvswitch' in a['binary']) except StopIteration: status_err("No host(s) found in the agents list", m_name='maas_neutron') else: # Return all the things status_ok(m_name='maas_neutron') agent_is_up = "Yes" if ovs_agent['is_admin_state_up'] and not ovs_agent['is_alive']: agent_is_up = "No" if args.host: name = '%s_status' % ovs_agent['binary'] elif args.fqdn: name = '%s_status' % ovs_agent['binary'] else: name = '%s_%s_on_host_%s' % (ovs_agent['binary'], ovs_agent['id'], ovs_agent['host']) metric(name, 'string', agent_is_up, m_name='maas_neutron') if on_lxc_container: all_containers = lxc.list_containers() neutron_containers_list = [] neutron_agent_containers_list = [] # NOTE(npawelek): The neutron container architecture was # refactored in recent versions removing all neutron containers # with the exception of one, or even using baremetal directly. # Since logic is looking for the presence of LXC, we do not need # to account for baremetal here. for container in all_containers: if 'neutron_agents' in container: neutron_agent_containers_list.append(container) if 'neutron' in container: neutron_containers_list.append(container) if len(neutron_containers_list) == 1 and \ 'neutron_server' in neutron_containers_list[0]: valid_containers = neutron_containers_list elif len(neutron_agent_containers_list) > 0: valid_containers = neutron_agent_containers_list else: valid_containers = 0 if len(valid_containers) == 0: status_err('no neutron agent or server containers found', m_name='maas_neutron') return for container in valid_containers: # Get the neutron_agent_container's init PID. try: c = lxc.Container(container) # If the container wasn't found, exit now. if c.init_pid == -1: metric_bool('container_success', False, m_name='maas_neutron') status_err('Could not find PID for container {}'.format( container), m_name='maas_neutron') except (Exception, SystemError) as e: metric_bool('container_success', False, m_name='maas_neutron') status_err( 'Container lookup failed on "{}". ERROR: "{}"'.format( container, e), m_name='maas_neutron') else: metric_bool('container_success', True, m_name='maas_neutron') # c is the lxc container instance of this # neutron_agent_container check_process_statuses(container, c) else: ovs_agent_host = socket.gethostname() check_process_statuses(ovs_agent_host)