示例#1
0
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)
    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        services = nova.services.list(host=args.host)
    else:
        services = nova.services.list()

    if len(services) == 0:
        status_err("No host(s) found in the service list")

    # return all the things
    status_ok()
    for service in services:
        service_is_up = True

        if service.status == 'enabled' and service.state == 'down':
            service_is_up = False

        if args.host:
            name = '%s_status' % service.binary
        else:
            name = '%s_on_host_%s_status' % (service.binary, service.host)

        metric_bool(name, service_is_up)
示例#2
0
def get_osd_statistics(client=None,
                       keyring=None,
                       osd_ids=None,
                       container_name=None):
    osd_dump = get_ceph_osd_dump(client=client,
                                 keyring=keyring,
                                 container_name=container_name)
    pg_osds_dump = get_ceph_pg_dump_osds(client=client,
                                         keyring=keyring,
                                         container_name=container_name)
    for osd_id in osd_ids:
        osd_ref = 'osd.%s' % osd_id
        for _osd in osd_dump['osds']:
            if _osd['osd'] == osd_id:
                osd = _osd
                break
        else:
            msg = 'The OSD ID %s does not exist.' % osd_id
            raise maas_common.MaaSException(msg)

        key = 'up'
        name = '_'.join((osd_ref, key))
        maas_common.metric_bool(name, osd[key])

        for _osd in pg_osds_dump:
            if _osd['osd'] == osd_id:
                osd = _osd
                break
示例#3
0
def check(auth_ref, args):
    # We call get_keystone_client here as there is some logic within to get a
    # new token if previous one is bad.
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    api_endpoint = 'http://{ip}:9292/v2'.format(ip=args.ip)

    s = Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        # Hit something that isn't querying the glance-registry, since we
        # query glance-registry in separate checks
        r = s.get('%s/schemas/image' % api_endpoint, verify=False,
                  timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('glance_api_local_status', is_up)

    # only want to send other metrics if api is up
    if is_up:
        milliseconds = r.elapsed.total_seconds() * 1000
        metric('glance_api_local_response_time', 
               'uint32', 
               '%.3f' % milliseconds, 
               'ms')
def check(args, tenant_id):

    CEILOMETER_ENDPOINT = 'http://{ip}:8777'.format(ip=args.ip)

    try:
        ceilometer = get_ceilometer_client(endpoint=CEILOMETER_ENDPOINT)
        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        meters = ceilometer.meters.list()
        # Exceptions are only thrown when we iterate over meter
        [i.meter_id for i in meters]
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('ceilometer_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('ceilometer_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
示例#5
0
def main():

    try:
        os.stat('/usr/sbin/ssacli')
        ssacli_bin = 'ssacli'
    except Exception:
        try:
            os.stat('/usr/sbin/hpssacli')
            ssacli_bin = 'hpssacli'
        except Exception:
            maas_common.status_err('Neither ssacli or hpssacli could be found',
                                   m_name='hp_monitoring')

    status = {}
    status['hardware_processors_status'] = \
        get_chassis_status('hpasmcli', 'server')
    status['hardware_memory_status'] = get_chassis_status('hpasmcli', 'dimm')
    status['hardware_powersupply_status'] = \
        get_powersupply_status('hpasmcli', 'powersupply')
    status['hardware_disk_status'] = get_drive_status(ssacli_bin)
    status['hardware_controller_status'] = get_controller_status(ssacli_bin)
    status['hardware_controller_cache_status'] = \
        get_controller_cache_status(ssacli_bin)
    status['hardware_controller_battery_status'] = \
        get_controller_battery_status(ssacli_bin)

    maas_common.status_ok(m_name='maas_hwvendor')
    for name, value in status.viewitems():
        maas_common.metric_bool(name, value, m_name='maas_hwvendor')
def check(args):
    metadata_endpoint = ('http://{ip}:8775'.format(ip=args.ip))
    is_up = True

    s = requests.Session()

    try:
        # looks like we can only get / (ec2 versions) without specifying
        # an instance ID and other headers
        versions = s.get('%s/' % metadata_endpoint,
                         verify=False,
                         timeout=10)
        milliseconds = versions.elapsed.total_seconds() * 1000
        if not versions.ok or '1.0' not in versions.content.splitlines():
            is_up = False
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('nova_api_metadata_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_metadata_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
示例#7
0
def check(args):
    try:
        nova = get_openstack_client('compute')

    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        status_err(str(e), m_name='maas_nova')
    else:
        metric_bool('client_success', True, m_name='maas_nova')
        # get some cloud stats
        stats = [nova.get_hypervisor(i.id) for i in nova.hypervisors()]
        cloud_stats = collections.defaultdict(dict)
        count = 0
        for stat in stats:
            count += 1
            setattr(stat, 'count', count)
            for metric_name, vals in stats_mapping.iteritems():
                multiplier = 1
                if metric_name == 'total_vcpus':
                    multiplier = args.cpu_allocation_ratio
                elif metric_name == 'total_memory':
                    multiplier = args.mem_allocation_ratio
                cloud_stats[metric_name]['value'] = \
                    (getattr(stat, vals['stat_name']) * multiplier)
                cloud_stats[metric_name]['unit'] = \
                    vals['unit']
                cloud_stats[metric_name]['type'] = \
                    vals['type']

    status_ok(m_name='maas_nova')
    for metric_name in cloud_stats.iterkeys():
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])
示例#8
0
def check(auth_ref, args):

    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    VOLUME_ENDPOINT = 'http://{ip}:8776/v1/{tenant}' \
                      .format(ip=args.ip, tenant=keystone.tenant_id)

    s = requests.Session()

    s.headers.update({
        'Content-type': 'application/json',
        'x-auth-token': auth_token
    })

    try:
        r = s.get('%s/os-services' % VOLUME_ENDPOINT, verify=False, timeout=10)
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        status_err(str(e))

    if not r.ok:
        status_err('could not get response from cinder api')

    status_ok()
    services = r.json()['services']
    for service in services:
        service_is_up = True
        if service['status'] == 'enabled' and service['state'] != 'up':
            service_is_up = False
        metric_bool('%s_on_host_%s' % (service['binary'], service['host']),
                    service_is_up)
def check(auth_ref, args):
    MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip, )

    try:
        if args.ip:
            magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT)
        else:
            magnum = get_magnum_client()

        api_is_up = True
    except exc.HttpError as e:
        api_is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        magnum.cluster_templates.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('magnum_api_local_status', api_is_up)
    if api_is_up:
        # only want to send other metrics if api is up
        metric('magnum_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
示例#10
0
def check_process_running(process_names, container_name=None):
    """Check to see if processes are running.

       Check if each of the processes in process_names are in a list
       of running processes in the specified container name, or on
       this host.
    """

    if not process_names:
        # The caller has not provided a value for process_names, which gives us
        # nothing to do. Return an error for the check.
        status_err('No process names provided')

    procs_path = '/sys/fs/cgroup/cpu/cgroup.procs'
    if container_name is not None:
        # Checking for processes in a container, not the parent host
        procs_path = os.path.join('/sys/fs/cgroup/cpu/lxc', container_name,
                                  'cgroup.procs')
    procs = get_processes(procs_path)

    if not procs:
        # Unable to get a list of process names for the container or host.
        status_err('Could not get a list of running processes')

    # Since we've fetched a process list, report status_ok.
    status_ok()

    # Report the presence of each process from the command line in the
    # running process list for the host or specified container.
    for process_name in process_names:
        metric_bool('%s_process_status' % process_name,
                    process_name in procs)
def check(auth_ref, args):
    GLANCE_ENDPOINT = ('http://{ip}:9292/v1'.format(ip=args.ip))

    try:
        if args.ip:
            glance = get_glance_client(endpoint=GLANCE_ENDPOINT)
        else:
            glance = get_glance_client()

        is_up = True
    except exc.HTTPException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        glance.images.list(search_opts={'all_tenants': 1})
        end = time.time()
        milliseconds = (end - start) * 1000
        # gather some metrics
        images = glance.images.list(search_opts={'all_tenants': 1})
        status_count = collections.Counter([s.status for s in images])

    status_ok()
    metric_bool('glance_api_local_status', is_up)

    # only want to send other metrics if api is up
    if is_up:
        metric('glance_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
        for status in IMAGE_STATUSES:
            metric('glance_%s_images' % status, 'uint32', status_count[status],
                   'images')
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    tenant_id = keystone.tenant_id

    HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format
                     (ip=args.ip, tenant=tenant_id))

    try:
        if args.ip:
            heat = get_heat_client(endpoint=HEAT_ENDPOINT)
        else:
            heat = get_heat_client()

        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        heat.build_info.build_info()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('heat_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
示例#13
0
def main():
    metrics = {}
    session = requests.Session()  # Make a Session to store the auth creds
    session.auth = (options.username, options.password)

    protocol = 'https' if options.https else 'http'

    _get_connection_metrics(session, metrics, protocol,
                            options.host, options.port)
    _get_overview_metrics(session, metrics, protocol,
                          options.host, options.port)
    _get_node_metrics(session, metrics, protocol, options.host,
                      options.port, options.name)
    _get_queue_metrics(session, metrics, protocol, options.host,
                       options.port)
    _get_consumer_metrics(session, metrics, protocol, options.host,
                          options.port)

    status_ok(m_name='maas_rabbitmq')

    for k, v in metrics.items():
        if v['value'] is True or v['value'] is False:
            metric_bool('rabbitmq_%s_status' % k, not v['value'])
        else:
            metric('rabbitmq_%s' % k, 'int64', v['value'], v['unit'])
def check(args):

    NETWORK_ENDPOINT = "http://{hostname}:9696".format(hostname=args.hostname)
    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        agents = neutron.list_agents(host=args.host)["agents"]
    else:
        agents = neutron.list_agents()["agents"]

    if len(agents) == 0:
        status_err("No host(s) found in the agents list")

    # return all the things
    status_ok()
    for agent in agents:
        agent_is_up = True
        if agent["admin_state_up"] and not agent["alive"]:
            agent_is_up = False

        if args.host:
            name = "%s_status" % agent["binary"]
        else:
            name = "%s_%s_on_host_%s" % (agent["binary"], agent["id"], agent["host"])

        metric_bool(name, agent_is_up)
def check(args):

    IDENTITY_ENDPOINT = 'http://{ip}:35357/v3'.format(ip=args.ip)

    try:
        keystone = get_keystone_client(endpoint=IDENTITY_ENDPOINT)
        is_up = True
    except (exc.HttpServerError, exc.ClientException):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        keystone.services.list()
        end = time.time()
        milliseconds = (end - start) * 1000

        # gather some vaguely interesting metrics to return
        project_count = len(keystone.projects.list())
        user_count = len(keystone.users.list(domain='Default'))

    status_ok()
    metric_bool('keystone_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('keystone_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('keystone_user_count', 'uint32', user_count, 'users')
        metric('keystone_tenant_count', 'uint32', project_count, 'tenants')
        metric('keystone_tenant_count', 'uint32', project_count, 'tenants')
示例#16
0
def check(args):

    IDENTITY_ENDPOINT = 'http://{ip}:35357/v2.0'.format(ip=args.ip)

    try:
        keystone = get_keystone_client(endpoint=IDENTITY_ENDPOINT)
        is_up = True
    except (exc.HttpServerError, exc.ClientException):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        keystone.services.list()
        end = time()
        milliseconds = (end - start) * 1000

        # gather some vaguely interesting metrics to return
        tenant_count = len(keystone.tenants.list())
        user_count = len(keystone.users.list())

    status_ok()
    metric_bool('keystone_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('keystone_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('keystone_user_count', 'uint32', user_count)
        metric('keystone_tenant_count', 'uint32', tenant_count)
示例#17
0
def check(auth_ref, args):

    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    VOLUME_ENDPOINT = ('http://{ip}:8776/v1/{tenant}'.format(
        ip=args.ip, tenant=keystone.tenant_id))

    s = requests.Session()

    s.headers.update({
        'Content-type': 'application/json',
        'x-auth-token': auth_token
    })

    try:
        r = s.get('%s/volumes' % VOLUME_ENDPOINT, verify=False, timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        status_err(str(e))
    else:
        status_ok()
        metric_bool('cinder_api_local_status', is_up)
        # only want to send other metrics if api is up
        if is_up:
            milliseconds = r.elapsed.total_seconds() * 1000
            metric('cinder_api_local_response_time', 'uint32',
                   '%.3f' % milliseconds, 'ms')
示例#18
0
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)
    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        services = nova.services.list(host=args.host)
    else:
        services = nova.services.list()

    if len(services) == 0:
        status_err("No host(s) found in the service list")

    # return all the things
    status_ok()
    for service in services:
        service_is_up = True

        if service.status == 'enabled' and service.state == 'down':
            service_is_up = False

        if args.host:
            name = '%s_status' % service.binary
        else:
            name = '%s_on_host_%s_status' % (service.binary, service.host)

        metric_bool(name, service_is_up)
def check(args):

    NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip)

    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)
        is_up = True
    # if we get a NeutronClientException don't bother sending any other metric
    # The API IS DOWN
    except exc.NeutronClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        neutron.list_agents()
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('neutron_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('neutron_api_local_response_time', 
               'uint32',
               '%.3f' % milliseconds,
               'ms')
示例#20
0
def check(auth_ref, args):
    MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip,)

    try:
        if args.ip:
            magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT)
        else:
            magnum = get_magnum_client()

        api_is_up = True
    except exc.HttpError as e:
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_magnum')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_magnum')
        status_err(str(e), m_name='maas_magnum')
    else:
        metric_bool('client_success', True, m_name='maas_magnum')
        services = magnum.mservices.list()

    status_ok(m_name='maas_magnum')
    metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum')
    if api_is_up:
        for service in services:
            metric_bool('_'.join([service.binary, 'status']),
                        True if service.state == 'up' else False)
示例#21
0
def check_process_running(process_names, container_name=None):
    """Check to see if processes are running.

       Check if each of the processes in process_names are in a list
       of running processes in the specified container name, or on
       this host.
    """

    if not process_names:
        # The caller has not provided a value for process_names, which gives us
        # nothing to do. Return an error for the check.
        status_err('No process names provided')

    procs_path = '/sys/fs/cgroup/cpu/cgroup.procs'
    if container_name is not None:
        # Checking for processes in a container, not the parent host
        procs_path = os.path.join('/sys/fs/cgroup/cpu/lxc', container_name,
                                  'cgroup.procs')
    procs = get_processes(procs_path)

    if not procs:
        # Unable to get a list of process names for the container or host.
        status_err('Could not get a list of running processes')

    # Since we've fetched a process list, report status_ok.
    status_ok()

    # Report the presence of each process from the command line in the
    # running process list for the host or specified container.
    for process_name in process_names:
        metric_bool('%s_process_status' % process_name, process_name in procs)
示例#22
0
def main():
    metrics = {}
    session = requests.Session()  # Make a Session to store the auth creds
    session.auth = (options.username, options.password)

    protocol = 'https' if options.https else 'http'

    _get_connection_metrics(session, metrics, protocol,
                            options.host, options.port)
    _get_overview_metrics(session, metrics, protocol,
                          options.host, options.port)
    _get_node_metrics(session, metrics, protocol, options.host,
                      options.port, options.name)
    _get_queue_metrics(session, metrics, protocol, options.host,
                       options.port)
    _get_consumer_metrics(session, metrics, protocol, options.host,
                          options.port)

    status_ok(m_name='maas_rabbitmq')

    for k, v in metrics.items():
        if v['value'] is True or v['value'] is False:
            metric_bool('rabbitmq_%s_status' % k, not v['value'])
        else:
            metric('rabbitmq_%s' % k, 'int64', v['value'], v['unit'])
def check(auth_ref, args):
    # We call get_keystone_client here as there is some logic within to get a
    # new token if previous one is bad.
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip)

    s = Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        # /images returns a list of public, non-deleted images
        r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('glance_registry_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        milliseconds = r.elapsed.total_seconds() * 1000
        metric('glance_registry_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
示例#24
0
def main():

    try:
        os.stat('/usr/sbin/ssacli')
        ssacli_bin = 'ssacli'
    except Exception:
        try:
            os.stat('/usr/sbin/hpssacli')
            ssacli_bin = 'hpssacli'
        except Exception:
            maas_common.status_err('Neither ssacli or hpssacli could be found',
                                   m_name='hp_monitoring')

    status = {}
    status['hardware_processors_status'] = \
        get_chassis_status('hpasmcli', 'server')
    status['hardware_memory_status'] = get_chassis_status('hpasmcli', 'dimm')
    status['hardware_disk_status'] = get_drive_status(ssacli_bin)
    status['hardware_controller_status'] = get_controller_status(ssacli_bin)
    status['hardware_controller_cache_status'] = \
        get_controller_cache_status(ssacli_bin)
    status['hardware_controller_battery_status'] = \
        get_controller_battery_status(ssacli_bin)

    maas_common.status_ok(m_name='maas_hwvendor')
    for name, value in status.viewitems():
        maas_common.metric_bool(name, value, m_name='maas_hwvendor')
def check(auth_ref, args):

    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    VOLUME_ENDPOINT = 'http://{ip}:8776/v1/{tenant}' \
                      .format(ip=args.ip, tenant=keystone.tenant_id)

    s = requests.Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        r = s.get('%s/os-services' % VOLUME_ENDPOINT,
                  verify=False,
                  timeout=10)
    except (exc.ConnectionError,
            exc.HTTPError,
            exc.Timeout) as e:
        status_err(str(e))

    if not r.ok:
        status_err('could not get response from cinder api')

    status_ok()
    services = r.json()['services']
    for service in services:
        service_is_up = True
        if service['status'] == 'enabled' and service['state'] != 'up':
            service_is_up = False
        metric_bool('%s_on_host_%s' %
                    (service['binary'], service['host']),
                    service_is_up)
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)

    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        is_up = True
    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        nova.services.list()
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('nova_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_local_response_time', 'uint32', '%.3f' % milliseconds,
               'ms')
示例#27
0
def get_osd_statistics(client=None, keyring=None, osd_ids=None,
                       container_name=None):
    osd_dump = get_ceph_osd_dump(client=client,
                                 keyring=keyring,
                                 container_name=container_name)
    pg_osds_dump = get_ceph_pg_dump_osds(client=client,
                                         keyring=keyring,
                                         container_name=container_name)
    for osd_id in osd_ids:
        osd_ref = 'osd.%s' % osd_id
        for _osd in osd_dump['osds']:
            if _osd['osd'] == osd_id:
                osd = _osd
                break
        else:
            msg = 'The OSD ID %s does not exist.' % osd_id
            raise maas_common.MaaSException(msg)

        key = 'up'
        name = '_'.join((osd_ref, key))
        maas_common.metric_bool(name, osd[key])

        for _osd in pg_osds_dump:
            if _osd['osd'] == osd_id:
                osd = _osd
                break
示例#28
0
def check(args, tenant_id):

    HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format
                     (ip=args.ip, tenant=tenant_id))

    try:
        heat = get_heat_client(endpoint=HEAT_ENDPOINT)
        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        heat.build_info.build_info()
        end = time()
        milliseconds = (end - start) * 1000

        # Add other metrics
        stack_count = len(list(heat.stacks.list()))

    status_ok()
    metric_bool('heat_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('heat_stack_count', 'uint32', stack_count, 'stacks')
示例#29
0
def check(args):
    try:
        nova = get_openstack_client('compute')

    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        status_err(str(e), m_name='maas_nova')
    else:
        metric_bool('client_success', True, m_name='maas_nova')
        # get some cloud stats
        stats = [nova.get_hypervisor(i.id) for i in nova.hypervisors()]
        cloud_stats = collections.defaultdict(dict)
        count = 0
        for stat in stats:
            count += 1
            setattr(stat, 'count', count)
            for metric_name, vals in iter(stats_mapping.items()):
                multiplier = 1
                if metric_name == 'total_vcpus':
                    multiplier = args.cpu_allocation_ratio
                elif metric_name == 'total_memory':
                    multiplier = args.mem_allocation_ratio
                cloud_stats[metric_name]['value'] = \
                    (getattr(stat, vals['stat_name']) * multiplier)
                cloud_stats[metric_name]['unit'] = \
                    vals['unit']
                cloud_stats[metric_name]['type'] = \
                    vals['type']

    status_ok(m_name='maas_nova')
    for metric_name in iter(cloud_stats):
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])
示例#30
0
def bonding_ifaces_check(_):
    bonding_ifaces = os.listdir("/proc/net/bonding")
    for bonding_iface in bonding_ifaces:
        bonding_iface_check_cmd = [
            'cat', '/proc/net/bonding/%s' % bonding_iface
        ]
        bonding_iface_check_cmd_output = subprocess.check_output(
            bonding_iface_check_cmd)

        bonding_iface_check_cmd_output_lines = (
            bonding_iface_check_cmd_output.split('\n'))

        has_slave_down = False
        slave_count = 0
        for idx, line in enumerate(bonding_iface_check_cmd_output_lines):
            if line.startswith("Slave Interface"):
                slave_count = slave_count + 1
                slave_inface_mii_status_line = (
                    bonding_iface_check_cmd_output_lines[idx + 1])
                slave_inface_mii_status = (
                    slave_inface_mii_status_line.split(":")[1])
                if 'up' not in slave_inface_mii_status or slave_count < 2:
                    has_slave_down = True

        if has_slave_down:
            metric_bool('host_bonding_iface_%s_slave_down' % bonding_iface,
                        True)
        else:
            metric_bool('host_bonding_iface_%s_slave_down' % bonding_iface,
                        False)
示例#31
0
def main():
    args = parse_args()
    galera_container = args.galera_container_name
    holland_bin = args.holland_binary
    holland_bs = args.holland_backupset

    today = datetime.date.today().strftime('%Y%m%d')
    yesterday = (datetime.date.today() -
                 datetime.timedelta(days=1)).strftime('%Y%m%d')

    # Get completed Holland backup set
    backupsets = \
        container_holland_lb_check(galera_container, holland_bin, holland_bs)

    if len(
        [backup
         for backup in backupsets if yesterday or today in backup[0]]) > 0:
        status_ok()
        metric_bool('holland_backup_status', True)
    else:
        status_err('Could not find Holland backup from %s or %s' %
                   (yesterday, today))
        metric_bool('holland_backup_status', False)

    # Print metric about last backup
    print_metrics('holland_backup_size', float(backupsets[-1][1]) / 1024)
def check(args, tenant_id):

    CEILOMETER_ENDPOINT = 'http://{ip}:8777'.format(ip=args.ip)

    try:
        ceilometer = get_ceilometer_client(endpoint=CEILOMETER_ENDPOINT)
        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        meters = ceilometer.meters.list()
        # Exceptions are only thrown when we iterate over meter
        [i.meter_id for i in meters]
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('ceilometer_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('ceilometer_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def main():
    galera_hostname = args.galera_hostname
    holland_bin = args.holland_binary
    holland_bs = args.holland_backupset

    today = datetime.date.today().strftime('%Y%m%d')
    yesterday = (datetime.date.today() -
                 datetime.timedelta(days=1)).strftime('%Y%m%d')

    # Get completed Holland backup set
    backupsets = \
        holland_lb_check(galera_hostname, holland_bin, holland_bs)

    if len(
        [backup
         for backup in backupsets if yesterday or today in backup[0]]) > 0:
        status_ok(m_name='maas_holland')
        metric_bool('holland_backup_status', True, m_name='maas_holland')
    else:
        metric_bool('holland_backup_status', False, m_name='maas_holland')
        status_err('Could not find Holland backup from %s or %s' %
                   (yesterday, today),
                   m_name='maas_holland')

    # Print metric about last backup
    print_metrics('holland_backup_size',
                  "{0:.1f}".format(float(backupsets[-1][1]) / 1024))
示例#34
0
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)

    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        is_up = True
    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        nova.services.list()
        end = time()
        milliseconds = (end - start) * 1000

        # gather some metrics
        status_count = collections.Counter(
            [s.status for s in nova.servers.list()])

    status_ok()
    metric_bool('nova_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_local_response_time', 'double', '%.3f' % milliseconds,
               'ms')
        for status in SERVER_STATUSES:
            metric('nova_servers_in_state_%s' % status, 'uint32',
                   status_count[status])
示例#35
0
def check(args):
    metadata_endpoint = ('http://{ip}:8775'.format(ip=args.ip))
    is_up = True

    s = requests.Session()

    try:
        # looks like we can only get / (ec2 versions) without specifying
        # an instance ID and other headers
        versions = s.get('%s/' % metadata_endpoint, verify=False, timeout=10)
        milliseconds = versions.elapsed.total_seconds() * 1000
        if not versions.ok or '1.0' not in versions.content.splitlines():
            is_up = False
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        is_up = False
    except Exception as e:
        status_err(str(e))

    metric_values = dict()

    status_ok()
    metric_bool('nova_api_metadata_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_metadata_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')

        metric_values['nova_api_metadata_local_response_time'] = ('%.3f' %
                                                                  milliseconds)
        metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
示例#36
0
def check(auth_ref, args):

    ironic_endpoint = ('{protocol}://{ip}:{port}/v1'.format(
        ip=args.ip, protocol=args.protocol, port=args.port))

    try:
        if args.ip:
            ironic = get_ironic_client(endpoint=ironic_endpoint)
        else:
            ironic = get_ironic_client()

        is_up = True

    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
        return
    else:
        metric_bool('client_success', True, m_name='maas_ironic')
        # pass limit=0 to list all nodes list without pagination
        all_nodes = ironic.node.list(limit=0)
        status_ok(m_name='maas_ironic')

    if is_up:
        maint_nodes = [node for node in all_nodes if node.maintenance]
        maint_nodes_count = len(maint_nodes)
        total_nodes = len(all_nodes)
        up_nodes = total_nodes - maint_nodes_count
        metric('ironic_up_nodes_count', 'uint32', up_nodes)
        metric('ironic_total_nodes_count', 'uint32', total_nodes)
示例#37
0
def check(args):

    NETWORK_ENDPOINT = 'http://{hostname}:9696'.format(hostname=args.hostname)
    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        agents = neutron.list_agents(host=args.host)['agents']
    else:
        agents = neutron.list_agents()['agents']

    if len(agents) == 0:
        status_err("No host(s) found in the agents list")

    # return all the things
    status_ok()
    for agent in agents:
        agent_is_up = True
        if agent['admin_state_up'] and not agent['alive']:
            agent_is_up = False

        if args.host:
            name = '%s_status' % agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'],
                                         agent['host'])

        name = name.replace(".", "_")
        metric_bool(name, agent_is_up)
示例#38
0
def check(args):

    NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip)
    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        agents = neutron.list_agents(host=args.host)['agents']
    else:
        agents = neutron.list_agents()['agents']

    if len(agents) == 0:
        status_err("No host(s) found in the agents list")

    # return all the things
    status_ok()
    for agent in agents:
        agent_is_up = True
        if agent['admin_state_up'] and not agent['alive']:
            agent_is_up = False

        if args.host:
            name = '%s_status' % agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (agent['binary'],
                                         agent['id'],
                                         agent['host'])

        metric_bool(name, agent_is_up)
示例#39
0
def check(auth_ref, args):
    # We call get_keystone_client here as there is some logic within to get a
    # new token if previous one is bad.
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip)

    s = Session()

    s.headers.update({
        'Content-type': 'application/json',
        'x-auth-token': auth_token
    })

    try:
        # /images returns a list of public, non-deleted images
        r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('glance_registry_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        milliseconds = r.elapsed.total_seconds() * 1000
        metric('glance_registry_local_response_time', 'uint32', milliseconds)
示例#40
0
def check(args):
    cinder = get_openstack_client('block_storage')
    volume_endpoint = '%s/os-services' % str(cinder.get_endpoint())

    try:
        # We cannot do /os-services?host=X as cinder returns a hostname of
        # X@lvm for cinder-volume binary
        resp = cinder.session.get(volume_endpoint, timeout=180)
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        metric_bool('client_success', False, m_name='maas_cinder')
        status_err(str(e), m_name='maas_cinder')

    if not resp.ok:
        metric_bool('client_success', False, m_name='maas_cinder')
        status_err(
            'Could not get response from Cinder API',
            m_name='cinder'
        )
    else:
        metric_bool('client_success', True, m_name='maas_cinder')

    services = resp.json()['services']

    # We need to match against a host of X and X@lvm (or whatever backend)
    if args.host:
        backend = ''.join((args.host, '@'))
        services = [service for service in services
                    if (service['host'].startswith(backend) or
                        service['host'] == args.host)]

    if len(services) == 0:
        status_err(
            'No host(s) found in the service list',
            m_name='maas_cinder'
        )

    status_ok(m_name='maas_cinder')

    if args.host:
        for service in services:
            service_is_up = True
            name = '%s_status' % service['binary']

            if service['status'] == 'enabled' and service['state'] != 'up':
                service_is_up = False

            if '@' in service['host']:
                [host, backend] = service['host'].split('@')
                name = '%s-%s_status' % (service['binary'], backend)

            metric_bool(name, service_is_up)
    else:
        for service in services:
            service_is_up = True
            if service['status'] == 'enabled' and service['state'] != 'up':
                service_is_up = False

            name = '%s_on_host_%s' % (service['binary'], service['host'])
            metric_bool(name, service_is_up)
def check():
    try:
        NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=CONFIGS['ip'])

        try:
            if CONFIGS['ip']:
                neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)
            else:
                neutron = get_neutron_client()

            is_up = True
        # if we get a NeutronClientException don't bother sending
        # any other metric The API IS DOWN
        except exc.NeutronClientException:
            is_up = False
        # Any other exception presumably isn't an API error
        except Exception as e:
            status_err(str(e))
        else:
            # time something arbitrary
            start = time.time()
            neutron.list_agents()
            end = time.time()
            milliseconds = (end - start) * 1000

            # gather some metrics
            networks = len(neutron.list_networks()['networks'])
            agents = len(neutron.list_agents()['agents'])
            routers = len(neutron.list_routers()['routers'])
            subnets = len(neutron.list_subnets()['subnets'])

        status_ok()
        metric_bool(PLUGIN, 'neutron_api_local_status', is_up,
                    graphite_host=CONFIGS['graphite_host'],
                    graphite_port=CONFIGS['graphite_port'])
        # only want to send other metrics if api is up
        if is_up:
            metric(PLUGIN,
                   'neutron_api_local_response_time',
                   '%.3f' % milliseconds,
                   graphite_host=CONFIGS['graphite_host'],
                   graphite_port=CONFIGS['graphite_port'])
            metric(PLUGIN, 'neutron_networks', networks,
                   graphite_host=CONFIGS['graphite_host'],
                   graphite_port=CONFIGS['graphite_port'])
            metric(PLUGIN, 'neutron_agents', agents,
                   graphite_host=CONFIGS['graphite_host'],
                   graphite_port=CONFIGS['graphite_port'])
            metric(PLUGIN, 'neutron_routers', routers,
                   graphite_host=CONFIGS['graphite_host'],
                   graphite_port=CONFIGS['graphite_port'])
            metric(PLUGIN, 'neutron_subnets', subnets,
                   graphite_host=CONFIGS['graphite_host'],
                   graphite_port=CONFIGS['graphite_port'])
    except:
        metric_bool(PLUGIN, 'neutron_api_local_status', False,
                    graphite_host=CONFIGS['graphite_host'],
                    graphite_port=CONFIGS['graphite_port'])
        raise
示例#42
0
def check(auth_ref, args):
    name = "ironic-conductor_status"
    for proc in psutil.process_iter():
        if 'ironic-conducto' in proc.name():
            metric_bool(name, True)
            break
    else:
        metric_bool(name, False)
示例#43
0
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    tenant_id = keystone.tenant_id
    nova_version = '.'.join(
        map(str, get_os_component_major_api_version('nova')))

    COMPUTE_ENDPOINT = (
        '{protocol}://{hostname}:8774/v{version}/{tenant_id}'.format(
            protocol=args.protocol,
            hostname=args.hostname,
            version=nova_version,
            tenant_id=tenant_id))
    try:
        nova = get_nova_client(auth_token=auth_token,
                               bypass_url=COMPUTE_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        for nova_service_type in NOVA_SERVICE_TYPE_LIST:
            metric('%s_status' % nova_service_type,
                   'string',
                   '%s cannot reach API' % nova_service_type,
                   m_name='maas_nova')
        status_err_no_exit(str(e), m_name='maas_nova')
        return
    else:
        metric_bool('client_success', True, m_name='maas_nova')

    # gather nova service states
    if args.host:
        services = nova.services.list(host=args.host)
    else:
        services = nova.services.list()

    if len(services) == 0:
        status_err("No host(s) found in the service list", m_name='maas_nova')

    # return all the things
    status_ok(m_name='maas_nova')
    for service in services:
        service_is_up = "Yes"

        if service.status.lower() == 'enabled':
            if service.state.lower() == 'down':
                service_is_up = "No"
        elif service.status.lower() == 'disabled':
            if service.disabled_reason:
                if 'auto' in service.disabled_reason.lower():
                    service_is_up = "No"

        if args.host:
            name = '%s_status' % service.binary
        else:
            name = '%s_on_host_%s_status' % (service.binary, service.host)

        metric(name, 'string', service_is_up, m_name='maas_nova')
示例#44
0
def get_mon_statistics(client=None, keyring=None, host=None,
                       container_name=None):
    ceph_status = get_ceph_status(client=client,
                                  keyring=keyring,
                                  container_name=container_name)
    mon = [m for m in ceph_status['monmap']['mons']
           if m['name'] == host]
    mon_in = mon[0]['rank'] in ceph_status['quorum']
    maas_common.metric_bool('mon_in_quorum', mon_in)
示例#45
0
def main(args):
    """Main function."""
    if not args.processes:
        # The command line does not have any process names specified
        metric_bool('container_success', False, m_name='maas_container')
        status_err('No executable names supplied', m_name='maas_container')

    check_process_running(container_name=args.container,
                          process_names=args.processes)
def main(args):
    """Main function."""
    if not args.processes:
        # The command line does not have any process names specified
        metric_bool('container_success', False, m_name='maas_container')
        status_err('No executable names supplied', m_name='maas_container')

    check_process_running(container_name=args.container,
                          process_names=args.processes)
def check(args):
    # identify the container we will use for monitoring
    try:
        containers_list = subprocess.check_output(FIND_CONTAINER)
        container = containers_list.splitlines()[0]
    except (IndexError, subprocess.CalledProcessError):
        metric_bool('agents_found', False, m_name='maas_neutron')
        status_err('no running neutron agents containers found',
                   m_name='maas_neutron')
    else:
        metric_bool('agents_found', True, m_name='maas_neutron')

    network_endpoint = '{protocol}://{host}:{port}'.format(
        host=args.neutron_host,
        protocol=args.protocol,
        port=args.port
    )
    try:
        neutron = get_neutron_client(endpoint_url=network_endpoint)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_neutron')
        status_err(str(e), m_name='maas_neutron')
    else:
        metric_bool('client_success', True, m_name='maas_neutron')

    # only check networks which have a port with DHCP enabled
    ports = neutron.list_ports(device_owner='network:dhcp')['ports']
    nets = set([p['network_id'] for p in ports])

    # perform checks for each identified network
    failures = []
    for net_id in nets:
        namespace = 'qdhcp-%s' % net_id
        service_check_cmd = SERVICE_CHECK % namespace
        command = shlex.split('lxc-attach -n %s -- %s' % (container,
                                                          service_check_cmd))
        try:
            subprocess.check_output(command, stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            # HTTP 404 response indicates the service is responsive.
            # this is the expected response because the maas testing host IP
            # is used to look up metadata and no metadata exists for this IP
            if '404 Not Found' not in e.output:
                failures.append(net_id)

    is_ok = len(failures) == 0
    metric_bool('neutron-metadata-agent-proxy_status', is_ok,
                m_name='maas_neutron')

    if is_ok:
        status_ok(m_name='maas_neutron')
    else:
        status_err('neutron metadata agent proxies fail on host %s '
                   'net_ids: %s' % (container, ','.join(failures)),
                   m_name='maas_neutron')
示例#48
0
def main():
    status = {}
    status['hardware_processors_status'] = get_hpasmcli_status('server')
    status['hardware_memory_status'] = get_hpasmcli_status('dimm')
    status['hardware_disk_status'] = get_drive_status()

    maas_common.status_ok()
    for name, value in status.viewitems():
        maas_common.metric_bool(name, value)
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token

    VOLUME_ENDPOINT = "http://{hostname}:8776/v1/{tenant}".format(hostname=args.hostname, tenant=keystone.tenant_id)

    s = requests.Session()

    s.headers.update({"Content-type": "application/json", "x-auth-token": auth_token})

    try:
        # We cannot do /os-services?host=X as cinder returns a hostname of
        # X@lvm for cinder-volume binary
        r = s.get("%s/os-services" % VOLUME_ENDPOINT, verify=False, timeout=10)
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        status_err(str(e))

    if not r.ok:
        status_err("Could not get response from Cinder API")

    services = r.json()["services"]

    # We need to match against a host of X and X@lvm (or whatever backend)
    if args.host:
        backend = "".join((args.host, "@"))
        services = [
            service for service in services if (service["host"].startswith(backend) or service["host"] == args.host)
        ]

    if len(services) == 0:
        status_err("No host(s) found in the service list")

    status_ok()

    if args.host:

        for service in services:
            service_is_up = True
            name = "%s_status" % service["binary"]

            if service["status"] == "enabled" and service["state"] != "up":
                service_is_up = False

            if "@" in service["host"]:
                [host, backend] = service["host"].split("@")
                name = "%s-%s_status" % (service["binary"], backend)

            metric_bool(name, service_is_up)
    else:
        for service in services:
            service_is_up = True
            if service["status"] == "enabled" and service["state"] != "up":
                service_is_up = False

            name = "%s_on_host_%s" % (service["binary"], service["host"])
            metric_bool(name, service_is_up)
def check(auth_ref, args):

    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    VOLUME_ENDPOINT = ('http://{ip}:8776/v1/{tenant}'.format
                       (ip=args.ip, tenant=keystone.tenant_id))

    s = requests.Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        vol = s.get('%s/volumes/detail' % VOLUME_ENDPOINT,
                    verify=False,
                    timeout=10)
        milliseconds = vol.elapsed.total_seconds() * 1000
        snap = s.get('%s/snapshots/detail' % VOLUME_ENDPOINT,
                     verify=False,
                     timeout=10)
        is_up = vol.ok and snap.ok
    except (exc.ConnectionError,
            exc.HTTPError,
            exc.Timeout) as e:
        is_up = False
    except Exception as e:
           status_err(str(e))
    else:
        # gather some metrics
        vol_statuses = [v['status'] for v in vol.json()['volumes']]
        vol_status_count = collections.Counter(vol_statuses)
        total_vols = len(vol.json()['volumes'])

        snap_statuses = [v['status'] for v in snap.json()['snapshots']]
        snap_status_count = collections.Counter(snap_statuses)
        total_snaps = len(snap.json()['snapshots'])

    status_ok()
    metric_bool('cinder_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('cinder_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('cinder_total_volumes', 'uint32', total_vols, 'volumes')
        for status in VOLUME_STATUSES:
            metric('cinder_%s_volumes' % status,
                   'uint32',
                   vol_status_count[status], 'volumes')
        metric('cinder_total_snapshots', 'uint32', total_snaps, 'snapshots')
        for status in VOLUME_STATUSES:
            metric('cinder_%s_snaps' % status,
                   'uint32',
                   snap_status_count[status], 'snapshots')
示例#51
0
def get_mon_statistics(report=None, host=None):
    mon = [m for m in report['monmap']['mons']
           if m['name'] == host]
    mon_in = mon[0]['rank'] in report['quorum']
    maas_common.metric_bool('mon_in_quorum', mon_in)
    health_status = 0
    for each in report['health']['health']['health_services'][0]['mons']:
        if each['name'] == host:
            health_status = STATUSES[each['health']]
            break
    maas_common.metric('mon_health', 'uint32', health_status)
示例#52
0
def check(args):

    # NOTE(npawelek): API calls for conductor status are only available
    # in ironic v1.49 and onward. Instead, we look for the process
    # directly until it becomes available within the API.
    name = "ironic-conductor_status"
    for proc in psutil.process_iter():
        if 'ironic-conducto' in proc.name():
            metric_bool(name, True)
            break
    else:
        metric_bool(name, False)
示例#53
0
def get_mon_statistics(client=None, keyring=None, host=None):
    ceph_status = get_ceph_status(client=client, keyring=keyring)
    mon = [m for m in ceph_status['monmap']['mons']
           if m['name'] == host]
    mon_in = mon[0]['rank'] in ceph_status['quorum']
    maas_common.metric_bool('mon_in_quorum', mon_in)
    health_status = 0
    for each in ceph_status['health']['health']['health_services'][0]['mons']:
        if each['name'] == host:
            health_status = STATUSES[each['health']]
            break
    maas_common.metric('mon_health', 'uint32', health_status)
示例#54
0
def check(args):

    if on_lxc_container:
        containers = lxc.list_containers()
        neutron_agent_containers = []
        for container in containers:
            if 'neutron_agents' in container:
                metric_bool('agents_found',
                            True, m_name='maas_neutron')
                neutron_agent_containers.append(container)

        if len(neutron_agent_containers) == 0:
            metric_bool('agents_found', False, m_name='maas_neutron')
            status_err('no running neutron agents containers found',
                       m_name='maas_neutron')
            return

        for neutron_agent_container in neutron_agent_containers:
            # Get the neutron_agent_container's init PID.
            try:
                c = lxc.Container(neutron_agent_container)
                # If the container wasn't found, exit now.
                if c.init_pid == -1:
                    metric_bool('container_success',
                                False,
                                m_name='maas_neutron_agent_container')
                    status_err(
                        'Could not find PID for container {}'.format(
                            neutron_agent_container
                        ),
                        m_name='maas_neutron_agent_container'
                    )
            except (Exception, SystemError) as e:
                metric_bool('container_success', False,
                            m_name='maas_neutron_agent_container')
                status_err(
                    'Container lookup failed on "{}". ERROR: "{}"'
                    .format(
                        neutron_agent_container,
                        e
                    ),
                    m_name='maas_neutron_agent_container'
                )
            else:
                metric_bool('container_success', True,
                            m_name='maas_neutron_agent_container')

            # c is the lxc container instance of this
            # neutron_agent_container
            check_process_statuses(neutron_agent_container, c)
    else:
        ovs_agent_host = socket.gethostname()
        check_process_statuses(ovs_agent_host)