Exemplo n.º 1
0
def recon_output(for_ring,
                 options=None,
                 swift_recon_path=None,
                 deploy_osp=False):
    """Run swift-recon and filter out extraneous printed lines.

    ::

        >>> recon_output('account', '-r')
        ['[2014-11-21 00:25:16] Checking on replication',
         '[replication_failure] low: 0, high: 0, avg: 0.0, total: 0, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_success] low: 2, high: 4, avg: 3.0, total: 6, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_time] low: 0, high: 0, avg: 0.0, total: 0, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_attempted] low: 1, high: 2, avg: 1.5, total: 3, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         'Oldest completion was 2014-11-21 00:24:51 (25 seconds ago) by \
                 192.168.31.1:6002.',
         'Most recent completion was 2014-11-21 00:24:56 (20 seconds ago) by \
                 192.168.31.2:6002.']

    :param str for_ring: Which ring to run swift-recon on
    :param list options: Command line options with which to run swift-recon
    :returns: Strings from output that are most important
    :rtype: list
    """

    # identify the container we will use for monitoring
    container = get_container_name(deploy_osp, for_ring)
    command = [os.path.join(swift_recon_path or "", 'swift-recon'), for_ring]
    command.extend(options or [])
    command_options = ' '.join(command)

    if not container:
        _full_command = '{command_options}'.format(
            command_options=command_options)
    elif deploy_osp:
        _full_command = '{container_exec_command} {command_options}'.format(
            container_exec_command='docker exec {}'.format(container),
            command_options=command_options)
    else:
        _full_command = '{container_exec_command} {command_options}'.format(
            container_exec_command='lxc-attach -n {} -- bash -c'.format(
                container),
            command_options='"{}"'.format(command_options))

    full_command = shlex.split(_full_command)

    try:
        out = subprocess.check_output(full_command)
    except subprocess.CalledProcessError as error:
        # in case attach command fails we return no metrics rather than
        # letting it fail to give out red herring alarms
        status_err_no_exit("Attach container command failed: %s" % str(error),
                           m_name='maas_swift')
        return []
    return filter(lambda s: s and not s.startswith(('==', '-')),
                  out.split('\n'))
Exemplo n.º 2
0
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    tenant_id = keystone.tenant_id
    nova_version = '.'.join(
        map(str, get_os_component_major_api_version('nova')))

    COMPUTE_ENDPOINT = (
        '{protocol}://{hostname}:8774/v{version}/{tenant_id}'.format(
            protocol=args.protocol,
            hostname=args.hostname,
            version=nova_version,
            tenant_id=tenant_id))
    try:
        nova = get_nova_client(auth_token=auth_token,
                               bypass_url=COMPUTE_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        for nova_service_type in NOVA_SERVICE_TYPE_LIST:
            metric('%s_status' % nova_service_type,
                   'string',
                   '%s cannot reach API' % nova_service_type,
                   m_name='maas_nova')
        status_err_no_exit(str(e), m_name='maas_nova')
        return
    else:
        metric_bool('client_success', True, m_name='maas_nova')

    # gather nova service states
    if args.host:
        services = nova.services.list(host=args.host)
    else:
        services = nova.services.list()

    if len(services) == 0:
        status_err("No host(s) found in the service list", m_name='maas_nova')

    # return all the things
    status_ok(m_name='maas_nova')
    for service in services:
        service_is_up = "Yes"

        if service.status.lower() == 'enabled':
            if service.state.lower() == 'down':
                service_is_up = "No"
        elif service.status.lower() == 'disabled':
            if service.disabled_reason:
                if 'auto' in service.disabled_reason.lower():
                    service_is_up = "No"

        if args.host:
            name = '%s_status' % service.binary
        else:
            name = '%s_on_host_%s_status' % (service.binary, service.host)

        metric(name, 'string', service_is_up, m_name='maas_nova')
Exemplo n.º 3
0
def check(args):

    NETWORK_ENDPOINT = '{protocol}://{hostname}:9696'.format(
        protocol=args.protocol, hostname=args.hostname)
    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_neutron')
        for neutron_agent_type in NEUTRON_AGENT_TYPE_LIST:
            metric('%s_status' % neutron_agent_type,
                   'string',
                   '%s cannot reach API' % neutron_agent_type,
                   m_name='maas_neutron')
        status_err_no_exit(str(e), m_name='maas_neutron')
        return
    else:
        metric_bool('client_success', True, m_name='maas_neutron')

    # gather neutron service states
    if args.host:
        agents = neutron.list_agents(host=args.host)['agents']
    elif args.fqdn:
        agents = neutron.list_agents(host=args.fqdn)['agents']
    else:
        agents = neutron.list_agents()['agents']

    if len(agents) == 0:
        metric_bool('agents_found', False, m_name='maas_neutron')
        status_err("No host(s) found in the agents list",
                   m_name='maas_neutron')
    else:
        metric_bool('agents_found', True, m_name='maas_neutron')

    # return all the things
    status_ok(m_name='maas_neutron')
    for agent in agents:
        agent_is_up = "Yes"
        if agent['admin_state_up'] and not agent['alive']:
            agent_is_up = "No"

        if args.host:
            name = '%s_status' % agent['binary']
        elif args.fqdn:
            name = '%z_status' % agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (agent['binary'],
                                         agent['id'],
                                         agent['host'])

        metric(name, 'string', agent_is_up, m_name='maas_neutron')
Exemplo n.º 4
0
def check_process_statuses(container_or_host_name, container=None):
    process_names = ['ovsdb-server', 'ovs-vswitchd',
                     'neutron-openvswitch-agent']

    if container is None:
        pid = None
    else:
        pid = container.init_pid

    # Get the processes within the neutron agent container (or a
    # compute host).
    procs = get_processes(parent_pid=pid)

    # Make a list of command lines from each PID. There's a
    # chance that one or more PIDs may have exited already and
    # this causes a NoSuchProcess exception.
    cmdlines = []
    for proc in procs:
        try:
            # In psutil 1.2.1, cmdline is an attribute, but in
            # 5.x, it's now a callable method.
            cmdline_check = getattr(proc, "cmdline", None)
            if callable(cmdline_check):
                cmdline_check_value = proc.cmdline()
            else:
                cmdline_check_value = proc.cmdline
            cmdlines.append(map(os.path.basename,
                                cmdline_check_value))
        except Exception as e:
            status_err_no_exit('Error while retrieving process %s, ERROR: %s'
                               % (cmdline_check_value, str(e)),
                               m_name='maas_neutron')

    # Loop through the process names provided on the command line to
    # see if ovsdb-server,  ovs-vswitchd, and neutron-openvswitch
    # exist on the system or in a container.
    # suppress some character which throw MaaS off
    # ovsdb-server and ovs-vswitchd are not directly in the command
    # line parsing so we use condition
    # `process_name in x or process_name in x[0]`
    pattern = re.compile('[^-\w]+')
    for process_name in process_names:
        matches = [x for x in cmdlines
                   if process_name in x or (
                       len(x) > 0 and process_name in x[0]
                   )
                   ]

        metric_bool('%s_process_status' % (
                    pattern.sub('', process_name)
                    ),
                    len(matches) > 0)
Exemplo n.º 5
0
def recon_output(for_ring, options=None, swift_recon_path=None):
    """Run swift-recon and filter out extraneous printed lines.

    ::

        >>> recon_output('account', '-r')
        ['[2014-11-21 00:25:16] Checking on replication',
         '[replication_failure] low: 0, high: 0, avg: 0.0, total: 0, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_success] low: 2, high: 4, avg: 3.0, total: 6, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_time] low: 0, high: 0, avg: 0.0, total: 0, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_attempted] low: 1, high: 2, avg: 1.5, total: 3, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         'Oldest completion was 2014-11-21 00:24:51 (25 seconds ago) by \
                 192.168.31.1:6002.',
         'Most recent completion was 2014-11-21 00:24:56 (20 seconds ago) by \
                 192.168.31.2:6002.']

    :param str for_ring: Which ring to run swift-recon on
    :param list options: Command line options with which to run swift-recon
    :returns: Strings from output that are most important
    :rtype: list
    """

    # identify the container we will use for monitoring
    get_container = shlex.split('lxc-ls -1 --running ".*(swift_proxy|swift)"')

    try:
        containers_list = subprocess.check_output(get_container)
        container = containers_list.splitlines()[0]
    except (IndexError, subprocess.CalledProcessError):
        status_err('no running swift proxy containers found',
                   m_name='maas_swift')

    command = [os.path.join(swift_recon_path or "", 'swift-recon'), for_ring]
    command.extend(options or [])
    command_options = ' '.join(command)
    full_command = shlex.split('lxc-attach -n %s -- bash -c "%s"' %
                               (container, command_options))
    try:
        out = subprocess.check_output(full_command)
    except subprocess.CalledProcessError as error:
        # in case attach command fails we return no metrics rather than
        # letting it fail to give out red herring alarms
        status_err_no_exit("Attach container command failed: %s" % str(error),
                           m_name='maas_swift')
        return []
    return filter(lambda s: s and not s.startswith(('==', '-')),
                  out.split('\n'))
Exemplo n.º 6
0
def check(args):

    NETWORK_ENDPOINT = '{protocol}://{hostname}:9696'.format(
        protocol=args.protocol, hostname=args.hostname)
    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_neutron')
        for neutron_agent_type in NEUTRON_AGENT_TYPE_LIST:
            metric('%s_status' % neutron_agent_type,
                   'string',
                   '%s cannot reach API' % neutron_agent_type,
                   m_name='maas_neutron')
        status_err_no_exit(str(e), m_name='maas_neutron')
        return
    else:
        metric_bool('client_success', True, m_name='maas_neutron')

    # gather neutron service states
    if args.host:
        agents = neutron.list_agents(host=args.host)['agents']
    elif args.fqdn:
        agents = neutron.list_agents(host=args.fqdn)['agents']
    else:
        agents = neutron.list_agents()['agents']

    if len(agents) == 0:
        metric_bool('agents_found', False, m_name='maas_neutron')
        status_err("No host(s) found in the agents list",
                   m_name='maas_neutron')
    else:
        metric_bool('agents_found', True, m_name='maas_neutron')

    # return all the things
    status_ok(m_name='maas_neutron')
    for agent in agents:
        agent_is_up = "Yes"
        if agent['admin_state_up'] and not agent['alive']:
            agent_is_up = "No"

        if args.host:
            name = '%s_status' % agent['binary']
        elif args.fqdn:
            name = '%z_status' % agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'],
                                         agent['host'])

        metric(name, 'string', agent_is_up, m_name='maas_neutron')
Exemplo n.º 7
0
def _get_node_metrics(session, metrics, protocol, host, port, name):
    response = _get_rabbit_json(session, NODES_URL % (protocol, host, port))

    # Either use the option provided by the commandline flag or the current
    # hostname
    name = '@' + (name or hostname())
    is_cluster_member = False

    # Ensure this node is a member of the cluster
    nodes_matching_name = [n for n in response
                           if n['name'].endswith(name)]
    is_cluster_member = any(nodes_matching_name)

    if CLUSTERED:
        if len(response) < CLUSTER_SIZE:
            status_err_no_exit('cluster too small', m_name='maas_rabbitmq')
        if not is_cluster_member:
            status_err_no_exit('{0} not a member of the cluster'.format(name),
                               m_name='maas_rabbitmq')
        if sum([len(n.get('partitions', 0)) for n in response]):
            status_err_no_exit('At least one partition found in the rabbit '
                               'cluster', m_name='maas_rabbitmq')
        if any([len(n.get('cluster_links', [])) != CLUSTER_SIZE - 1
                for n in response]):
            status_err_no_exit('At least one rabbit node is missing a cluster'
                               ' link', m_name='maas_rabbitmq')

    for k, v in NODES_METRICS.items():
        metrics[k] = {'value': nodes_matching_name[0][k], 'unit': v}
Exemplo n.º 8
0
def _get_node_metrics(session, metrics, protocol, host, port, name):
    response = _get_rabbit_json(session, NODES_URL % (protocol, host, port))

    # Either use the option provided by the commandline flag or the current
    # hostname
    name = '@' + (name or hostname())
    is_cluster_member = False

    # Ensure this node is a member of the cluster
    nodes_matching_name = [n for n in response if n['name'].endswith(name)]
    is_cluster_member = any(nodes_matching_name)

    if CLUSTERED:
        if len(response) < CLUSTER_SIZE:
            status_err_no_exit('cluster too small', m_name='maas_rabbitmq')
        if not is_cluster_member:
            status_err_no_exit('{0} not a member of the cluster'.format(name),
                               m_name='maas_rabbitmq')
        if sum([len(n.get('partitions', 0)) for n in response]):
            status_err_no_exit(
                'At least one partition found in the rabbit '
                'cluster',
                m_name='maas_rabbitmq')
        if any([
                len(n.get('cluster_links', [])) != CLUSTER_SIZE - 1
                for n in response
        ]):
            status_err_no_exit(
                'At least one rabbit node is missing a cluster'
                ' link',
                m_name='maas_rabbitmq')

    for k, v in NODES_METRICS.items():
        metrics[k] = {'value': nodes_matching_name[0][k], 'unit': v}
Exemplo n.º 9
0
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    tenant_id = keystone.tenant_id

    COMPUTE_ENDPOINT = (
        '{protocol}://{hostname}:8774/v2.1/{tenant_id}'
        .format(protocol=args.protocol, hostname=args.hostname,
                tenant_id=tenant_id)
    )
    try:
        nova = get_nova_client(auth_token=auth_token,
                               bypass_url=COMPUTE_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        for nova_service_type in NOVA_SERVICE_TYPE_LIST:
            metric('%s_status' % nova_service_type,
                   'string',
                   '%s cannot reach API' % nova_service_type,
                   m_name='maas_nova')
        status_err_no_exit(str(e), m_name='maas_nova')
        return
    else:
        metric_bool('client_success', True, m_name='maas_nova')

    # gather nova service states
    if args.host:
        services = nova.services.list(host=args.host)
    else:
        services = nova.services.list()

    if len(services) == 0:
        status_err("No host(s) found in the service list", m_name='maas_nova')

    # return all the things
    status_ok(m_name='maas_nova')
    for service in services:
        service_is_up = "Yes"

        if service.status == 'enabled' and service.state == 'down':
            service_is_up = "No"

        if args.host:
            name = '%s_status' % service.binary
        else:
            name = '%s_on_host_%s_status' % (service.binary, service.host)

        metric(name, 'string', service_is_up, m_name='maas_nova')
Exemplo n.º 10
0
def check(args):
    nova = get_openstack_client('compute')

    try:
        if args.host:
            services = [i for i in nova.services() if i.host == args.host]
        else:
            services = [i for i in nova.services()]

    # not gathering api status metric here so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        for nova_service_type in NOVA_SERVICE_TYPE_LIST:
            metric('%s_status' % nova_service_type,
                   'string',
                   '%s cannot reach API' % nova_service_type,
                   m_name='maas_nova')
        status_err_no_exit(str(e), m_name='maas_nova')
        return
    else:
        metric_bool('client_success', True, m_name='maas_nova')

    if len(services) == 0:
        status_err("No host(s) found in the service list", m_name='maas_nova')

    # return all the things
    status_ok(m_name='maas_nova')
    for service in services:
        service_is_up = "Yes"

        if service.status.lower() == 'enabled':
            if service.state.lower() == 'down':
                service_is_up = "No"
        elif service.status.lower() == 'disabled':
            try:
                if service.disabled_reason:
                    if 'auto' in service.disabled_reason.lower():
                        service_is_up = "No"
            except AttributeError:
                pass

        if args.host:
            name = '%s_status' % service.binary
        else:
            name = '%s_on_host_%s_status' % (service.binary, service.host)

        metric(name, 'string', service_is_up, m_name='maas_nova')
Exemplo n.º 11
0
def check(args):
    neutron = get_openstack_client('network')

    try:
        if args.host:
            agents = [i for i in neutron.agents(host=args.host)]
        elif args.fqdn:
            agents = [i for i in neutron.agents(host=args.fqdn)]
        else:
            agents = [i for i in neutron.agents()]

    # An API status metric is not gathered so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_neutron')
        for neutron_agent_type in NEUTRON_AGENT_TYPE_LIST:
            metric('%s_status' % neutron_agent_type,
                   'string',
                   '%s cannot reach API' % neutron_agent_type,
                   m_name='maas_neutron')
        status_err_no_exit(str(e), m_name='maas_neutron')
        return
    else:
        metric_bool('client_success', True, m_name='maas_neutron')

    if len(agents) == 0:
        status_err("No host(s) found in the agents list",
                   m_name='maas_neutron')

    # Return all the things
    status_ok(m_name='maas_neutron')
    for agent in agents:
        agent_is_up = "Yes"
        if agent['is_admin_state_up'] and not agent['is_alive']:
            agent_is_up = "No"

        if args.host:
            name = '%s_status' % agent['binary']
        elif args.fqdn:
            name = '%s_status' % agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (agent['binary'],
                                         agent['id'],
                                         agent['host'])

        metric(name, 'string', agent_is_up, m_name='maas_neutron')
Exemplo n.º 12
0
def check(args):
    neutron = get_openstack_client('network')

    try:
        if args.host:
            agents = [i for i in neutron.agents(host=args.host)]
        elif args.fqdn:
            agents = [i for i in neutron.agents(host=args.fqdn)]
        else:
            agents = [i for i in neutron.agents()]

    # An API status metric is not gathered so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_neutron')
        for neutron_agent_type in NEUTRON_AGENT_TYPE_LIST:
            metric('%s_status' % neutron_agent_type,
                   'string',
                   '%s cannot reach API' % neutron_agent_type,
                   m_name='maas_neutron')
        status_err_no_exit(str(e), m_name='maas_neutron')
        return
    else:
        metric_bool('client_success', True, m_name='maas_neutron')

    if len(agents) == 0:
        status_err("No host(s) found in the agents list",
                   m_name='maas_neutron')

    # Return all the things
    status_ok(m_name='maas_neutron')
    for agent in agents:
        agent_is_up = "Yes"
        if agent['is_admin_state_up'] and not agent['is_alive']:
            agent_is_up = "No"

        if args.host:
            name = '%s_status' % agent['binary']
        elif args.fqdn:
            name = '%s_status' % agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'],
                                         agent['host'])

        metric(name, 'string', agent_is_up, m_name='maas_neutron')
def check(args):
    neutron = get_openstack_client('network')

    try:
        # Gather neutron agent states
        if args.host:
            agents = [i for i in neutron.agents(host=args.host)]
        elif args.fqdn:
            agents = [i for i in neutron.agents(host=args.fqdn)]
        else:
            agents = [i for i in neutron.agents()]

    # An API status metric is not gathered so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_neutron')
        metric('%s_status' % "neutron-openvswitch-agent",
               'string',
               '%s cannot reach API' % "neutron-openvswitch-agent",
               m_name='maas_neutron')
        status_err_no_exit(str(e), m_name='maas_neutron')
        return
    else:
        metric_bool('client_success', True, m_name='maas_neutron')

    try:
        ovs_agent = next(a for a in agents if 'openvswitch' in a['binary'])
    except StopIteration:
        status_err("No host(s) found in the agents list",
                   m_name='maas_neutron')
    else:
        # Return all the things
        status_ok(m_name='maas_neutron')

        agent_is_up = "Yes"
        if ovs_agent['is_admin_state_up'] and not ovs_agent['is_alive']:
            agent_is_up = "No"

        if args.host:
            name = '%s_status' % ovs_agent['binary']
        elif args.fqdn:
            name = '%s_status' % ovs_agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (ovs_agent['binary'], ovs_agent['id'],
                                         ovs_agent['host'])

        metric(name, 'string', agent_is_up, m_name='maas_neutron')

    if on_lxc_container:
        all_containers = lxc.list_containers()
        neutron_containers_list = []
        neutron_agent_containers_list = []

        # NOTE(npawelek): The neutron container architecture was
        # refactored in recent versions removing all neutron containers
        # with the exception of one, or even using baremetal directly.
        # Since logic is looking for the presence of LXC, we do not need
        # to account for baremetal here.
        for container in all_containers:
            if 'neutron_agents' in container:
                neutron_agent_containers_list.append(container)

            if 'neutron' in container:
                neutron_containers_list.append(container)

        if len(neutron_containers_list) == 1 and \
                'neutron_server' in neutron_containers_list[0]:
            valid_containers = neutron_containers_list
        elif len(neutron_agent_containers_list) > 0:
            valid_containers = neutron_agent_containers_list
        else:
            valid_containers = 0

        if len(valid_containers) == 0:
            status_err('no neutron agent or server containers found',
                       m_name='maas_neutron')
            return

        for container in valid_containers:
            # Get the neutron_agent_container's init PID.
            try:
                c = lxc.Container(container)
                # If the container wasn't found, exit now.
                if c.init_pid == -1:
                    metric_bool('container_success',
                                False,
                                m_name='maas_neutron')
                    status_err('Could not find PID for container {}'.format(
                        container),
                               m_name='maas_neutron')
            except (Exception, SystemError) as e:
                metric_bool('container_success', False, m_name='maas_neutron')
                status_err(
                    'Container lookup failed on "{}". ERROR: "{}"'.format(
                        container, e),
                    m_name='maas_neutron')
            else:
                metric_bool('container_success', True, m_name='maas_neutron')

                # c is the lxc container instance of this
                # neutron_agent_container
                check_process_statuses(container, c)
    else:
        ovs_agent_host = socket.gethostname()
        check_process_statuses(ovs_agent_host)
Exemplo n.º 14
0
def check(args):
    neutron = get_openstack_client('network')

    try:
        # Gather neutron agent states
        if args.host:
            agents = [i for i in neutron.agents(host=args.host)]
        elif args.fqdn:
            agents = [i for i in neutron.agents(host=args.fqdn)]
        else:
            agents = [i for i in neutron.agents()]

    # An API status metric is not gathered so catch any exception
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_neutron')
        metric('%s_status' % "neutron-openvswitch-agent",
               'string',
               '%s cannot reach API' % "neutron-openvswitch-agent",
               m_name='maas_neutron')
        status_err_no_exit(str(e), m_name='maas_neutron')
        return
    else:
        metric_bool('client_success', True, m_name='maas_neutron')

    try:
        ovs_agent = next(
            a for a in agents if 'openvswitch' in a['binary']
        )
    except StopIteration:
        status_err("No host(s) found in the agents list",
                   m_name='maas_neutron')
    else:
        # Return all the things
        status_ok(m_name='maas_neutron')

        agent_is_up = "Yes"
        if ovs_agent['is_admin_state_up'] and not ovs_agent['is_alive']:
            agent_is_up = "No"

        if args.host:
            name = '%s_status' % ovs_agent['binary']
        elif args.fqdn:
            name = '%s_status' % ovs_agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (ovs_agent['binary'],
                                         ovs_agent['id'],
                                         ovs_agent['host'])

        metric(name, 'string', agent_is_up, m_name='maas_neutron')

    if on_lxc_container:
        all_containers = lxc.list_containers()
        neutron_containers_list = []
        neutron_agent_containers_list = []

        # NOTE(npawelek): The neutron container architecture was
        # refactored in recent versions removing all neutron containers
        # with the exception of one, or even using baremetal directly.
        # Since logic is looking for the presence of LXC, we do not need
        # to account for baremetal here.
        for container in all_containers:
            if 'neutron_agents' in container:
                neutron_agent_containers_list.append(container)

            if 'neutron' in container:
                neutron_containers_list.append(container)

        if len(neutron_containers_list) == 1 and \
                'neutron_server' in neutron_containers_list[0]:
            valid_containers = neutron_containers_list
        elif len(neutron_agent_containers_list) > 0:
            valid_containers = neutron_agent_containers_list
        else:
            valid_containers = 0

        if len(valid_containers) == 0:
            status_err('no neutron agent or server containers found',
                       m_name='maas_neutron')
            return

        for container in valid_containers:
            # Get the neutron_agent_container's init PID.
            try:
                c = lxc.Container(container)
                # If the container wasn't found, exit now.
                if c.init_pid == -1:
                    metric_bool('container_success',
                                False,
                                m_name='maas_neutron')
                    status_err(
                        'Could not find PID for container {}'.format(
                            container
                        ),
                        m_name='maas_neutron'
                    )
            except (Exception, SystemError) as e:
                metric_bool('container_success', False,
                            m_name='maas_neutron')
                status_err(
                    'Container lookup failed on "{}". ERROR: "{}"'
                    .format(
                        container,
                        e
                    ),
                    m_name='maas_neutron'
                )
            else:
                metric_bool('container_success', True,
                            m_name='maas_neutron')

                # c is the lxc container instance of this
                # neutron_agent_container
                check_process_statuses(container, c)
    else:
        ovs_agent_host = socket.gethostname()
        check_process_statuses(ovs_agent_host)
Exemplo n.º 15
0
def recon_output(for_ring, options=None, swift_recon_path=None,
                 deploy_osp=False):
    """Run swift-recon and filter out extraneous printed lines.

    ::

        >>> recon_output('account', '-r')
        ['[2014-11-21 00:25:16] Checking on replication',
         '[replication_failure] low: 0, high: 0, avg: 0.0, total: 0, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_success] low: 2, high: 4, avg: 3.0, total: 6, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_time] low: 0, high: 0, avg: 0.0, total: 0, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         '[replication_attempted] low: 1, high: 2, avg: 1.5, total: 3, \
                 Failed: 0.0%, no_result: 0, reported: 2',
         'Oldest completion was 2014-11-21 00:24:51 (25 seconds ago) by \
                 192.168.31.1:6002.',
         'Most recent completion was 2014-11-21 00:24:56 (20 seconds ago) by \
                 192.168.31.2:6002.']

    :param str for_ring: Which ring to run swift-recon on
    :param list options: Command line options with which to run swift-recon
    :returns: Strings from output that are most important
    :rtype: list
    """

    # identify the container we will use for monitoring
    container = get_container_name(deploy_osp, for_ring)
    command = [os.path.join(swift_recon_path or "", 'swift-recon'), for_ring]
    command.extend(options or [])
    command_options = ' '.join(command)

    if not container:
        _full_command = '{command_options}'.format(
            command_options=command_options
        )
    elif deploy_osp:
        _full_command = '{container_exec_command} {command_options}'.format(
            container_exec_command='docker exec {}'.format(
                container
            ),
            command_options=command_options
        )
    else:
        _full_command = '{container_exec_command} {command_options}'.format(
            container_exec_command='lxc-attach -n {} -- bash -c'.format(
                container
            ),
            command_options='"{}"'.format(command_options)
        )

    full_command = shlex.split(_full_command)

    try:
        out = subprocess.check_output(full_command)
    except subprocess.CalledProcessError as error:
        # in case attach command fails we return no metrics rather than
        # letting it fail to give out red herring alarms
        status_err_no_exit("Attach container command failed: %s" % str(error),
                           m_name='maas_swift')
        return []
    return filter(lambda s: s and not s.startswith(('==', '-')),
                  out.split('\n'))