示例#1
0
    def test_key_function(self):
        items = [Item('abc'), Item('def'), Item('abcdef'), Item('ghi')]
        whitelist = ['abc', 'def']

        assert pattern_filter(items, whitelist=whitelist, key=lambda item: item.name) == [
            Item('abc'), Item('def'), Item('abcdef')
        ]
示例#2
0
    def test_whitelist_override(self):
        items = ['abc', 'def', 'abcdef', 'ghi']
        whitelist = ['def']
        blacklist = ['abc', 'def']

        assert pattern_filter(items, whitelist=whitelist,
                              blacklist=blacklist) == ['def', 'abcdef', 'ghi']
示例#3
0
 def get_projects(self, include_project_name_rules, exclude_project_name_rules):
     projects = self._apis.get(self.instance_name).get_projects()
     project_by_name = {}
     for project in projects:
         name = project.get('name')
         project_by_name[name] = project
     filtered_project_names = pattern_filter([p for p in project_by_name],
                                             whitelist=include_project_name_rules,
                                             blacklist=exclude_project_name_rules)
     result = {name: v for (name, v) in iteritems(project_by_name) if name in filtered_project_names}
     return result
    def filter_excluded_servers(self):
        proj_list = set([])
        if self.exclude_server_id_rules:
            # Filter out excluded servers
            for exclude_id_rule in self.exclude_server_id_rules:
                for server_id in list(self.server_details_by_id):
                    if re.match(exclude_id_rule, server_id):
                        del self.server_details_by_id[server_id]

        for _, server in iteritems(self.server_details_by_id):
            proj_list.add(server.get('project_name'))

        projects_filtered = pattern_filter(
            proj_list,
            whitelist=self.include_project_name_rules,
            blacklist=self.exclude_project_name_rules)

        self.server_details_by_id = {
            sid: server
            for (sid, server) in iteritems(self.server_details_by_id)
            if server.get('project_name') in projects_filtered
        }
示例#5
0
    def test_multiple_matches_blacklist(self):
        items = ['abc', 'def', 'abcdef', 'ghi']
        blacklist = ['abc', 'def']

        assert pattern_filter(items, blacklist=blacklist) == ['ghi']
示例#6
0
    def test_multiple_matches_whitelist(self):
        items = ['abc', 'def', 'abcdef', 'ghi']
        whitelist = ['abc', 'def']

        assert pattern_filter(items, whitelist=whitelist) == ['abc', 'def', 'abcdef']
示例#7
0
    def test_no_patterns(self):
        items = ['mock']

        assert pattern_filter(items) is items
示例#8
0
    def test_no_items(self):
        items = []
        whitelist = ['mock']

        assert pattern_filter(items, whitelist=whitelist) == []
示例#9
0
    def _check_linux(self, instance):
        """
        _check_linux can be run inside a container and still collects the network metrics from the host
        For that procfs_path can be set to something like "/host/proc"
        When a custom procfs_path is set, the collect_connection_state option is ignored
        """
        proc_location = self.agentConfig.get('procfs_path',
                                             '/proc').rstrip('/')
        custom_tags = instance.get('tags', [])

        if Platform.is_containerized() and proc_location != "/proc":
            proc_location = "%s/1" % proc_location

        if self._is_collect_cx_state_runnable(proc_location):
            try:
                self.log.debug("Using `ss` to collect connection state")
                # Try using `ss` for increased performance over `netstat`
                for ip_version in ['4', '6']:
                    for protocol in ['tcp', 'udp']:
                        # Call `ss` for each IP version because there's no built-in way of distinguishing
                        # between the IP versions in the output
                        # Also calls `ss` for each protocol, because on some systems (e.g. Ubuntu 14.04), there is a
                        # bug that print `tcp` even if it's `udp`
                        output, _, _ = get_subprocess_output([
                            "ss", "-n", "-{0}".format(protocol[0]), "-a",
                            "-{0}".format(ip_version)
                        ], self.log)
                        lines = output.splitlines()

                        # State      Recv-Q Send-Q     Local Address:Port       Peer Address:Port
                        # UNCONN     0      0              127.0.0.1:8125                  *:*
                        # ESTAB      0      0              127.0.0.1:37036         127.0.0.1:8125
                        # UNCONN     0      0        fe80::a00:27ff:fe1c:3c4:123          :::*
                        # TIME-WAIT  0      0          90.56.111.177:56867        46.105.75.4:143
                        # LISTEN     0      0       ::ffff:127.0.0.1:33217  ::ffff:127.0.0.1:7199
                        # ESTAB      0      0       ::ffff:127.0.0.1:58975  ::ffff:127.0.0.1:2181

                        metrics = self._parse_linux_cx_state(
                            lines[1:],
                            self.tcp_states['ss'],
                            0,
                            protocol=protocol,
                            ip_version=ip_version)
                        # Only send the metrics which match the loop iteration's ip version
                        for stat, metric in iteritems(self.cx_state_gauge):
                            if stat[0].endswith(ip_version) and stat[
                                    0].startswith(protocol):
                                self.gauge(metric,
                                           metrics.get(metric),
                                           tags=custom_tags)

            except OSError:
                self.log.info("`ss` not found: using `netstat` as a fallback")
                output, _, _ = get_subprocess_output(
                    ["netstat", "-n", "-u", "-t", "-a"], self.log)
                lines = output.splitlines()
                # Active Internet connections (w/o servers)
                # Proto Recv-Q Send-Q Local Address           Foreign Address         State
                # tcp        0      0 46.105.75.4:80          79.220.227.193:2032     SYN_RECV
                # tcp        0      0 46.105.75.4:143         90.56.111.177:56867     ESTABLISHED
                # tcp        0      0 46.105.75.4:50468       107.20.207.175:443      TIME_WAIT
                # tcp6       0      0 46.105.75.4:80          93.15.237.188:58038     FIN_WAIT2
                # tcp6       0      0 46.105.75.4:80          79.220.227.193:2029     ESTABLISHED
                # udp        0      0 0.0.0.0:123             0.0.0.0:*
                # udp6       0      0 :::41458                :::*

                metrics = self._parse_linux_cx_state(
                    lines[2:], self.tcp_states['netstat'], 5)
                for metric, value in iteritems(metrics):
                    self.gauge(metric, value, tags=custom_tags)
            except SubprocessOutputEmptyError:
                self.log.exception("Error collecting connection stats.")

        proc_dev_path = "{}/net/dev".format(proc_location)
        with open(proc_dev_path, 'r') as proc:
            lines = proc.readlines()
        # Inter-|   Receive                                                 |  Transmit
        #  face |bytes     packets errs drop fifo frame compressed multicast|bytes       packets errs drop fifo colls carrier compressed # noqa: E501
        #     lo:45890956   112797   0    0    0     0          0         0    45890956   112797    0    0    0     0       0          0 # noqa: E501
        #   eth0:631947052 1042233   0   19    0   184          0      1206  1208625538  1320529    0    0    0     0       0          0 # noqa: E501
        #   eth1:       0        0   0    0    0     0          0         0           0        0    0    0    0     0       0          0 # noqa: E501
        for l in lines[2:]:
            cols = l.split(':', 1)
            x = cols[1].split()
            # Filter inactive interfaces
            if self._parse_value(x[0]) or self._parse_value(x[8]):
                iface = cols[0].strip()
                metrics = {
                    'bytes_rcvd':
                    self._parse_value(x[0]),
                    'bytes_sent':
                    self._parse_value(x[8]),
                    'packets_in.count':
                    self._parse_value(x[1]),
                    'packets_in.error':
                    self._parse_value(x[2]) + self._parse_value(x[3]),
                    'packets_out.count':
                    self._parse_value(x[9]),
                    'packets_out.error':
                    self._parse_value(x[10]) + self._parse_value(x[11]),
                }
                self._submit_devicemetrics(iface, metrics, custom_tags)

        netstat_data = {}
        for f in ['netstat', 'snmp']:
            proc_data_path = "{}/net/{}".format(proc_location, f)
            try:
                with open(proc_data_path, 'r') as netstat:
                    while True:
                        n_header = netstat.readline()
                        if not n_header:
                            break  # No more? Abort!
                        n_data = netstat.readline()

                        h_parts = n_header.strip().split(' ')
                        h_values = n_data.strip().split(' ')
                        ns_category = h_parts[0][:-1]
                        netstat_data[ns_category] = {}
                        # Turn the data into a dictionary
                        for idx, hpart in enumerate(h_parts[1:]):
                            netstat_data[ns_category][hpart] = h_values[idx +
                                                                        1]
            except IOError:
                # On Openshift, /proc/net/snmp is only readable by root
                self.log.debug("Unable to read %s.", proc_data_path)

        nstat_metrics_names = {
            'Tcp': {
                'RetransSegs': 'system.net.tcp.retrans_segs',
                'InSegs': 'system.net.tcp.in_segs',
                'OutSegs': 'system.net.tcp.out_segs',
            },
            'TcpExt': {
                'ListenOverflows': 'system.net.tcp.listen_overflows',
                'ListenDrops': 'system.net.tcp.listen_drops',
                'TCPBacklogDrop': 'system.net.tcp.backlog_drops',
                'TCPRetransFail': 'system.net.tcp.failed_retransmits',
            },
            'Udp': {
                'InDatagrams': 'system.net.udp.in_datagrams',
                'NoPorts': 'system.net.udp.no_ports',
                'InErrors': 'system.net.udp.in_errors',
                'OutDatagrams': 'system.net.udp.out_datagrams',
                'RcvbufErrors': 'system.net.udp.rcv_buf_errors',
                'SndbufErrors': 'system.net.udp.snd_buf_errors',
                'InCsumErrors': 'system.net.udp.in_csum_errors',
            },
        }

        # Skip the first line, as it's junk
        for k in nstat_metrics_names:
            for met in nstat_metrics_names[k]:
                if met in netstat_data.get(k, {}):
                    self._submit_netmetric(nstat_metrics_names[k][met],
                                           self._parse_value(
                                               netstat_data[k][met]),
                                           tags=custom_tags)

        # Get the conntrack -S information
        conntrack_path = instance.get('conntrack_path')
        if conntrack_path is not None:
            self._add_conntrack_stats_metrics(conntrack_path, custom_tags)

        # Get the rest of the metric by reading the files. Metrics available since kernel 3.6
        conntrack_files_location = os.path.join(proc_location, 'sys', 'net',
                                                'netfilter')
        # By default, only max and count are reported. However if the blacklist is set,
        # the whitelist is loosing its default value
        blacklisted_files = instance.get('blacklist_conntrack_metrics')
        whitelisted_files = instance.get('whitelist_conntrack_metrics')
        if blacklisted_files is None and whitelisted_files is None:
            whitelisted_files = ['max', 'count']

        available_files = []

        # Get the metrics to read
        try:
            for metric_file in os.listdir(conntrack_files_location):
                if (os.path.isfile(
                        os.path.join(conntrack_files_location, metric_file))
                        and 'nf_conntrack_' in metric_file):
                    available_files.append(metric_file[len('nf_conntrack_'):])
        except Exception as e:
            self.log.debug("Unable to list the files in {}. {}".format(
                conntrack_files_location, e))

        filtered_available_files = pattern_filter(available_files,
                                                  whitelist=whitelisted_files,
                                                  blacklist=blacklisted_files)

        for metric_name in filtered_available_files:
            metric_file_location = os.path.join(
                conntrack_files_location,
                'nf_conntrack_{}'.format(metric_name))
            try:
                with open(metric_file_location, 'r') as conntrack_file:
                    # Checking it's an integer
                    try:
                        value = int(conntrack_file.read().rstrip())
                        self.gauge(
                            'system.net.conntrack.{}'.format(metric_name),
                            value,
                            tags=custom_tags)
                    except ValueError:
                        self.log.debug(
                            "{} is not an integer".format(metric_name))
            except IOError as e:
                self.log.debug("Unable to read {}, skipping {}.".format(
                    metric_file_location, e))
    def check(self, instance):
        # have we been backed off
        if not self.backoff.should_run(instance):
            self.log.info('Skipping run due to exponential backoff in effect')
            return

        projects = {}
        custom_tags = instance.get("tags", [])
        collect_limits_from_all_projects = is_affirmative(
            instance.get('collect_limits_from_all_projects', True))
        collect_hypervisor_load = is_affirmative(
            instance.get('collect_hypervisor_load', False))
        use_shortname = is_affirmative(instance.get('use_shortname', False))

        try:
            scope_map = self.get_scope_map(instance)
            for _, scope in iteritems(scope_map):
                # Store the scope on the object so we don't have to keep passing it around
                self._current_scope = scope

                self._send_api_service_checks(scope, custom_tags)

                self.log.debug("Running check with credentials: \n")
                self.log.debug("Nova Url: %s", self.get_nova_endpoint())
                self.log.debug("Neutron Url: %s", self.get_neutron_endpoint())

                project = self.get_scoped_project(scope)
                if project and project.get('name'):
                    projects[project.get('name')] = project

                i_key = get_instance_key(instance)

            if collect_limits_from_all_projects:
                scope_projects = self.get_all_projects(scope)
                if scope_projects:
                    for proj in scope_projects:
                        projects[proj['name']] = proj

            proj_filtered = pattern_filter(
                [p for p in projects],
                whitelist=self.include_project_name_rules,
                blacklist=self.exclude_project_name_rules)

            projects = \
                {name: v for (name, v) in iteritems(projects) if name in proj_filtered}

            for name, project in iteritems(projects):
                self.get_stats_for_single_project(project, custom_tags)

            self.get_stats_for_all_hypervisors(
                instance,
                custom_tags=custom_tags,
                use_shortname=use_shortname,
                collect_hypervisor_load=collect_hypervisor_load)

            # This updates the server cache directly
            self.get_all_servers(i_key)
            self.filter_excluded_servers()

            # Deep copy the cache so we can remove things from the Original during the iteration
            # Allows us to remove bad servers from the cache if needbe
            server_cache_copy = copy.deepcopy(self.server_details_by_id)

            for server in server_cache_copy:
                server_tags = copy.deepcopy(custom_tags)
                server_tags.append("nova_managed_server")

                self.get_stats_for_single_server(server_cache_copy[server],
                                                 tags=server_tags,
                                                 use_shortname=use_shortname)

            # For now, monitor all networks
            self.get_network_stats(custom_tags)

            if set_external_tags is not None:
                set_external_tags(self.get_external_host_tags())

        except IncompleteConfig as e:
            if isinstance(e, IncompleteIdentity):
                self.warning(
                    "Please specify the user via the `user` variable in your init_config.\n"
                    +
                    "This is the user you would use to authenticate with Keystone v3 via password auth.\n"
                    + "The user should look like:" +
                    "{'password': '******', 'name': 'my_name', 'domain': {'id': 'my_domain_id'}}"
                )
            else:
                self.warning(
                    "Configuration Incomplete! Check your openstack.yaml file")
        except requests.exceptions.HTTPError as e:
            if e.response.status_code >= 500:
                # exponential backoff
                self.backoff.do_backoff(instance)
                self.warning(
                    "There were some problems reaching the nova API - applying exponential backoff"
                )
            else:
                self.warning("Error reaching nova API")

            return
        except (requests.exceptions.Timeout,
                requests.exceptions.ConnectionError) as e:
            # exponential backoff
            self.backoff.do_backoff(instance)
            self.warning(
                "There were some problems reaching the nova API - applying exponential backoff"
            )
            return

        self.backoff.reset_backoff(instance)