def _is_container_excluded(self, container): """Check if a container is excluded according to the filter rules. Requires _filter_containers to run first. """ container_name = DockerUtil.container_name_extractor(container)[0] return container_name in self._filtered_containers
def _format_events(self, aggregated_events, containers_by_id): events = [] for image_name, event_group in aggregated_events.iteritems(): container_tags = set() low_prio_events = [] normal_prio_events = [] for event in event_group: container_name = event['id'][:11] if event['id'] in containers_by_id: cont = containers_by_id[event['id']] container_name = DockerUtil.container_name_extractor(cont)[0] container_tags.update(self._get_tags(cont, PERFORMANCE)) container_tags.add('container_name:%s' % container_name) # health checks generate tons of these so we treat them separately and lower their priority if event['status'].startswith('exec_create:') or event['status'].startswith('exec_start:'): low_prio_events.append((event, container_name)) else: normal_prio_events.append((event, container_name)) exec_event = self._create_dd_event(low_prio_events, image_name, container_tags, priority='Low') if exec_event: events.append(exec_event) normal_event = self._create_dd_event(normal_prio_events, image_name, container_tags, priority='Normal') if normal_event: events.append(normal_event) return events
def _report_performance_metrics(self, containers_by_id): containers_without_proc_root = [] for container in containers_by_id.itervalues(): if self._is_container_excluded(container) or not self._is_container_running(container): continue tags = self._get_tags(container, PERFORMANCE) try: self._report_cgroup_metrics(container, tags) if "_proc_root" not in container: containers_without_proc_root.append(DockerUtil.container_name_extractor(container)[0]) continue self._report_net_metrics(container, tags) except BogusPIDException as e: self.log.warning('Unable to report cgroup metrics: %s', e) if containers_without_proc_root: message = "Couldn't find pid directory for containers: {0}. They'll be missing network metrics".format( ", ".join(containers_without_proc_root)) if not Platform.is_k8s(): self.warning(message) else: # On kubernetes, this is kind of expected. Network metrics will be collected by the kubernetes integration anyway self.log.debug(message)
def _get_and_count_containers(self, custom_cgroups=False): """List all the containers from the API, filter and count them.""" # Querying the size of containers is slow, we don't do it at each run must_query_size = self.collect_container_size and self._latest_size_query == 0 self._latest_size_query = (self._latest_size_query + 1) % SIZE_REFRESH_RATE running_containers_count = Counter() all_containers_count = Counter() try: containers = self.docker_client.containers(all=True, size=must_query_size) except Exception as e: message = "Unable to list Docker containers: {0}".format(e) self.service_check(SERVICE_CHECK_NAME, AgentCheck.CRITICAL, message=message) raise Exception(message) else: self.service_check(SERVICE_CHECK_NAME, AgentCheck.OK) # Filter containers according to the exclude/include rules self._filter_containers(containers) containers_by_id = {} for container in containers: container_name = DockerUtil.container_name_extractor(container)[0] container_status_tags = self._get_tags(container, CONTAINER) all_containers_count[tuple(sorted(container_status_tags))] += 1 if self._is_container_running(container): running_containers_count[tuple(sorted(container_status_tags))] += 1 # Check if the container is included/excluded via its tags if self._is_container_excluded(container): self.log.debug("Container {0} is excluded".format(container_name)) continue containers_by_id[container['Id']] = container # grab pid via API if custom cgroups - otherwise we won't find process when # crawling for pids. if custom_cgroups: try: inspect_dict = self.docker_client.inspect_container(container_name) container['_pid'] = inspect_dict['State']['Pid'] except Exception as e: self.log.debug("Unable to inspect Docker container: %s", e) for tags, count in running_containers_count.iteritems(): self.gauge("docker.containers.running", count, tags=list(tags)) for tags, count in all_containers_count.iteritems(): stopped_count = count - running_containers_count[tags] self.gauge("docker.containers.stopped", stopped_count, tags=list(tags)) return containers_by_id
def _report_exit_codes(self, events, containers_by_id): for event in events: container_tags = set() container = containers_by_id.get(event.get('id')) # Skip events related to filtered containers if container is not None and self._is_container_excluded( container): continue # Report the exit code in case of a DIE event if container is not None and event['status'] == 'die': container_name = DockerUtil.container_name_extractor( container)[0] container_tags.update(self._get_tags(container, CONTAINER)) container_tags.add('container_name:%s' % container_name) try: exit_code = int(event['Actor']['Attributes']['exitCode']) message = 'Container %s exited with %s' % (container_name, exit_code) status = AgentCheck.OK if exit_code == 0 else AgentCheck.CRITICAL self.service_check(EXIT_SERVICE_CHECK_NAME, status, tags=list(container_tags), message=message) except KeyError: self.log.warning( 'Unable to collect the exit code for container %s' % container_name)
def _report_performance_metrics(self, containers_by_id): containers_without_proc_root = [] for container_id, container in containers_by_id.iteritems(): if self._is_container_excluded(container) or not self._is_container_running(container): continue tags = self._get_tags(container, PERFORMANCE) try: self._report_cgroup_metrics(container, tags) if "_proc_root" not in container: containers_without_proc_root.append(DockerUtil.container_name_extractor(container)[0]) continue self._report_net_metrics(container, tags) except BogusPIDException as e: self.log.warning('Unable to report cgroup metrics for container %s: %s', container_id[:12], e) if containers_without_proc_root: message = "Couldn't find pid directory for containers: {0}. They'll be missing network metrics".format( ", ".join(containers_without_proc_root)) if not Platform.is_k8s(): self.warning(message) else: # On kubernetes, this is kind of expected. Network metrics will be collected by the kubernetes integration anyway self.log.debug(message)
def _format_events(self, aggregated_events, containers_by_id): events = [] for image_name, event_group in aggregated_events.iteritems(): container_tags = set() filtered_events_count = 0 normal_prio_events = [] for event in event_group: # Only keep events that are not configured to be filtered out if event['status'].startswith(self.filtered_event_types): filtered_events_count += 1 continue container_name = event['id'][:11] if event['id'] in containers_by_id: cont = containers_by_id[event['id']] container_name = DockerUtil.container_name_extractor(cont)[0] container_tags.update(self._get_tags(cont, PERFORMANCE)) container_tags.add('container_name:%s' % container_name) # Add additionnal docker event attributes as tag for attr in self.event_attributes_as_tags: if attr in event['Actor']['Attributes'] and attr not in EXCLUDED_ATTRIBUTES: container_tags.add('%s:%s' % (attr, event['Actor']['Attributes'][attr])) normal_prio_events.append((event, container_name)) if filtered_events_count: self.log.debug('%d events were filtered out because of ignored event type' % filtered_events_count) normal_event = self._create_dd_event(normal_prio_events, image_name, container_tags, priority='Normal') if normal_event: events.append(normal_event) return events
def test_container_name_extraction(self): containers = [ ({"Id": "deadbeef"}, ["deadbeef"]), ({"Names": ["/redis"], "Id": "deadbeef"}, ["redis"]), ({"Names": ["/mongo", "/redis/mongo"], "Id": "deadbeef"}, ["mongo"]), ({"Names": ["/redis/mongo", "/mongo"], "Id": "deadbeef"}, ["mongo"]), ] for co in containers: self.assertEqual(DockerUtil.container_name_extractor(co[0]), co[1])
def test_container_name_extraction(self): containers = [ ({'Id': 'deadbeef'}, ['deadbeef']), ({'Names': ['/redis'], 'Id': 'deadbeef'}, ['redis']), ({'Names': ['/mongo', '/redis/mongo'], 'Id': 'deadbeef'}, ['mongo']), ({'Names': ['/redis/mongo', '/mongo'], 'Id': 'deadbeef'}, ['mongo']), ] for co in containers: self.assertEqual(DockerUtil.container_name_extractor(co[0]), co[1])
def print_containers(): containers = DockerUtil().client.containers() print("\nContainers info:\n") print("Number of containers found: %s" % len(containers)) for co in containers: c_id = 'ID: %s' % co.get('Id')[:12] c_image = 'image: %s' % co.get('Image') c_name = 'name: %s' % DockerUtil.container_name_extractor(co)[0] print("\t- %s %s %s" % (c_id, c_image, c_name)) print('\n')
def _filter_containers(self, containers): if not self._filtering_enabled: return self._filtered_containers = set() for container in containers: container_tags = self._get_tags(container, FILTERED) if self._are_tags_filtered(container_tags): container_name = DockerUtil.container_name_extractor(container)[0] self._filtered_containers.add(container_name) self.log.debug("Container {0} is filtered".format(container_name))
def _filter_containers(self, containers): if not self.docker_util.filtering_enabled: return self._filtered_containers = set() for container in containers: container_tags = self._get_tags(container, FILTERED) # exclude/include patterns are stored in docker_util to share them with other container-related checks if self.docker_util.are_tags_filtered(container_tags): container_name = DockerUtil.container_name_extractor(container)[0] self._filtered_containers.add(container_name) self.log.debug("Container {0} is filtered".format(container_name))
def _format_events(self, aggregated_events, containers_by_id): events = [] for image_name, event_group in aggregated_events.iteritems(): max_timestamp = 0 status = defaultdict(int) status_change = [] container_tags = set() for event in event_group: max_timestamp = max(max_timestamp, int(event['time'])) status[event['status']] += 1 container_name = event['id'][:11] if event['id'] in containers_by_id: cont = containers_by_id[event['id']] container_name = DockerUtil.container_name_extractor(cont)[0] container_tags.update(self._get_tags(cont, PERFORMANCE)) container_tags.add('container_name:%s' % container_name) status_change.append([container_name, event['status']]) status_text = ", ".join(["%d %s" % (count, st) for st, count in status.iteritems()]) msg_title = "%s %s on %s" % (image_name, status_text, self.hostname) msg_body = ( "%%%\n" "{image_name} {status} on {hostname}\n" "```\n{status_changes}\n```\n" "%%%" ).format( image_name=image_name, status=status_text, hostname=self.hostname, status_changes="\n".join( ["%s \t%s" % (change[1].upper(), change[0]) for change in status_change]) ) if any(error in status_text for error in ERROR_ALERT_TYPE): alert_type = "error" else: alert_type = None events.append({ 'timestamp': max_timestamp, 'host': self.hostname, 'event_type': EVENT_TYPE, 'msg_title': msg_title, 'msg_text': msg_body, 'source_type_name': EVENT_TYPE, 'event_object': 'docker:%s' % image_name, 'tags': list(container_tags), 'alert_type': alert_type }) return events
def _report_exit_codes(self, events, containers_by_id): for event in events: container_tags = set() container = containers_by_id.get(event.get('id')) # Skip events related to filtered containers if container is not None and self._is_container_excluded(container): continue # Report the exit code in case of a DIE event if container is not None and event['status'] == 'die': container_name = DockerUtil.container_name_extractor(container)[0] container_tags.update(self._get_tags(container, CONTAINER)) container_tags.add('container_name:%s' % container_name) try: exit_code = int(event['Actor']['Attributes']['exitCode']) message = 'Container %s exited with %s' % (container_name, exit_code) status = AgentCheck.OK if exit_code == 0 else AgentCheck.CRITICAL self.service_check(EXIT_SERVICE_CHECK_NAME, status, tags=list(container_tags), message=message) except KeyError: self.log.warning('Unable to collect the exit code for container %s' % container_name)
def _report_performance_metrics(self, containers_by_id): containers_without_proc_root = [] for container in containers_by_id.itervalues(): if self._is_container_excluded( container) or not self._is_container_running(container): continue tags = self._get_tags(container, PERFORMANCE) self._report_cgroup_metrics(container, tags) if "_proc_root" not in container: containers_without_proc_root.append( DockerUtil.container_name_extractor(container)[0]) continue self._report_net_metrics(container, tags) if containers_without_proc_root: message = "Couldn't find pid directory for containers: {0}. They'll be missing network metrics".format( ", ".join(containers_without_proc_root)) if not self.is_k8s(): self.warning(message) else: self.log.debug(message)
def _is_container_excluded(self, container): container_name = DockerUtil.container_name_extractor(container)[0] return container_name in self._filtered_containers
except Exception, e: message = "Unable to list Docker containers: {0}".format(e) self.service_check(SERVICE_CHECK_NAME, AgentCheck.CRITICAL, message=message) raise Exception(message) else: self.service_check(SERVICE_CHECK_NAME, AgentCheck.OK) # Filter containers according to the exclude/include rules self._filter_containers(containers) containers_by_id = {} for container in containers: container_name = DockerUtil.container_name_extractor(container)[0] container_status_tags = self._get_tags(container, CONTAINER) all_containers_count[tuple(sorted(container_status_tags))] += 1 if self._is_container_running(container): running_containers_count[tuple(sorted(container_status_tags))] += 1 # Check if the container is included/excluded via its tags if self._is_container_excluded(container): self.log.debug("Container {0} is excluded".format(container_name)) continue containers_by_id[container['Id']] = container for tags, count in running_containers_count.iteritems():
message = "Unable to list Docker containers: {0}".format(e) self.service_check(SERVICE_CHECK_NAME, AgentCheck.CRITICAL, message=message) raise Exception(message) else: self.service_check(SERVICE_CHECK_NAME, AgentCheck.OK) # Filter containers according to the exclude/include rules self._filter_containers(containers) containers_by_id = {} for container in containers: container_name = DockerUtil.container_name_extractor(container)[0] container_status_tags = self._get_tags(container, CONTAINER) all_containers_count[tuple(sorted(container_status_tags))] += 1 if self._is_container_running(container): running_containers_count[tuple( sorted(container_status_tags))] += 1 # Check if the container is included/excluded via its tags if self._is_container_excluded(container): self.log.debug( "Container {0} is excluded".format(container_name)) continue containers_by_id[container['Id']] = container