Пример #1
0
    def __init__(self, agentConfig):
        try:
            self.config_store = get_config_store(agentConfig=agentConfig)
        except Exception as e:
            log.error('Failed to instantiate the config store client. '
                      'Auto-config only will be used. %s' % str(e))
            agentConfig['sd_config_backend'] = None
            self.config_store = get_config_store(agentConfig=agentConfig)

        self.dockerutil = DockerUtil(config_store=self.config_store)
        self.kubeutil = None
        if Platform.is_k8s():
            try:
                self.kubeutil = KubeUtil()
            except Exception as ex:
                log.error(
                    "Couldn't instantiate the kubernetes client, "
                    "subsequent kubernetes calls will fail as well. Error: %s"
                    % str(ex))

        self.metadata_collector = MetadataCollector()

        self.VAR_MAPPING = {
            'host': self._get_host_address,
            'pid': self._get_container_pid,
            'port': self._get_port,
            'container-name': self._get_container_name,
            'tags': self._get_additional_tags,
        }

        AbstractSDBackend.__init__(self, agentConfig)
Пример #2
0
    def init(self):
        try:
            instance = self.instances[0]

            # Getting custom tags for service checks when docker is down
            self.custom_tags = instance.get("tags", [])

            self.docker_util = DockerUtil()
            if not self.docker_util.client:
                raise Exception("Failed to initialize Docker client.")

            self.docker_gateway = DockerUtil.get_gateway()
            self.metadata_collector = MetadataCollector()

            self.kubeutil = None
            if Platform.is_k8s():
                try:
                    self.kubeutil = KubeUtil()
                except Exception as ex:
                    self.log.error("Couldn't instantiate the kubernetes client, "
                                   "subsequent kubernetes calls will fail as well. Error: %s" % str(ex))

            # We configure the check with the right cgroup settings for this host
            # Just needs to be done once
            self._mountpoints = self.docker_util.get_mountpoints(CGROUP_METRICS)
            self._latest_size_query = 0
            self._filtered_containers = set()
            self._disable_net_metrics = False

            # Set tagging options
            # The collect_labels_as_tags is legacy, only tagging docker metrics.
            # It is replaced by docker_labels_as_tags in config.cfg.
            # We keep this line for backward compatibility.
            if "collect_labels_as_tags" in instance:
                self.collect_labels_as_tags = instance.get("collect_labels_as_tags")

            self.kube_pod_tags = {}

            self.use_histogram = _is_affirmative(instance.get('use_histogram', False))
            performance_tags = instance.get("performance_tags", DEFAULT_PERFORMANCE_TAGS)

            self.tag_names = {
                CONTAINER: instance.get("container_tags", DEFAULT_CONTAINER_TAGS),
                PERFORMANCE: performance_tags,
                IMAGE: instance.get('image_tags', DEFAULT_IMAGE_TAGS)
            }

            # Set filtering settings
            if self.docker_util.filtering_enabled:
                self.tag_names[FILTERED] = self.docker_util.filtered_tag_names

            # Container network mapping cache
            self.network_mappings = {}

            # get the health check whitelist
            self.whitelist_patterns = None
            health_scs_whitelist = instance.get('health_service_check_whitelist', [])
            if health_scs_whitelist:
                patterns, whitelist_tags = compile_filter_rules(health_scs_whitelist)
                self.whitelist_patterns = set(patterns)
                self.tag_names[HEALTHCHECK] = set(whitelist_tags)

            # Other options
            self.collect_image_stats = _is_affirmative(instance.get('collect_images_stats', False))
            self.collect_container_size = _is_affirmative(instance.get('collect_container_size', False))
            self.collect_container_count = _is_affirmative(instance.get('collect_container_count', False))
            self.collect_volume_count = _is_affirmative(instance.get('collect_volume_count', False))
            self.collect_events = _is_affirmative(instance.get('collect_events', True))
            self.event_attributes_as_tags = instance.get('event_attributes_as_tags', [])
            self.collect_image_size = _is_affirmative(instance.get('collect_image_size', False))
            self.collect_disk_stats = _is_affirmative(instance.get('collect_disk_stats', False))
            self.collect_exit_codes = _is_affirmative(instance.get('collect_exit_codes', False))
            self.collect_ecs_tags = _is_affirmative(instance.get('ecs_tags', True)) and Platform.is_ecs_instance()

            self.filtered_event_types = tuple(instance.get("filtered_event_types", DEFAULT_FILTERED_EVENT_TYPES))

            self.capped_metrics = instance.get('capped_metrics')

        except Exception as e:
            self.log.critical(e)
            self.warning("Initialization failed. Will retry at next iteration")
        else:
            self.init_success = True
Пример #3
0
    def _populate_payload_metadata(self,
                                   payload,
                                   check_statuses,
                                   start_event=True):
        """
        Periodically populate the payload with metadata related to the system, host, and/or checks.
        """
        now = time.time()

        # Include system stats on first postback
        if start_event and self._is_first_run():
            payload['systemStats'] = self.agentConfig.get('system_stats', {})
            # Also post an event in the newsfeed
            payload['events']['System'] = [{
                'api_key':
                self.agentConfig['api_key'],
                'host':
                self.hostname,
                'timestamp':
                now,
                'event_type':
                'Agent Startup',
                'msg_text':
                'Version %s' % get_version()
            }]

        # Periodically send the host metadata.
        if self._should_send_additional_data('host_metadata'):
            # gather metadata with gohai
            gohai_metadata = self._run_gohai_metadata()
            if gohai_metadata:
                payload['gohai'] = gohai_metadata

            payload['systemStats'] = get_system_stats(
                proc_path=self.agentConfig.get('procfs_path', '/proc').rstrip(
                    '/'))

            if self.agentConfig['collect_orchestrator_tags']:
                host_container_metadata = MetadataCollector(
                ).get_host_metadata()
                if host_container_metadata:
                    payload['container-meta'] = host_container_metadata

            payload['meta'] = self._get_hostname_metadata()

            self.hostname_metadata_cache = payload['meta']
            # Add static tags from the configuration file
            host_tags = []
            if self.agentConfig['tags'] is not None:
                host_tags.extend([
                    unicode(tag.strip())
                    for tag in self.agentConfig['tags'].split(",")
                ])

            if self.agentConfig['collect_ec2_tags']:
                host_tags.extend(EC2.get_tags(self.agentConfig))

            if self.agentConfig['collect_orchestrator_tags']:
                host_docker_tags = MetadataCollector().get_host_tags()
                if host_docker_tags:
                    host_tags.extend(host_docker_tags)

            if host_tags:
                payload['host-tags']['system'] = host_tags

            # If required by the user, let's create the dd_check:xxx host tags
            if self.agentConfig['create_dd_check_tags']:
                app_tags_list = [
                    DD_CHECK_TAG.format(c.name)
                    for c in self.initialized_checks_d
                ]
                app_tags_list.extend([
                    DD_CHECK_TAG.format(cname)
                    for cname in JMXFiles.get_jmx_appnames()
                ])

                if 'system' not in payload['host-tags']:
                    payload['host-tags']['system'] = []

                payload['host-tags']['system'].extend(app_tags_list)

            GCE_tags = GCE.get_tags(self.agentConfig)
            if GCE_tags is not None:
                payload['host-tags'][GCE.SOURCE_TYPE_NAME] = GCE_tags

            # Log the metadata on the first run
            if self._is_first_run():
                log.info(
                    "Hostnames: %s, tags: %s" %
                    (repr(self.hostname_metadata_cache), payload['host-tags']))

        # Periodically send extra hosts metadata (vsphere)
        # Metadata of hosts that are not the host where the agent runs, not all the checks use
        # that
        external_host_tags = []
        if self._should_send_additional_data('external_host_tags'):
            for check in self.initialized_checks_d:
                try:
                    getter = getattr(check, 'get_external_host_tags')
                    check_tags = getter()
                    external_host_tags.extend(check_tags)
                except AttributeError:
                    pass

        if external_host_tags:
            payload['external_host_tags'] = external_host_tags

        # Periodically send agent_checks metadata
        if self._should_send_additional_data('agent_checks'):
            # Add agent checks statuses and error/warning messages
            agent_checks = []
            for check in check_statuses:
                if check.instance_statuses is not None:
                    for i, instance_status in enumerate(
                            check.instance_statuses):
                        agent_checks.append((
                            check.name,
                            check.source_type_name,
                            instance_status.instance_id,
                            instance_status.status,
                            # put error message or list of warning messages in the same field
                            # it will be handled by the UI
                            instance_status.error or instance_status.warnings
                            or "",
                            check.service_metadata[i]))
                else:
                    agent_checks.append(
                        (check.name, check.source_type_name, "initialization",
                         check.status, repr(check.init_failed_error)))
            payload['agent_checks'] = agent_checks
            payload[
                'meta'] = self.hostname_metadata_cache  # add hostname metadata