def __init__(self, agentConfig): try: self.config_store = get_config_store(agentConfig=agentConfig) except Exception as e: log.error('Failed to instantiate the config store client. ' 'Auto-config only will be used. %s' % str(e)) agentConfig['sd_config_backend'] = None self.config_store = get_config_store(agentConfig=agentConfig) self.dockerutil = DockerUtil(config_store=self.config_store) self.docker_client = self.dockerutil.client if Platform.is_k8s(): try: self.kubeutil = KubeUtil() except Exception as ex: self.kubeutil = None log.error( "Couldn't instantiate the kubernetes client, " "subsequent kubernetes calls will fail as well. Error: %s" % str(ex)) if Platform.is_nomad(): self.nomadutil = NomadUtil() elif Platform.is_ecs_instance(): self.ecsutil = ECSUtil() self.VAR_MAPPING = { 'host': self._get_host_address, 'port': self._get_port, 'tags': self._get_additional_tags, } AbstractSDBackend.__init__(self, agentConfig)
def test_detect_agent(self, mock_init, mock_inspect, mock_gw, mock_get): mock_get.return_value = MockResponse({}, 404) mock_init.return_value = None mock_gw.return_value = "10.0.2.2" mock_inspect.return_value = { 'NetworkSettings': { 'IPAddress': '10.0.0.42', 'Ports': { '1234/tcp': '1234/tcp' } } } probe_calls = [ mock.call('http://10.0.0.42:1234/', timeout=1), mock.call('http://10.0.2.2:51678/', timeout=1), mock.call('http://localhost:51678/', timeout=1) ] util = ECSUtil() mock_get.reset_mock() util._detect_agent() mock_get.assert_has_calls(probe_calls)
def test_host_tags(self, mock_init, mock_inspect, mock_get): mock_inspect.return_value = {} mock_get.return_value = MockResponse({"Cluster": "default-xvello", "Version": "Amazon ECS Agent - v1.14.1 (467c3d7)"}, 200) mock_init.return_value = None util = ECSUtil() util.agent_url = 'http://dummy' mock_get.reset_mock() tags = util.get_host_tags() self.assertEqual(['ecs_version:1.14.1'], tags) mock_get.assert_called_once_with('http://dummy/v1/metadata', timeout=1)
def test_extract_tags(self, mock_init, mock_get): mock_get.return_value = MockResponse({}, 404) mock_init.return_value = None util = ECSUtil() util.agent_url = 'http://dummy' mock_get.reset_mock() mock_get.return_value = MockResponse({"Tasks": [{"Family": "dd-agent-latest", "Version": "12", "Containers": [{"DockerId": CO_ID}]}]}, 200) tags = util._get_cacheable_tags(CO_ID) self.assertEqual(['task_name:dd-agent-latest', 'task_version:12'], tags) mock_get.assert_called_once_with('http://dummy/v1/tasks', timeout=1)
def __init__(self, agentConfig): try: self.config_store = get_config_store(agentConfig=agentConfig) except Exception as e: log.error('Failed to instantiate the config store client. ' 'Auto-config only will be used. %s' % str(e)) agentConfig['sd_config_backend'] = None self.config_store = get_config_store(agentConfig=agentConfig) self.dockerutil = DockerUtil(config_store=self.config_store) self.docker_client = self.dockerutil.client if Platform.is_k8s(): try: self.kubeutil = KubeUtil() except Exception as ex: self.kubeutil = None log.error("Couldn't instantiate the kubernetes client, " "subsequent kubernetes calls will fail as well. Error: %s" % str(ex)) if Platform.is_nomad(): self.nomadutil = NomadUtil() elif Platform.is_ecs_instance(): self.ecsutil = ECSUtil() self.VAR_MAPPING = { 'host': self._get_host_address, 'port': self._get_port, 'tags': self._get_additional_tags, } AbstractSDBackend.__init__(self, agentConfig)
def test_host_tags(self, mock_init, mock_inspect, mock_get): mock_inspect.return_value = {} mock_get.return_value = MockResponse( { "Cluster": "default-xvello", "Version": "Amazon ECS Agent - v1.14.1 (467c3d7)" }, 200) mock_init.return_value = None util = ECSUtil() util.agent_url = 'http://dummy' mock_get.reset_mock() tags = util.get_host_tags() self.assertEqual(['ecs_version:1.14.1'], tags) mock_get.assert_called_once_with('http://dummy/v1/metadata', timeout=1)
def test_detect_agent(self, mock_init, mock_inspect, mock_gw, mock_get): mock_get.return_value = MockResponse({}, 404) mock_init.return_value = None mock_gw.return_value = "10.0.2.2" mock_inspect.return_value = {'NetworkSettings': {'IPAddress': '10.0.0.42', 'Ports': {'1234/tcp': '1234/tcp'}}} probe_calls = [mock.call('http://10.0.0.42:1234/', timeout=1), mock.call('http://10.0.2.2:51678/', timeout=1), mock.call('http://localhost:51678/', timeout=1)] util = ECSUtil() mock_get.reset_mock() util._detect_agent() mock_get.assert_has_calls(probe_calls)
def test_extract_tags(self, mock_init, mock_get): mock_get.return_value = MockResponse({}, 404) mock_init.return_value = None util = ECSUtil() util.agent_url = 'http://dummy' mock_get.reset_mock() mock_get.return_value = MockResponse( { "Tasks": [{ "Family": "dd-agent-latest", "Version": "12", "Containers": [{ "DockerId": CO_ID }] }] }, 200) tags = util._get_cacheable_tags(CO_ID) self.assertEqual(['task_name:dd-agent-latest', 'task_version:12'], tags) mock_get.assert_called_once_with('http://dummy/v1/tasks', timeout=1)
class SDDockerBackend(AbstractSDBackend): """Docker-based service discovery""" def __init__(self, agentConfig): try: self.config_store = get_config_store(agentConfig=agentConfig) except Exception as e: log.error('Failed to instantiate the config store client. ' 'Auto-config only will be used. %s' % str(e)) agentConfig['sd_config_backend'] = None self.config_store = get_config_store(agentConfig=agentConfig) self.dockerutil = DockerUtil(config_store=self.config_store) self.docker_client = self.dockerutil.client if Platform.is_k8s(): try: self.kubeutil = KubeUtil() except Exception as ex: self.kubeutil = None log.error("Couldn't instantiate the kubernetes client, " "subsequent kubernetes calls will fail as well. Error: %s" % str(ex)) if Platform.is_nomad(): self.nomadutil = NomadUtil() elif Platform.is_ecs_instance(): self.ecsutil = ECSUtil() self.VAR_MAPPING = { 'host': self._get_host_address, 'port': self._get_port, 'tags': self._get_additional_tags, } AbstractSDBackend.__init__(self, agentConfig) def _make_fetch_state(self): pod_list = [] if Platform.is_k8s(): if not self.kubeutil: log.error("kubelet client not created, cannot retrieve pod list.") else: try: pod_list = self.kubeutil.retrieve_pods_list().get('items', []) except Exception as ex: log.warning("Failed to retrieve pod list: %s" % str(ex)) return _SDDockerBackendConfigFetchState(self.docker_client.inspect_container, pod_list) def update_checks(self, changed_containers): state = self._make_fetch_state() if Platform.is_k8s(): self.kubeutil.check_services_cache_freshness() conf_reload_set = set() for c_id in changed_containers: checks = self._get_checks_to_refresh(state, c_id) if checks: conf_reload_set.update(set(checks)) if conf_reload_set: self.reload_check_configs = conf_reload_set def _get_checks_to_refresh(self, state, c_id): """Get the list of checks applied to a container from the identifier_to_checks cache in the config store. Use the DATADOG_ID label or the image.""" inspect = state.inspect_container(c_id) # If the container was removed we can't tell which check is concerned # so we have to reload everything. # Same thing if it's stopped and we're on Kubernetes in auto_conf mode # because the pod was deleted and its template could have been in the annotations. if not inspect or \ (not inspect.get('State', {}).get('Running') and Platform.is_k8s() and not self.agentConfig.get('sd_config_backend')): self.reload_check_configs = True return identifier = inspect.get('Config', {}).get('Labels', {}).get(DATADOG_ID) or \ self.dockerutil.image_name_extractor(inspect) platform_kwargs = {} if Platform.is_k8s(): kube_metadata = state.get_kube_config(c_id, 'metadata') or {} platform_kwargs = { 'kube_annotations': kube_metadata.get('annotations'), 'kube_container_name': state.get_kube_container_name(c_id), } return self.config_store.get_checks_to_refresh(identifier, **platform_kwargs) def _get_host_address(self, state, c_id, tpl_var): """Extract the container IP from a docker inspect object, or the kubelet API.""" c_inspect = state.inspect_container(c_id) c_id, c_img = c_inspect.get('Id', ''), c_inspect.get('Config', {}).get('Image', '') networks = c_inspect.get('NetworkSettings', {}).get('Networks') or {} ip_dict = {} for net_name, net_desc in networks.iteritems(): ip = net_desc.get('IPAddress') if ip: ip_dict[net_name] = ip ip_addr = self._extract_ip_from_networks(ip_dict, tpl_var) if ip_addr: return ip_addr # try to get the bridge (default) IP address log.debug("No IP address was found in container %s (%s) " "networks, trying with the IPAddress field" % (c_id[:12], c_img)) ip_addr = c_inspect.get('NetworkSettings', {}).get('IPAddress') if ip_addr: return ip_addr if Platform.is_k8s(): # kubernetes case log.debug("Couldn't find the IP address for container %s (%s), " "using the kubernetes way." % (c_id[:12], c_img)) pod_ip = state.get_kube_config(c_id, 'status').get('podIP') if pod_ip: return pod_ip if Platform.is_rancher(): # try to get the rancher IP address log.debug("No IP address was found in container %s (%s) " "trying with the Rancher label" % (c_id[:12], c_img)) ip_addr = c_inspect.get('Config', {}).get('Labels', {}).get(RANCHER_CONTAINER_IP) if ip_addr: return ip_addr.split('/')[0] log.error("No IP address was found for container %s (%s)" % (c_id[:12], c_img)) return None def _extract_ip_from_networks(self, ip_dict, tpl_var): """Extract a single IP from a dictionary made of network names and IPs.""" if not ip_dict: return None tpl_parts = tpl_var.split('_', 1) # no specifier if len(tpl_parts) < 2: log.debug("No key was passed for template variable %s." % tpl_var) return self._get_fallback_ip(ip_dict) else: res = ip_dict.get(tpl_parts[-1]) if res is None: log.warning("The key passed for template variable %s was not found." % tpl_var) return self._get_fallback_ip(ip_dict) else: return res def _get_fallback_ip(self, ip_dict): """try to pick the bridge key, falls back to the value of the last key""" if 'bridge' in ip_dict: log.debug("Using the bridge network.") return ip_dict['bridge'] else: last_key = sorted(ip_dict.iterkeys())[-1] log.debug("Trying with the last (sorted) network: '%s'." % last_key) return ip_dict[last_key] def _get_port(self, state, c_id, tpl_var): """Extract a port from a container_inspect or the k8s API given a template variable.""" container_inspect = state.inspect_container(c_id) try: ports = map(lambda x: x.split('/')[0], container_inspect['NetworkSettings']['Ports'].keys()) if len(ports) == 0: # There might be a key Port in NetworkSettings but no ports so we raise IndexError to check in ExposedPorts raise IndexError except (IndexError, KeyError, AttributeError): # try to get ports from the docker API. Works if the image has an EXPOSE instruction ports = map(lambda x: x.split('/')[0], container_inspect['Config'].get('ExposedPorts', {}).keys()) # if it failed, try with the kubernetes API if not ports and Platform.is_k8s(): log.debug("Didn't find the port for container %s (%s), trying the kubernetes way." % (c_id[:12], container_inspect.get('Config', {}).get('Image', ''))) spec = state.get_kube_container_spec(c_id) if spec: ports = [str(x.get('containerPort')) for x in spec.get('ports', [])] ports = sorted(ports, key=int) return self._extract_port_from_list(ports, tpl_var) def _extract_port_from_list(self, ports, tpl_var): if not ports: return None tpl_parts = tpl_var.split('_', 1) if len(tpl_parts) == 1: log.debug("No index was passed for template variable %s. " "Trying with the last element." % tpl_var) return ports[-1] try: idx = tpl_parts[-1] return ports[int(idx)] except ValueError: log.error("Port index is not an integer. Using the last element instead.") except IndexError: log.error("Port index is out of range. Using the last element instead.") return ports[-1] def get_tags(self, state, c_id): """Extract useful tags from docker or platform APIs. These are collected by default.""" tags = [] ctr = state.inspect_container(c_id) # TODO: extend with labels, container ID, etc. tags.append('docker_image:%s' % self.dockerutil.image_name_extractor(ctr)) tags.append('image_name:%s' % self.dockerutil.image_tag_extractor(ctr, 0)[0]) tags.append('image_tag:%s' % self.dockerutil.image_tag_extractor(ctr, 1)[0]) if Platform.is_k8s(): pod_metadata = state.get_kube_config(c_id, 'metadata') if pod_metadata is None: log.warning("Failed to fetch pod metadata for container %s." " Kubernetes tags may be missing." % c_id[:12]) return [] # get pod labels kube_labels = pod_metadata.get('labels', {}) for label, value in kube_labels.iteritems(): tags.append('%s:%s' % (label, value)) # get kubernetes namespace namespace = pod_metadata.get('namespace') tags.append('kube_namespace:%s' % namespace) # add creator tags creator_tags = self.kubeutil.get_pod_creator_tags(pod_metadata) tags.extend(creator_tags) # add services tags services = self.kubeutil.match_services_for_pod(pod_metadata) for s in services: if s is not None: tags.append('kube_service:%s' % s) elif Platform.is_swarm(): c_labels = state.inspect_container(c_id).get('Config', {}).get('Labels', {}) swarm_svc = c_labels.get(SWARM_SVC_LABEL) if swarm_svc: tags.append('swarm_service:%s' % swarm_svc) elif Platform.is_rancher(): c_inspect = state.inspect_container(c_id) service_name = c_inspect.get('Config', {}).get('Labels', {}).get(RANCHER_SVC_NAME) stack_name = c_inspect.get('Config', {}).get('Labels', {}).get(RANCHER_STACK_NAME) container_name = c_inspect.get('Config', {}).get('Labels', {}).get(RANCHER_CONTAINER_NAME) if service_name: tags.append('rancher_service:%s' % service_name) if stack_name: tags.append('rancher_stack:%s' % stack_name) if container_name: tags.append('rancher_container:%s' % container_name) elif Platform.is_nomad(): nomad_tags = self.nomadutil.extract_container_tags(state.inspect_container(c_id)) if nomad_tags: tags.extend(nomad_tags) elif Platform.is_ecs_instance(): ecs_tags = self.ecsutil.extract_container_tags(state.inspect_container(c_id)) tags.extend(ecs_tags) return tags def _get_additional_tags(self, state, c_id, *args): tags = [] if Platform.is_k8s(): pod_metadata = state.get_kube_config(c_id, 'metadata') pod_spec = state.get_kube_config(c_id, 'spec') if pod_metadata is None or pod_spec is None: log.warning("Failed to fetch pod metadata or pod spec for container %s." " Additional Kubernetes tags may be missing." % c_id[:12]) return [] tags.append('node_name:%s' % pod_spec.get('nodeName')) tags.append('pod_name:%s' % pod_metadata.get('name')) return tags def get_configs(self): """Get the config for all docker containers running on the host.""" configs = {} state = self._make_fetch_state() containers = [( self.dockerutil.image_name_extractor(container), container.get('Id'), container.get('Labels') ) for container in self.docker_client.containers()] if Platform.is_k8s(): self.kubeutil.check_services_cache_freshness() for image, cid, labels in containers: try: # value of the DATADOG_ID tag or the image name if the label is missing identifier = self.get_config_id(image, labels) check_configs = self._get_check_configs(state, cid, identifier) or [] for conf in check_configs: source, (check_name, init_config, instance) = conf # build instances list if needed if configs.get(check_name) is None: configs[check_name] = (source, (init_config, [instance])) else: conflict_init_msg = 'Different versions of `init_config` found for check {}. ' \ 'Keeping the first one found.' if configs[check_name][1][0] != init_config: log.warning(conflict_init_msg.format(check_name)) configs[check_name][1][1].append(instance) except Exception: log.exception('Building config for container %s based on image %s using service ' 'discovery failed, leaving it alone.' % (cid[:12], image)) return configs def get_config_id(self, image, labels): """Look for a DATADOG_ID label, return its value or the image name if missing""" return labels.get(DATADOG_ID) or image def _get_check_configs(self, state, c_id, identifier): """Retrieve configuration templates and fill them with data pulled from docker and tags.""" platform_kwargs = {} if Platform.is_k8s(): kube_metadata = state.get_kube_config(c_id, 'metadata') or {} platform_kwargs = { 'kube_container_name': state.get_kube_container_name(c_id), 'kube_annotations': kube_metadata.get('annotations'), } config_templates = self._get_config_templates(identifier, **platform_kwargs) if not config_templates: return None check_configs = [] tags = self.get_tags(state, c_id) for config_tpl in config_templates: source, config_tpl = config_tpl check_name, init_config_tpl, instance_tpl, variables = config_tpl # insert tags in instance_tpl and process values for template variables instance_tpl, var_values = self._fill_tpl(state, c_id, instance_tpl, variables, tags) tpl = self._render_template(init_config_tpl or {}, instance_tpl or {}, var_values) if tpl and len(tpl) == 2: init_config, instance = tpl check_configs.append((source, (check_name, init_config, instance))) return check_configs def _get_config_templates(self, identifier, **platform_kwargs): """Extract config templates for an identifier from a K/V store and returns it as a dict object.""" config_backend = self.agentConfig.get('sd_config_backend') templates = [] if config_backend is None: auto_conf = True else: auto_conf = False # format [(source, ('ident', {init_tpl}, {instance_tpl}))] raw_tpls = self.config_store.get_check_tpls(identifier, auto_conf=auto_conf, **platform_kwargs) for tpl in raw_tpls: # each template can come from either auto configuration or user-supplied templates try: source, (check_name, init_config_tpl, instance_tpl) = tpl except (TypeError, IndexError, ValueError): log.debug('No template was found for identifier %s, leaving it alone: %s' % (identifier, tpl)) return None try: # build a list of all variables to replace in the template variables = self.PLACEHOLDER_REGEX.findall(str(init_config_tpl)) + \ self.PLACEHOLDER_REGEX.findall(str(instance_tpl)) variables = map(lambda x: x.strip('%'), variables) if not isinstance(init_config_tpl, dict): init_config_tpl = json.loads(init_config_tpl or '{}') if not isinstance(instance_tpl, dict): instance_tpl = json.loads(instance_tpl or '{}') except json.JSONDecodeError: log.exception('Failed to decode the JSON template fetched for check {0}. Its configuration' ' by service discovery failed for ident {1}.'.format(check_name, identifier)) return None templates.append((source, (check_name, init_config_tpl, instance_tpl, variables))) return templates def _fill_tpl(self, state, c_id, instance_tpl, variables, tags=None): """Add container tags to instance templates and build a dict from template variable names and their values.""" var_values = {} c_image = state.inspect_container(c_id).get('Config', {}).get('Image', '') # add default tags to the instance if tags: tpl_tags = instance_tpl.get('tags', []) tags += tpl_tags if isinstance(tpl_tags, list) else [tpl_tags] instance_tpl['tags'] = list(set(tags)) for var in variables: # variables can be suffixed with an index in case several values are found if var.split('_')[0] in self.VAR_MAPPING: try: res = self.VAR_MAPPING[var.split('_')[0]](state, c_id, var) if res is None: raise ValueError("Invalid value for variable %s." % var) var_values[var] = res except Exception as ex: log.error("Could not find a value for the template variable %s for container %s " "(%s): %s" % (var, c_id[:12], c_image, str(ex))) else: log.error("No method was found to interpolate template variable %s for container %s " "(%s)." % (var, c_id[:12], c_image)) return instance_tpl, var_values
class SDDockerBackend(AbstractSDBackend): """Docker-based service discovery""" def __init__(self, agentConfig): try: self.config_store = get_config_store(agentConfig=agentConfig) except Exception as e: log.error('Failed to instantiate the config store client. ' 'Auto-config only will be used. %s' % str(e)) agentConfig['sd_config_backend'] = None self.config_store = get_config_store(agentConfig=agentConfig) self.dockerutil = DockerUtil(config_store=self.config_store) self.docker_client = self.dockerutil.client if Platform.is_k8s(): try: self.kubeutil = KubeUtil() except Exception as ex: self.kubeutil = None log.error( "Couldn't instantiate the kubernetes client, " "subsequent kubernetes calls will fail as well. Error: %s" % str(ex)) if Platform.is_nomad(): self.nomadutil = NomadUtil() elif Platform.is_ecs_instance(): self.ecsutil = ECSUtil() self.VAR_MAPPING = { 'host': self._get_host_address, 'port': self._get_port, 'tags': self._get_additional_tags, } AbstractSDBackend.__init__(self, agentConfig) def _make_fetch_state(self): pod_list = [] if Platform.is_k8s(): if not self.kubeutil: log.error( "kubelet client not created, cannot retrieve pod list.") else: try: pod_list = self.kubeutil.retrieve_pods_list().get( 'items', []) except Exception as ex: log.warning("Failed to retrieve pod list: %s" % str(ex)) return _SDDockerBackendConfigFetchState( self.docker_client.inspect_container, pod_list) def update_checks(self, changed_containers): state = self._make_fetch_state() if Platform.is_k8s(): self.kubeutil.check_services_cache_freshness() conf_reload_set = set() for c_id in changed_containers: checks = self._get_checks_to_refresh(state, c_id) if checks: conf_reload_set.update(set(checks)) if conf_reload_set: self.reload_check_configs = conf_reload_set def _get_checks_to_refresh(self, state, c_id): """Get the list of checks applied to a container from the identifier_to_checks cache in the config store. Use the DATADOG_ID label or the image.""" inspect = state.inspect_container(c_id) # If the container was removed we can't tell which check is concerned # so we have to reload everything. # Same thing if it's stopped and we're on Kubernetes in auto_conf mode # because the pod was deleted and its template could have been in the annotations. if not inspect or \ (not inspect.get('State', {}).get('Running') and Platform.is_k8s() and not self.agentConfig.get('sd_config_backend')): self.reload_check_configs = True return identifier = inspect.get('Config', {}).get('Labels', {}).get(DATADOG_ID) or \ self.dockerutil.image_name_extractor(inspect) platform_kwargs = {} if Platform.is_k8s(): kube_metadata = state.get_kube_config(c_id, 'metadata') or {} platform_kwargs = { 'kube_annotations': kube_metadata.get('annotations'), 'kube_container_name': state.get_kube_container_name(c_id), } return self.config_store.get_checks_to_refresh(identifier, **platform_kwargs) def _get_host_address(self, state, c_id, tpl_var): """Extract the container IP from a docker inspect object, or the kubelet API.""" c_inspect = state.inspect_container(c_id) c_id, c_img = c_inspect.get('Id', ''), c_inspect.get('Config', {}).get('Image', '') networks = c_inspect.get('NetworkSettings', {}).get('Networks') or {} ip_dict = {} for net_name, net_desc in networks.iteritems(): ip = net_desc.get('IPAddress') if ip: ip_dict[net_name] = ip ip_addr = self._extract_ip_from_networks(ip_dict, tpl_var) if ip_addr: return ip_addr # try to get the bridge (default) IP address log.debug("No IP address was found in container %s (%s) " "networks, trying with the IPAddress field" % (c_id[:12], c_img)) ip_addr = c_inspect.get('NetworkSettings', {}).get('IPAddress') if ip_addr: return ip_addr if Platform.is_k8s(): # kubernetes case log.debug("Couldn't find the IP address for container %s (%s), " "using the kubernetes way." % (c_id[:12], c_img)) pod_ip = state.get_kube_config(c_id, 'status').get('podIP') if pod_ip: return pod_ip if Platform.is_rancher(): # try to get the rancher IP address log.debug("No IP address was found in container %s (%s) " "trying with the Rancher label" % (c_id[:12], c_img)) ip_addr = c_inspect.get('Config', {}).get('Labels', {}).get(RANCHER_CONTAINER_IP) if ip_addr: return ip_addr.split('/')[0] log.error("No IP address was found for container %s (%s)" % (c_id[:12], c_img)) return None def _extract_ip_from_networks(self, ip_dict, tpl_var): """Extract a single IP from a dictionary made of network names and IPs.""" if not ip_dict: return None tpl_parts = tpl_var.split('_', 1) # no specifier if len(tpl_parts) < 2: log.debug("No key was passed for template variable %s." % tpl_var) return self._get_fallback_ip(ip_dict) else: res = ip_dict.get(tpl_parts[-1]) if res is None: log.warning( "The key passed for template variable %s was not found." % tpl_var) return self._get_fallback_ip(ip_dict) else: return res def _get_fallback_ip(self, ip_dict): """try to pick the bridge key, falls back to the value of the last key""" if 'bridge' in ip_dict: log.debug("Using the bridge network.") return ip_dict['bridge'] else: last_key = sorted(ip_dict.iterkeys())[-1] log.debug("Trying with the last (sorted) network: '%s'." % last_key) return ip_dict[last_key] def _get_port(self, state, c_id, tpl_var): """Extract a port from a container_inspect or the k8s API given a template variable.""" container_inspect = state.inspect_container(c_id) try: ports = map(lambda x: x.split('/')[0], container_inspect['NetworkSettings']['Ports'].keys()) if len( ports ) == 0: # There might be a key Port in NetworkSettings but no ports so we raise IndexError to check in ExposedPorts raise IndexError except (IndexError, KeyError, AttributeError): # try to get ports from the docker API. Works if the image has an EXPOSE instruction ports = map( lambda x: x.split('/')[0], container_inspect['Config'].get('ExposedPorts', {}).keys()) # if it failed, try with the kubernetes API if not ports and Platform.is_k8s(): log.debug( "Didn't find the port for container %s (%s), trying the kubernetes way." % (c_id[:12], container_inspect.get('Config', {}).get( 'Image', ''))) spec = state.get_kube_container_spec(c_id) if spec: ports = [ str(x.get('containerPort')) for x in spec.get('ports', []) ] ports = sorted(ports, key=int) return self._extract_port_from_list(ports, tpl_var) def _extract_port_from_list(self, ports, tpl_var): if not ports: return None tpl_parts = tpl_var.split('_', 1) if len(tpl_parts) == 1: log.debug("No index was passed for template variable %s. " "Trying with the last element." % tpl_var) return ports[-1] try: idx = tpl_parts[-1] return ports[int(idx)] except ValueError: log.error( "Port index is not an integer. Using the last element instead." ) except IndexError: log.error( "Port index is out of range. Using the last element instead.") return ports[-1] def get_tags(self, state, c_id): """Extract useful tags from docker or platform APIs. These are collected by default.""" c_inspect = state.inspect_container(c_id) tags = self.dockerutil.extract_container_tags(c_inspect) if Platform.is_k8s(): pod_metadata = state.get_kube_config(c_id, 'metadata') if pod_metadata is None: log.warning("Failed to fetch pod metadata for container %s." " Kubernetes tags may be missing." % c_id[:12]) return [] # get pod labels kube_labels = pod_metadata.get('labels', {}) for label, value in kube_labels.iteritems(): tags.append('%s:%s' % (label, value)) # get kubernetes namespace namespace = pod_metadata.get('namespace') tags.append('kube_namespace:%s' % namespace) # add creator tags creator_tags = self.kubeutil.get_pod_creator_tags(pod_metadata) tags.extend(creator_tags) # add services tags services = self.kubeutil.match_services_for_pod(pod_metadata) for s in services: if s is not None: tags.append('kube_service:%s' % s) elif Platform.is_swarm(): c_labels = c_inspect.get('Config', {}).get('Labels', {}) swarm_svc = c_labels.get(SWARM_SVC_LABEL) if swarm_svc: tags.append('swarm_service:%s' % swarm_svc) elif Platform.is_rancher(): service_name = c_inspect.get('Config', {}).get('Labels', {}).get(RANCHER_SVC_NAME) stack_name = c_inspect.get('Config', {}).get('Labels', {}).get(RANCHER_STACK_NAME) container_name = c_inspect.get('Config', {}).get( 'Labels', {}).get(RANCHER_CONTAINER_NAME) if service_name: tags.append('rancher_service:%s' % service_name) if stack_name: tags.append('rancher_stack:%s' % stack_name) if container_name: tags.append('rancher_container:%s' % container_name) elif Platform.is_nomad(): nomad_tags = self.nomadutil.extract_container_tags(c_inspect) if nomad_tags: tags.extend(nomad_tags) elif Platform.is_ecs_instance(): ecs_tags = self.ecsutil.extract_container_tags(c_inspect) tags.extend(ecs_tags) return tags def _get_additional_tags(self, state, c_id, *args): tags = [] if Platform.is_k8s(): pod_metadata = state.get_kube_config(c_id, 'metadata') pod_spec = state.get_kube_config(c_id, 'spec') if pod_metadata is None or pod_spec is None: log.warning( "Failed to fetch pod metadata or pod spec for container %s." " Additional Kubernetes tags may be missing." % c_id[:12]) return [] tags.append('node_name:%s' % pod_spec.get('nodeName')) tags.append('pod_name:%s' % pod_metadata.get('name')) return tags def get_configs(self): """Get the config for all docker containers running on the host.""" configs = {} state = self._make_fetch_state() containers = [(self.dockerutil.image_name_extractor(container), container.get('Id'), container.get('Labels')) for container in self.docker_client.containers()] if Platform.is_k8s(): self.kubeutil.check_services_cache_freshness() for image, cid, labels in containers: try: # value of the DATADOG_ID tag or the image name if the label is missing identifier = self.get_config_id(image, labels) check_configs = self._get_check_configs( state, cid, identifier) or [] for conf in check_configs: source, (check_name, init_config, instance) = conf # build instances list if needed if configs.get(check_name) is None: configs[check_name] = (source, (init_config, [instance])) else: conflict_init_msg = 'Different versions of `init_config` found for check {}. ' \ 'Keeping the first one found.' if configs[check_name][1][0] != init_config: log.warning(conflict_init_msg.format(check_name)) configs[check_name][1][1].append(instance) except Exception: log.exception( 'Building config for container %s based on image %s using service ' 'discovery failed, leaving it alone.' % (cid[:12], image)) return configs def get_config_id(self, image, labels): """Look for a DATADOG_ID label, return its value or the image name if missing""" return labels.get(DATADOG_ID) or image def _get_check_configs(self, state, c_id, identifier): """Retrieve configuration templates and fill them with data pulled from docker and tags.""" platform_kwargs = {} if Platform.is_k8s(): kube_metadata = state.get_kube_config(c_id, 'metadata') or {} platform_kwargs = { 'kube_container_name': state.get_kube_container_name(c_id), 'kube_annotations': kube_metadata.get('annotations'), } config_templates = self._get_config_templates(identifier, **platform_kwargs) if not config_templates: return None check_configs = [] tags = self.get_tags(state, c_id) for config_tpl in config_templates: source, config_tpl = config_tpl check_name, init_config_tpl, instance_tpl, variables = config_tpl # insert tags in instance_tpl and process values for template variables instance_tpl, var_values = self._fill_tpl(state, c_id, instance_tpl, variables, tags) tpl = self._render_template(init_config_tpl or {}, instance_tpl or {}, var_values) if tpl and len(tpl) == 2: init_config, instance = tpl check_configs.append( (source, (check_name, init_config, instance))) return check_configs def _get_config_templates(self, identifier, **platform_kwargs): """Extract config templates for an identifier from a K/V store and returns it as a dict object.""" config_backend = self.agentConfig.get('sd_config_backend') templates = [] if config_backend is None: auto_conf = True else: auto_conf = False # format [(source, ('ident', {init_tpl}, {instance_tpl}))] raw_tpls = self.config_store.get_check_tpls(identifier, auto_conf=auto_conf, **platform_kwargs) for tpl in raw_tpls: # each template can come from either auto configuration or user-supplied templates try: source, (check_name, init_config_tpl, instance_tpl) = tpl except (TypeError, IndexError, ValueError): log.debug( 'No template was found for identifier %s, leaving it alone: %s' % (identifier, tpl)) return None try: # build a list of all variables to replace in the template variables = self.PLACEHOLDER_REGEX.findall(str(init_config_tpl)) + \ self.PLACEHOLDER_REGEX.findall(str(instance_tpl)) variables = map(lambda x: x.strip('%'), variables) if not isinstance(init_config_tpl, dict): init_config_tpl = json.loads(init_config_tpl or '{}') if not isinstance(instance_tpl, dict): instance_tpl = json.loads(instance_tpl or '{}') except json.JSONDecodeError: log.exception( 'Failed to decode the JSON template fetched for check {0}. Its configuration' ' by service discovery failed for ident {1}.'.format( check_name, identifier)) return None templates.append((source, (check_name, init_config_tpl, instance_tpl, variables))) return templates def _fill_tpl(self, state, c_id, instance_tpl, variables, tags=None): """Add container tags to instance templates and build a dict from template variable names and their values.""" var_values = {} c_image = state.inspect_container(c_id).get('Config', {}).get('Image', '') # add default tags to the instance if tags: tpl_tags = instance_tpl.get('tags', []) tags += tpl_tags if isinstance(tpl_tags, list) else [tpl_tags] instance_tpl['tags'] = list(set(tags)) for var in variables: # variables can be suffixed with an index in case several values are found if var.split('_')[0] in self.VAR_MAPPING: try: res = self.VAR_MAPPING[var.split('_')[0]](state, c_id, var) if res is None: raise ValueError("Invalid value for variable %s." % var) var_values[var] = res except Exception as ex: log.error( "Could not find a value for the template variable %s for container %s " "(%s): %s" % (var, c_id[:12], c_image, str(ex))) else: log.error( "No method was found to interpolate template variable %s for container %s " "(%s)." % (var, c_id[:12], c_image)) return instance_tpl, var_values