def _create_cadvisor_prometheus_instance(self, instance): """ Create a copy of the instance and set default values. This is so the base class can create a scraper_config with the proper values. """ cadvisor_instance = deepcopy(instance) cadvisor_instance.update( { 'namespace': self.NAMESPACE, # We need to specify a prometheus_url so the base class can use it as the key for our config_map, # we specify a dummy url that will be replaced in the `check()` function. We append it with "cadvisor" # so the key is different than the kubelet scraper. 'prometheus_url': instance.get('cadvisor_metrics_endpoint', 'dummy_url/cadvisor'), 'ignore_metrics': [ 'container_cpu_cfs_periods_total', 'container_fs_inodes_free', 'container_fs_inodes_total', 'container_fs_io_current', 'container_fs_io_time_seconds_total', 'container_fs_io_time_weighted_seconds_total', 'container_fs_read_seconds_total', 'container_fs_reads_merged_total', 'container_fs_reads_total', 'container_fs_sector_reads_total', 'container_fs_sector_writes_total', 'container_fs_write_seconds_total', 'container_fs_writes_merged_total', 'container_fs_writes_total', 'container_last_seen', 'container_start_time_seconds', 'container_scrape_error', ], # Defaults that were set when CadvisorPrometheusScraper was based on PrometheusScraper 'send_monotonic_counter': instance.get('send_monotonic_counter', False), 'health_service_check': instance.get('health_service_check', False), } ) clustername = get_clustername() if clustername != "": cadvisor_instance.get('tags',[]).extend(['cluster_name:%s' % clustername]) return cadvisor_instance
def _create_kubelet_prometheus_instance(self, instance): """ Create a copy of the instance and set default values. This is so the base class can create a scraper_config with the proper values. """ kubelet_instance = deepcopy(instance) kubelet_instance.update( { 'namespace': self.NAMESPACE, # We need to specify a prometheus_url so the base class can use it as the key for our config_map, # we specify a dummy url that will be replaced in the `check()` function. We append it with "kubelet" # so the key is different than the cadvisor scraper. 'prometheus_url': instance.get('kubelet_metrics_endpoint', 'dummy_url/kubelet'), 'metrics': [ { 'apiserver_client_certificate_expiration_seconds': 'apiserver.certificate.expiration', 'rest_client_requests_total': 'rest.client.requests', 'rest_client_request_latency_seconds': 'rest.client.latency', 'kubelet_runtime_operations': 'kubelet.runtime.operations', 'kubelet_runtime_operations_errors': 'kubelet.runtime.errors', 'kubelet_network_plugin_operations_latency_microseconds': 'kubelet.network_plugin.latency', 'kubelet_volume_stats_available_bytes': 'kubelet.volume.stats.available_bytes', 'kubelet_volume_stats_capacity_bytes': 'kubelet.volume.stats.capacity_bytes', 'kubelet_volume_stats_used_bytes': 'kubelet.volume.stats.used_bytes', 'kubelet_volume_stats_inodes': 'kubelet.volume.stats.inodes', 'kubelet_volume_stats_inodes_free': 'kubelet.volume.stats.inodes_free', 'kubelet_volume_stats_inodes_used': 'kubelet.volume.stats.inodes_used', } ], # Defaults that were set when the Kubelet scraper was based on PrometheusScraper 'send_monotonic_counter': instance.get('send_monotonic_counter', False), 'health_service_check': instance.get('health_service_check', False), } ) clustername = get_clustername() if clustername != "": kubelet_instance['_metric_tags'] = [clustername] return kubelet_instance
def _create_kubernetes_state_prometheus_instance(self, instance): """ Set up the kubernetes_state instance so it can be used in OpenMetricsBaseCheck """ ksm_instance = deepcopy(instance) endpoint = instance.get('kube_state_url') if endpoint is None: raise CheckException("Unable to find kube_state_url in config file.") extra_labels = ksm_instance.get('label_joins', {}) hostname_override = is_affirmative(ksm_instance.get('hostname_override', True)) join_kube_labels = is_affirmative(ksm_instance.get('join_kube_labels', False)) join_standard_tags = is_affirmative(ksm_instance.get('join_standard_tags', False)) ksm_instance.update( { 'namespace': 'kubernetes_state', 'metrics': [ { 'kube_daemonset_status_current_number_scheduled': 'daemonset.scheduled', 'kube_daemonset_status_desired_number_scheduled': 'daemonset.desired', 'kube_daemonset_status_number_misscheduled': 'daemonset.misscheduled', 'kube_daemonset_status_number_ready': 'daemonset.ready', 'kube_daemonset_updated_number_scheduled': 'daemonset.updated', 'kube_deployment_spec_paused': 'deployment.paused', 'kube_deployment_spec_replicas': 'deployment.replicas_desired', 'kube_deployment_spec_strategy_rollingupdate_max_unavailable': 'deployment.rollingupdate.max_unavailable', # noqa: E501 'kube_deployment_status_replicas': 'deployment.replicas', 'kube_deployment_status_replicas_available': 'deployment.replicas_available', 'kube_deployment_status_replicas_unavailable': 'deployment.replicas_unavailable', 'kube_deployment_status_replicas_updated': 'deployment.replicas_updated', 'kube_endpoint_address_available': 'endpoint.address_available', 'kube_endpoint_address_not_ready': 'endpoint.address_not_ready', 'kube_endpoint_created': 'endpoint.created', 'kube_hpa_spec_min_replicas': 'hpa.min_replicas', 'kube_hpa_spec_max_replicas': 'hpa.max_replicas', 'kube_hpa_status_desired_replicas': 'hpa.desired_replicas', 'kube_hpa_status_current_replicas': 'hpa.current_replicas', 'kube_hpa_status_condition': 'hpa.condition', 'kube_node_info': 'node.count', 'kube_node_status_allocatable_cpu_cores': 'node.cpu_allocatable', 'kube_node_status_allocatable_memory_bytes': 'node.memory_allocatable', 'kube_node_status_allocatable_pods': 'node.pods_allocatable', 'kube_node_status_capacity_cpu_cores': 'node.cpu_capacity', 'kube_node_status_capacity_memory_bytes': 'node.memory_capacity', 'kube_node_status_capacity_pods': 'node.pods_capacity', 'kube_node_status_allocatable_nvidia_gpu_cards': 'node.gpu.cards_allocatable', 'kube_node_status_capacity_nvidia_gpu_cards': 'node.gpu.cards_capacity', 'kube_pod_container_status_terminated': 'container.terminated', 'kube_pod_container_status_waiting': 'container.waiting', 'kube_persistentvolumeclaim_status_phase': 'persistentvolumeclaim.status', 'kube_persistentvolumeclaim_resource_requests_storage_bytes': 'persistentvolumeclaim.request_storage', # noqa: E501 'kube_pod_container_resource_limits_cpu_cores': 'container.cpu_limit', 'kube_pod_container_resource_limits_memory_bytes': 'container.memory_limit', 'kube_pod_container_resource_requests_cpu_cores': 'container.cpu_requested', 'kube_pod_container_resource_requests_memory_bytes': 'container.memory_requested', 'kube_pod_container_status_ready': 'container.ready', 'kube_pod_container_status_restarts': 'container.restarts', # up to kube-state-metrics 1.1.x 'kube_pod_container_status_restarts_total': 'container.restarts', # noqa: E501, from kube-state-metrics 1.2.0 'kube_pod_container_status_running': 'container.running', 'kube_pod_container_resource_requests_nvidia_gpu_devices': 'container.gpu.request', 'kube_pod_container_resource_limits_nvidia_gpu_devices': 'container.gpu.limit', 'kube_pod_status_ready': 'pod.ready', 'kube_pod_status_scheduled': 'pod.scheduled', 'kube_pod_status_unschedulable': 'pod.unschedulable', 'kube_poddisruptionbudget_status_current_healthy': 'pdb.pods_healthy', 'kube_poddisruptionbudget_status_desired_healthy': 'pdb.pods_desired', 'kube_poddisruptionbudget_status_pod_disruptions_allowed': 'pdb.disruptions_allowed', 'kube_poddisruptionbudget_status_expected_pods': 'pdb.pods_total', 'kube_replicaset_spec_replicas': 'replicaset.replicas_desired', 'kube_replicaset_status_fully_labeled_replicas': 'replicaset.fully_labeled_replicas', 'kube_replicaset_status_ready_replicas': 'replicaset.replicas_ready', 'kube_replicaset_status_replicas': 'replicaset.replicas', 'kube_replicationcontroller_spec_replicas': 'replicationcontroller.replicas_desired', 'kube_replicationcontroller_status_available_replicas': 'replicationcontroller.replicas_available', # noqa: E501 'kube_replicationcontroller_status_fully_labeled_replicas': 'replicationcontroller.fully_labeled_replicas', # noqa: E501 'kube_replicationcontroller_status_ready_replicas': 'replicationcontroller.replicas_ready', 'kube_replicationcontroller_status_replicas': 'replicationcontroller.replicas', 'kube_statefulset_replicas': 'statefulset.replicas_desired', 'kube_statefulset_status_replicas': 'statefulset.replicas', 'kube_statefulset_status_replicas_current': 'statefulset.replicas_current', 'kube_statefulset_status_replicas_ready': 'statefulset.replicas_ready', 'kube_statefulset_status_replicas_updated': 'statefulset.replicas_updated', 'kube_verticalpodautoscaler_status_recommendation_containerrecommendations_lowerbound': ( 'vpa.lower_bound' ), 'kube_verticalpodautoscaler_status_recommendation_containerrecommendations_target': ( 'vpa.target' ), 'kube_verticalpodautoscaler_status_recommendation_containerrecommendations_uncappedtarget': ( 'vpa.uncapped_target' ), 'kube_verticalpodautoscaler_status_recommendation_containerrecommendations_upperbound': ( 'vpa.upperbound' ), 'kube_verticalpodautoscaler_spec_updatepolicy_updatemode': 'vpa.update_mode', } ], 'ignore_metrics': [ # _info, _labels and _created don't convey any metric 'kube_cronjob_info', 'kube_cronjob_created', 'kube_daemonset_created', 'kube_deployment_created', 'kube_deployment_labels', 'kube_job_created', 'kube_job_info', 'kube_limitrange_created', 'kube_namespace_created', 'kube_namespace_labels', 'kube_node_created', 'kube_node_labels', 'kube_pod_created', 'kube_pod_container_info', 'kube_pod_info', 'kube_pod_owner', 'kube_pod_start_time', 'kube_pod_labels', 'kube_poddisruptionbudget_created', 'kube_replicaset_created', 'kube_replicationcontroller_created', 'kube_resourcequota_created', 'kube_replicaset_owner', 'kube_service_created', 'kube_service_info', 'kube_service_labels', 'kube_service_spec_external_ip', 'kube_service_status_load_balancer_ingress', 'kube_statefulset_labels', 'kube_statefulset_created', 'kube_statefulset_status_current_revision', 'kube_statefulset_status_update_revision', # Already provided by the kubelet integration 'kube_pod_container_status_last_terminated_reason', # _generation metrics are more metadata than metrics, no real use case for now 'kube_daemonset_metadata_generation', 'kube_deployment_metadata_generation', 'kube_deployment_status_observed_generation', 'kube_replicaset_metadata_generation', 'kube_replicaset_status_observed_generation', 'kube_replicationcontroller_metadata_generation', 'kube_replicationcontroller_status_observed_generation', 'kube_statefulset_metadata_generation', 'kube_statefulset_status_observed_generation', 'kube_hpa_metadata_generation', # kube_node_status_phase and kube_namespace_status_phase have no use case as a service check 'kube_namespace_status_phase', 'kube_node_status_phase', # These CronJob and Job metrics need use cases to determine how do implement 'kube_cronjob_status_active', 'kube_cronjob_status_last_schedule_time', 'kube_cronjob_spec_suspend', 'kube_cronjob_spec_starting_deadline_seconds', 'kube_job_spec_active_dealine_seconds', 'kube_job_spec_completions', 'kube_job_spec_parallelism', 'kube_job_status_active', 'kube_job_status_completion_time', # We could compute the duration=completion-start as a gauge 'kube_job_status_start_time', 'kube_verticalpodautoscaler_labels', ], 'label_joins': { 'kube_pod_info': {'labels_to_match': ['pod', 'namespace'], 'labels_to_get': ['node']}, 'kube_pod_status_phase': {'labels_to_match': ['pod', 'namespace'], 'labels_to_get': ['phase']}, 'kube_persistentvolume_info': { 'labels_to_match': ['persistentvolume'], # Persistent Volumes are not namespaced 'labels_to_get': ['storageclass'], }, 'kube_persistentvolumeclaim_info': { 'labels_to_match': ['persistentvolumeclaim', 'namespace'], 'labels_to_get': ['storageclass'], }, }, # Defaults that were set when kubernetes_state was based on PrometheusCheck 'send_monotonic_counter': ksm_instance.get('send_monotonic_counter', False), 'health_service_check': ksm_instance.get('health_service_check', False), } ) experimental_metrics_mapping = { 'kube_hpa_spec_target_metric': 'hpa.spec_target_metric', 'kube_verticalpodautoscaler_spec_resourcepolicy_container_policies_minallowed': ( 'vpa.spec_container_minallowed' ), 'kube_verticalpodautoscaler_spec_resourcepolicy_container_policies_maxallowed': ( 'vpa.spec_container_maxallowed' ), } experimental_metrics = is_affirmative(ksm_instance.get('experimental_metrics', False)) if experimental_metrics: ksm_instance['metrics'].append(experimental_metrics_mapping) else: ksm_instance['ignore_metrics'].extend(experimental_metrics_mapping.keys()) ksm_instance['prometheus_url'] = endpoint if join_kube_labels: ksm_instance['label_joins'].update( { 'kube_pod_labels': {'labels_to_match': ['pod', 'namespace'], 'labels_to_get': ['*']}, 'kube_deployment_labels': {'labels_to_match': ['deployment', 'namespace'], 'labels_to_get': ['*']}, 'kube_daemonset_labels': {'labels_to_match': ['daemonset', 'namespace'], 'labels_to_get': ['*']}, } ) labels_to_get = [ "label_tags_datadoghq_com_env", "label_tags_datadoghq_com_service", "label_tags_datadoghq_com_version", ] if join_standard_tags: ksm_instance['label_joins'].update( { "kube_pod_labels": {"labels_to_match": ["pod", "namespace"], "labels_to_get": labels_to_get}, "kube_deployment_labels": { "labels_to_match": ["deployment", "namespace"], "labels_to_get": labels_to_get, }, "kube_replicaset_labels": { "labels_to_match": ["replicaset", "namespace"], "labels_to_get": labels_to_get, }, "kube_daemonset_labels": { "labels_to_match": ["daemonset", "namespace"], "labels_to_get": labels_to_get, }, "kube_statefulset_labels": { "labels_to_match": ["statefulset", "namespace"], "labels_to_get": labels_to_get, }, "kube_job_labels": {"labels_to_match": ["job_name", "namespace"], "labels_to_get": labels_to_get}, } ) ksm_instance.setdefault("labels_mapper", {}).update( { "label_tags_datadoghq_com_env": "env", "label_tags_datadoghq_com_service": "service", "label_tags_datadoghq_com_version": "version", } ) ksm_instance['label_joins'].update(extra_labels) if hostname_override: ksm_instance['label_to_hostname'] = 'node' clustername = get_clustername() if clustername != "": ksm_instance['label_to_hostname_suffix'] = "-" + clustername if 'labels_mapper' in ksm_instance and not isinstance(ksm_instance['labels_mapper'], dict): self.log.warning("Option labels_mapper should be a dictionary for %s", endpoint) return ksm_instance
def _create_kubernetes_state_prometheus_instance(self, instance): """ Set up the kubernetes_state instance so it can be used in OpenMetricsBaseCheck """ ksm_instance = deepcopy(instance) endpoint = instance.get('kube_state_url') if endpoint is None: raise CheckException( "Unable to find kube_state_url in config file.") extra_labels = ksm_instance.get('label_joins', {}) hostname_override = is_affirmative( ksm_instance.get('hostname_override', True)) ksm_instance.update({ 'namespace': 'kubernetes_state', 'metrics': [{ 'kube_daemonset_status_current_number_scheduled': 'daemonset.scheduled', 'kube_daemonset_status_desired_number_scheduled': 'daemonset.desired', 'kube_daemonset_status_number_misscheduled': 'daemonset.misscheduled', 'kube_daemonset_status_number_ready': 'daemonset.ready', 'kube_deployment_spec_paused': 'deployment.paused', 'kube_deployment_spec_replicas': 'deployment.replicas_desired', 'kube_deployment_spec_strategy_rollingupdate_max_unavailable': 'deployment.rollingupdate.max_unavailable', # noqa: E501 'kube_deployment_status_replicas': 'deployment.replicas', 'kube_deployment_status_replicas_available': 'deployment.replicas_available', 'kube_deployment_status_replicas_unavailable': 'deployment.replicas_unavailable', 'kube_deployment_status_replicas_updated': 'deployment.replicas_updated', 'kube_endpoint_address_available': 'endpoint.address_available', 'kube_endpoint_address_not_ready': 'endpoint.address_not_ready', 'kube_endpoint_created': 'endpoint.created', 'kube_hpa_spec_min_replicas': 'hpa.min_replicas', 'kube_hpa_spec_max_replicas': 'hpa.max_replicas', 'kube_hpa_status_desired_replicas': 'hpa.desired_replicas', 'kube_hpa_status_current_replicas': 'hpa.current_replicas', 'kube_node_status_allocatable_cpu_cores': 'node.cpu_allocatable', 'kube_node_status_allocatable_memory_bytes': 'node.memory_allocatable', 'kube_node_status_allocatable_pods': 'node.pods_allocatable', 'kube_node_status_capacity_cpu_cores': 'node.cpu_capacity', 'kube_node_status_capacity_memory_bytes': 'node.memory_capacity', 'kube_node_status_capacity_pods': 'node.pods_capacity', 'kube_node_status_allocatable_nvidia_gpu_cards': 'node.gpu.cards_allocatable', 'kube_node_status_capacity_nvidia_gpu_cards': 'node.gpu.cards_capacity', 'kube_pod_container_status_terminated': 'container.terminated', 'kube_pod_container_status_waiting': 'container.waiting', 'kube_persistentvolumeclaim_status_phase': 'persistentvolumeclaim.status', 'kube_persistentvolumeclaim_resource_requests_storage_bytes': 'persistentvolumeclaim.request_storage', 'kube_pod_container_resource_limits_cpu_cores': 'container.cpu_limit', 'kube_pod_container_resource_limits_memory_bytes': 'container.memory_limit', 'kube_pod_container_resource_requests_cpu_cores': 'container.cpu_requested', 'kube_pod_container_resource_requests_memory_bytes': 'container.memory_requested', 'kube_pod_container_status_ready': 'container.ready', 'kube_pod_container_status_restarts': 'container.restarts', # up to kube-state-metrics 1.1.x 'kube_pod_container_status_restarts_total': 'container.restarts', # from kube-state-metrics 1.2.0 'kube_pod_container_status_running': 'container.running', 'kube_pod_container_resource_requests_nvidia_gpu_devices': 'container.gpu.request', 'kube_pod_container_resource_limits_nvidia_gpu_devices': 'container.gpu.limit', 'kube_pod_status_ready': 'pod.ready', 'kube_pod_status_scheduled': 'pod.scheduled', 'kube_replicaset_spec_replicas': 'replicaset.replicas_desired', 'kube_replicaset_status_fully_labeled_replicas': 'replicaset.fully_labeled_replicas', 'kube_replicaset_status_ready_replicas': 'replicaset.replicas_ready', 'kube_replicaset_status_replicas': 'replicaset.replicas', 'kube_replicationcontroller_spec_replicas': 'replicationcontroller.replicas_desired', 'kube_replicationcontroller_status_available_replicas': 'replicationcontroller.replicas_available', 'kube_replicationcontroller_status_fully_labeled_replicas': 'replicationcontroller.fully_labeled_replicas', # noqa: E501 'kube_replicationcontroller_status_ready_replicas': 'replicationcontroller.replicas_ready', 'kube_replicationcontroller_status_replicas': 'replicationcontroller.replicas', 'kube_statefulset_replicas': 'statefulset.replicas_desired', 'kube_statefulset_status_replicas': 'statefulset.replicas', 'kube_statefulset_status_replicas_current': 'statefulset.replicas_current', 'kube_statefulset_status_replicas_ready': 'statefulset.replicas_ready', 'kube_statefulset_status_replicas_updated': 'statefulset.replicas_updated', }], 'ignore_metrics': [ # _info, _labels and _created don't convey any metric 'kube_cronjob_info', 'kube_cronjob_created', 'kube_daemonset_created', 'kube_deployment_created', 'kube_deployment_labels', 'kube_job_created', 'kube_job_info', 'kube_limitrange_created', 'kube_namespace_created', 'kube_namespace_labels', 'kube_node_created', 'kube_node_info', 'kube_node_labels', 'kube_pod_created' 'kube_pod_container_info', 'kube_pod_info', 'kube_pod_owner', 'kube_pod_start_time', 'kube_pod_labels', 'kube_replicaset_created', 'kube_replicationcontroller_created', 'kube_resourcequota_created', 'kube_service_created', 'kube_service_info', 'kube_service_labels', 'kube_statefulset_labels', 'kube_statefulset_created', # _generation metrics are more metadata than metrics, no real use case for now 'kube_daemonset_metadata_generation', 'kube_deployment_metadata_generation', 'kube_deployment_status_observed_generation', 'kube_replicaset_metadata_generation', 'kube_replicaset_status_observed_generation', 'kube_replicationcontroller_metadata_generation', 'kube_replicationcontroller_status_observed_generation', 'kube_statefulset_metadata_generation', 'kube_statefulset_status_observed_generation', 'kube_hpa_metadata_generation', # kube_node_status_phase and kube_namespace_status_phase have no use case as a service check 'kube_namespace_status_phase', 'kube_node_status_phase', # These CronJob and Job metrics need use cases to determine how do implement 'kube_cronjob_status_active', 'kube_cronjob_status_last_schedule_time', 'kube_cronjob_spec_suspend', 'kube_cronjob_spec_starting_deadline_seconds', 'kube_job_spec_active_dealine_seconds', 'kube_job_spec_completions', 'kube_job_spec_parallelism', 'kube_job_status_active', 'kube_job_status_completion_time', # We could compute the duration=completion-start as a gauge 'kube_job_status_start_time', ], 'label_joins': { 'kube_pod_info': { 'label_to_match': 'pod', 'labels_to_get': ['node'] }, 'kube_pod_status_phase': { 'label_to_match': 'pod', 'labels_to_get': ['phase'] }, 'kube_persistentvolume_info': { 'label_to_match': 'persistentvolume', 'labels_to_get': ['storageclass'] }, 'kube_persistentvolumeclaim_info': { 'label_to_match': 'persistentvolumeclaim', 'labels_to_get': ['storageclass'] } }, # Defaults that were set when kubernetes_state was based on PrometheusCheck 'send_monotonic_counter': ksm_instance.get('send_monotonic_counter', False), 'health_service_check': ksm_instance.get('health_service_check', False) }) ksm_instance['prometheus_url'] = endpoint ksm_instance['label_joins'].update(extra_labels) if hostname_override: ksm_instance['label_to_hostname'] = 'node' clustername = get_clustername() if clustername != "": ksm_instance['label_to_hostname_suffix'] = "-" + clustername if 'labels_mapper' in ksm_instance and not isinstance( ksm_instance['labels_mapper'], dict): self.log.warning( "Option labels_mapper should be a dictionary for {}".format( endpoint)) return ksm_instance
def __init__(self, *args, **kwargs): """ args: `name`, `init_config`, `agentConfig` (deprecated), `instances` """ self.metrics = defaultdict(list) self.check_id = '' self.instances = kwargs.get('instances', []) self.name = kwargs.get('name', '') self.init_config = kwargs.get('init_config', {}) self.agentConfig = kwargs.get('agentConfig', {}) self.warnings = [] self.metric_limiter = None if len(args) > 0: self.name = args[0] if len(args) > 1: self.init_config = args[1] if len(args) > 2: if len(args) > 3 or 'instances' in kwargs: # old-style init: the 3rd argument is `agentConfig` self.agentConfig = args[2] if len(args) > 3: self.instances = args[3] else: # new-style init: the 3rd argument is `instances` self.instances = args[2] # Agent 6+ will only have one instance self.instance = self.instances[0] if self.instances else None # `self.hostname` is deprecated, use `datadog_agent.get_hostname()` instead self.hostname = datadog_agent.get_hostname() # returns the cluster name if the check is running in Kubernetes / OpenShift self.cluster_name = datadog_agent.get_clustername() # the agent5 'AgentCheck' setup a log attribute. self.log = logging.getLogger('{}.{}'.format(__name__, self.name)) # Set proxy settings self.proxies = self._get_requests_proxy() if not self.init_config: self._use_agent_proxy = True else: self._use_agent_proxy = is_affirmative( self.init_config.get('use_agent_proxy', True)) self.default_integration_http_timeout = float( self.agentConfig.get('default_integration_http_timeout', 9)) self._deprecations = { 'increment': [ False, ('DEPRECATION NOTICE: `AgentCheck.increment`/`AgentCheck.decrement` are deprecated, please ' 'use `AgentCheck.gauge` or `AgentCheck.count` instead, with a different metric name' ), ], 'device_name': [ False, ('DEPRECATION NOTICE: `device_name` is deprecated, please use a `device:` ' 'tag in the `tags` list instead'), ], 'in_developer_mode': [ False, 'DEPRECATION NOTICE: `in_developer_mode` is deprecated, please stop using it.', ], 'no_proxy': [ False, ('DEPRECATION NOTICE: The `no_proxy` config option has been renamed ' 'to `skip_proxy` and will be removed in a future release.'), ], } # Setup metric limits try: metric_limit = self.instances[0].get('max_returned_metrics', self.DEFAULT_METRIC_LIMIT) # Do not allow to disable limiting if the class has set a non-zero default value if metric_limit == 0 and self.DEFAULT_METRIC_LIMIT > 0: metric_limit = self.DEFAULT_METRIC_LIMIT self.warning( 'Setting max_returned_metrics to zero is not allowed, reverting ' 'to the default of {} metrics'.format( self.DEFAULT_METRIC_LIMIT)) except Exception: metric_limit = self.DEFAULT_METRIC_LIMIT if metric_limit > 0: self.metric_limiter = Limiter(self.name, 'metrics', metric_limit, self.warning)
def __init__(self, name, init_config, agentConfig, instances=None): # We do not support more than one instance of kube-state-metrics instance = instances[0] clustername = get_clustername() if clustername != "": instance.get('tags', []).extend(['cluster_name:%s' % clustername]) kubernetes_state_instance = self._create_kubernetes_state_prometheus_instance( instance) generic_instances = [kubernetes_state_instance] super(KubernetesState, self).__init__(name, init_config, agentConfig, instances=generic_instances) self.condition_to_status_positive = { 'true': self.OK, 'false': self.CRITICAL, 'unknown': self.UNKNOWN } self.condition_to_status_negative = { 'true': self.CRITICAL, 'false': self.OK, 'unknown': self.UNKNOWN } # Parameters for the count_objects_by_tags method self.object_count_params = { 'kube_persistentvolume_status_phase': { 'metric_name': 'persistentvolumes.by_phase', 'allowed_labels': ['storageclass', 'phase'], }, 'kube_service_spec_type': { 'metric_name': 'service.count', 'allowed_labels': ['namespace', 'type'] }, } self.METRIC_TRANSFORMERS = { 'kube_pod_status_phase': self.kube_pod_status_phase, 'kube_pod_container_status_waiting_reason': self.kube_pod_container_status_waiting_reason, 'kube_pod_container_status_terminated_reason': self.kube_pod_container_status_terminated_reason, 'kube_cronjob_next_schedule_time': self.kube_cronjob_next_schedule_time, 'kube_job_complete': self.kube_job_complete, 'kube_job_failed': self.kube_job_failed, 'kube_job_status_failed': self.kube_job_status_failed, 'kube_job_status_succeeded': self.kube_job_status_succeeded, 'kube_node_status_condition': self.kube_node_status_condition, 'kube_node_status_ready': self.kube_node_status_ready, 'kube_node_status_out_of_disk': self.kube_node_status_out_of_disk, 'kube_node_status_memory_pressure': self.kube_node_status_memory_pressure, 'kube_node_status_disk_pressure': self.kube_node_status_disk_pressure, 'kube_node_status_network_unavailable': self.kube_node_status_network_unavailable, 'kube_node_spec_unschedulable': self.kube_node_spec_unschedulable, 'kube_resourcequota': self.kube_resourcequota, 'kube_limitrange': self.kube_limitrange, 'kube_persistentvolume_status_phase': self.count_objects_by_tags, 'kube_service_spec_type': self.count_objects_by_tags, } # Handling jobs succeeded/failed counts self.failed_job_counts = defaultdict(KubernetesState.JobCount) self.succeeded_job_counts = defaultdict(KubernetesState.JobCount)