def __init__(self, name, init_config, agentConfig, instances=None): # We do not support more than one instance of kube-state-metrics instance = instances[0] self.cluster_name = AgentCheck.get_cluster_name() if self.cluster_name != "": instance.get('tags', []).extend(['cluster_name:%s' % self.cluster_name]) kubernetes_state_instance = self._create_kubernetes_state_prometheus_instance(instance) generic_instances = [kubernetes_state_instance] super(KubernetesState, self).__init__(name, init_config, agentConfig, instances=generic_instances) self.condition_to_status_positive = {'true': self.OK, 'false': self.CRITICAL, 'unknown': self.UNKNOWN} self.condition_to_status_negative = {'true': self.CRITICAL, 'false': self.OK, 'unknown': self.UNKNOWN} # Parameters for the count_objects_by_tags method self.object_count_params = { 'kube_persistentvolume_status_phase': { 'metric_name': 'persistentvolumes.by_phase', 'allowed_labels': ['storageclass', 'phase'], }, 'kube_service_spec_type': {'metric_name': 'service.count', 'allowed_labels': ['namespace', 'type']}, } self.METRIC_TRANSFORMERS = { 'kube_pod_status_phase': self.kube_pod_status_phase, 'kube_pod_container_status_waiting_reason': self.kube_pod_container_status_waiting_reason, 'kube_pod_container_status_terminated_reason': self.kube_pod_container_status_terminated_reason, 'kube_cronjob_next_schedule_time': self.kube_cronjob_next_schedule_time, 'kube_job_complete': self.kube_job_complete, 'kube_job_failed': self.kube_job_failed, 'kube_job_status_failed': self.kube_job_status_failed, 'kube_job_status_succeeded': self.kube_job_status_succeeded, 'kube_node_status_condition': self.kube_node_status_condition, 'kube_node_status_ready': self.kube_node_status_ready, 'kube_node_status_out_of_disk': self.kube_node_status_out_of_disk, 'kube_node_status_memory_pressure': self.kube_node_status_memory_pressure, 'kube_node_status_disk_pressure': self.kube_node_status_disk_pressure, 'kube_node_status_network_unavailable': self.kube_node_status_network_unavailable, 'kube_node_spec_unschedulable': self.kube_node_spec_unschedulable, 'kube_resourcequota': self.kube_resourcequota, 'kube_limitrange': self.kube_limitrange, 'kube_persistentvolume_status_phase': self.count_objects_by_tags, 'kube_service_spec_type': self.count_objects_by_tags, } # Handling jobs succeeded/failed counts self.failed_job_counts = defaultdict(KubernetesState.JobCount) self.succeeded_job_counts = defaultdict(KubernetesState.JobCount)
def _create_cadvisor_prometheus_instance(self, instance): """ Create a copy of the instance and set default values. This is so the base class can create a scraper_config with the proper values. """ cadvisor_instance = deepcopy(instance) cadvisor_instance.update( { 'namespace': self.NAMESPACE, # We need to specify a prometheus_url so the base class can use it as the key for our config_map, # we specify a dummy url that will be replaced in the `check()` function. We append it with "cadvisor" # so the key is different than the kubelet scraper. 'prometheus_url': instance.get('cadvisor_metrics_endpoint', 'dummy_url/cadvisor'), 'ignore_metrics': [ 'container_cpu_cfs_periods_total', 'container_fs_inodes_free', 'container_fs_inodes_total', 'container_fs_io_current', 'container_fs_io_time_seconds_total', 'container_fs_io_time_weighted_seconds_total', 'container_fs_read_seconds_total', 'container_fs_reads_merged_total', 'container_fs_reads_total', 'container_fs_sector_reads_total', 'container_fs_sector_writes_total', 'container_fs_write_seconds_total', 'container_fs_writes_merged_total', 'container_fs_writes_total', 'container_last_seen', 'container_start_time_seconds', 'container_scrape_error', ], # Defaults that were set when CadvisorPrometheusScraper was based on PrometheusScraper 'send_monotonic_counter': instance.get('send_monotonic_counter', False), 'health_service_check': instance.get('health_service_check', False), } ) clustername = AgentCheck.get_cluster_name() if clustername != "": cadvisor_instance.get('tags', []).extend(['cluster_name:%s' % clustername]) return cadvisor_instance