def _process_pods(self, pods, kubernetes_labels, dimensions, container_dimension_map, pod_dimensions_map, memory_limit_map): for pod in pods: pod_status = pod['status'] pod_spec = pod['spec'] pod_containers = pod_spec.get('containers', None) container_statuses = pod_status.get('containerStatuses', None) if not pod_containers or not container_statuses: # Pod does not have any containers assigned to it no-op going to next pod continue pod_dimensions = dimensions.copy() pod_dimensions.update( utils.get_pod_dimensions(self.kubernetes_connector, pod['metadata'], kubernetes_labels)) pod_key = pod_dimensions['pod_name'] + pod_dimensions['namespace'] pod_dimensions_map[pod_key] = pod_dimensions pod_retry_count = 0 name2id = {} for container_status in container_statuses: container_restart_count = container_status['restartCount'] container_dimensions = pod_dimensions.copy() container_name = container_status['name'] container_dimensions['container_name'] = container_name container_dimensions['image'] = container_status['image'] container_id = container_status.get('containerID', '').split('//')[-1] name2id[container_name] = container_id container_dimension_map[container_id] = container_dimensions if self.report_container_metrics: container_ready = 0 if container_status['ready'] else 1 self.gauge("container.ready_status", container_ready, container_dimensions, hostname="SUPPRESS") self.gauge("container.restart_count", container_restart_count, container_dimensions, hostname="SUPPRESS") # getting an aggregated value for pod restart count pod_retry_count += container_restart_count # Report limit/request metrics if self.report_container_metrics or self.report_container_mem_percent: self._report_container_limits(pod_containers, container_dimension_map, name2id, memory_limit_map) self.gauge("pod.restart_count", pod_retry_count, pod_dimensions, hostname="SUPPRESS") self.gauge("pod.phase", POD_PHASE.get(pod_status['phase']), pod_dimensions, hostname="SUPPRESS")
def _get_metric_endpoints_by_pod(self, dimensions): scrape_endpoints = {} # Grab running pods from local Kubelet try: pods = requests.get(self.kubelet_url, timeout=self.connection_timeout).json() except Exception as e: exception_message = "Could not get pods from local kubelet with error - {}".format(e) self.log.exception(exception_message) raise Exception(exception_message) # Iterate through each pod and check if it contains a scrape endpoint for pod in pods['items']: try: pod_metadata = pod['metadata'] pod_spec = pod['spec'] pod_status = pod['status'] if "annotations" not in pod_metadata or not ('containers' in pod_spec and 'podIP' in pod_status): # No annotations, containers, or endpoints skipping pod continue # Check pod annotations if we should scrape pod pod_annotations = pod_metadata['annotations'] prometheus_scrape = pod_annotations.get("prometheus.io/scrape", "false").lower() if prometheus_scrape != "true": continue pod_ports = [] pod_containers = pod_spec['containers'] for container in pod_containers: if "ports" in container: pod_ports += container['ports'] pod_name = pod_metadata['name'] endpoints = self._get_prometheus_endpoint(pod_annotations, pod_ports, pod_name) if not endpoints: continue # Add pod endpoint to scrape endpoints pod_ip = pod_status['podIP'] # Loop through list of ports and build list of endpoints pod_dimensions = dimensions.copy() pod_dimensions.update(utils.get_pod_dimensions( self.kubernetes_connector, pod['metadata'], self.kubernetes_labels)) for endpoint in endpoints: scrape_endpoint = "http://{}:{}".format(pod_ip, endpoint) scrape_endpoints[scrape_endpoint] = pod_dimensions self.log.info("Detected pod endpoint - {} with metadata " "of {}".format(scrape_endpoint, pod_dimensions)) except Exception as e: self.log.warn("Error parsing {} to detect for scraping - {}".format(pod, e)) continue return scrape_endpoints
def _process_pods( self, pods, kubernetes_labels, dimensions, container_dimension_map, pod_dimensions_map, memory_limit_map): for pod in pods: pod_status = pod['status'] pod_spec = pod['spec'] pod_containers = pod_spec.get('containers', None) container_statuses = pod_status.get('containerStatuses', None) if not pod_containers or not container_statuses: # Pod does not have any containers assigned to it no-op going to next pod continue pod_dimensions = dimensions.copy() pod_dimensions.update( utils.get_pod_dimensions( self.kubernetes_connector, pod['metadata'], kubernetes_labels)) pod_key = pod_dimensions['pod_name'] + pod_dimensions['namespace'] pod_dimensions_map[pod_key] = pod_dimensions pod_retry_count = 0 name2id = {} for container_status in container_statuses: container_restart_count = container_status['restartCount'] container_dimensions = pod_dimensions.copy() container_name = container_status['name'] container_dimensions['container_name'] = container_name container_dimensions['image'] = container_status['image'] container_id = container_status.get('containerID', '').split('//')[-1] name2id[container_name] = container_id container_dimension_map[container_id] = container_dimensions if self.report_container_metrics: container_ready = 0 if container_status['ready'] else 1 self.gauge( "container.ready_status", container_ready, container_dimensions, hostname="SUPPRESS") self.gauge( "container.restart_count", container_restart_count, container_dimensions, hostname="SUPPRESS") # getting an aggregated value for pod restart count pod_retry_count += container_restart_count # Report limit/request metrics if self.report_container_metrics or self.report_container_mem_percent: self._report_container_limits( pod_containers, container_dimension_map, name2id, memory_limit_map) self.gauge("pod.restart_count", pod_retry_count, pod_dimensions, hostname="SUPPRESS") self.gauge( "pod.phase", POD_PHASE.get( pod_status['phase']), pod_dimensions, hostname="SUPPRESS")
def _get_metric_endpoints_by_pod(self, dimensions): prometheus_endpoints = [] # Grab running pods from local Kubelet try: pods = requests.get(self.kubelet_url, timeout=self.connection_timeout).json() except Exception as e: exception_message = "Could not get pods from local kubelet with error - {}".format(e) self.log.exception(exception_message) raise Exception(exception_message) # Iterate through each pod and check if it contains a scrape endpoint for pod in pods['items']: try: pod_metadata = pod['metadata'] pod_spec = pod['spec'] pod_status = pod['status'] if "annotations" not in pod_metadata or not ( 'containers' in pod_spec and 'podIP' in pod_status): # No annotations, containers, or endpoints skipping pod continue # Check pod annotations if we should scrape pod pod_annotations = pod_metadata['annotations'] prometheus_scrape = pod_annotations.get("prometheus.io/scrape", "false").lower() if prometheus_scrape != "true": continue pod_ports = [] pod_containers = pod_spec['containers'] for container in pod_containers: if "ports" in container: pod_ports += container['ports'] pod_name = pod_metadata['name'] endpoints = self._get_prometheus_endpoint(pod_annotations, pod_ports, pod_name) if not endpoints: continue # Add pod endpoint to scrape endpoints pod_ip = pod_status['podIP'] # Loop through list of ports and build list of endpoints pod_dimensions = dimensions.copy() try: use_k8s_labels, whitelist, metric_types, report_pod_label_owner = \ self._get_monasca_settings(pod_name, pod_annotations) except Exception as e: error_message = "Error parsing monasca annotations on endpoints {} " \ "with error - {}. " \ "Skipping scraping metrics".format(endpoints, e) self.log.error(error_message) continue # set global_pod_cache if report_pod_label_owner and self.k8s_pod_cache is None: self.k8s_pod_cache = {} self.initialize_pod_cache() if use_k8s_labels: pod_dimensions.update(utils.get_pod_dimensions( self.kubernetes_connector, pod['metadata'], self.kubernetes_labels)) for endpoint in endpoints: scrape_endpoint = "http://{}:{}".format(pod_ip, endpoint) prometheus_endpoint = PrometheusEndpoint( scrape_endpoint, pod_dimensions, whitelist, metric_types, report_pod_label_owner) prometheus_endpoints.append(prometheus_endpoint) self.log.info("Detected pod endpoint - {} with metadata " "of {}".format(scrape_endpoint, pod_dimensions)) except Exception as e: self.log.warn("Error parsing {} to detect for scraping - {}".format(pod, e)) continue return prometheus_endpoints