def _container_in_pod(gs, container, pod): """Returns True when 'container' is a part of 'pod'. Args: gs: global state. container: a wrapped container object. pod: a wrapped pod object. Raises: CollectorError: if the 'container' or the 'pod' are missing essential attributes. Returns: True iff container 'container' is a part of 'pod'. """ assert isinstance(gs, global_state.GlobalState) assert utilities.is_wrapped_object(container, 'Container') assert utilities.is_wrapped_object(pod, 'Pod') parent_pod_id = utilities.get_parent_pod_id(container) if not utilities.valid_string(parent_pod_id): msg = 'could not find parent pod ID in container %s' % container['id'] gs.logger_error(msg) raise collector_error.CollectorError(msg) return parent_pod_id == pod['id']
def _do_compute_node(gs, input_queue, cluster_guid, node, g): assert isinstance(gs, global_state.GlobalState) assert isinstance(input_queue, Queue.PriorityQueue) assert utilities.valid_string(cluster_guid) assert utilities.is_wrapped_object(node, 'Node') assert isinstance(g, ContextGraph) node_id = node['id'] node_guid = 'Node:' + node_id g.add_resource(node_guid, node['annotations'], 'Node', node['timestamp'], node['properties']) g.add_relation(cluster_guid, node_guid, 'contains') # Cluster contains Node # Pods in a Node pod_ids = set() docker_hosts = set() # Process pods sequentially because calls to _do_compute_pod() do not call # lower-level services or wait. for pod in kubernetes.get_pods(gs, node_id): _do_compute_pod(gs, cluster_guid, node_guid, pod, g) pod_ids.add(pod['id']) # pod.properties.spec.nodeName may be missing if the pod is waiting. docker_host = utilities.get_attribute( pod, ['properties', 'spec', 'nodeName']) if utilities.valid_string(docker_host): docker_hosts.add(docker_host) # 'docker_hosts' should contain a single Docker host, because all of # the pods run in the same Node. However, if it is not the case, we # cannot fix the situation, so we just log an error message and continue. if len(docker_hosts) != 1: gs.logger_error( 'corrupt pod data in node=%s: ' '"docker_hosts" is empty or contains more than one entry: %s', node_guid, str(docker_hosts)) # Process containers concurrently. for docker_host in docker_hosts: for container in docker.get_containers_with_metrics(gs, docker_host): parent_pod_id = utilities.get_parent_pod_id(container) if utilities.valid_string(parent_pod_id) and (parent_pod_id in pod_ids): # This container is contained in a pod. parent_guid = 'Pod:' + parent_pod_id else: # This container is not contained in a pod. parent_guid = node_guid # Do not compute the containers by worker threads in test mode # because the order of the output will be different than the golden # files due to the effects of queuing the work. if gs.get_testing(): _do_compute_container(gs, docker_host, parent_guid, container, g) else: input_queue.put(( gs.get_random_priority(), _do_compute_container, {'gs': gs, 'docker_host': docker_host, 'parent_guid': parent_guid, 'container': container, 'g': g}))
def test_container_to_pod(self): """Tests the operation of utilities.get_parent_pod_id().""" f = open('testdata/containers.output.json') containers_blob = json.loads(f.read()) f.close() assert isinstance(containers_blob.get('resources'), types.ListType) pod_ids_list = [] for container in containers_blob['resources']: pod_id = utilities.get_parent_pod_id(container) pod_ids_list.append(pod_id) self.assertEqual( ['guestbook-controller-14zj2', 'redis-master', 'guestbook-controller-myab8', 'redis-worker-controller-4qg33'], pod_ids_list)
def get_containers_with_metrics(gs, docker_host): """Gets the list of all containers in 'docker_host' with metric annotations. Args: gs: global state. docker_host: the Docker host running the containers. Returns: list of wrapped container objects. Each element in the list is the result of utilities.wrap_object(container, 'Container', ...) Raises: CollectorError: in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ # Create a lookup table from pod IDs to pods. # This lookup table is needed when annotating containers with # metrics. Also compute the project's name. containers_list = get_containers(gs, docker_host) if not containers_list: return [] pod_id_to_pod = {} project_id = '_unknown_' # Populate the pod ID to pod lookup table. # Compute the project_id from the name of the first pod. for pod in kubernetes.get_pods(gs, docker_host): assert utilities.is_wrapped_object(pod, 'Pod') pod_id_to_pod[pod['id']] = pod if project_id != '_unknown_': continue pod_hostname = utilities.get_attribute( pod, ['properties', 'spec', 'host']) if utilities.valid_string(pod_hostname): project_id = utilities.node_id_to_project_id(pod_hostname) # We know that there are containers in this docker_host. if not pod_id_to_pod: # there are no pods in this docker_host. msg = 'Docker host %s has containers but no pods' % docker_host gs.logger_exception(msg) raise collector_error.CollectorError(msg) # Annotate the containers with their metrics. for container in containers_list: assert utilities.is_wrapped_object(container, 'Container') parent_pod_id = utilities.get_parent_pod_id(container) if not utilities.valid_string(parent_pod_id): msg = ('missing or invalid parent pod ID in container %s' % container['id']) gs.logger_error(msg) raise collector_error.CollectorError(msg) if parent_pod_id not in pod_id_to_pod: msg = ('could not locate parent pod %s for container %s' % (parent_pod_id, container['id'])) gs.logger_error(msg) raise collector_error.CollectorError(msg) # Note that the project ID may be '_unknown_'. # This is not a big deal, because the aggregator knows the project ID. metrics.annotate_container( project_id, container, pod_id_to_pod[parent_pod_id]) return containers_list
def get_containers(gs, docker_host): """Gets the list of all containers in 'docker_host'. Args: gs: global state. docker_host: the Docker host running the containers. Returns: list of wrapped container objects. Each element in the list is the result of utilities.wrap_object(container, 'Container', ...) Raises: CollectorError: in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ containers, timestamp = gs.get_containers_cache().lookup(docker_host) if timestamp is not None: gs.logger_info( 'get_containers(docker_host=%s) cache hit returns ' '%d containers', docker_host, len(containers)) return containers url = 'http://{docker_host}:{port}/containers/json'.format( docker_host=docker_host, port=gs.get_docker_port()) # A typical value of 'docker_host' is: # k8s-guestbook-node-3.c.rising-apricot-840.internal # Use only the first period-seperated element for the test file name. fname = '{host}-containers'.format(host=docker_host.split('.')[0]) try: containers_list = fetch_data(gs, url, fname) except collector_error.CollectorError: raise except: msg = ('fetching %s or %s failed with exception %s' % (url, fname, sys.exc_info()[0])) gs.logger_exception(msg) raise collector_error.CollectorError(msg) if not isinstance(containers_list, types.ListType): msg = 'invalid response from fetching %s' % url gs.logger_exception(msg) raise collector_error.CollectorError(msg) containers = [] timestamps = [] for container_info in containers_list: # NOTE: container 'Name' is stable across container re-starts whereas # container 'Id' is not. # This may be because Kubernertes assigns the Name while Docker assigns # the Id (?) # The container Name is the only element of the array 'Names' - # why is Names an array here? # skip the leading / in the Name if not (isinstance(container_info.get('Names'), types.ListType) and container_info['Names'] and utilities.valid_string(container_info['Names'][0]) and container_info['Names'][0][0] == '/'): msg = 'invalid containers data format. docker_host=%s' % docker_host gs.logger_error(msg) raise collector_error.CollectorError(msg) container_id = container_info['Names'][0][1:] container, ts = _inspect_container(gs, docker_host, container_id) if container is None: continue if not utilities.valid_string(container.get('Name')): msg = ('missing or invalid Name attribute in container %s' % container_id) gs.logger_error(msg) raise collector_error.CollectorError(msg) if container['Name'] != ('/' + container_id): msg = ('container %s\'s Name attribute is "%s"; expecting "%s"' % (container_id, container['Name'], '/' + container_id)) gs.logger_error(msg) raise collector_error.CollectorError(msg) short_hex_id = utilities.object_to_hex_id(container) if short_hex_id is None: msg = 'Could not compute short hex ID of container %s' % container_id gs.logger_error(msg) raise collector_error.CollectorError(msg) wrapped_container = utilities.wrap_object( container, 'Container', container_id, ts, label=short_hex_id) containers.append(wrapped_container) timestamps.append(ts) # Modify the container's label after the wrapped container was added # to the containers list. # Compute the container's short name to create a better container label: # short_container_name/short_hex_id. # For example: "cassandra/d85b599c17d8". parent_pod_id = utilities.get_parent_pod_id(wrapped_container) if parent_pod_id is None: continue parent_pod = kubernetes.get_one_pod(gs, docker_host, parent_pod_id) if parent_pod is None: continue short_container_name = utilities.get_short_container_name( wrapped_container, parent_pod) if not utilities.valid_string(short_container_name): continue wrapped_container['annotations']['label'] = (short_container_name + '/' + short_hex_id) ret_value = gs.get_containers_cache().update( docker_host, containers, min(timestamps) if timestamps else time.time()) gs.logger_info( 'get_containers(docker_host=%s) returns %d containers', docker_host, len(containers)) return ret_value
def get_containers_with_metrics(gs, docker_host): """Gets the list of all containers in 'docker_host' with metric annotations. Args: gs: global state. docker_host: the Docker host running the containers. Returns: list of wrapped container objects. Each element in the list is the result of utilities.wrap_object(container, 'Container', ...) Raises: CollectorError: in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ # Create a lookup table from pod IDs to pods. # This lookup table is needed when annotating containers with # metrics. Also compute the project's name. containers_list = get_containers(gs, docker_host) if not containers_list: return [] pod_id_to_pod = {} project_id = '_unknown_' # Populate the pod ID to pod lookup table. # Compute the project_id from the name of the first pod. for pod in kubernetes.get_pods(gs, docker_host): assert utilities.is_wrapped_object(pod, 'Pod') pod_id_to_pod[pod['id']] = pod if project_id != '_unknown_': continue pod_hostname = utilities.get_attribute(pod, ['properties', 'spec', 'host']) if utilities.valid_string(pod_hostname): project_id = utilities.node_id_to_project_id(pod_hostname) # We know that there are containers in this docker_host. if not pod_id_to_pod: # there are no pods in this docker_host. msg = 'Docker host %s has containers but no pods' % docker_host gs.logger_exception(msg) raise collector_error.CollectorError(msg) # Annotate the containers with their metrics. for container in containers_list: assert utilities.is_wrapped_object(container, 'Container') parent_pod_id = utilities.get_parent_pod_id(container) if not utilities.valid_string(parent_pod_id): msg = ('missing or invalid parent pod ID in container %s' % container['id']) gs.logger_error(msg) raise collector_error.CollectorError(msg) if parent_pod_id not in pod_id_to_pod: msg = ('could not locate parent pod %s for container %s' % (parent_pod_id, container['id'])) gs.logger_error(msg) raise collector_error.CollectorError(msg) # Note that the project ID may be '_unknown_'. # This is not a big deal, because the aggregator knows the project ID. metrics.annotate_container(project_id, container, pod_id_to_pod[parent_pod_id]) return containers_list
def get_containers(gs, docker_host): """Gets the list of all containers in 'docker_host'. Args: gs: global state. docker_host: the Docker host running the containers. Returns: list of wrapped container objects. Each element in the list is the result of utilities.wrap_object(container, 'Container', ...) Raises: CollectorError: in case of failure to fetch data from Docker. Other exceptions may be raised due to exectution errors. """ containers, timestamp = gs.get_containers_cache().lookup(docker_host) if timestamp is not None: gs.logger_info( 'get_containers(docker_host=%s) cache hit returns ' '%d containers', docker_host, len(containers)) return containers url = 'http://{docker_host}:{port}/containers/json'.format( docker_host=docker_host, port=gs.get_docker_port()) # A typical value of 'docker_host' is: # k8s-guestbook-node-3.c.rising-apricot-840.internal # Use only the first period-seperated element for the test file name. fname = '{host}-containers'.format(host=docker_host.split('.')[0]) try: containers_list = fetch_data(gs, url, fname) except collector_error.CollectorError: raise except: msg = ('fetching %s or %s failed with exception %s' % (url, fname, sys.exc_info()[0])) gs.logger_exception(msg) raise collector_error.CollectorError(msg) if not isinstance(containers_list, types.ListType): msg = 'invalid response from fetching %s' % url gs.logger_exception(msg) raise collector_error.CollectorError(msg) containers = [] timestamps = [] for container_info in containers_list: # NOTE: container 'Name' is stable across container re-starts whereas # container 'Id' is not. # This may be because Kubernertes assigns the Name while Docker assigns # the Id (?) # The container Name is the only element of the array 'Names' - # why is Names an array here? # skip the leading / in the Name if not (isinstance(container_info.get('Names'), types.ListType) and container_info['Names'] and utilities.valid_string(container_info['Names'][0]) and container_info['Names'][0][0] == '/'): msg = 'invalid containers data format. docker_host=%s' % docker_host gs.logger_error(msg) raise collector_error.CollectorError(msg) container_id = container_info['Names'][0][1:] container, ts = _inspect_container(gs, docker_host, container_id) if container is None: continue if not utilities.valid_string(container.get('Name')): msg = ('missing or invalid Name attribute in container %s' % container_id) gs.logger_error(msg) raise collector_error.CollectorError(msg) if container['Name'] != ('/' + container_id): msg = ('container %s\'s Name attribute is "%s"; expecting "%s"' % (container_id, container['Name'], '/' + container_id)) gs.logger_error(msg) raise collector_error.CollectorError(msg) short_hex_id = utilities.object_to_hex_id(container) if short_hex_id is None: msg = 'Could not compute short hex ID of container %s' % container_id gs.logger_error(msg) raise collector_error.CollectorError(msg) wrapped_container = utilities.wrap_object(container, 'Container', container_id, ts, label=short_hex_id) containers.append(wrapped_container) timestamps.append(ts) # Modify the container's label after the wrapped container was added # to the containers list. # Compute the container's short name to create a better container label: # short_container_name/short_hex_id. # For example: "cassandra/d85b599c17d8". parent_pod_id = utilities.get_parent_pod_id(wrapped_container) if parent_pod_id is None: continue parent_pod = kubernetes.get_one_pod(gs, docker_host, parent_pod_id) if parent_pod is None: continue short_container_name = utilities.get_short_container_name( wrapped_container, parent_pod) if not utilities.valid_string(short_container_name): continue wrapped_container['annotations']['label'] = (short_container_name + '/' + short_hex_id) ret_value = gs.get_containers_cache().update( docker_host, containers, min(timestamps) if timestamps else time.time()) gs.logger_info('get_containers(docker_host=%s) returns %d containers', docker_host, len(containers)) return ret_value