Exemplo n.º 1
0
def _container_in_pod(gs, container, pod):
    """Returns True when 'container' is a part of 'pod'.

  Args:
    gs: global state.
    container: a wrapped container object.
    pod: a wrapped pod object.

  Raises:
    CollectorError: if the 'container' or the 'pod' are missing essential
    attributes.

  Returns:
  True iff container 'container' is a part of 'pod'.
  """
    assert isinstance(gs, global_state.GlobalState)
    assert utilities.is_wrapped_object(container, 'Container')
    assert utilities.is_wrapped_object(pod, 'Pod')

    parent_pod_id = utilities.get_parent_pod_id(container)
    if not utilities.valid_string(parent_pod_id):
        msg = 'could not find parent pod ID in container %s' % container['id']
        gs.logger_error(msg)
        raise collector_error.CollectorError(msg)

    return parent_pod_id == pod['id']
Exemplo n.º 2
0
def _container_in_pod(gs, container, pod):
  """Returns True when 'container' is a part of 'pod'.

  Args:
    gs: global state.
    container: a wrapped container object.
    pod: a wrapped pod object.

  Raises:
    CollectorError: if the 'container' or the 'pod' are missing essential
    attributes.

  Returns:
  True iff container 'container' is a part of 'pod'.
  """
  assert isinstance(gs, global_state.GlobalState)
  assert utilities.is_wrapped_object(container, 'Container')
  assert utilities.is_wrapped_object(pod, 'Pod')

  parent_pod_id = utilities.get_parent_pod_id(container)
  if not utilities.valid_string(parent_pod_id):
    msg = 'could not find parent pod ID in container %s' % container['id']
    gs.logger_error(msg)
    raise collector_error.CollectorError(msg)

  return parent_pod_id == pod['id']
Exemplo n.º 3
0
def _do_compute_node(gs, input_queue, cluster_guid, node, g):
  assert isinstance(gs, global_state.GlobalState)
  assert isinstance(input_queue, Queue.PriorityQueue)
  assert utilities.valid_string(cluster_guid)
  assert utilities.is_wrapped_object(node, 'Node')
  assert isinstance(g, ContextGraph)

  node_id = node['id']
  node_guid = 'Node:' + node_id
  g.add_resource(node_guid, node['annotations'], 'Node', node['timestamp'],
                 node['properties'])
  g.add_relation(cluster_guid, node_guid, 'contains')  # Cluster contains Node
  # Pods in a Node
  pod_ids = set()
  docker_hosts = set()

  # Process pods sequentially because calls to _do_compute_pod() do not call
  # lower-level services or wait.
  for pod in kubernetes.get_pods(gs, node_id):
    _do_compute_pod(gs, cluster_guid, node_guid, pod, g)
    pod_ids.add(pod['id'])
    # pod.properties.spec.nodeName may be missing if the pod is waiting.
    docker_host = utilities.get_attribute(
        pod, ['properties', 'spec', 'nodeName'])
    if utilities.valid_string(docker_host):
      docker_hosts.add(docker_host)

  # 'docker_hosts' should contain a single Docker host, because all of
  # the pods run in the same Node. However, if it is not the case, we
  # cannot fix the situation, so we just log an error message and continue.
  if len(docker_hosts) != 1:
    gs.logger_error(
        'corrupt pod data in node=%s: '
        '"docker_hosts" is empty or contains more than one entry: %s',
        node_guid, str(docker_hosts))

  # Process containers concurrently.
  for docker_host in docker_hosts:
    for container in docker.get_containers_with_metrics(gs, docker_host):
      parent_pod_id = utilities.get_parent_pod_id(container)
      if utilities.valid_string(parent_pod_id) and (parent_pod_id in pod_ids):
        # This container is contained in a pod.
        parent_guid = 'Pod:' + parent_pod_id
      else:
        # This container is not contained in a pod.
        parent_guid = node_guid

      # Do not compute the containers by worker threads in test mode
      # because the order of the output will be different than the golden
      # files due to the effects of queuing the work.
      if gs.get_testing():
        _do_compute_container(gs, docker_host, parent_guid, container, g)
      else:
        input_queue.put((
            gs.get_random_priority(),
            _do_compute_container,
            {'gs': gs, 'docker_host': docker_host, 'parent_guid': parent_guid,
             'container': container, 'g': g}))
Exemplo n.º 4
0
  def test_container_to_pod(self):
    """Tests the operation of utilities.get_parent_pod_id()."""
    f = open('testdata/containers.output.json')
    containers_blob = json.loads(f.read())
    f.close()

    assert isinstance(containers_blob.get('resources'), types.ListType)
    pod_ids_list = []
    for container in containers_blob['resources']:
      pod_id = utilities.get_parent_pod_id(container)
      pod_ids_list.append(pod_id)

    self.assertEqual(
        ['guestbook-controller-14zj2',
         'redis-master',
         'guestbook-controller-myab8',
         'redis-worker-controller-4qg33'],
        pod_ids_list)
Exemplo n.º 5
0
  def test_container_to_pod(self):
    """Tests the operation of utilities.get_parent_pod_id()."""
    f = open('testdata/containers.output.json')
    containers_blob = json.loads(f.read())
    f.close()

    assert isinstance(containers_blob.get('resources'), types.ListType)
    pod_ids_list = []
    for container in containers_blob['resources']:
      pod_id = utilities.get_parent_pod_id(container)
      pod_ids_list.append(pod_id)

    self.assertEqual(
        ['guestbook-controller-14zj2',
         'redis-master',
         'guestbook-controller-myab8',
         'redis-worker-controller-4qg33'],
        pod_ids_list)
Exemplo n.º 6
0
def get_containers_with_metrics(gs, docker_host):
  """Gets the list of all containers in 'docker_host' with metric annotations.

  Args:
    gs: global state.
    docker_host: the Docker host running the containers.

  Returns:
    list of wrapped container objects.
    Each element in the list is the result of
    utilities.wrap_object(container, 'Container', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
  # Create a lookup table from pod IDs to pods.
  # This lookup table is needed when annotating containers with
  # metrics. Also compute the project's name.
  containers_list = get_containers(gs, docker_host)
  if not containers_list:
    return []

  pod_id_to_pod = {}
  project_id = '_unknown_'

  # Populate the pod ID to pod lookup table.
  # Compute the project_id from the name of the first pod.
  for pod in kubernetes.get_pods(gs, docker_host):
    assert utilities.is_wrapped_object(pod, 'Pod')
    pod_id_to_pod[pod['id']] = pod
    if project_id != '_unknown_':
      continue
    pod_hostname = utilities.get_attribute(
        pod, ['properties', 'spec', 'host'])
    if utilities.valid_string(pod_hostname):
      project_id = utilities.node_id_to_project_id(pod_hostname)

  # We know that there are containers in this docker_host.
  if not pod_id_to_pod:
    # there are no pods in this docker_host.
    msg = 'Docker host %s has containers but no pods' % docker_host
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  # Annotate the containers with their metrics.
  for container in containers_list:
    assert utilities.is_wrapped_object(container, 'Container')

    parent_pod_id = utilities.get_parent_pod_id(container)
    if not utilities.valid_string(parent_pod_id):
      msg = ('missing or invalid parent pod ID in container %s' %
             container['id'])
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    if parent_pod_id not in pod_id_to_pod:
      msg = ('could not locate parent pod %s for container %s' %
             (parent_pod_id, container['id']))
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    # Note that the project ID may be '_unknown_'.
    # This is not a big deal, because the aggregator knows the project ID.
    metrics.annotate_container(
        project_id, container, pod_id_to_pod[parent_pod_id])

  return containers_list
Exemplo n.º 7
0
def get_containers(gs, docker_host):
  """Gets the list of all containers in 'docker_host'.

  Args:
    gs: global state.
    docker_host: the Docker host running the containers.

  Returns:
    list of wrapped container objects.
    Each element in the list is the result of
    utilities.wrap_object(container, 'Container', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
  containers, timestamp = gs.get_containers_cache().lookup(docker_host)
  if timestamp is not None:
    gs.logger_info(
        'get_containers(docker_host=%s) cache hit returns '
        '%d containers', docker_host, len(containers))
    return containers

  url = 'http://{docker_host}:{port}/containers/json'.format(
      docker_host=docker_host, port=gs.get_docker_port())
  # A typical value of 'docker_host' is:
  # k8s-guestbook-node-3.c.rising-apricot-840.internal
  # Use only the first period-seperated element for the test file name.
  fname = '{host}-containers'.format(host=docker_host.split('.')[0])
  try:
    containers_list = fetch_data(gs, url, fname)
  except collector_error.CollectorError:
    raise
  except:
    msg = ('fetching %s or %s failed with exception %s' %
           (url, fname, sys.exc_info()[0]))
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  if not isinstance(containers_list, types.ListType):
    msg = 'invalid response from fetching %s' % url
    gs.logger_exception(msg)
    raise collector_error.CollectorError(msg)

  containers = []
  timestamps = []
  for container_info in containers_list:
    # NOTE: container 'Name' is stable across container re-starts whereas
    # container 'Id' is not.
    # This may be because Kubernertes assigns the Name while Docker assigns
    # the Id (?)
    # The container Name is the only element of the array 'Names' -
    # why is Names an array here?
    # skip the leading / in the Name
    if not (isinstance(container_info.get('Names'), types.ListType) and
            container_info['Names'] and
            utilities.valid_string(container_info['Names'][0]) and
            container_info['Names'][0][0] == '/'):
      msg = 'invalid containers data format. docker_host=%s' % docker_host
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    container_id = container_info['Names'][0][1:]
    container, ts = _inspect_container(gs, docker_host, container_id)
    if container is None:
      continue

    if not utilities.valid_string(container.get('Name')):
      msg = ('missing or invalid Name attribute in container %s' %
             container_id)
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    if container['Name'] != ('/' + container_id):
      msg = ('container %s\'s Name attribute is "%s"; expecting "%s"' %
             (container_id, container['Name'], '/' + container_id))
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    short_hex_id = utilities.object_to_hex_id(container)
    if short_hex_id is None:
      msg = 'Could not compute short hex ID of container %s' % container_id
      gs.logger_error(msg)
      raise collector_error.CollectorError(msg)

    wrapped_container = utilities.wrap_object(
        container, 'Container', container_id, ts, label=short_hex_id)
    containers.append(wrapped_container)
    timestamps.append(ts)

    # Modify the container's label after the wrapped container was added
    # to the containers list.
    # Compute the container's short name to create a better container label:
    # short_container_name/short_hex_id.
    # For example: "cassandra/d85b599c17d8".
    parent_pod_id = utilities.get_parent_pod_id(wrapped_container)
    if parent_pod_id is None:
      continue
    parent_pod = kubernetes.get_one_pod(gs, docker_host, parent_pod_id)
    if parent_pod is None:
      continue
    short_container_name = utilities.get_short_container_name(
        wrapped_container, parent_pod)
    if not utilities.valid_string(short_container_name):
      continue
    wrapped_container['annotations']['label'] = (short_container_name + '/' +
                                                 short_hex_id)

  ret_value = gs.get_containers_cache().update(
      docker_host, containers,
      min(timestamps) if timestamps else time.time())
  gs.logger_info(
      'get_containers(docker_host=%s) returns %d containers',
      docker_host, len(containers))
  return ret_value
Exemplo n.º 8
0
def get_containers_with_metrics(gs, docker_host):
    """Gets the list of all containers in 'docker_host' with metric annotations.

  Args:
    gs: global state.
    docker_host: the Docker host running the containers.

  Returns:
    list of wrapped container objects.
    Each element in the list is the result of
    utilities.wrap_object(container, 'Container', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
    # Create a lookup table from pod IDs to pods.
    # This lookup table is needed when annotating containers with
    # metrics. Also compute the project's name.
    containers_list = get_containers(gs, docker_host)
    if not containers_list:
        return []

    pod_id_to_pod = {}
    project_id = '_unknown_'

    # Populate the pod ID to pod lookup table.
    # Compute the project_id from the name of the first pod.
    for pod in kubernetes.get_pods(gs, docker_host):
        assert utilities.is_wrapped_object(pod, 'Pod')
        pod_id_to_pod[pod['id']] = pod
        if project_id != '_unknown_':
            continue
        pod_hostname = utilities.get_attribute(pod,
                                               ['properties', 'spec', 'host'])
        if utilities.valid_string(pod_hostname):
            project_id = utilities.node_id_to_project_id(pod_hostname)

    # We know that there are containers in this docker_host.
    if not pod_id_to_pod:
        # there are no pods in this docker_host.
        msg = 'Docker host %s has containers but no pods' % docker_host
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    # Annotate the containers with their metrics.
    for container in containers_list:
        assert utilities.is_wrapped_object(container, 'Container')

        parent_pod_id = utilities.get_parent_pod_id(container)
        if not utilities.valid_string(parent_pod_id):
            msg = ('missing or invalid parent pod ID in container %s' %
                   container['id'])
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        if parent_pod_id not in pod_id_to_pod:
            msg = ('could not locate parent pod %s for container %s' %
                   (parent_pod_id, container['id']))
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        # Note that the project ID may be '_unknown_'.
        # This is not a big deal, because the aggregator knows the project ID.
        metrics.annotate_container(project_id, container,
                                   pod_id_to_pod[parent_pod_id])

    return containers_list
Exemplo n.º 9
0
def get_containers(gs, docker_host):
    """Gets the list of all containers in 'docker_host'.

  Args:
    gs: global state.
    docker_host: the Docker host running the containers.

  Returns:
    list of wrapped container objects.
    Each element in the list is the result of
    utilities.wrap_object(container, 'Container', ...)

  Raises:
    CollectorError: in case of failure to fetch data from Docker.
    Other exceptions may be raised due to exectution errors.
  """
    containers, timestamp = gs.get_containers_cache().lookup(docker_host)
    if timestamp is not None:
        gs.logger_info(
            'get_containers(docker_host=%s) cache hit returns '
            '%d containers', docker_host, len(containers))
        return containers

    url = 'http://{docker_host}:{port}/containers/json'.format(
        docker_host=docker_host, port=gs.get_docker_port())
    # A typical value of 'docker_host' is:
    # k8s-guestbook-node-3.c.rising-apricot-840.internal
    # Use only the first period-seperated element for the test file name.
    fname = '{host}-containers'.format(host=docker_host.split('.')[0])
    try:
        containers_list = fetch_data(gs, url, fname)
    except collector_error.CollectorError:
        raise
    except:
        msg = ('fetching %s or %s failed with exception %s' %
               (url, fname, sys.exc_info()[0]))
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    if not isinstance(containers_list, types.ListType):
        msg = 'invalid response from fetching %s' % url
        gs.logger_exception(msg)
        raise collector_error.CollectorError(msg)

    containers = []
    timestamps = []
    for container_info in containers_list:
        # NOTE: container 'Name' is stable across container re-starts whereas
        # container 'Id' is not.
        # This may be because Kubernertes assigns the Name while Docker assigns
        # the Id (?)
        # The container Name is the only element of the array 'Names' -
        # why is Names an array here?
        # skip the leading / in the Name
        if not (isinstance(container_info.get('Names'), types.ListType)
                and container_info['Names']
                and utilities.valid_string(container_info['Names'][0])
                and container_info['Names'][0][0] == '/'):
            msg = 'invalid containers data format. docker_host=%s' % docker_host
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        container_id = container_info['Names'][0][1:]
        container, ts = _inspect_container(gs, docker_host, container_id)
        if container is None:
            continue

        if not utilities.valid_string(container.get('Name')):
            msg = ('missing or invalid Name attribute in container %s' %
                   container_id)
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        if container['Name'] != ('/' + container_id):
            msg = ('container %s\'s Name attribute is "%s"; expecting "%s"' %
                   (container_id, container['Name'], '/' + container_id))
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        short_hex_id = utilities.object_to_hex_id(container)
        if short_hex_id is None:
            msg = 'Could not compute short hex ID of container %s' % container_id
            gs.logger_error(msg)
            raise collector_error.CollectorError(msg)

        wrapped_container = utilities.wrap_object(container,
                                                  'Container',
                                                  container_id,
                                                  ts,
                                                  label=short_hex_id)
        containers.append(wrapped_container)
        timestamps.append(ts)

        # Modify the container's label after the wrapped container was added
        # to the containers list.
        # Compute the container's short name to create a better container label:
        # short_container_name/short_hex_id.
        # For example: "cassandra/d85b599c17d8".
        parent_pod_id = utilities.get_parent_pod_id(wrapped_container)
        if parent_pod_id is None:
            continue
        parent_pod = kubernetes.get_one_pod(gs, docker_host, parent_pod_id)
        if parent_pod is None:
            continue
        short_container_name = utilities.get_short_container_name(
            wrapped_container, parent_pod)
        if not utilities.valid_string(short_container_name):
            continue
        wrapped_container['annotations']['label'] = (short_container_name +
                                                     '/' + short_hex_id)

    ret_value = gs.get_containers_cache().update(
        docker_host, containers,
        min(timestamps) if timestamps else time.time())
    gs.logger_info('get_containers(docker_host=%s) returns %d containers',
                   docker_host, len(containers))
    return ret_value