예제 #1
0
    def info(self) -> ClusterStats:  # pylint: disable=too-many-locals
        """Retrieve Kubernetes cluster statistics."""
        pl_status = ClusterStats()

        node_list = pykube.Node.objects(
            self.api).filter(namespace=pykube.all).iterator()
        node_dict = {}

        # Get basic information from nodes
        for node in node_list:
            nss = NodeStats(node.name)
            nss.cores_total = float(node.obj['status']['allocatable']['cpu']
                                    [1])  ## Bug found on GKE
            nss.memory_total = humanfriendly.parse_size(
                node.obj['status']['allocatable']['memory'])
            nss.labels = node.obj['metadata']['labels']
            nss.status = 'online'
            node_dict[str(socket.gethostbyname(node.name))] = nss

        # Get information from all running pods, then accumulate to nodes
        pod_list = pykube.Pod.objects(
            self.api).filter(namespace=pykube.all).iterator()
        for pod in pod_list:
            try:
                host_ip = pod.obj['status']['hostIP']
            except KeyError:
                continue
            nss = node_dict[host_ip]
            nss.container_count += 1
            spec_cont = pod.obj['spec']['containers'][0]
            if 'resources' in spec_cont:
                if 'requests' in spec_cont['resources']:
                    if 'memory' in spec_cont['resources']['requests']:
                        memory = spec_cont['resources']['requests']['memory']
                        nss.memory_reserved = nss.memory_reserved + humanfriendly.parse_size(
                            memory)
                    if 'cpu' in spec_cont['resources']['requests']:
                        cpu = spec_cont['resources']['requests']['cpu']
                        # ex: cpu could be 100m or 0.1
                        cpu_splitted = cpu.split('m')
                        if len(cpu_splitted) > 1:
                            cpu_float = int(cpu_splitted[0]) / 1000
                        else:
                            cpu_float = int(cpu_splitted[0])
                        nss.cores_reserved = round(
                            nss.cores_reserved + cpu_float, 3)

        for node_ip in node_dict:
            pl_status.nodes.append(node_dict[node_ip])

        return pl_status
예제 #2
0
파일: threads.py 프로젝트: www3838438/zoe
    def _host_subthread(self, host_config: DockerHostConfig):
        log.info("Synchro thread for host {} started".format(host_config.name))

        self.host_stats[host_config.name] = NodeStats(host_config.name)

        while True:
            time_start = time.time()
            try:
                my_engine = DockerClient(host_config)
                container_list = my_engine.list(
                    only_label={
                        'zoe_deployment_name': get_conf().deployment_name
                    })
                info = my_engine.info()
            except ZoeException as e:
                self.host_stats[host_config.name].status = 'offline'
                log.error(str(e))
                log.info('Node {} is offline'.format(host_config.name))
            else:
                if self.host_stats[host_config.name].status == 'offline':
                    log.info('Node {} is now online'.format(host_config.name))
                    self.host_stats[host_config.name].status = 'online'

                self.host_stats[
                    host_config.name].container_count = info['Containers']
                self.host_stats[host_config.name].cores_total = info['NCPU']
                self.host_stats[
                    host_config.name].memory_total = info['MemTotal']
                self.host_stats[host_config.name].labels = host_config.labels
                if info['Labels'] is not None:
                    self.host_stats[host_config.name].labels.union(
                        set(info['Labels']))

                self.host_stats[host_config.name].memory_allocated = sum([
                    cont['memory_soft_limit'] for cont in container_list
                    if cont['memory_soft_limit'] != info['MemTotal']
                ])
                self.host_stats[host_config.name].cores_allocated = sum([
                    cont['cpu_quota'] / cont['cpu_period']
                    for cont in container_list if cont['cpu_period'] != 0
                ])

                stats = {}
                self.host_stats[host_config.name].memory_reserved = 0
                self.host_stats[host_config.name].cores_reserved = 0
                for cont in container_list:
                    service = self.state.services.select(
                        only_one=True,
                        backend_host=host_config.name,
                        backend_id=cont['id'])
                    if service is None:
                        log.warning(
                            'Container {} on host {} has no corresponding service'
                            .format(cont['name'], host_config.name))
                        if cont['state'] == Service.BACKEND_DIE_STATUS:
                            log.warning(
                                'Terminating dead and orphan container {}'.
                                format(cont['name']))
                            my_engine.terminate_container(cont['id'],
                                                          delete=True)
                        continue
                    self._update_service_status(service, cont)
                    self.host_stats[
                        host_config.
                        name].memory_reserved += service.resource_reservation.memory.min
                    self.host_stats[
                        host_config.
                        name].cores_reserved += service.resource_reservation.cores.min
                    stats[service.id] = {
                        'core_limit': cont['cpu_quota'] / cont['cpu_period'],
                        'mem_limit': cont['memory_soft_limit']
                    }
                self.host_stats[host_config.name].service_stats = stats

                self.host_stats[host_config.name].images = []
                for dk_image in my_engine.list_images():
                    image = {
                        'id': dk_image.attrs['Id'],
                        'size': dk_image.attrs['Size'],
                        'names': dk_image.tags  # type: list
                    }
                    for name in image['names']:
                        if name[-7:] == ':latest':  # add an image with the name without 'latest' to fake Docker image lookup algorithm
                            image['names'].append(name[:-7])
                            break
                    self.host_stats[host_config.name].images.append(image)

            sleep_time = CHECK_INTERVAL - (time.time() - time_start)
            if sleep_time <= 0:
                log.warning(
                    'synchro thread for host {} is late by {:.2f} seconds'.
                    format(host_config.name, sleep_time * -1))
                sleep_time = 0
            if self.stop.wait(timeout=sleep_time):
                break

        log.info("Synchro thread for host {} stopped".format(host_config.name))
예제 #3
0
파일: backend.py 프로젝트: bquocminh/zoe
    def _update_node_state(self, host_conf: DockerHostConfig, node_stats: NodeStats, get_usage_stats: bool):
        node_stats.labels = host_conf.labels
        try:
            my_engine = DockerClient(host_conf)
        except ZoeException as e:
            log.error(str(e))
            node_stats.status = 'offline'
            log.info('Node {} is offline'.format(host_conf.name))
            return
        else:
            node_stats.status = 'online'

        try:
            container_list = my_engine.list(only_label={'zoe_deployment_name': get_conf().deployment_name})
            info = my_engine.info()
        except ZoeException:
            return

        node_stats.container_count = len(container_list)
        node_stats.cores_total = info['NCPU']
        node_stats.memory_total = info['MemTotal']
        if info['Labels'] is not None:
            node_stats.labels += set(info['Labels'])

        node_stats.memory_reserved = sum([cont['memory_soft_limit'] for cont in container_list if cont['memory_soft_limit'] != node_stats.memory_total])
        node_stats.cores_reserved = sum([cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0])

        stats = {}
        for cont in container_list:
            stats[cont['id']] = {}
            stats[cont['id']]['core_limit'] = cont['cpu_quota'] / cont['cpu_period']
            stats[cont['id']]['mem_limit'] = cont['memory_soft_limit']
        node_stats.service_stats = stats

        if get_usage_stats:
            if get_conf().kairosdb_enable:
                kdb = KairosDBInMetrics()
                for cont in container_list:
                    stats[cont['id']].update(kdb.get_service_usage(cont['name']))

                node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()])
                node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()])
            else:
                for cont in container_list:
                    try:
                        aux = my_engine.stats(cont['id'], stream=False)  # this call is very slow (>~1sec)
                        if 'usage' in aux['memory_stats']:
                            stats[cont['id']]['mem_usage'] = aux['memory_stats']['usage']
                        else:
                            stats[cont['id']]['mem_usage'] = 0
                        stats[cont['id']]['cpu_usage'] = self._get_core_usage(aux)
                    except ZoeException:
                        continue

                node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()])
                node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()])
        else:
            node_stats.memory_in_use = 0
            node_stats.cores_in_use = 0
예제 #4
0
    def info(self) -> ClusterStats:
        """Retrieve Swarm statistics. The Docker API returns a mess difficult to parse."""
        info = self.cli.info()
        pl_status = ClusterStats()

        # SystemStatus is a list...
        idx = 0  # Role, skip
        idx += 1
        assert 'Strategy' in info["SystemStatus"][idx][0]
        pl_status.placement_strategy = info["SystemStatus"][idx][1]
        idx += 1
        assert 'Filters' in info["SystemStatus"][idx][0]
        pl_status.active_filters = [x.strip() for x in info["SystemStatus"][idx][1].split(", ")]
        idx += 1
        assert 'Nodes' in info["SystemStatus"][idx][0]
        node_count = int(info["SystemStatus"][idx][1])
        idx += 1  # At index 4 the nodes begin
        for node in range(node_count):
            idx2 = 0
            node_stats = NodeStats(info["SystemStatus"][idx + node][0].strip())
            node_stats.docker_endpoint = info["SystemStatus"][idx + node][1]
            idx2 += 1  # ID, skip
            idx2 += 1  # Status
            if info["SystemStatus"][idx + node + idx2][1] == 'Healthy':
                node_stats.status = 'online'
            else:
                node_stats.status = 'offline'
            idx2 += 1  # Containers
            node_stats.container_count = int(info["SystemStatus"][idx + node + idx2][1].split(' ')[0])
            idx2 += 1  # CPUs
            node_stats.cores_reserved = int(info["SystemStatus"][idx + node + idx2][1].split(' / ')[0])
            node_stats.cores_total = int(info["SystemStatus"][idx + node + idx2][1].split(' / ')[1])
            idx2 += 1  # Memory
            node_stats.memory_reserved = info["SystemStatus"][idx + node + idx2][1].split(' / ')[0]
            node_stats.memory_total = info["SystemStatus"][idx + node + idx2][1].split(' / ')[1]
            idx2 += 1  # Labels
            node_stats.labels = info["SystemStatus"][idx + node + idx2][1].split(', ')
            idx2 += 1  # Last update
            node_stats.last_update = info["SystemStatus"][idx + node + idx2][1]
            idx2 += 1  # Docker version
            node_stats.server_version = info["SystemStatus"][idx + node + idx2][1]

            node_stats.memory_reserved = humanfriendly.parse_size(node_stats.memory_reserved)
            node_stats.memory_total = humanfriendly.parse_size(node_stats.memory_total)

            pl_status.nodes.append(node_stats)
            idx += idx2
        pl_status.timestamp = time.time()
        return pl_status