Exemplo n.º 1
0
    def list_available_images(self, node_name):
        """List the images available on the specified node."""
        host_conf = None
        for conf in self.docker_config:
            if conf.name == node_name:
                host_conf = conf
                break
        if host_conf is None:
            log.error('Unknown node {}, returning empty image list'.format(
                node_name))
            return []

        my_engine = DockerClient(host_conf)

        image_list = []
        for dk_image in my_engine.list_images():
            image = {
                'id': dk_image.attrs['Id'],
                'size': dk_image.attrs['Size'],
                'names': dk_image.tags
            }
            for name in image['names']:
                if name[-7:] == ':latest':  # add an image with the name without 'latest' to fake Docker image lookup algorithm
                    image['names'].append(name[:-7])
                    break
            image_list.append(image)
        return image_list
Exemplo n.º 2
0
 def preload_image(self, image_name):
     """Pull an image from a Docker registry into each host. We shuffle the list to prevent the scheduler to find always the first host in the list."""
     parsed_name = re.search(
         r'^(?:([^/]+)/)?(?:([^/]+)/)?([^@:/]+)(?:[@:](.+))?$', image_name)
     if parsed_name.group(4) is None:
         raise ZoeException(
             'Image {} does not have a version tag'.format(image_name))
     one_success = False
     for host_conf in self.docker_config:
         log.debug('Pre-loading image {} on host {}'.format(
             image_name, host_conf.name))
         time_start = time.time()
         my_engine = DockerClient(host_conf)
         try:
             my_engine.pull_image(image_name)
         except ZoeException:
             log.error('Image {} pre-loading failed on host {}'.format(
                 image_name, host_conf.name))
             continue
         else:
             one_success = True
         log.debug('Image {} pre-loaded on host {} in {:.2f}s'.format(
             image_name, host_conf.name,
             time.time() - time_start))
     if not one_success:
         raise ZoeException('Cannot pull image {}'.format(image_name))
Exemplo n.º 3
0
    def _update_node_state(self, host_conf: DockerHostConfig, node_stats: NodeStats, get_usage_stats: bool):
        node_stats.labels = host_conf.labels
        try:
            my_engine = DockerClient(host_conf)
        except ZoeException as e:
            log.error(str(e))
            node_stats.status = 'offline'
            log.info('Node {} is offline'.format(host_conf.name))
            return
        else:
            node_stats.status = 'online'

        try:
            container_list = my_engine.list(only_label={'zoe_deployment_name': get_conf().deployment_name})
            info = my_engine.info()
        except ZoeException:
            return

        node_stats.container_count = len(container_list)
        node_stats.cores_total = info['NCPU']
        node_stats.memory_total = info['MemTotal']
        if info['Labels'] is not None:
            node_stats.labels += set(info['Labels'])

        node_stats.memory_reserved = sum([cont['memory_soft_limit'] for cont in container_list if cont['memory_soft_limit'] != node_stats.memory_total])
        node_stats.cores_reserved = sum([cont['cpu_quota'] / cont['cpu_period'] for cont in container_list if cont['cpu_period'] != 0])

        stats = {}
        for cont in container_list:
            stats[cont['id']] = {}
            stats[cont['id']]['core_limit'] = cont['cpu_quota'] / cont['cpu_period']
            stats[cont['id']]['mem_limit'] = cont['memory_soft_limit']
        node_stats.service_stats = stats

        if get_usage_stats:
            if get_conf().kairosdb_enable:
                kdb = KairosDBInMetrics()
                for cont in container_list:
                    stats[cont['id']].update(kdb.get_service_usage(cont['name']))

                node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()])
                node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()])
            else:
                for cont in container_list:
                    try:
                        aux = my_engine.stats(cont['id'], stream=False)  # this call is very slow (>~1sec)
                        if 'usage' in aux['memory_stats']:
                            stats[cont['id']]['mem_usage'] = aux['memory_stats']['usage']
                        else:
                            stats[cont['id']]['mem_usage'] = 0
                        stats[cont['id']]['cpu_usage'] = self._get_core_usage(aux)
                    except ZoeException:
                        continue

                node_stats.memory_in_use = sum([stat['mem_usage'] for stat in stats.values()])
                node_stats.cores_in_use = sum([stat['cpu_usage'] for stat in stats.values()])
        else:
            node_stats.memory_in_use = 0
            node_stats.cores_in_use = 0
Exemplo n.º 4
0
 def terminate_service(self, service: Service) -> None:
     """Terminate and delete a container."""
     conf = self._get_config(service.backend_host)
     engine = DockerClient(conf)
     if service.backend_id is not None:
         engine.terminate_container(service.backend_id, delete=True)
     else:
         log.error('Cannot terminate service {}, since it has no backend ID'.format(service.name))
     service.set_backend_status(service.BACKEND_DESTROY_STATUS)
Exemplo n.º 5
0
    def spawn_service(self, service_instance: ServiceInstance):
        """Spawn a service, translating a Zoe Service into a Docker container."""
        parsed_name = re.search(r'^(?:([^/]+)/)?(?:([^/]+)/)?([^@:/]+)(?:[@:](.+))?$', service_instance.image_name)
        if parsed_name.group(4) is None:
            raise ZoeStartExecutionFatalException('Image {} does not have a version tag'.format(service_instance.image_name))
        conf = self._get_config(service_instance.backend_host)
        try:
            engine = DockerClient(conf)
            cont_info = engine.spawn_container(service_instance)
        except ZoeNotEnoughResourcesException:
            raise ZoeStartExecutionRetryException('Not enough free resources to satisfy reservation request for service {}'.format(service_instance.name))
        except ZoeException as e:
            raise ZoeStartExecutionFatalException(str(e))

        return cont_info["id"], cont_info['external_address'], cont_info['ports']
Exemplo n.º 6
0
 def update_service(self, service, cores=None, memory=None):
     """Update a service reservation."""
     conf = self._get_config(service.backend_host)
     try:
         engine = DockerClient(conf)
     except ZoeException as e:
         log.error(str(e))
         return
     if service.backend_id is not None:
         info = engine.info()
         if cores is not None and cores > info['NCPU']:
             cores = info['NCPU']
         if memory is not None and memory > info['MemTotal']:
             memory = info['MemTotal']
         cpu_quota = int(cores * 100000)
         engine.update(service.backend_id, cpu_quota=cpu_quota, mem_reservation=memory)
     else:
         log.error('Cannot update reservations for service {} ({}), since it has no backend ID'.format(service.name, service.id))
Exemplo n.º 7
0
    def _host_subthread(self, host_config: DockerHostConfig):
        log.info("Synchro thread for host {} started".format(host_config.name))

        self.host_stats[host_config.name] = NodeStats(host_config.name)

        while True:
            time_start = time.time()
            try:
                my_engine = DockerClient(host_config)
                container_list = my_engine.list(
                    only_label={
                        'zoe_deployment_name': get_conf().deployment_name
                    })
                info = my_engine.info()
            except ZoeException as e:
                self.host_stats[host_config.name].status = 'offline'
                log.error(str(e))
                log.info('Node {} is offline'.format(host_config.name))
            else:
                if self.host_stats[host_config.name].status == 'offline':
                    log.info('Node {} is now online'.format(host_config.name))
                    self.host_stats[host_config.name].status = 'online'

                self.host_stats[
                    host_config.name].container_count = info['Containers']
                self.host_stats[host_config.name].cores_total = info['NCPU']
                self.host_stats[
                    host_config.name].memory_total = info['MemTotal']
                self.host_stats[host_config.name].labels = host_config.labels
                if info['Labels'] is not None:
                    self.host_stats[host_config.name].labels.union(
                        set(info['Labels']))

                self.host_stats[host_config.name].memory_allocated = sum([
                    cont['memory_soft_limit'] for cont in container_list
                    if cont['memory_soft_limit'] != info['MemTotal']
                ])
                self.host_stats[host_config.name].cores_allocated = sum([
                    cont['cpu_quota'] / cont['cpu_period']
                    for cont in container_list if cont['cpu_period'] != 0
                ])

                stats = {}
                self.host_stats[host_config.name].memory_reserved = 0
                self.host_stats[host_config.name].cores_reserved = 0
                for cont in container_list:
                    service = self.state.services.select(
                        only_one=True,
                        backend_host=host_config.name,
                        backend_id=cont['id'])
                    if service is None:
                        log.warning(
                            'Container {} on host {} has no corresponding service'
                            .format(cont['name'], host_config.name))
                        if cont['state'] == Service.BACKEND_DIE_STATUS:
                            log.warning(
                                'Terminating dead and orphan container {}'.
                                format(cont['name']))
                            my_engine.terminate_container(cont['id'],
                                                          delete=True)
                        continue
                    self._update_service_status(service, cont)
                    self.host_stats[
                        host_config.
                        name].memory_reserved += service.resource_reservation.memory.min
                    self.host_stats[
                        host_config.
                        name].cores_reserved += service.resource_reservation.cores.min
                    stats[service.id] = {
                        'core_limit': cont['cpu_quota'] / cont['cpu_period'],
                        'mem_limit': cont['memory_soft_limit']
                    }
                self.host_stats[host_config.name].service_stats = stats

                self.host_stats[host_config.name].images = []
                for dk_image in my_engine.list_images():
                    image = {
                        'id': dk_image.attrs['Id'],
                        'size': dk_image.attrs['Size'],
                        'names': dk_image.tags  # type: list
                    }
                    for name in image['names']:
                        if name[-7:] == ':latest':  # add an image with the name without 'latest' to fake Docker image lookup algorithm
                            image['names'].append(name[:-7])
                            break
                    self.host_stats[host_config.name].images.append(image)

            sleep_time = CHECK_INTERVAL - (time.time() - time_start)
            if sleep_time <= 0:
                log.warning(
                    'synchro thread for host {} is late by {:.2f} seconds'.
                    format(host_config.name, sleep_time * -1))
                sleep_time = 0
            if self.stop.wait(timeout=sleep_time):
                break

        log.info("Synchro thread for host {} stopped".format(host_config.name))
Exemplo n.º 8
0
 def service_log(self, service: Service):
     """Get the log."""
     conf = self._get_config(service.backend_host)
     engine = DockerClient(conf)
     return engine.logs(service.backend_id, True, False)