Exemplo n.º 1
0
 def preload_image(self, image_name):
     """Pull an image from a Docker registry into each host. We shuffle the list to prevent the scheduler to find always the first host in the list."""
     parsed_name = re.search(
         r'^(?:([^/]+)/)?(?:([^/]+)/)?([^@:/]+)(?:[@:](.+))?$', image_name)
     if parsed_name.group(4) is None:
         raise ZoeException(
             'Image {} does not have a version tag'.format(image_name))
     one_success = False
     for host_conf in self.docker_config:
         log.debug('Pre-loading image {} on host {}'.format(
             image_name, host_conf.name))
         time_start = time.time()
         my_engine = DockerClient(host_conf)
         try:
             my_engine.pull_image(image_name)
         except ZoeException:
             log.error('Image {} pre-loading failed on host {}'.format(
                 image_name, host_conf.name))
             continue
         else:
             one_success = True
         log.debug('Image {} pre-loaded on host {} in {:.2f}s'.format(
             image_name, host_conf.name,
             time.time() - time_start))
     if not one_success:
         raise ZoeException('Cannot pull image {}'.format(image_name))
Exemplo n.º 2
0
    def list(self, only_label=None) -> List[dict]:
        """
        List running or defined containers.

        :param only_label: filter containers with only a certain label
        :return: a list of containers
        """
        try:
            ret = self.cli.containers.list(all=True)
        except docker.errors.APIError as ex:
            raise ZoeException(str(ex))
        except requests.exceptions.RequestException as ex:
            raise ZoeException(str(ex))
        if only_label is None:
            only_label = {}
        conts = []
        for cont_info in ret:
            match = True
            for key, value in only_label.items():
                if key not in cont_info.attrs['Config']['Labels']:
                    match = False
                    break
                if cont_info.attrs['Config']['Labels'][key] != value:
                    match = False
                    break
            if match:
                conts.append(self._container_summary(cont_info))

        return conts
Exemplo n.º 3
0
 def pull_image(self, image_name):
     """Pulls an image in the docker engine."""
     try:
         self.cli.images.pull(image_name)
     except docker.errors.APIError as e:
         log.error('Cannot download image {}: {}'.format(image_name, e))
         raise ZoeException('Cannot download image {}: {}'.format(image_name, e))
Exemplo n.º 4
0
    def loop(self):
        """The API loop."""
        while True:
            message = self.zmq_s.recv_json()
            self.debug_has_replied = False
            start_time = time.time()
            if message['command'] == 'execution_start':
                exec_id = message['exec_id']
                execution = self.state.executions.select(id=exec_id,
                                                         only_one=True)
                if execution is None:
                    self._reply_error('Execution ID {} not found'.format(
                        message['exec_id']))
                else:
                    self._reply_ok()
                    zoe_master.preprocessing.execution_submit(
                        self.state, self.scheduler, execution)
            elif message['command'] == 'execution_terminate':
                exec_id = message['exec_id']
                execution = self.state.executions.select(id=exec_id,
                                                         only_one=True)
                if execution is None:
                    self._reply_error('Execution ID {} not found'.format(
                        message['exec_id']))
                else:
                    self._reply_ok()
                    zoe_master.preprocessing.execution_terminate(
                        self.scheduler, execution)
            elif message['command'] == 'execution_delete':
                exec_id = message['exec_id']
                execution = self.state.executions.select(id=exec_id,
                                                         only_one=True)
                if execution is not None:
                    zoe_master.preprocessing.execution_delete(execution)
                self._reply_ok()
            elif message['command'] == 'scheduler_stats':
                try:
                    data = self.scheduler.stats()
                    if self.metrics.current_stats is None:
                        data['platform_stats'] = {}
                    else:
                        data[
                            'platform_stats'] = self.metrics.current_stats.serialize(
                            )
                except ZoeException as e:
                    log.error(str(e))
                    self._reply_error(str(e))
                else:
                    self._reply_ok(data=data)
            else:
                log.error('Unknown command: {}'.format(message['command']))
                self._reply_error('unknown command')

            if not self.debug_has_replied:
                self._reply_error('bug')
                raise ZoeException('BUG: command {} does not fill a reply')

            log.debug('API call {} took {:.2f}s'.format(
                message['command'],
                time.time() - start_time))
Exemplo n.º 5
0
 def inspect_container(self, docker_id: str) -> Dict[str, Any]:
     """Retrieve information about a running container."""
     try:
         cont = self.cli.containers.get(docker_id)
     except Exception as e:
         raise ZoeException(str(e))
     return self._container_summary(cont)
Exemplo n.º 6
0
    def __init__(self,
                 docker_config: DockerHostConfig,
                 mock_client=None) -> None:
        self.name = docker_config.name
        self.docker_config = docker_config
        if not docker_config.tls:
            tls = None
        else:
            tls = docker.tls.TLSConfig(client_cert=(docker_config.tls_cert,
                                                    docker_config.tls_key),
                                       verify=docker_config.tls_ca)

        # Simplify testing
        if mock_client is not None:
            self.cli = mock_client
            return

        try:
            self.cli = docker.DockerClient(base_url=docker_config.address,
                                           version="auto",
                                           tls=tls)
        except docker.errors.DockerException as e:
            raise ZoeException(
                "Cannot connect to Docker host {} at address {}: {}".format(
                    docker_config.name, docker_config.address, str(e)))
Exemplo n.º 7
0
    def stats(self, docker_id: str, stream: bool):
        """Retrieves container stats based on resource usage."""
        try:
            cont = self.cli.containers.get(docker_id)
        except docker.errors.NotFound:
            raise ZoeException('Container not found')
        except docker.errors.APIError as e:
            raise ZoeException('Docker API error: {}'.format(e))

        try:
            return cont.stats(stream=stream)
        except docker.errors.APIError as e:
            raise ZoeException('Docker API error: {}'.format(e))
        except requests.exceptions.ReadTimeout:
            raise ZoeException('Read timeout')
        except ValueError:
            raise ZoeException('Docker API decoding error')
Exemplo n.º 8
0
def _get_backend() -> Union[BaseBackend, None]:
    """Return the right backend instance by reading the global configuration."""
    backend_name = get_conf().backend
    assert backend_name in ['Kubernetes', 'Swarm', 'DockerEngine']
    if backend_name == 'Kubernetes':
        if KubernetesBackend is None:
            raise ZoeException(
                'The Kubernetes backend requires the pykube module')
        return KubernetesBackend(get_conf())
    elif backend_name == 'DockerEngine':
        if DockerEngineBackend is None:
            raise ZoeException(
                'The Docker Engine backend requires docker python version >= 2.0.2'
            )
        return DockerEngineBackend(get_conf())
    else:
        log.error('Unknown backend selected')
        return None
Exemplo n.º 9
0
    def loop(self):
        assert isinstance(config.singletons['sql_manager'],
                          zoe_lib.sql_manager.SQLManager)
        while True:
            message = self.zmq_s.recv_json()
            self.debug_has_replied = False
            start_time = time.time()
            if message['command'] == 'execution_start':
                exec_id = message['exec_id']
                execution = config.singletons['sql_manager'].execution_list(
                    id=exec_id, only_one=True)
                if execution is None:
                    self._reply_error('Execution ID {} not found'.format(
                        message['exec_id']))
                else:
                    execution.set_scheduled()
                    self._reply_ok()
                    zoe_master.execution_manager.execution_submit(execution)
            elif message['command'] == 'execution_terminate':
                exec_id = message['exec_id']
                execution = config.singletons['sql_manager'].execution_list(
                    id=exec_id, only_one=True)
                if execution is None:
                    self._reply_error('Execution ID {} not found'.format(
                        message['exec_id']))
                else:
                    execution.set_cleaning_up()
                    self._reply_ok()
                    zoe_master.execution_manager.execution_terminate(execution)
            elif message['command'] == 'execution_delete':
                exec_id = message['exec_id']
                execution = config.singletons['sql_manager'].execution_list(
                    id=exec_id, only_one=True)
                if execution is not None:
                    zoe_master.execution_manager.execution_delete(execution)
                self._reply_ok()
            elif message['command'] == 'service_inspect':
                service_id = message['service_id']
                service = config.singletons['sql_manager'].service_list(
                    id=service_id, only_one=True)
                if service is None:
                    self._reply_error('no such service')
                else:
                    swarm = SwarmClient(config.get_conf())
                    info = swarm.inspect_container(service.docker_id)
                    self._reply_ok(info)
            else:
                log.error('Unknown command: {}'.format(message['command']))
                self._reply_error('unknown command')

            if not self.debug_has_replied:
                self._reply_error('bug')
                raise ZoeException('BUG: command {} does not fill a reply')

            config.singletons['metric'].metric_api_call(
                start_time, message['command'])
Exemplo n.º 10
0
    def loop(self):
        """The API loop."""
        while True:
            message = self.zmq_s.recv_json()
            self.debug_has_replied = False
            start_time = time.time()
            if message['command'] == 'execution_start':
                exec_id = message['exec_id']
                execution = self.state.execution_list(id=exec_id,
                                                      only_one=True)
                if execution is None:
                    self._reply_error('Execution ID {} not found'.format(
                        message['exec_id']))
                else:
                    execution.set_scheduled()
                    self._reply_ok()
                    zoe_master.execution_manager.execution_submit(
                        self.state, self.scheduler, execution)
            elif message['command'] == 'execution_terminate':
                exec_id = message['exec_id']
                execution = self.state.execution_list(id=exec_id,
                                                      only_one=True)
                if execution is None:
                    self._reply_error('Execution ID {} not found'.format(
                        message['exec_id']))
                else:
                    execution.set_cleaning_up()
                    self._reply_ok()
                    zoe_master.execution_manager.execution_terminate(
                        self.scheduler, execution)
            elif message['command'] == 'execution_delete':
                exec_id = message['exec_id']
                execution = self.state.execution_list(id=exec_id,
                                                      only_one=True)
                if execution is not None:
                    zoe_master.execution_manager.execution_delete(
                        self.scheduler, execution)
                self._reply_ok()
            elif message['command'] == 'scheduler_stats':
                data = self.scheduler.stats()
                self._reply_ok(data=data)
            else:
                log.error('Unknown command: {}'.format(message['command']))
                self._reply_error('unknown command')

            if not self.debug_has_replied:
                self._reply_error('bug')
                raise ZoeException('BUG: command {} does not fill a reply')

            self.metrics.metric_api_call(start_time, message['command'])
Exemplo n.º 11
0
    def list(self, only_label=None, status=None) -> List[dict]:
        """
        List running or defined containers.

        :param only_label: filter containers with only a certain label
        :param status: filter containers with only a certain status (one of restarting, running, paused, exited)
        :return: a list of containers
        """
        filters = {}
        if only_label is not None:
            filters['label'] = only_label
        if status is not None:
            filters['status'] = status
        try:
            ret = self.cli.containers.list(all=True, filters=filters)
        except docker.errors.APIError as ex:
            raise ZoeException(str(ex))
        except requests.exceptions.RequestException as ex:
            raise ZoeException(str(ex))
        conts = []
        for cont_info in ret:
            conts.append(self._container_summary(cont_info))

        return conts
Exemplo n.º 12
0
 def __init__(self) -> None:
     url = get_conf().backend_swarm_url
     tls = False
     if 'zk://' in url:
         if KazooClient is None:
             raise ZoeException('ZooKeeper URL for Swarm, but the kazoo package is not installed')
         url = url[len('zk://'):]
         manager = zookeeper_swarm(url, get_conf().backend_swarm_zk_path)
     elif 'consul://' in url:
         if Consul is None:
             raise ZoeException('Consul URL for Swarm, but the consul package is not installed')
         url = url[len('consul://'):]
         manager = consul_swarm(url)
     elif 'http://' in url:
         manager = url
     elif 'https://' in url:
         tls = docker.tls.TLSConfig(client_cert=(get_conf().backend_swarm_tls_cert, get_conf().backend_swarm_tls_key), verify=get_conf().backend_swarm_tls_ca)
         manager = url
     else:
         raise ZoeException('Unsupported URL scheme for Swarm')
     try:
         self.cli = docker.DockerClient(base_url=manager, version="auto", tls=tls)
     except docker.errors.DockerException:
         raise ZoeException("Cannot connect to Docker")
Exemplo n.º 13
0
    def spawn_container(self, service_instance: ServiceInstance) -> Dict[str, Any]:
        """Create and start a new container."""
        run_args = {
            'detach': True,
            'ports': {},
            'environment': {},
            'volumes': {},
            'working_dir': service_instance.work_dir,
            'mem_limit': 0,
            'mem_reservation': 0,
            'memswap_limit': 0,
            'name': service_instance.name,
            'network_disabled': False,
            'network_mode': get_conf().overlay_network_name,
            'image': service_instance.image_name,
            'command': service_instance.command,
            'hostname': service_instance.hostname,
            'labels': service_instance.labels,
            'cpu_period': 100000,
            'cpu_quota': 100000,
            'log_config': {
                "type": "json-file",
                "config": {}
            }
        }
        for port in service_instance.ports:
            run_args['ports'][str(port.number) + '/' + port.proto] = None

        for name, value in service_instance.environment:
            run_args['environment'][name] = value

        for volume in service_instance.volumes:
            if volume.type == "host_directory":
                assert isinstance(volume, VolumeDescriptionHostPath)
                run_args['volumes'][volume.path] = {'bind': volume.mount_point, 'mode': ("ro" if volume.readonly else "rw")}
            else:
                log.error('Swarm backend does not support volume type {}'.format(volume.type))

        if service_instance.memory_limit is not None:
            run_args['mem_limit'] = service_instance.memory_limit.max
            run_args['mem_reservation'] = service_instance.memory_limit.min
            if service_instance.memory_limit.max == service_instance.memory_limit.min:
                run_args['mem_reservation'] -= 1

        if service_instance.core_limit is not None:
            run_args['cpu_quota'] = int(100000 * service_instance.core_limit.min)

        if get_conf().gelf_address != '':
            run_args['log_config'] = {
                "type": "gelf",
                "config": {
                    'gelf-address': get_conf().gelf_address,
                    'labels': ",".join(service_instance.labels)
                }
            }

        cont = None
        try:
            cont = self.cli.containers.run(**run_args)
        except docker.errors.ImageNotFound:
            raise ZoeException(message='Image not found')
        except docker.errors.APIError as e:
            if cont is not None:
                cont.remove(force=True)
            if e.explanation == b'no resources available to schedule container':
                raise ZoeNotEnoughResourcesException(message=str(e))
            else:
                raise ZoeException(message=str(e))
        except Exception as e:
            if cont is not None:
                cont.remove(force=True)
            raise ZoeException(str(e))

        cont = self.cli.containers.get(cont.id)
        return self._container_summary(cont)
Exemplo n.º 14
0
    def spawn_container(self, service_instance: ServiceInstance) -> Dict[str, Any]:
        """Create and start a new container."""
        cont = None
        port_bindings = {}  # type: Dict[str, Any]
        for port in service_instance.ports:
            port_bindings[str(port.number) + '/' + port.proto] = None

        environment = {}
        for name, value in service_instance.environment:
            environment[name] = value

        volumes = {}
        for volume in service_instance.volumes:
            if volume.type == "host_directory":
                assert isinstance(volume, VolumeDescriptionHostPath)
                volumes[volume.path] = {'bind': volume.mount_point, 'mode': ("ro" if volume.readonly else "rw")}
            else:
                log.error('Swarm backend does not support volume type {}'.format(volume.type))

        if service_instance.memory_limit is not None:
            mem_limit = service_instance.memory_limit.max
        else:
            mem_limit = 0
        # Swarm backend does not support cores in a consistent way, see https://github.com/docker/swarm/issues/475

        if get_conf().gelf_address != '':
            log_config = {
                "type": "gelf",
                "config": {
                    'gelf-address': get_conf().gelf_address,
                    'labels': ",".join(service_instance.labels)
                }
            }
        else:
            log_config = {
                "type": "json-file",
                "config": {}
            }

        try:
            cont = self.cli.containers.run(image=service_instance.image_name,
                                           command=service_instance.command,
                                           detach=True,
                                           environment=environment,
                                           hostname=service_instance.hostname,
                                           labels=service_instance.labels,
                                           log_config=log_config,
                                           mem_limit=mem_limit,
                                           memswap_limit=0,
                                           name=service_instance.name,
                                           network_disabled=False,
                                           network_mode=get_conf().overlay_network_name,
                                           ports=port_bindings,
                                           working_dir=service_instance.work_dir,
                                           volumes=volumes)
        except docker.errors.ImageNotFound:
            raise ZoeException(message='Image not found')
        except docker.errors.APIError as e:
            if cont is not None:
                cont.remove(force=True)
            if e.explanation == b'no resources available to schedule container':
                raise ZoeNotEnoughResourcesException(message=str(e))
            else:
                raise ZoeException(message=str(e))
        except Exception as e:
            if cont is not None:
                cont.remove(force=True)
            raise ZoeException(str(e))

        cont = self.cli.containers.get(cont.id)
        return self._container_summary(cont)