Пример #1
0
    def logs(self, task_id: str):
        """ Stream task logs """
        try:
            container = self.docker.containers.get(task_id)
            return json_stream(container.logs(stream=True))

        except docker.errors.NotFound:
            raise ProviderError(f'No such task: {task_id}')

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #2
0
    def wait_until_ready(self, task_id: str, poll_interval: float = 1):
        while True:
            time.sleep(poll_interval)
            pod = self.get_task_pod(task_id)
            if not pod:
                raise ProviderError(f'No such task: {task_id}')

            try:
                if pod_is_ready(pod):
                    break
                else:
                    poll_interval = 1

            except PodUnschedulableError as e:
                poll_interval = 10
                print('warning: task', task_id, 'is unschedulable:', str(e))

            except PodTerminatedError:
                raise TaskCreationError('Task terminated') from None

            except ImagePullError:
                self.kill(task_id)
                raise TaskCreationError('Image pull failed') from None

            except PodConfigError:
                # todo: check pod events to figure out what went wrong, and report it back.
                # for now, leave the pod running so the user may inspect it
                raise TaskCreationError('Pod configuration error') from None
Пример #3
0
def create_env(cluster, taskdef):
    env = base_environment(cluster, taskdef)

    # check total length of environment data
    length = 0
    for key, value in env.items():
        if isinstance(value, dict):
            # complex env settings are not supported by DockerProvider
            # try to inherit the setting from the host
            if key in os.environ:
                value = os.environ[key]
            elif 'fallback' in value:
                value = value['fallback']
            else:
                source = value.get('source', '<unset>')
                print(
                    f'Warning: unset environment variable {key} with source "{source}"'
                )
                value = ''

        length += len(str(key)) + len(str(value))
        env[key] = str(value)

    if length > MAX_ENV_LENGTH:
        raise ProviderError(
            f'Task environment too long. Was {length}, max: {MAX_ENV_LENGTH}')

    return env
Пример #4
0
    def destroy_all(self) -> list:
        try:
            self.core.delete_collection_namespaced_pod(
                namespace=self.namespace,
                label_selector=LABEL_TASK_ID,
            )

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
Пример #5
0
    def list_all(self) -> list:
        """ Returns a list of task definitions for all running tasks """
        try:
            containers = self.docker.containers.list(
                filters={'label': LABEL_TASK_ID},
            )
            return [extract_container_taskdef(c) for c in containers]

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #6
0
    def get_task_child_pods(self, task_id: str):
        try:
            res = self.core.list_namespaced_pod(
                namespace=self.namespace,
                label_selector=f'{LABEL_PARENT_ID}={task_id}',
            )
            return res.items

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
Пример #7
0
    def get_task_pod(self, task_id):
        try:
            res = self.core.list_namespaced_pod(
                namespace=self.namespace,
                label_selector=f'{LABEL_TASK_ID}={task_id}',
            )
            return res.items[0] if len(res.items) > 0 else None

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
Пример #8
0
    def spawn(self, taskdef: TaskDefinition, deploy: bool = False) -> DockerTask:
        try:
            self.ensure_network()

            self.emit_sync('prepare', taskdef=taskdef)

            cpu_period = 100000
            cpu_quota = float(taskdef.cpu_limit or 0) * cpu_period

            container = self.docker.containers.run(
                detach=True,
                image=taskdef.image,
                name=taskdef.id,
                hostname=taskdef.id,
                network=self.network,
                ports=create_ports(taskdef),
                environment=create_env(self, taskdef),
                mounts=create_volumes(taskdef.volumes),
                cpu_quota=int(cpu_quota),
                cpu_period=int(cpu_period),
                mem_reservation=str(taskdef.memory or 0),
                mem_limit=str(taskdef.memory_limit or 0),
                restart_policy=None if not deploy else {'Name': 'always'},
                labels={
                    LABEL_TASK_ID: taskdef.id,
                    LABEL_PARENT_ID: taskdef.parent,
                    **taskdef.meta,
                },
            )

            # print('~~ created docker container with id',
            #   container.id[:12], 'for task', taskdef.id)

            task = DockerTask(self, taskdef, container)
            self.emit_sync('spawn', task=task)
            return task

        except docker.errors.APIError as e:
            raise ProviderError(e.explanation)

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #9
0
    def find_child_containers(self, parent_id: str) -> list:
        """ Finds all child containers of a given task id """
        try:
            return self.docker.containers.list(
                filters={
                    'label': f'{LABEL_PARENT_ID}={parent_id}',
                },
            )

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #10
0
    def kill(self, task_id):
        try:
            self.core.delete_collection_namespaced_pod(
                namespace=self.namespace,
                label_selector=f'{LABEL_TASK_ID}={task_id}',
            )
            self.emit_sync('kill', task_id=task_id)
            return task_id

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
Пример #11
0
    def destroy_children(self, parent_id: str) -> list:
        """ Destroy all child tasks of a given task id """
        try:
            children = self.find_child_containers(parent_id)

            tasks = []
            for child in children:
                tasks += self.destroy(child.labels[LABEL_TASK_ID])

            return tasks

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #12
0
    def list_all(self) -> list:
        try:
            res = self.core.list_namespaced_pod(
                namespace=self.namespace,
                label_selector=LABEL_TASK_ID,
            )
            running = filter(lambda pod: pod.status.phase == 'Running',
                             res.items)
            return [
                KubernetesTask(self, extract_pod_taskdef(pod), pod)
                for pod in running
            ]

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
Пример #13
0
    def ensure_network(self):
        try:
            self.docker.networks.get(self.network)
        except docker.errors.NotFound:
            print('~~ creating docker network', self.network)
            self.docker.networks.create(
                name=self.network,
                check_duplicate=False,
                driver='bridge',
                labels={
                    'cowait': '1',
                })

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #14
0
    def destroy_all(self) -> None:
        """ Destroys all running tasks """
        try:
            containers = self.docker.containers.list(
                all=True,
                filters={
                    'label': LABEL_TASK_ID,
                },
            )

            for container in containers:
                container.remove(force=True)

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #15
0
    def find_agent(self):
        try:
            container = self.docker.containers.get('agent')
            if container.status != 'running':
                return None

            token = container.labels['http_token']
            return get_remote_url('agent', token)

        except docker.errors.NotFound:
            return None

        except requests.exceptions.ChunkedEncodingError:
            # workaround for a bug in docker on mac:
            # https://github.com/docker/docker-py/issues/2696
            return None

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #16
0
    def destroy(self, task_id):
        """ Destroy a specific task id and all its descendants """

        # optimization: grab a list of all tasks at once, instead of querying
        # for every child.

        def kill_family(container):
            container_task_id = container.labels[LABEL_TASK_ID]
            # print(f'~~ kill {container_task_id} ({container.short_id})')

            children = self.find_child_containers(container_task_id)
            kills = []
            for child in children:
                kills += kill_family(child)

            try:
                container.remove(force=True)
            except docker.errors.NotFound:
                pass
            except docker.errors.APIError as e:
                if 'already in progress' in str(e):
                    pass
                else:
                    raise e

            kills.append(task_id)
            return kills

        try:
            container = self.docker.containers.get(task_id)
            return kill_family(container)

        except docker.errors.NotFound:
            return [task_id]

        except requests.exceptions.ChunkedEncodingError:
            # workaround for a bug in docker on mac:
            # https://github.com/docker/docker-py/issues/2696
            return None

        except requests.exceptions.ConnectionError:
            raise ProviderError('Docker engine unavailable')
Пример #17
0
    def destroy_children(self, parent_id: str) -> list:
        try:
            # get a list of child pods
            children = self.core.list_namespaced_pod(
                namespace=self.namespace,
                label_selector=f'{LABEL_PARENT_ID}={parent_id}',
            )

            # destroy child pods
            self.core.delete_collection_namespaced_pod(
                namespace=self.namespace,
                label_selector=f'{LABEL_PARENT_ID}={parent_id}',
            )

            # return killed child ids
            return [
                child.metadata.labels[LABEL_TASK_ID]
                for child in children.items
            ]

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')
Пример #18
0
    def spawn(self,
              taskdef: TaskDefinition,
              deploy: bool = False) -> KubernetesTask:
        try:
            self.emit_sync('prepare', taskdef=taskdef)

            if deploy:
                # if deploying, destroy any existing pod
                self.kill(taskdef.id)
                self.wait_until_deleted(taskdef.id)

            volumes, mounts = create_volumes(taskdef.volumes)

            # container definition
            container = client.V1Container(
                name=taskdef.id,
                image=taskdef.image,
                env=create_env(self, taskdef),
                ports=create_ports(taskdef.ports),
                image_pull_policy='Always',  # taskdef field??
                resources=client.V1ResourceRequirements(
                    requests={
                        'cpu': str(taskdef.cpu or '0'),
                        'memory': str(taskdef.memory or '0'),
                    },
                    limits={
                        'cpu': str(taskdef.cpu_limit or '0'),
                        'memory': str(taskdef.memory_limit or '0'),
                    },
                ),
                volume_mounts=mounts,
            )

            pod = self.core.create_namespaced_pod(
                namespace=self.namespace,
                body=client.V1Pod(
                    metadata=client.V1ObjectMeta(
                        name=taskdef.id,
                        namespace=self.namespace,
                        labels={
                            LABEL_TASK_ID: taskdef.id,
                            LABEL_PARENT_ID: taskdef.parent,
                            **taskdef.meta,
                        },
                    ),
                    spec=client.V1PodSpec(
                        hostname=taskdef.id,
                        restart_policy='Always' if deploy else 'Never',
                        image_pull_secrets=self.get_pull_secrets(),
                        volumes=volumes,
                        node_selector=taskdef.nodes,
                        containers=[container],
                        service_account_name=self.service_account,
                        affinity=create_affinity(taskdef.affinity),
                    ),
                ),
            )

            # wrap & return task
            # print('~~ created kubenetes pod', pod.metadata.name)
            task = KubernetesTask(self, taskdef, pod)
            self.emit_sync('spawn', task=task)
            return task

        except urllib3.exceptions.MaxRetryError:
            raise ProviderError('Kubernetes engine unavailable')