def logs(self, task_id: str): """ Stream task logs """ try: container = self.docker.containers.get(task_id) return json_stream(container.logs(stream=True)) except docker.errors.NotFound: raise ProviderError(f'No such task: {task_id}') except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def wait_until_ready(self, task_id: str, poll_interval: float = 1): while True: time.sleep(poll_interval) pod = self.get_task_pod(task_id) if not pod: raise ProviderError(f'No such task: {task_id}') try: if pod_is_ready(pod): break else: poll_interval = 1 except PodUnschedulableError as e: poll_interval = 10 print('warning: task', task_id, 'is unschedulable:', str(e)) except PodTerminatedError: raise TaskCreationError('Task terminated') from None except ImagePullError: self.kill(task_id) raise TaskCreationError('Image pull failed') from None except PodConfigError: # todo: check pod events to figure out what went wrong, and report it back. # for now, leave the pod running so the user may inspect it raise TaskCreationError('Pod configuration error') from None
def create_env(cluster, taskdef): env = base_environment(cluster, taskdef) # check total length of environment data length = 0 for key, value in env.items(): if isinstance(value, dict): # complex env settings are not supported by DockerProvider # try to inherit the setting from the host if key in os.environ: value = os.environ[key] elif 'fallback' in value: value = value['fallback'] else: source = value.get('source', '<unset>') print( f'Warning: unset environment variable {key} with source "{source}"' ) value = '' length += len(str(key)) + len(str(value)) env[key] = str(value) if length > MAX_ENV_LENGTH: raise ProviderError( f'Task environment too long. Was {length}, max: {MAX_ENV_LENGTH}') return env
def destroy_all(self) -> list: try: self.core.delete_collection_namespaced_pod( namespace=self.namespace, label_selector=LABEL_TASK_ID, ) except urllib3.exceptions.MaxRetryError: raise ProviderError('Kubernetes engine unavailable')
def list_all(self) -> list: """ Returns a list of task definitions for all running tasks """ try: containers = self.docker.containers.list( filters={'label': LABEL_TASK_ID}, ) return [extract_container_taskdef(c) for c in containers] except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def get_task_child_pods(self, task_id: str): try: res = self.core.list_namespaced_pod( namespace=self.namespace, label_selector=f'{LABEL_PARENT_ID}={task_id}', ) return res.items except urllib3.exceptions.MaxRetryError: raise ProviderError('Kubernetes engine unavailable')
def get_task_pod(self, task_id): try: res = self.core.list_namespaced_pod( namespace=self.namespace, label_selector=f'{LABEL_TASK_ID}={task_id}', ) return res.items[0] if len(res.items) > 0 else None except urllib3.exceptions.MaxRetryError: raise ProviderError('Kubernetes engine unavailable')
def spawn(self, taskdef: TaskDefinition, deploy: bool = False) -> DockerTask: try: self.ensure_network() self.emit_sync('prepare', taskdef=taskdef) cpu_period = 100000 cpu_quota = float(taskdef.cpu_limit or 0) * cpu_period container = self.docker.containers.run( detach=True, image=taskdef.image, name=taskdef.id, hostname=taskdef.id, network=self.network, ports=create_ports(taskdef), environment=create_env(self, taskdef), mounts=create_volumes(taskdef.volumes), cpu_quota=int(cpu_quota), cpu_period=int(cpu_period), mem_reservation=str(taskdef.memory or 0), mem_limit=str(taskdef.memory_limit or 0), restart_policy=None if not deploy else {'Name': 'always'}, labels={ LABEL_TASK_ID: taskdef.id, LABEL_PARENT_ID: taskdef.parent, **taskdef.meta, }, ) # print('~~ created docker container with id', # container.id[:12], 'for task', taskdef.id) task = DockerTask(self, taskdef, container) self.emit_sync('spawn', task=task) return task except docker.errors.APIError as e: raise ProviderError(e.explanation) except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def find_child_containers(self, parent_id: str) -> list: """ Finds all child containers of a given task id """ try: return self.docker.containers.list( filters={ 'label': f'{LABEL_PARENT_ID}={parent_id}', }, ) except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def kill(self, task_id): try: self.core.delete_collection_namespaced_pod( namespace=self.namespace, label_selector=f'{LABEL_TASK_ID}={task_id}', ) self.emit_sync('kill', task_id=task_id) return task_id except urllib3.exceptions.MaxRetryError: raise ProviderError('Kubernetes engine unavailable')
def destroy_children(self, parent_id: str) -> list: """ Destroy all child tasks of a given task id """ try: children = self.find_child_containers(parent_id) tasks = [] for child in children: tasks += self.destroy(child.labels[LABEL_TASK_ID]) return tasks except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def list_all(self) -> list: try: res = self.core.list_namespaced_pod( namespace=self.namespace, label_selector=LABEL_TASK_ID, ) running = filter(lambda pod: pod.status.phase == 'Running', res.items) return [ KubernetesTask(self, extract_pod_taskdef(pod), pod) for pod in running ] except urllib3.exceptions.MaxRetryError: raise ProviderError('Kubernetes engine unavailable')
def ensure_network(self): try: self.docker.networks.get(self.network) except docker.errors.NotFound: print('~~ creating docker network', self.network) self.docker.networks.create( name=self.network, check_duplicate=False, driver='bridge', labels={ 'cowait': '1', }) except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def destroy_all(self) -> None: """ Destroys all running tasks """ try: containers = self.docker.containers.list( all=True, filters={ 'label': LABEL_TASK_ID, }, ) for container in containers: container.remove(force=True) except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def find_agent(self): try: container = self.docker.containers.get('agent') if container.status != 'running': return None token = container.labels['http_token'] return get_remote_url('agent', token) except docker.errors.NotFound: return None except requests.exceptions.ChunkedEncodingError: # workaround for a bug in docker on mac: # https://github.com/docker/docker-py/issues/2696 return None except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def destroy(self, task_id): """ Destroy a specific task id and all its descendants """ # optimization: grab a list of all tasks at once, instead of querying # for every child. def kill_family(container): container_task_id = container.labels[LABEL_TASK_ID] # print(f'~~ kill {container_task_id} ({container.short_id})') children = self.find_child_containers(container_task_id) kills = [] for child in children: kills += kill_family(child) try: container.remove(force=True) except docker.errors.NotFound: pass except docker.errors.APIError as e: if 'already in progress' in str(e): pass else: raise e kills.append(task_id) return kills try: container = self.docker.containers.get(task_id) return kill_family(container) except docker.errors.NotFound: return [task_id] except requests.exceptions.ChunkedEncodingError: # workaround for a bug in docker on mac: # https://github.com/docker/docker-py/issues/2696 return None except requests.exceptions.ConnectionError: raise ProviderError('Docker engine unavailable')
def destroy_children(self, parent_id: str) -> list: try: # get a list of child pods children = self.core.list_namespaced_pod( namespace=self.namespace, label_selector=f'{LABEL_PARENT_ID}={parent_id}', ) # destroy child pods self.core.delete_collection_namespaced_pod( namespace=self.namespace, label_selector=f'{LABEL_PARENT_ID}={parent_id}', ) # return killed child ids return [ child.metadata.labels[LABEL_TASK_ID] for child in children.items ] except urllib3.exceptions.MaxRetryError: raise ProviderError('Kubernetes engine unavailable')
def spawn(self, taskdef: TaskDefinition, deploy: bool = False) -> KubernetesTask: try: self.emit_sync('prepare', taskdef=taskdef) if deploy: # if deploying, destroy any existing pod self.kill(taskdef.id) self.wait_until_deleted(taskdef.id) volumes, mounts = create_volumes(taskdef.volumes) # container definition container = client.V1Container( name=taskdef.id, image=taskdef.image, env=create_env(self, taskdef), ports=create_ports(taskdef.ports), image_pull_policy='Always', # taskdef field?? resources=client.V1ResourceRequirements( requests={ 'cpu': str(taskdef.cpu or '0'), 'memory': str(taskdef.memory or '0'), }, limits={ 'cpu': str(taskdef.cpu_limit or '0'), 'memory': str(taskdef.memory_limit or '0'), }, ), volume_mounts=mounts, ) pod = self.core.create_namespaced_pod( namespace=self.namespace, body=client.V1Pod( metadata=client.V1ObjectMeta( name=taskdef.id, namespace=self.namespace, labels={ LABEL_TASK_ID: taskdef.id, LABEL_PARENT_ID: taskdef.parent, **taskdef.meta, }, ), spec=client.V1PodSpec( hostname=taskdef.id, restart_policy='Always' if deploy else 'Never', image_pull_secrets=self.get_pull_secrets(), volumes=volumes, node_selector=taskdef.nodes, containers=[container], service_account_name=self.service_account, affinity=create_affinity(taskdef.affinity), ), ), ) # wrap & return task # print('~~ created kubenetes pod', pod.metadata.name) task = KubernetesTask(self, taskdef, pod) self.emit_sync('spawn', task=task) return task except urllib3.exceptions.MaxRetryError: raise ProviderError('Kubernetes engine unavailable')