async def run_on_docker( self, docker_client: aiodocker.Docker, session: aiohttp.ClientSession, task_id: str, *, run_config: Dict[str, Any], ) -> Optional[str]: """Runs the container image defined in the step's properties. Running is done asynchronously. Args: docker_client: Docker environment to run containers (async). wait_on_completion: if True await containers, else do not. Awaiting containers is helpful when running a dependency graph (like a pipeline), because one step can only executed once all its proper ancestors have completed. """ if not all([parent._status == "SUCCESS" for parent in self.parents]): # The step cannot be run yet. return self._status if self._status != "PENDING": # The step has already been started. # Each parent attempts to start their children when they # finish. When all parents finish simultaneously (with all # their _status'es being "SUCCESS") not checking whether # the child has started or not would lead to multiple start # attempts of the child, resulting in errors. return self._status # TODO: better error handling? self._status = "STARTED" await update_status( self._status, task_id, session, type="step", run_endpoint=run_config["run_endpoint"], uuid=self.properties["uuid"], ) orchest_mounts = get_orchest_mounts( project_dir=_config.PROJECT_DIR, host_project_dir=run_config["project_dir"], mount_form="docker-engine", ) # add volume mount orchest_mounts += get_volume_mounts(run_config, task_id) device_requests = get_device_requests( self.properties["environment"], run_config["project_uuid"], form="docker-engine", ) # The working directory relative to the project directory is # based on the location of the pipeline, e.g. if the pipeline is # in /project-dir/my/project/path/mypipeline.orchest the working # directory will be my/project/path/. working_dir = os.path.split(run_config["pipeline_path"])[0] config = { "Image": run_config["env_uuid_docker_id_mappings"][ self.properties["environment"]], "Env": [ f'ORCHEST_STEP_UUID={self.properties["uuid"]}', f'ORCHEST_PIPELINE_UUID={run_config["pipeline_uuid"]}', f'ORCHEST_PIPELINE_PATH={run_config["pipeline_path"]}', f'ORCHEST_PROJECT_UUID={run_config["project_uuid"]}', # ORCHEST_MEMORY_EVICTION is never present when running # notebooks interactively and otherwise always present, # this means eviction of objects from memory can never # be triggered when running notebooks interactively. # This environment variable being present implies that # the Orchest SDK will always emit an eviction message # given the choice, this however, does not imply that # eviction will actually take place, since the memory # server manager will check the pipeline definition # settings to decide whetever object eviction should # take place or not. "ORCHEST_MEMORY_EVICTION=1", ], "HostConfig": { "Binds": orchest_mounts, "DeviceRequests": device_requests, "GroupAdd": [os.environ.get("ORCHEST_HOST_GID")], }, "Cmd": [ "/orchest/bootscript.sh", "runnable", working_dir, self.properties["file_path"], ], "NetworkingConfig": { # TODO: should not be hardcoded. "EndpointsConfig": { "orchest": {} } }, # NOTE: the `'tests-uuid'` key is only used for tests and # gets ignored by the `docker_client`. "tests-uuid": self.properties["uuid"], } # Starts the container asynchronously, however, it does not wait # for completion of the container (like the `docker run` CLI # command does). Therefore the option to await the container # completion is introduced. try: container = await docker_client.containers.run( config=config, name=_config.PIPELINE_STEP_CONTAINER_NAME.format( run_uuid=task_id, step_uuid=self.properties["uuid"]), ) data = await container.wait() # The status code will be 0 for "SUCCESS" and -N otherwise. # A negative value -N indicates that the child was # terminated by signal N (POSIX only). if data.get("StatusCode") != 0: self._status = "FAILURE" logging.error( "Docker container for step %s failed with output:\n%s" % ( self.properties["uuid"], "".join(await container.log(stdout=True, stderr=True)), )) else: self._status = "SUCCESS" except Exception as e: logging.error("Failed to run Docker container: %s" % e) self._status = "FAILURE" finally: await update_status( self._status, task_id, session, type="step", run_endpoint=run_config["run_endpoint"], uuid=self.properties["uuid"], ) return self._status
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode): # Launches a containerized kernel. # Can't proceed if no image was specified. image_name = os.environ.get("KERNEL_IMAGE", None) if image_name is None: sys.exit( "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!" ) # Container name is composed of KERNEL_USERNAME and KERNEL_ID container_name = os.environ.get("KERNEL_USERNAME", "") + "-" + kernel_id # Determine network. If EG_DOCKER_NETWORK has not been propagated, fall back to 'bridge'... docker_network = os.environ.get("EG_DOCKER_NETWORK", "bridge") # Build labels - these will be modelled similar to kubernetes: kernel_id, component, app, ... labels = dict() labels["kernel_id"] = kernel_id labels["component"] = "kernel" labels["app"] = "enterprise-gateway" # Capture env parameters... param_env = dict() param_env["EG_RESPONSE_ADDRESS"] = response_addr param_env["KERNEL_SPARK_CONTEXT_INIT_MODE"] = spark_context_init_mode # Since the environment is specific to the kernel (per env stanza of kernelspec, KERNEL_ and ENV_WHITELIST) # just add the env here. param_env.update(os.environ) param_env.pop( "PATH" ) # Let the image PATH be used. Since this is relative to images, we're probably safe. user = param_env.get("KERNEL_UID") group = param_env.get("KERNEL_GID") # setup common args kwargs = dict() kwargs["name"] = container_name kwargs["user"] = user kwargs["labels"] = labels client = DockerClient.from_env() if swarm_mode: print("Started Jupyter kernel in swarm-mode") networks = list() networks.append(docker_network) mounts = list() mounts.append( "/usr/local/share/jupyter/kernels:/usr/local/share/jupyter/kernels:ro" ) endpoint_spec = EndpointSpec(mode="dnsrr") restart_policy = RestartPolicy(condition="none") # finish args setup kwargs["env"] = param_env kwargs["endpoint_spec"] = endpoint_spec kwargs["restart_policy"] = restart_policy kwargs["container_labels"] = labels kwargs["networks"] = networks kwargs["groups"] = [group, "100"] if param_env.get("KERNEL_WORKING_DIR"): kwargs["workdir"] = param_env.get("KERNEL_WORKING_DIR") # kwargs['mounts'] = mounts # Enable if necessary # print("service args: {}".format(kwargs)) # useful for debug kernel_service = client.services.create(image_name, **kwargs) else: print("Started Jupyter kernel in normal docker mode") # Note: seems to me that the kernels don't need to be mounted on a container that runs a single kernel # mount the kernel working directory from EG to kernel container # finish args setup kwargs["hostname"] = container_name kwargs["environment"] = param_env kwargs["remove"] = remove_container kwargs["network"] = docker_network kwargs["group_add"] = [ group, "100", ] # NOTE: "group_add" for newer versions of docker kwargs["detach"] = True if param_env.get("KERNEL_WORKING_DIR"): kwargs["working_dir"] = param_env.get("KERNEL_WORKING_DIR") # print("container args: {}".format(kwargs)) # useful for debug orchest_mounts = get_orchest_mounts( project_dir=param_env.get("KERNEL_WORKING_DIR"), host_project_dir=param_env.get("ORCHEST_HOST_PROJECT_DIR"), ) orchest_mounts += [ get_volume_mount( param_env.get("ORCHEST_PIPELINE_UUID"), param_env.get("ORCHEST_PROJECT_UUID"), ) ] # Extract environment_uuid from the image name (last 36 characters) extracted_environment_uuid = image_name[-36:] device_requests = get_device_requests( extracted_environment_uuid, param_env.get("ORCHEST_PROJECT_UUID") ) kernel_container = client.containers.run( image_name, mounts=orchest_mounts, device_requests=device_requests, **kwargs )
async def run_on_docker( self, docker_client: aiodocker.Docker, session: aiohttp.ClientSession, task_id: str, *, run_config: Dict[str, Any], ) -> Optional[str]: """Runs the container image defined in the step's properties. Running is done asynchronously. Args: docker_client: Docker environment to run containers (async). wait_on_completion: if True await containers, else do not. Awaiting containers is helpful when running a dependency graph (like a pipeline), because one step can only executed once all its proper ancestors have completed. """ if not all([parent._status == "SUCCESS" for parent in self.parents]): # The step cannot be run yet. return self._status orchest_mounts = get_orchest_mounts( project_dir=_config.PROJECT_DIR, host_project_dir=run_config["project_dir"], mount_form="docker-engine", ) # add volume mount orchest_mounts += get_volume_mounts(run_config, task_id) device_requests = get_device_requests( self.properties["environment"], run_config["project_uuid"], form="docker-engine", ) # the working directory relative to the project directory is based on the location of the pipeline # e.g. if the pipeline is in # /project-dir/my/project/path/mypipeline.orchest the working directory will be # my/project/path/ working_dir = os.path.split(run_config["pipeline_path"])[0] config = { "Image": _config.ENVIRONMENT_IMAGE_NAME.format( project_uuid=run_config["project_uuid"], environment_uuid=self.properties["environment"], ), "Env": [ f'ORCHEST_STEP_UUID={self.properties["uuid"]}', f'ORCHEST_PIPELINE_UUID={run_config["pipeline_uuid"]}', f'ORCHEST_PIPELINE_PATH={run_config["pipeline_path"]}', f'ORCHEST_PROJECT_UUID={run_config["project_uuid"]}', "ORCHEST_MEMORY_EVICTION=1", ], "HostConfig": { "Binds": orchest_mounts, "DeviceRequests": device_requests, }, "Cmd": [ "/orchest/bootscript.sh", "runnable", working_dir, self.properties["file_path"], ], "NetworkingConfig": { "EndpointsConfig": { "orchest": {} } # TODO: should not be hardcoded. }, # NOTE: the `'tests-uuid'` key is only used for tests and # gets ignored by the `docker_client`. "tests-uuid": self.properties["uuid"], } # Starts the container asynchronously, however, it does not wait # for completion of the container (like the `docker run` CLI # command does). Therefore the option to await the container # completion is introduced. try: container = await docker_client.containers.run(config=config) except Exception as e: print("Exception", e) # TODO: error handling? self._status = "STARTED" await update_status( self._status, task_id, session, type="step", run_endpoint=run_config["run_endpoint"], uuid=self.properties["uuid"], ) data = await container.wait() # The status code will be 0 for "SUCCESS" and -N otherwise. A # negative value -N indicates that the child was terminated # by signal N (POSIX only). self._status = "FAILURE" if data.get("StatusCode") else "SUCCESS" await update_status( self._status, task_id, session, type="step", run_endpoint=run_config["run_endpoint"], uuid=self.properties["uuid"], ) # TODO: get the logs (errors are piped to stdout, thus running # "docker logs" should get them). Find the appropriate # way to return them. if self._status == "FAILURE": pass return self._status
def launch_docker_kernel(kernel_id, response_addr, spark_context_init_mode): # Launches a containerized kernel. # Can't proceed if no image was specified. image_name = os.environ.get("KERNEL_IMAGE", None) if image_name is None: sys.exit( "ERROR - KERNEL_IMAGE not found in environment - kernel launch terminating!" ) # Container name is composed of KERNEL_USERNAME and KERNEL_ID container_name = os.environ.get("KERNEL_USERNAME", "") + "-" + kernel_id # Determine network. If EG_DOCKER_NETWORK has not been propagated, # fall back to 'bridge'... docker_network = os.environ.get("EG_DOCKER_NETWORK", "bridge") # Build labels - these will be modelled similar to kubernetes: # kernel_id, component, app, ... labels = dict() labels["kernel_id"] = kernel_id labels["component"] = "kernel" labels["app"] = "enterprise-gateway" # Capture env parameters... param_env = dict() param_env["EG_RESPONSE_ADDRESS"] = response_addr param_env["KERNEL_SPARK_CONTEXT_INIT_MODE"] = spark_context_init_mode # Since the environment is specific to the kernel (per env stanza of # kernelspec, KERNEL_ and ENV_WHITELIST) just add the env here. param_env.update(os.environ) param_env.pop("PATH") # Let the image PATH be used. Since this is relative to images, # we're probably safe. # setup common args kwargs = dict() kwargs["name"] = container_name kwargs["labels"] = labels client = DockerClient.from_env() print("Started Jupyter kernel in normal docker mode") # Note: seems to me that the kernels don't need to be mounted on a # container that runs a single kernel mount the kernel working # directory from EG to kernel container # finish args setup kwargs["hostname"] = container_name kwargs["environment"] = param_env kwargs["remove"] = remove_container kwargs["network"] = docker_network kwargs["group_add"] = [param_env.get("ORCHEST_HOST_GID")] kwargs["detach"] = True if param_env.get("KERNEL_WORKING_DIR"): kwargs["working_dir"] = param_env.get("KERNEL_WORKING_DIR") # print("container args: {}".format(kwargs)) # useful for debug orchest_mounts = get_orchest_mounts( project_dir=_config.PROJECT_DIR, host_user_dir=os.path.join(param_env.get("ORCHEST_HOST_PROJECT_DIR"), os.pardir, os.pardir, "data"), host_project_dir=param_env.get("ORCHEST_HOST_PROJECT_DIR"), ) volume_source, volume_spec = get_volume_mount( param_env.get("ORCHEST_PIPELINE_UUID"), param_env.get("ORCHEST_PROJECT_UUID"), ) orchest_mounts[volume_source] = volume_spec # Extract environment_uuid from the image name (last 36 characters) extracted_environment_uuid = image_name[-36:] device_requests = get_device_requests( extracted_environment_uuid, param_env.get("ORCHEST_PROJECT_UUID")) client.containers.run(image_name, volumes=orchest_mounts, device_requests=device_requests, **kwargs)