def generate_pod_spec_for_task(): # Primary containers do not require us to specify an image, the default image built for flyte tasks will get used. primary_container = V1Container(name="primary") # Note: for non-primary containers we must specify an image. secondary_container = V1Container(name="secondary", image="alpine",) secondary_container.command.extend(["/bin/sh"]) secondary_container.args.extend( ["-c", "echo hi pod world > {}".format(_SHARED_DATA_PATH)] ) resources = V1ResourceRequirements( requests={"cpu": "1", "memory": "100Mi"}, limits={"cpu": "1", "memory": "100Mi"} ) primary_container.resources = resources secondary_container = resources shared_volume_mount = V1VolumeMount(name="shared-data", mount_path="/data",) secondary_container.volumeMounts = [shared_volume_mount] primary_container.volumeMounts = [shared_volume_mount] pod_spec = V1PodSpec( containers=[primary_container, secondary_container], volumes=[ V1Volume( name="shared-data", empty_dir=V1EmptyDirVolumeSource(medium="Memory") ) ], ) return pod_spec
def get_container(train_op, train_env, train_num_gpus, drive='coco-headset-vol-1'): (train_op.container.set_memory_request('56Gi').set_memory_limit( '56Gi').set_cpu_request('7.5').set_cpu_limit('7.5').set_gpu_limit( str(train_num_gpus)).add_volume_mount( V1VolumeMount( name='tensorboard', mount_path='/shared/tensorboard')).add_volume_mount( V1VolumeMount(name='data', mount_path='/data/')).add_volume_mount( V1VolumeMount( name='shm', mount_path='/dev/shm'))) (add_env(add_ssh_volume(train_op), train_env).add_toleration( V1Toleration(key='nvidia.com/gpu', operator='Exists', effect='NoSchedule')).add_node_selector_constraint( 'beta.kubernetes.io/instance-type', f'p3.{2 * train_num_gpus}xlarge'). add_volume( V1Volume(name='tensorboard', persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( 'tensorboard-research-kf')) ).add_volume( V1Volume(name='data', persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( drive))) # .add_volume(V1Volume(name='shm', host_path=V1HostPathVolumeSource(path='/dev/shm'))) .add_volume( V1Volume(name='shm', empty_dir=V1EmptyDirVolumeSource(medium='Memory'))))
def train_eval_epic(owner, project, experiment, model, git_rev, pretrained_s3, mode, train_additional_args='', eval_additional_args=''): train_env = {} train_num_gpus = 1 train_op = components.load_component_from_file('components/train.yaml')( owner=owner, project=project, experiment=experiment, model=model, git_rev=git_rev, pretrained_s3=pretrained_s3, mode=mode, additional_args=train_additional_args) (train_op.container.set_memory_request('56Gi').set_memory_limit( '56Gi').set_cpu_request('7.5').set_cpu_limit('7.5').set_gpu_limit( str(train_num_gpus)).add_volume_mount( V1VolumeMount( name='tensorboard', mount_path='/shared/tensorboard')).add_volume_mount( V1VolumeMount(name='data', mount_path='/data/')).add_volume_mount( V1VolumeMount( name='shm', mount_path='/dev/shm'))) (add_env(add_ssh_volume(train_op), train_env).add_toleration( V1Toleration(key='nvidia.com/gpu', operator='Exists', effect='NoSchedule')).add_node_selector_constraint( 'beta.kubernetes.io/instance-type', f'p3.{2*train_num_gpus}xlarge'). add_volume( V1Volume(name='tensorboard', persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( 'tensorboard-research-kf')) ).add_volume( V1Volume(name='data', persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( 'dataset-epic-kitchen'))) # .add_volume(V1Volume(name='shm', host_path=V1HostPathVolumeSource(path='/dev/shm'))) .add_volume( V1Volume(name='shm', empty_dir=V1EmptyDirVolumeSource(medium='Memory'))))
def __init__(self, pipeline_name: str, experiment_name: str, notebook: str, cos_endpoint: str, cos_bucket: str, cos_directory: str, cos_dependencies_archive: str, pipeline_version: Optional[str] = '', pipeline_source: Optional[str] = None, pipeline_outputs: Optional[List[str]] = None, pipeline_inputs: Optional[List[str]] = None, pipeline_envs: Optional[Dict[str, str]] = None, requirements_url: Optional[str] = None, bootstrap_script_url: Optional[str] = None, emptydir_volume_size: Optional[str] = None, cpu_request: Optional[str] = None, mem_request: Optional[str] = None, gpu_limit: Optional[str] = None, workflow_engine: Optional[str] = 'argo', **kwargs): """Create a new instance of ContainerOp. Args: pipeline_name: pipeline that this op belongs to experiment_name: the experiment where pipeline_name is executed notebook: name of the notebook that will be executed per this operation cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442 cos_bucket: bucket to retrieve archive from cos_directory: name of the directory in the object storage bucket to pull cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz pipeline_version: optional version identifier pipeline_source: pipeline source pipeline_outputs: comma delimited list of files produced by the notebook pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook pipeline_envs: dictionary of environmental variables to set in the container prior to execution requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook bootstrap_script_url: URL to a custom python bootstrap script to run emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime cpu_request: number of CPUs requested for the operation mem_request: memory requested for the operation (in Gi) gpu_limit: maximum number of GPUs allowed for the operation workflow_engine: Kubeflow workflow engine, defaults to 'argo' kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler. See Kubeflow pipelines ContainerOp definition for more parameters or how to use https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp """ self.pipeline_name = pipeline_name self.pipeline_version = pipeline_version self.pipeline_source = pipeline_source self.experiment_name = experiment_name self.notebook = notebook self.notebook_name = os.path.basename(notebook) self.cos_endpoint = cos_endpoint self.cos_bucket = cos_bucket self.cos_directory = cos_directory self.cos_dependencies_archive = cos_dependencies_archive self.container_work_dir_root_path = "./" self.container_work_dir_name = "jupyter-work-dir/" self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name self.bootstrap_script_url = bootstrap_script_url self.requirements_url = requirements_url self.pipeline_outputs = pipeline_outputs self.pipeline_inputs = pipeline_inputs self.pipeline_envs = pipeline_envs self.cpu_request = cpu_request self.mem_request = mem_request self.gpu_limit = gpu_limit argument_list = [] """ CRI-o support for kfp pipelines We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow us to write to the base image layer file system, only to volumes. """ self.emptydir_volume_name = "workspace" self.emptydir_volume_size = emptydir_volume_size self.python_user_lib_path = '' self.python_user_lib_path_target = '' self.python_pip_config_url = '' if self.emptydir_volume_size: self.container_work_dir_root_path = "/opt/app-root/src/" self.container_python_dir_name = "python3/" self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name self.python_user_lib_path_target = '--target=' + self.python_user_lib_path self.python_pip_config_url = ELYRA_PIP_CONFIG_URL if not self.bootstrap_script_url: self.bootstrap_script_url = ELYRA_BOOTSTRAP_SCRIPT_URL if not self.requirements_url: self.requirements_url = ELYRA_REQUIREMENTS_URL if 'name' not in kwargs: raise TypeError("You need to provide a name for the operation.") elif not kwargs.get('name'): raise ValueError("You need to provide a name for the operation.") if 'image' not in kwargs: raise ValueError("You need to provide an image.") if not notebook: raise ValueError("You need to provide a notebook.") if 'arguments' not in kwargs: """ If no arguments are passed, we use our own. If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed NOTE: Images being pulled must have python3 available on PATH and cURL utility """ argument_list.append('mkdir -p {container_work_dir} && cd {container_work_dir} && ' 'curl -H "Cache-Control: no-cache" -L {bootscript_url} --output bootstrapper.py && ' 'curl -H "Cache-Control: no-cache" -L {reqs_url} --output requirements-elyra.txt && ' .format(container_work_dir=self.container_work_dir, bootscript_url=self.bootstrap_script_url, reqs_url=self.requirements_url) ) if self.emptydir_volume_size: argument_list.append('mkdir {container_python_dir} && cd {container_python_dir} && ' 'curl -H "Cache-Control: no-cache" -L {python_pip_config_url} ' '--output pip.conf && cd .. &&' .format(python_pip_config_url=self.python_pip_config_url, container_python_dir=self.container_python_dir_name) ) argument_list.append('python3 -m pip install {python_user_lib_path_target} packaging && ' 'python3 -m pip freeze > requirements-current.txt && ' 'python3 bootstrapper.py ' '--cos-endpoint {cos_endpoint} ' '--cos-bucket {cos_bucket} ' '--cos-directory "{cos_directory}" ' '--cos-dependencies-archive "{cos_dependencies_archive}" ' '--file "{notebook}" ' .format(cos_endpoint=self.cos_endpoint, cos_bucket=self.cos_bucket, cos_directory=self.cos_directory, cos_dependencies_archive=self.cos_dependencies_archive, notebook=self.notebook, python_user_lib_path_target=self.python_user_lib_path_target) ) if self.pipeline_inputs: inputs_str = self._artifact_list_to_str(self.pipeline_inputs) argument_list.append('--inputs "{}" '.format(inputs_str)) if self.pipeline_outputs: outputs_str = self._artifact_list_to_str(self.pipeline_outputs) argument_list.append('--outputs "{}" '.format(outputs_str)) if self.emptydir_volume_size: argument_list.append('--user-volume-path "{}" '.format(self.python_user_lib_path)) kwargs['command'] = ['sh', '-c'] kwargs['arguments'] = "".join(argument_list) super().__init__(**kwargs) # We must deal with the envs after the superclass initialization since these amend the # container attribute that isn't available until now. if self.pipeline_envs: for key, value in self.pipeline_envs.items(): # Convert dict entries to format kfp needs self.container.add_env_variable(V1EnvVar(name=key, value=value)) # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as # its container runtime if self.emptydir_volume_size: self.add_volume(V1Volume(empty_dir=V1EmptyDirVolumeSource( medium="", size_limit=self.emptydir_volume_size), name=self.emptydir_volume_name)) self.container.add_volume_mount(V1VolumeMount(mount_path=self.container_work_dir_root_path, name=self.emptydir_volume_name)) # Append to PYTHONPATH location of elyra dependencies in installed in Volume self.container.add_env_variable(V1EnvVar(name='PYTHONPATH', value=self.python_user_lib_path)) if self.cpu_request: self.container.set_cpu_request(cpu=str(cpu_request)) if self.mem_request: self.container.set_memory_request(memory=str(mem_request) + "G") if self.gpu_limit: gpu_vendor = self.pipeline_envs.get('GPU_VENDOR', 'nvidia') self.container.set_gpu_limit(gpu=str(gpu_limit), vendor=gpu_vendor) # Generate unique ELYRA_RUN_NAME value and expose it as an environment # variable in the container if workflow_engine and workflow_engine.lower() == 'argo': run_name_placeholder = '{{workflow.annotations.pipelines.kubeflow.org/run_name}}' self.container.add_env_variable(V1EnvVar(name='ELYRA_RUN_NAME', value=run_name_placeholder)) else: # For Tekton derive the value from the specified pod annotation annotation = 'pipelines.kubeflow.org/run_name' field_path = f"metadata.annotations['{annotation}']" self.container.add_env_variable(V1EnvVar(name='ELYRA_RUN_NAME', value_from=V1EnvVarSource( field_ref=V1ObjectFieldSelector(field_path=field_path)))) # Attach metadata to the pod # Node type (a static type for this op) self.add_pod_label('elyra/node-type', NotebookOp._normalize_label_value( 'notebook-script')) # Pipeline name self.add_pod_label('elyra/pipeline-name', NotebookOp._normalize_label_value(self.pipeline_name)) # Pipeline version self.add_pod_label('elyra/pipeline-version', NotebookOp._normalize_label_value(self.pipeline_version)) # Experiment name self.add_pod_label('elyra/experiment-name', NotebookOp._normalize_label_value(self.experiment_name)) # Pipeline node name self.add_pod_label('elyra/node-name', NotebookOp._normalize_label_value(kwargs.get('name'))) # Pipeline node file self.add_pod_annotation('elyra/node-file-name', self.notebook) # Identify the pipeline source, which can be a # pipeline file (mypipeline.pipeline), a Python # script or notebook that was submitted if self.pipeline_source is not None: self.add_pod_annotation('elyra/pipeline-source', self.pipeline_source)
def __init__( self, pipeline_name: str, experiment_name: str, notebook: str, cos_endpoint: str, cos_bucket: str, cos_directory: str, cos_dependencies_archive: str, pipeline_version: Optional[str] = "", pipeline_source: Optional[str] = None, pipeline_outputs: Optional[List[str]] = None, pipeline_inputs: Optional[List[str]] = None, pipeline_envs: Optional[Dict[str, str]] = None, requirements_url: Optional[str] = None, bootstrap_script_url: Optional[str] = None, emptydir_volume_size: Optional[str] = None, cpu_request: Optional[str] = None, mem_request: Optional[str] = None, gpu_limit: Optional[str] = None, workflow_engine: Optional[str] = "argo", volume_mounts: Optional[List[VolumeMount]] = None, kubernetes_secrets: Optional[List[KubernetesSecret]] = None, **kwargs, ): """Create a new instance of ContainerOp. Args: pipeline_name: pipeline that this op belongs to experiment_name: the experiment where pipeline_name is executed notebook: name of the notebook that will be executed per this operation cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442 cos_bucket: bucket to retrieve archive from cos_directory: name of the directory in the object storage bucket to pull cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz pipeline_version: optional version identifier pipeline_source: pipeline source pipeline_outputs: comma delimited list of files produced by the notebook pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook pipeline_envs: dictionary of environmental variables to set in the container prior to execution requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook bootstrap_script_url: URL to a custom python bootstrap script to run emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime cpu_request: number of CPUs requested for the operation mem_request: memory requested for the operation (in Gi) gpu_limit: maximum number of GPUs allowed for the operation workflow_engine: Kubeflow workflow engine, defaults to 'argo' volume_mounts: data volumes to be mounted kubernetes_secrets: secrets to be made available as environment variables kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler. See Kubeflow pipelines ContainerOp definition for more parameters or how to use https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp """ self.pipeline_name = pipeline_name self.pipeline_version = pipeline_version self.pipeline_source = pipeline_source self.experiment_name = experiment_name self.notebook = notebook self.notebook_name = os.path.basename(notebook) self.cos_endpoint = cos_endpoint self.cos_bucket = cos_bucket self.cos_directory = cos_directory self.cos_dependencies_archive = cos_dependencies_archive self.container_work_dir_root_path = "./" self.container_work_dir_name = "jupyter-work-dir/" self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name self.bootstrap_script_url = bootstrap_script_url self.requirements_url = requirements_url self.pipeline_outputs = pipeline_outputs self.pipeline_inputs = pipeline_inputs self.pipeline_envs = pipeline_envs self.cpu_request = cpu_request self.mem_request = mem_request self.gpu_limit = gpu_limit self.volume_mounts = volume_mounts # optional data volumes to be mounted to the pod self.kubernetes_secrets = kubernetes_secrets # optional secrets to be made available as env vars argument_list = [] """ CRI-o support for kfp pipelines We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow us to write to the base image layer file system, only to volumes. """ self.emptydir_volume_name = "workspace" self.emptydir_volume_size = emptydir_volume_size self.python_user_lib_path = "" self.python_user_lib_path_target = "" self.python_pip_config_url = "" if self.emptydir_volume_size: self.container_work_dir_root_path = "/opt/app-root/src/" self.container_python_dir_name = "python3/" self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name self.python_user_lib_path_target = "--target=" + self.python_user_lib_path self.python_pip_config_url = ELYRA_PIP_CONFIG_URL if not self.bootstrap_script_url: self.bootstrap_script_url = ELYRA_BOOTSTRAP_SCRIPT_URL if not self.requirements_url: self.requirements_url = ELYRA_REQUIREMENTS_URL if "name" not in kwargs: raise TypeError("You need to provide a name for the operation.") elif not kwargs.get("name"): raise ValueError("You need to provide a name for the operation.") if "image" not in kwargs: raise ValueError("You need to provide an image.") if not notebook: raise ValueError("You need to provide a notebook.") if "arguments" not in kwargs: """If no arguments are passed, we use our own. If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed NOTE: Images being pulled must have python3 available on PATH and cURL utility """ common_curl_options = '--fail -H "Cache-Control: no-cache"' argument_list.append( f"mkdir -p {self.container_work_dir} && cd {self.container_work_dir} && " f"echo 'Downloading {self.bootstrap_script_url}' && " f"curl {common_curl_options} -L {self.bootstrap_script_url} --output bootstrapper.py && " f"echo 'Downloading {self.requirements_url}' && " f"curl {common_curl_options} -L {self.requirements_url} --output requirements-elyra.txt && " f"echo 'Downloading {ELYRA_REQUIREMENTS_URL_PY37}' && " f"curl {common_curl_options} -L {ELYRA_REQUIREMENTS_URL_PY37} --output requirements-elyra-py37.txt && " ) if self.emptydir_volume_size: argument_list.append( f"mkdir {self.container_python_dir_name} && cd {self.container_python_dir_name} && " f"echo 'Downloading {self.python_pip_config_url}' && " f"curl {common_curl_options} -L {self.python_pip_config_url} --output pip.conf && cd .. &&" ) argument_list.append( f"python3 -m pip install {self.python_user_lib_path_target} packaging && " "python3 -m pip freeze > requirements-current.txt && " "python3 bootstrapper.py " f'--pipeline-name "{self.pipeline_name}" ' f"--cos-endpoint {self.cos_endpoint} " f"--cos-bucket {self.cos_bucket} " f'--cos-directory "{self.cos_directory}" ' f'--cos-dependencies-archive "{self.cos_dependencies_archive}" ' f'--file "{self.notebook}" ') if self.pipeline_inputs: inputs_str = self._artifact_list_to_str(self.pipeline_inputs) argument_list.append(f'--inputs "{inputs_str}" ') if self.pipeline_outputs: outputs_str = self._artifact_list_to_str(self.pipeline_outputs) argument_list.append(f'--outputs "{outputs_str}" ') if self.emptydir_volume_size: argument_list.append( f'--user-volume-path "{self.python_user_lib_path}" ') kwargs["command"] = ["sh", "-c"] kwargs["arguments"] = "".join(argument_list) super().__init__(**kwargs) # add user-specified volume mounts: the referenced PVCs must exist # or this generic operation will fail if self.volume_mounts: unique_pvcs = [] for volume_mount in self.volume_mounts: if volume_mount.pvc_name not in unique_pvcs: self.add_volume( V1Volume( name=volume_mount.pvc_name, persistent_volume_claim= V1PersistentVolumeClaimVolumeSource( claim_name=volume_mount.pvc_name), )) unique_pvcs.append(volume_mount.pvc_name) self.container.add_volume_mount( V1VolumeMount(mount_path=volume_mount.path, name=volume_mount.pvc_name)) # We must deal with the envs after the superclass initialization since these amend the # container attribute that isn't available until now. if self.pipeline_envs: for key, value in self.pipeline_envs.items( ): # Convert dict entries to format kfp needs self.container.add_env_variable(V1EnvVar(name=key, value=value)) if self.kubernetes_secrets: for secret in self.kubernetes_secrets: # Convert tuple entries to format kfp needs self.container.add_env_variable( V1EnvVar( name=secret.env_var, value_from=V1EnvVarSource( secret_key_ref=V1SecretKeySelector( name=secret.name, key=secret.key)), )) # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as # its container runtime if self.emptydir_volume_size: self.add_volume( V1Volume( empty_dir=V1EmptyDirVolumeSource( medium="", size_limit=self.emptydir_volume_size), name=self.emptydir_volume_name, )) self.container.add_volume_mount( V1VolumeMount(mount_path=self.container_work_dir_root_path, name=self.emptydir_volume_name)) # Append to PYTHONPATH location of elyra dependencies in installed in Volume self.container.add_env_variable( V1EnvVar(name="PYTHONPATH", value=self.python_user_lib_path)) if self.cpu_request: self.container.set_cpu_request(cpu=str(cpu_request)) if self.mem_request: self.container.set_memory_request(memory=str(mem_request) + "G") if self.gpu_limit: gpu_vendor = self.pipeline_envs.get("GPU_VENDOR", "nvidia") self.container.set_gpu_limit(gpu=str(gpu_limit), vendor=gpu_vendor) # Generate unique ELYRA_RUN_NAME value and expose it as an environment # variable in the container if not workflow_engine: raise ValueError( "workflow_engine is missing and needs to be specified.") if workflow_engine.lower() == "argo": # attach RUN_ID_PLACEHOLDER as run name # '{{workflow.annotations.pipelines.kubeflow.org/run_name}}' variable # cannot be resolved by Argo in KF 1.4 run_name_placeholder = RUN_ID_PLACEHOLDER self.container.add_env_variable( V1EnvVar(name="ELYRA_RUN_NAME", value=run_name_placeholder)) elif workflow_engine.lower() == "tekton": try: from kfp_tekton import TektonClient # noqa: F401 except ImportError: raise ValueError( "kfp-tekton not installed. Please install using elyra[kfp-tekton] to use Tekton engine." ) # For Tekton derive the value from the specified pod annotation annotation = "pipelines.kubeflow.org/run_name" field_path = f"metadata.annotations['{annotation}']" self.container.add_env_variable( V1EnvVar( name="ELYRA_RUN_NAME", value_from=V1EnvVarSource(field_ref=V1ObjectFieldSelector( field_path=field_path)), )) else: raise ValueError( f"{workflow_engine} is not a supported workflow engine.") # Attach metadata to the pod # Node type (a static type for this op) self.add_pod_label( "elyra/node-type", ExecuteFileOp._normalize_label_value("notebook-script")) # Pipeline name self.add_pod_label( "elyra/pipeline-name", ExecuteFileOp._normalize_label_value(self.pipeline_name)) # Pipeline version self.add_pod_label( "elyra/pipeline-version", ExecuteFileOp._normalize_label_value(self.pipeline_version)) # Experiment name self.add_pod_label( "elyra/experiment-name", ExecuteFileOp._normalize_label_value(self.experiment_name)) # Pipeline node name self.add_pod_label( "elyra/node-name", ExecuteFileOp._normalize_label_value(kwargs.get("name"))) # Pipeline node file self.add_pod_annotation("elyra/node-file-name", self.notebook) # Identify the pipeline source, which can be a # pipeline file (mypipeline.pipeline), a Python # script or notebook that was submitted if self.pipeline_source is not None: self.add_pod_annotation("elyra/pipeline-source", self.pipeline_source)
def __init__(self, notebook: str, cos_endpoint: str, cos_bucket: str, cos_directory: str, cos_dependencies_archive: str, pipeline_outputs: Optional[List[str]] = None, pipeline_inputs: Optional[List[str]] = None, pipeline_envs: Optional[Dict[str, str]] = None, requirements_url: str = None, bootstrap_script_url: str = None, emptydir_volume_size: str = None, **kwargs): """Create a new instance of ContainerOp. Args: notebook: name of the notebook that will be executed per this operation cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442 cos_bucket: bucket to retrieve archive from cos_directory: name of the directory in the object storage bucket to pull cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz pipeline_outputs: comma delimited list of files produced by the notebook pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook pipeline_envs: dictionary of environmental variables to set in the container prior to execution requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook bootstrap_script_url: URL to a custom python bootstrap script to run emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler. See Kubeflow pipelines ContainerOp definition for more parameters or how to use https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp """ self.notebook = notebook self.notebook_name = self._get_file_name_with_extension(notebook, 'ipynb') self.cos_endpoint = cos_endpoint self.cos_bucket = cos_bucket self.cos_directory = cos_directory self.cos_dependencies_archive = cos_dependencies_archive self.container_work_dir_root_path = "./" self.container_work_dir_name = "jupyter-work-dir/" self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name self.bootstrap_script_url = bootstrap_script_url self.requirements_url = requirements_url self.pipeline_outputs = pipeline_outputs self.pipeline_inputs = pipeline_inputs self.pipeline_envs = pipeline_envs argument_list = [] """ CRI-o support for kfp pipelines We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow us to write to the base image layer file system, only to volumes. """ self.emptydir_volume_name = "workspace" self.emptydir_volume_size = emptydir_volume_size self.python_user_lib_path = '' self.python_user_lib_path_target = '' self.python_pip_config_url = '' if self.emptydir_volume_size: self.container_work_dir_root_path = "/opt/app-root/src/" self.container_python_dir_name = "python3/" self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name self.python_user_lib_path_target = '--target=' + self.python_user_lib_path self.python_pip_config_url = 'https://raw.githubusercontent.com/{org}/' \ 'kfp-notebook/{branch}/etc/pip.conf'. \ format(org=KFP_NOTEBOOK_ORG, branch=KFP_NOTEBOOK_BRANCH) if not self.bootstrap_script_url: self.bootstrap_script_url = 'https://raw.githubusercontent.com/{org}/' \ 'kfp-notebook/{branch}/etc/docker-scripts/bootstrapper.py'.\ format(org=KFP_NOTEBOOK_ORG, branch=KFP_NOTEBOOK_BRANCH) if not self.requirements_url: self.requirements_url = 'https://raw.githubusercontent.com/{org}/' \ 'kfp-notebook/{branch}/etc/requirements-elyra.txt'.\ format(org=KFP_NOTEBOOK_ORG, branch=KFP_NOTEBOOK_BRANCH) if 'image' not in kwargs: raise ValueError("You need to provide an image.") if not notebook: raise ValueError("You need to provide a notebook.") if 'arguments' not in kwargs: """ If no arguments are passed, we use our own. If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed NOTE: Images being pulled must have python3 available on PATH and cURL utility """ argument_list.append('mkdir -p {container_work_dir} && cd {container_work_dir} && ' 'curl -H "Cache-Control: no-cache" -L {bootscript_url} --output bootstrapper.py && ' 'curl -H "Cache-Control: no-cache" -L {reqs_url} --output requirements-elyra.txt && ' .format(container_work_dir=self.container_work_dir, bootscript_url=self.bootstrap_script_url, reqs_url=self.requirements_url) ) if self.emptydir_volume_size: argument_list.append('mkdir {container_python_dir} && cd {container_python_dir} && ' 'curl -H "Cache-Control: no-cache" -L {python_pip_config_url} ' '--output pip.conf && cd .. &&' .format(python_pip_config_url=self.python_pip_config_url, container_python_dir=self.container_python_dir_name) ) argument_list.append('python3 -m pip install {python_user_lib_path_target} packaging && ' 'python3 -m pip freeze > requirements-current.txt && ' 'python3 bootstrapper.py ' '--cos-endpoint {cos_endpoint} ' '--cos-bucket {cos_bucket} ' '--cos-directory "{cos_directory}" ' '--cos-dependencies-archive "{cos_dependencies_archive}" ' '--file "{notebook}" ' .format(cos_endpoint=self.cos_endpoint, cos_bucket=self.cos_bucket, cos_directory=self.cos_directory, cos_dependencies_archive=self.cos_dependencies_archive, notebook=self.notebook, python_user_lib_path_target=self.python_user_lib_path_target) ) if self.pipeline_inputs: inputs_str = self._artifact_list_to_str(self.pipeline_inputs) argument_list.append('--inputs "{}" '.format(inputs_str)) if self.pipeline_outputs: outputs_str = self._artifact_list_to_str(self.pipeline_outputs) argument_list.append('--outputs "{}" '.format(outputs_str)) if self.emptydir_volume_size: argument_list.append('--user-volume-path "{}" '.format(self.python_user_lib_path)) kwargs['command'] = ['sh', '-c'] kwargs['arguments'] = "".join(argument_list) super().__init__(**kwargs) # We must deal with the envs after the superclass initialization since these amend the # container attribute that isn't available until now. if self.pipeline_envs: for key, value in self.pipeline_envs.items(): # Convert dict entries to format kfp needs self.container.add_env_variable(V1EnvVar(name=key, value=value)) # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as # its container runtime if self.emptydir_volume_size: self.add_volume(V1Volume(empty_dir=V1EmptyDirVolumeSource( medium="", size_limit=self.emptydir_volume_size), name=self.emptydir_volume_name)) self.container.add_volume_mount(V1VolumeMount(mount_path=self.container_work_dir_root_path, name=self.emptydir_volume_name)) # Append to PYTHONPATH location of elyra dependencies in installed in Volume self.container.add_env_variable(V1EnvVar(name='PYTHONPATH', value=self.python_user_lib_path))