Exemplo n.º 1
0
def generate_pod_spec_for_task():

    # Primary containers do not require us to specify an image, the default image built for flyte tasks will get used.
    primary_container = V1Container(name="primary")

    # Note: for non-primary containers we must specify an image.
    secondary_container = V1Container(name="secondary", image="alpine",)
    secondary_container.command.extend(["/bin/sh"])
    secondary_container.args.extend(
        ["-c", "echo hi pod world > {}".format(_SHARED_DATA_PATH)]
    )

    resources = V1ResourceRequirements(
        requests={"cpu": "1", "memory": "100Mi"}, limits={"cpu": "1", "memory": "100Mi"}
    )
    primary_container.resources = resources
    secondary_container = resources

    shared_volume_mount = V1VolumeMount(name="shared-data", mount_path="/data",)
    secondary_container.volumeMounts = [shared_volume_mount]
    primary_container.volumeMounts = [shared_volume_mount]

    pod_spec = V1PodSpec(
        containers=[primary_container, secondary_container],
        volumes=[
            V1Volume(
                name="shared-data", empty_dir=V1EmptyDirVolumeSource(medium="Memory")
            )
        ],
    )

    return pod_spec
Exemplo n.º 2
0
def get_container(train_op,
                  train_env,
                  train_num_gpus,
                  drive='coco-headset-vol-1'):
    (train_op.container.set_memory_request('56Gi').set_memory_limit(
        '56Gi').set_cpu_request('7.5').set_cpu_limit('7.5').set_gpu_limit(
            str(train_num_gpus)).add_volume_mount(
                V1VolumeMount(
                    name='tensorboard',
                    mount_path='/shared/tensorboard')).add_volume_mount(
                        V1VolumeMount(name='data',
                                      mount_path='/data/')).add_volume_mount(
                                          V1VolumeMount(
                                              name='shm',
                                              mount_path='/dev/shm')))
    (add_env(add_ssh_volume(train_op), train_env).add_toleration(
        V1Toleration(key='nvidia.com/gpu',
                     operator='Exists',
                     effect='NoSchedule')).add_node_selector_constraint(
                         'beta.kubernetes.io/instance-type',
                         f'p3.{2 * train_num_gpus}xlarge').
     add_volume(
         V1Volume(name='tensorboard',
                  persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                      'tensorboard-research-kf'))
     ).add_volume(
         V1Volume(name='data',
                  persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                      drive)))
     # .add_volume(V1Volume(name='shm', host_path=V1HostPathVolumeSource(path='/dev/shm')))
     .add_volume(
         V1Volume(name='shm',
                  empty_dir=V1EmptyDirVolumeSource(medium='Memory'))))
Exemplo n.º 3
0
def train_eval_epic(owner,
                    project,
                    experiment,
                    model,
                    git_rev,
                    pretrained_s3,
                    mode,
                    train_additional_args='',
                    eval_additional_args=''):
    train_env = {}

    train_num_gpus = 1
    train_op = components.load_component_from_file('components/train.yaml')(
        owner=owner,
        project=project,
        experiment=experiment,
        model=model,
        git_rev=git_rev,
        pretrained_s3=pretrained_s3,
        mode=mode,
        additional_args=train_additional_args)
    (train_op.container.set_memory_request('56Gi').set_memory_limit(
        '56Gi').set_cpu_request('7.5').set_cpu_limit('7.5').set_gpu_limit(
            str(train_num_gpus)).add_volume_mount(
                V1VolumeMount(
                    name='tensorboard',
                    mount_path='/shared/tensorboard')).add_volume_mount(
                        V1VolumeMount(name='data',
                                      mount_path='/data/')).add_volume_mount(
                                          V1VolumeMount(
                                              name='shm',
                                              mount_path='/dev/shm')))
    (add_env(add_ssh_volume(train_op), train_env).add_toleration(
        V1Toleration(key='nvidia.com/gpu',
                     operator='Exists',
                     effect='NoSchedule')).add_node_selector_constraint(
                         'beta.kubernetes.io/instance-type',
                         f'p3.{2*train_num_gpus}xlarge').
     add_volume(
         V1Volume(name='tensorboard',
                  persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                      'tensorboard-research-kf'))
     ).add_volume(
         V1Volume(name='data',
                  persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                      'dataset-epic-kitchen')))
     # .add_volume(V1Volume(name='shm', host_path=V1HostPathVolumeSource(path='/dev/shm')))
     .add_volume(
         V1Volume(name='shm',
                  empty_dir=V1EmptyDirVolumeSource(medium='Memory'))))
Exemplo n.º 4
0
    def __init__(self,
                 pipeline_name: str,
                 experiment_name: str,
                 notebook: str,
                 cos_endpoint: str,
                 cos_bucket: str,
                 cos_directory: str,
                 cos_dependencies_archive: str,
                 pipeline_version: Optional[str] = '',
                 pipeline_source: Optional[str] = None,
                 pipeline_outputs: Optional[List[str]] = None,
                 pipeline_inputs: Optional[List[str]] = None,
                 pipeline_envs: Optional[Dict[str, str]] = None,
                 requirements_url: Optional[str] = None,
                 bootstrap_script_url: Optional[str] = None,
                 emptydir_volume_size: Optional[str] = None,
                 cpu_request: Optional[str] = None,
                 mem_request: Optional[str] = None,
                 gpu_limit: Optional[str] = None,
                 workflow_engine: Optional[str] = 'argo',
                 **kwargs):
        """Create a new instance of ContainerOp.
        Args:
          pipeline_name: pipeline that this op belongs to
          experiment_name: the experiment where pipeline_name is executed
          notebook: name of the notebook that will be executed per this operation
          cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442
          cos_bucket: bucket to retrieve archive from
          cos_directory: name of the directory in the object storage bucket to pull
          cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz
          pipeline_version: optional version identifier
          pipeline_source: pipeline source
          pipeline_outputs: comma delimited list of files produced by the notebook
          pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook
          pipeline_envs: dictionary of environmental variables to set in the container prior to execution
          requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook
          bootstrap_script_url: URL to a custom python bootstrap script to run
          emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime
          cpu_request: number of CPUs requested for the operation
          mem_request: memory requested for the operation (in Gi)
          gpu_limit: maximum number of GPUs allowed for the operation
          workflow_engine: Kubeflow workflow engine, defaults to 'argo'
          kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler.
                  See Kubeflow pipelines ContainerOp definition for more parameters or how to use
                  https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp
        """

        self.pipeline_name = pipeline_name
        self.pipeline_version = pipeline_version
        self.pipeline_source = pipeline_source
        self.experiment_name = experiment_name
        self.notebook = notebook
        self.notebook_name = os.path.basename(notebook)
        self.cos_endpoint = cos_endpoint
        self.cos_bucket = cos_bucket
        self.cos_directory = cos_directory
        self.cos_dependencies_archive = cos_dependencies_archive
        self.container_work_dir_root_path = "./"
        self.container_work_dir_name = "jupyter-work-dir/"
        self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
        self.bootstrap_script_url = bootstrap_script_url
        self.requirements_url = requirements_url
        self.pipeline_outputs = pipeline_outputs
        self.pipeline_inputs = pipeline_inputs
        self.pipeline_envs = pipeline_envs
        self.cpu_request = cpu_request
        self.mem_request = mem_request
        self.gpu_limit = gpu_limit

        argument_list = []

        """ CRI-o support for kfp pipelines
            We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow
            us to write to the base image layer file system, only to volumes.
        """
        self.emptydir_volume_name = "workspace"
        self.emptydir_volume_size = emptydir_volume_size
        self.python_user_lib_path = ''
        self.python_user_lib_path_target = ''
        self.python_pip_config_url = ''

        if self.emptydir_volume_size:
            self.container_work_dir_root_path = "/opt/app-root/src/"
            self.container_python_dir_name = "python3/"
            self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
            self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name
            self.python_user_lib_path_target = '--target=' + self.python_user_lib_path
            self.python_pip_config_url = ELYRA_PIP_CONFIG_URL

        if not self.bootstrap_script_url:
            self.bootstrap_script_url = ELYRA_BOOTSTRAP_SCRIPT_URL

        if not self.requirements_url:
            self.requirements_url = ELYRA_REQUIREMENTS_URL

        if 'name' not in kwargs:
            raise TypeError("You need to provide a name for the operation.")
        elif not kwargs.get('name'):
            raise ValueError("You need to provide a name for the operation.")

        if 'image' not in kwargs:
            raise ValueError("You need to provide an image.")

        if not notebook:
            raise ValueError("You need to provide a notebook.")

        if 'arguments' not in kwargs:
            """ If no arguments are passed, we use our own.
                If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed
                NOTE: Images being pulled must have python3 available on PATH and cURL utility
            """

            argument_list.append('mkdir -p {container_work_dir} && cd {container_work_dir} && '
                                 'curl -H "Cache-Control: no-cache" -L {bootscript_url} --output bootstrapper.py && '
                                 'curl -H "Cache-Control: no-cache" -L {reqs_url} --output requirements-elyra.txt && '
                                 .format(container_work_dir=self.container_work_dir,
                                         bootscript_url=self.bootstrap_script_url,
                                         reqs_url=self.requirements_url)
                                 )

            if self.emptydir_volume_size:
                argument_list.append('mkdir {container_python_dir} && cd {container_python_dir} && '
                                     'curl -H "Cache-Control: no-cache" -L {python_pip_config_url} '
                                     '--output pip.conf && cd .. &&'
                                     .format(python_pip_config_url=self.python_pip_config_url,
                                             container_python_dir=self.container_python_dir_name)
                                     )

            argument_list.append('python3 -m pip install {python_user_lib_path_target} packaging && '
                                 'python3 -m pip freeze > requirements-current.txt && '
                                 'python3 bootstrapper.py '
                                 '--cos-endpoint {cos_endpoint} '
                                 '--cos-bucket {cos_bucket} '
                                 '--cos-directory "{cos_directory}" '
                                 '--cos-dependencies-archive "{cos_dependencies_archive}" '
                                 '--file "{notebook}" '
                                 .format(cos_endpoint=self.cos_endpoint,
                                         cos_bucket=self.cos_bucket,
                                         cos_directory=self.cos_directory,
                                         cos_dependencies_archive=self.cos_dependencies_archive,
                                         notebook=self.notebook,
                                         python_user_lib_path_target=self.python_user_lib_path_target)
                                 )

            if self.pipeline_inputs:
                inputs_str = self._artifact_list_to_str(self.pipeline_inputs)
                argument_list.append('--inputs "{}" '.format(inputs_str))

            if self.pipeline_outputs:
                outputs_str = self._artifact_list_to_str(self.pipeline_outputs)
                argument_list.append('--outputs "{}" '.format(outputs_str))

            if self.emptydir_volume_size:
                argument_list.append('--user-volume-path "{}" '.format(self.python_user_lib_path))

            kwargs['command'] = ['sh', '-c']
            kwargs['arguments'] = "".join(argument_list)

        super().__init__(**kwargs)

        # We must deal with the envs after the superclass initialization since these amend the
        # container attribute that isn't available until now.
        if self.pipeline_envs:
            for key, value in self.pipeline_envs.items():  # Convert dict entries to format kfp needs
                self.container.add_env_variable(V1EnvVar(name=key, value=value))

        # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as
        # its container runtime
        if self.emptydir_volume_size:
            self.add_volume(V1Volume(empty_dir=V1EmptyDirVolumeSource(
                                     medium="",
                                     size_limit=self.emptydir_volume_size),
                            name=self.emptydir_volume_name))

            self.container.add_volume_mount(V1VolumeMount(mount_path=self.container_work_dir_root_path,
                                                          name=self.emptydir_volume_name))

            # Append to PYTHONPATH location of elyra dependencies in installed in Volume
            self.container.add_env_variable(V1EnvVar(name='PYTHONPATH',
                                                     value=self.python_user_lib_path))

        if self.cpu_request:
            self.container.set_cpu_request(cpu=str(cpu_request))

        if self.mem_request:
            self.container.set_memory_request(memory=str(mem_request) + "G")

        if self.gpu_limit:
            gpu_vendor = self.pipeline_envs.get('GPU_VENDOR', 'nvidia')
            self.container.set_gpu_limit(gpu=str(gpu_limit), vendor=gpu_vendor)

        # Generate unique ELYRA_RUN_NAME value and expose it as an environment
        # variable in the container
        if workflow_engine and workflow_engine.lower() == 'argo':
            run_name_placeholder = '{{workflow.annotations.pipelines.kubeflow.org/run_name}}'
            self.container.add_env_variable(V1EnvVar(name='ELYRA_RUN_NAME',
                                                     value=run_name_placeholder))
        else:
            # For Tekton derive the value from the specified pod annotation
            annotation = 'pipelines.kubeflow.org/run_name'
            field_path = f"metadata.annotations['{annotation}']"
            self.container.add_env_variable(V1EnvVar(name='ELYRA_RUN_NAME',
                                                     value_from=V1EnvVarSource(
                                                         field_ref=V1ObjectFieldSelector(field_path=field_path))))

        # Attach metadata to the pod
        # Node type (a static type for this op)
        self.add_pod_label('elyra/node-type',
                           NotebookOp._normalize_label_value(
                               'notebook-script'))
        # Pipeline name
        self.add_pod_label('elyra/pipeline-name',
                           NotebookOp._normalize_label_value(self.pipeline_name))
        # Pipeline version
        self.add_pod_label('elyra/pipeline-version',
                           NotebookOp._normalize_label_value(self.pipeline_version))
        # Experiment name
        self.add_pod_label('elyra/experiment-name',
                           NotebookOp._normalize_label_value(self.experiment_name))
        # Pipeline node name
        self.add_pod_label('elyra/node-name',
                           NotebookOp._normalize_label_value(kwargs.get('name')))
        # Pipeline node file
        self.add_pod_annotation('elyra/node-file-name',
                                self.notebook)

        # Identify the pipeline source, which can be a
        # pipeline file (mypipeline.pipeline), a Python
        # script or notebook that was submitted
        if self.pipeline_source is not None:
            self.add_pod_annotation('elyra/pipeline-source',
                                    self.pipeline_source)
Exemplo n.º 5
0
    def __init__(
        self,
        pipeline_name: str,
        experiment_name: str,
        notebook: str,
        cos_endpoint: str,
        cos_bucket: str,
        cos_directory: str,
        cos_dependencies_archive: str,
        pipeline_version: Optional[str] = "",
        pipeline_source: Optional[str] = None,
        pipeline_outputs: Optional[List[str]] = None,
        pipeline_inputs: Optional[List[str]] = None,
        pipeline_envs: Optional[Dict[str, str]] = None,
        requirements_url: Optional[str] = None,
        bootstrap_script_url: Optional[str] = None,
        emptydir_volume_size: Optional[str] = None,
        cpu_request: Optional[str] = None,
        mem_request: Optional[str] = None,
        gpu_limit: Optional[str] = None,
        workflow_engine: Optional[str] = "argo",
        volume_mounts: Optional[List[VolumeMount]] = None,
        kubernetes_secrets: Optional[List[KubernetesSecret]] = None,
        **kwargs,
    ):
        """Create a new instance of ContainerOp.
        Args:
          pipeline_name: pipeline that this op belongs to
          experiment_name: the experiment where pipeline_name is executed
          notebook: name of the notebook that will be executed per this operation
          cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442
          cos_bucket: bucket to retrieve archive from
          cos_directory: name of the directory in the object storage bucket to pull
          cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz
          pipeline_version: optional version identifier
          pipeline_source: pipeline source
          pipeline_outputs: comma delimited list of files produced by the notebook
          pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook
          pipeline_envs: dictionary of environmental variables to set in the container prior to execution
          requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook
          bootstrap_script_url: URL to a custom python bootstrap script to run
          emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime
          cpu_request: number of CPUs requested for the operation
          mem_request: memory requested for the operation (in Gi)
          gpu_limit: maximum number of GPUs allowed for the operation
          workflow_engine: Kubeflow workflow engine, defaults to 'argo'
          volume_mounts: data volumes to be mounted
          kubernetes_secrets: secrets to be made available as environment variables
          kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler.
                  See Kubeflow pipelines ContainerOp definition for more parameters or how to use
                  https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp
        """

        self.pipeline_name = pipeline_name
        self.pipeline_version = pipeline_version
        self.pipeline_source = pipeline_source
        self.experiment_name = experiment_name
        self.notebook = notebook
        self.notebook_name = os.path.basename(notebook)
        self.cos_endpoint = cos_endpoint
        self.cos_bucket = cos_bucket
        self.cos_directory = cos_directory
        self.cos_dependencies_archive = cos_dependencies_archive
        self.container_work_dir_root_path = "./"
        self.container_work_dir_name = "jupyter-work-dir/"
        self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
        self.bootstrap_script_url = bootstrap_script_url
        self.requirements_url = requirements_url
        self.pipeline_outputs = pipeline_outputs
        self.pipeline_inputs = pipeline_inputs
        self.pipeline_envs = pipeline_envs
        self.cpu_request = cpu_request
        self.mem_request = mem_request
        self.gpu_limit = gpu_limit
        self.volume_mounts = volume_mounts  # optional data volumes to be mounted to the pod
        self.kubernetes_secrets = kubernetes_secrets  # optional secrets to be made available as env vars

        argument_list = []
        """ CRI-o support for kfp pipelines
            We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow
            us to write to the base image layer file system, only to volumes.
        """
        self.emptydir_volume_name = "workspace"
        self.emptydir_volume_size = emptydir_volume_size
        self.python_user_lib_path = ""
        self.python_user_lib_path_target = ""
        self.python_pip_config_url = ""

        if self.emptydir_volume_size:
            self.container_work_dir_root_path = "/opt/app-root/src/"
            self.container_python_dir_name = "python3/"
            self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
            self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name
            self.python_user_lib_path_target = "--target=" + self.python_user_lib_path
            self.python_pip_config_url = ELYRA_PIP_CONFIG_URL

        if not self.bootstrap_script_url:
            self.bootstrap_script_url = ELYRA_BOOTSTRAP_SCRIPT_URL

        if not self.requirements_url:
            self.requirements_url = ELYRA_REQUIREMENTS_URL

        if "name" not in kwargs:
            raise TypeError("You need to provide a name for the operation.")
        elif not kwargs.get("name"):
            raise ValueError("You need to provide a name for the operation.")

        if "image" not in kwargs:
            raise ValueError("You need to provide an image.")

        if not notebook:
            raise ValueError("You need to provide a notebook.")

        if "arguments" not in kwargs:
            """If no arguments are passed, we use our own.
            If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed
            NOTE: Images being pulled must have python3 available on PATH and cURL utility
            """

            common_curl_options = '--fail -H "Cache-Control: no-cache"'

            argument_list.append(
                f"mkdir -p {self.container_work_dir} && cd {self.container_work_dir} && "
                f"echo 'Downloading {self.bootstrap_script_url}' && "
                f"curl {common_curl_options} -L {self.bootstrap_script_url} --output bootstrapper.py && "
                f"echo 'Downloading {self.requirements_url}' && "
                f"curl {common_curl_options} -L {self.requirements_url} --output requirements-elyra.txt && "
                f"echo 'Downloading {ELYRA_REQUIREMENTS_URL_PY37}' && "
                f"curl {common_curl_options} -L {ELYRA_REQUIREMENTS_URL_PY37} --output requirements-elyra-py37.txt && "
            )

            if self.emptydir_volume_size:
                argument_list.append(
                    f"mkdir {self.container_python_dir_name} && cd {self.container_python_dir_name} && "
                    f"echo 'Downloading {self.python_pip_config_url}' && "
                    f"curl {common_curl_options} -L {self.python_pip_config_url} --output pip.conf && cd .. &&"
                )

            argument_list.append(
                f"python3 -m pip install {self.python_user_lib_path_target} packaging && "
                "python3 -m pip freeze > requirements-current.txt && "
                "python3 bootstrapper.py "
                f'--pipeline-name "{self.pipeline_name}" '
                f"--cos-endpoint {self.cos_endpoint} "
                f"--cos-bucket {self.cos_bucket} "
                f'--cos-directory "{self.cos_directory}" '
                f'--cos-dependencies-archive "{self.cos_dependencies_archive}" '
                f'--file "{self.notebook}" ')

            if self.pipeline_inputs:
                inputs_str = self._artifact_list_to_str(self.pipeline_inputs)
                argument_list.append(f'--inputs "{inputs_str}" ')

            if self.pipeline_outputs:
                outputs_str = self._artifact_list_to_str(self.pipeline_outputs)
                argument_list.append(f'--outputs "{outputs_str}" ')

            if self.emptydir_volume_size:
                argument_list.append(
                    f'--user-volume-path "{self.python_user_lib_path}" ')

            kwargs["command"] = ["sh", "-c"]
            kwargs["arguments"] = "".join(argument_list)

        super().__init__(**kwargs)

        # add user-specified volume mounts: the referenced PVCs must exist
        # or this generic operation will fail
        if self.volume_mounts:
            unique_pvcs = []
            for volume_mount in self.volume_mounts:
                if volume_mount.pvc_name not in unique_pvcs:
                    self.add_volume(
                        V1Volume(
                            name=volume_mount.pvc_name,
                            persistent_volume_claim=
                            V1PersistentVolumeClaimVolumeSource(
                                claim_name=volume_mount.pvc_name),
                        ))
                    unique_pvcs.append(volume_mount.pvc_name)
                self.container.add_volume_mount(
                    V1VolumeMount(mount_path=volume_mount.path,
                                  name=volume_mount.pvc_name))

        # We must deal with the envs after the superclass initialization since these amend the
        # container attribute that isn't available until now.
        if self.pipeline_envs:
            for key, value in self.pipeline_envs.items(
            ):  # Convert dict entries to format kfp needs
                self.container.add_env_variable(V1EnvVar(name=key,
                                                         value=value))

        if self.kubernetes_secrets:
            for secret in self.kubernetes_secrets:  # Convert tuple entries to format kfp needs
                self.container.add_env_variable(
                    V1EnvVar(
                        name=secret.env_var,
                        value_from=V1EnvVarSource(
                            secret_key_ref=V1SecretKeySelector(
                                name=secret.name, key=secret.key)),
                    ))

        # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as
        # its container runtime
        if self.emptydir_volume_size:
            self.add_volume(
                V1Volume(
                    empty_dir=V1EmptyDirVolumeSource(
                        medium="", size_limit=self.emptydir_volume_size),
                    name=self.emptydir_volume_name,
                ))

            self.container.add_volume_mount(
                V1VolumeMount(mount_path=self.container_work_dir_root_path,
                              name=self.emptydir_volume_name))

            # Append to PYTHONPATH location of elyra dependencies in installed in Volume
            self.container.add_env_variable(
                V1EnvVar(name="PYTHONPATH", value=self.python_user_lib_path))

        if self.cpu_request:
            self.container.set_cpu_request(cpu=str(cpu_request))

        if self.mem_request:
            self.container.set_memory_request(memory=str(mem_request) + "G")

        if self.gpu_limit:
            gpu_vendor = self.pipeline_envs.get("GPU_VENDOR", "nvidia")
            self.container.set_gpu_limit(gpu=str(gpu_limit), vendor=gpu_vendor)

        # Generate unique ELYRA_RUN_NAME value and expose it as an environment
        # variable in the container
        if not workflow_engine:
            raise ValueError(
                "workflow_engine is missing and needs to be specified.")
        if workflow_engine.lower() == "argo":
            # attach RUN_ID_PLACEHOLDER as run name
            # '{{workflow.annotations.pipelines.kubeflow.org/run_name}}' variable
            # cannot be resolved by Argo in KF 1.4
            run_name_placeholder = RUN_ID_PLACEHOLDER
            self.container.add_env_variable(
                V1EnvVar(name="ELYRA_RUN_NAME", value=run_name_placeholder))
        elif workflow_engine.lower() == "tekton":
            try:
                from kfp_tekton import TektonClient  # noqa: F401
            except ImportError:
                raise ValueError(
                    "kfp-tekton not installed. Please install using elyra[kfp-tekton] to use Tekton engine."
                )

            # For Tekton derive the value from the specified pod annotation
            annotation = "pipelines.kubeflow.org/run_name"
            field_path = f"metadata.annotations['{annotation}']"
            self.container.add_env_variable(
                V1EnvVar(
                    name="ELYRA_RUN_NAME",
                    value_from=V1EnvVarSource(field_ref=V1ObjectFieldSelector(
                        field_path=field_path)),
                ))
        else:
            raise ValueError(
                f"{workflow_engine} is not a supported workflow engine.")

        # Attach metadata to the pod
        # Node type (a static type for this op)
        self.add_pod_label(
            "elyra/node-type",
            ExecuteFileOp._normalize_label_value("notebook-script"))
        # Pipeline name
        self.add_pod_label(
            "elyra/pipeline-name",
            ExecuteFileOp._normalize_label_value(self.pipeline_name))
        # Pipeline version
        self.add_pod_label(
            "elyra/pipeline-version",
            ExecuteFileOp._normalize_label_value(self.pipeline_version))
        # Experiment name
        self.add_pod_label(
            "elyra/experiment-name",
            ExecuteFileOp._normalize_label_value(self.experiment_name))
        # Pipeline node name
        self.add_pod_label(
            "elyra/node-name",
            ExecuteFileOp._normalize_label_value(kwargs.get("name")))
        # Pipeline node file
        self.add_pod_annotation("elyra/node-file-name", self.notebook)

        # Identify the pipeline source, which can be a
        # pipeline file (mypipeline.pipeline), a Python
        # script or notebook that was submitted
        if self.pipeline_source is not None:
            self.add_pod_annotation("elyra/pipeline-source",
                                    self.pipeline_source)
Exemplo n.º 6
0
    def __init__(self,
                 notebook: str,
                 cos_endpoint: str,
                 cos_bucket: str,
                 cos_directory: str,
                 cos_dependencies_archive: str,
                 pipeline_outputs: Optional[List[str]] = None,
                 pipeline_inputs: Optional[List[str]] = None,
                 pipeline_envs: Optional[Dict[str, str]] = None,
                 requirements_url: str = None,
                 bootstrap_script_url: str = None,
                 emptydir_volume_size: str = None,
                 **kwargs):
        """Create a new instance of ContainerOp.
        Args:
          notebook: name of the notebook that will be executed per this operation
          cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442
          cos_bucket: bucket to retrieve archive from
          cos_directory: name of the directory in the object storage bucket to pull
          cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz
          pipeline_outputs: comma delimited list of files produced by the notebook
          pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook
          pipeline_envs: dictionary of environmental variables to set in the container prior to execution
          requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook
          bootstrap_script_url: URL to a custom python bootstrap script to run
          emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime
          kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler.
                  See Kubeflow pipelines ContainerOp definition for more parameters or how to use
                  https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp
        """

        self.notebook = notebook
        self.notebook_name = self._get_file_name_with_extension(notebook, 'ipynb')
        self.cos_endpoint = cos_endpoint
        self.cos_bucket = cos_bucket
        self.cos_directory = cos_directory
        self.cos_dependencies_archive = cos_dependencies_archive
        self.container_work_dir_root_path = "./"
        self.container_work_dir_name = "jupyter-work-dir/"
        self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
        self.bootstrap_script_url = bootstrap_script_url
        self.requirements_url = requirements_url
        self.pipeline_outputs = pipeline_outputs
        self.pipeline_inputs = pipeline_inputs
        self.pipeline_envs = pipeline_envs

        argument_list = []

        """ CRI-o support for kfp pipelines
            We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow
            us to write to the base image layer file system, only to volumes.
        """
        self.emptydir_volume_name = "workspace"
        self.emptydir_volume_size = emptydir_volume_size
        self.python_user_lib_path = ''
        self.python_user_lib_path_target = ''
        self.python_pip_config_url = ''

        if self.emptydir_volume_size:
            self.container_work_dir_root_path = "/opt/app-root/src/"
            self.container_python_dir_name = "python3/"
            self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
            self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name
            self.python_user_lib_path_target = '--target=' + self.python_user_lib_path
            self.python_pip_config_url = 'https://raw.githubusercontent.com/{org}/' \
                                         'kfp-notebook/{branch}/etc/pip.conf'. \
                format(org=KFP_NOTEBOOK_ORG, branch=KFP_NOTEBOOK_BRANCH)

        if not self.bootstrap_script_url:
            self.bootstrap_script_url = 'https://raw.githubusercontent.com/{org}/' \
                                        'kfp-notebook/{branch}/etc/docker-scripts/bootstrapper.py'.\
                format(org=KFP_NOTEBOOK_ORG, branch=KFP_NOTEBOOK_BRANCH)

        if not self.requirements_url:
            self.requirements_url = 'https://raw.githubusercontent.com/{org}/' \
                                    'kfp-notebook/{branch}/etc/requirements-elyra.txt'.\
                format(org=KFP_NOTEBOOK_ORG, branch=KFP_NOTEBOOK_BRANCH)

        if 'image' not in kwargs:
            raise ValueError("You need to provide an image.")

        if not notebook:
            raise ValueError("You need to provide a notebook.")

        if 'arguments' not in kwargs:
            """ If no arguments are passed, we use our own.
                If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed
                NOTE: Images being pulled must have python3 available on PATH and cURL utility
            """

            argument_list.append('mkdir -p {container_work_dir} && cd {container_work_dir} && '
                                 'curl -H "Cache-Control: no-cache" -L {bootscript_url} --output bootstrapper.py && '
                                 'curl -H "Cache-Control: no-cache" -L {reqs_url} --output requirements-elyra.txt && '
                                 .format(container_work_dir=self.container_work_dir,
                                         bootscript_url=self.bootstrap_script_url,
                                         reqs_url=self.requirements_url)
                                 )

            if self.emptydir_volume_size:
                argument_list.append('mkdir {container_python_dir} && cd {container_python_dir} && '
                                     'curl -H "Cache-Control: no-cache" -L {python_pip_config_url} '
                                     '--output pip.conf && cd .. &&'
                                     .format(python_pip_config_url=self.python_pip_config_url,
                                             container_python_dir=self.container_python_dir_name)
                                     )

            argument_list.append('python3 -m pip install {python_user_lib_path_target} packaging && '
                                 'python3 -m pip freeze > requirements-current.txt && '
                                 'python3 bootstrapper.py '
                                 '--cos-endpoint {cos_endpoint} '
                                 '--cos-bucket {cos_bucket} '
                                 '--cos-directory "{cos_directory}" '
                                 '--cos-dependencies-archive "{cos_dependencies_archive}" '
                                 '--file "{notebook}" '
                                 .format(cos_endpoint=self.cos_endpoint,
                                         cos_bucket=self.cos_bucket,
                                         cos_directory=self.cos_directory,
                                         cos_dependencies_archive=self.cos_dependencies_archive,
                                         notebook=self.notebook,
                                         python_user_lib_path_target=self.python_user_lib_path_target)
                                 )

            if self.pipeline_inputs:
                inputs_str = self._artifact_list_to_str(self.pipeline_inputs)
                argument_list.append('--inputs "{}" '.format(inputs_str))

            if self.pipeline_outputs:
                outputs_str = self._artifact_list_to_str(self.pipeline_outputs)
                argument_list.append('--outputs "{}" '.format(outputs_str))

            if self.emptydir_volume_size:
                argument_list.append('--user-volume-path "{}" '.format(self.python_user_lib_path))

            kwargs['command'] = ['sh', '-c']
            kwargs['arguments'] = "".join(argument_list)

        super().__init__(**kwargs)

        # We must deal with the envs after the superclass initialization since these amend the
        # container attribute that isn't available until now.
        if self.pipeline_envs:
            for key, value in self.pipeline_envs.items():  # Convert dict entries to format kfp needs
                self.container.add_env_variable(V1EnvVar(name=key, value=value))

        # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as
        # its container runtime
        if self.emptydir_volume_size:
            self.add_volume(V1Volume(empty_dir=V1EmptyDirVolumeSource(
                                     medium="",
                                     size_limit=self.emptydir_volume_size),
                            name=self.emptydir_volume_name))

            self.container.add_volume_mount(V1VolumeMount(mount_path=self.container_work_dir_root_path,
                                                          name=self.emptydir_volume_name))

            # Append to PYTHONPATH location of elyra dependencies in installed in Volume
            self.container.add_env_variable(V1EnvVar(name='PYTHONPATH',
                                                     value=self.python_user_lib_path))