Пример #1
0
def get_container(train_op,
                  train_env,
                  train_num_gpus,
                  drive='coco-headset-vol-1'):
    (train_op.container.set_memory_request('56Gi').set_memory_limit(
        '56Gi').set_cpu_request('7.5').set_cpu_limit('7.5').set_gpu_limit(
            str(train_num_gpus)).add_volume_mount(
                V1VolumeMount(
                    name='tensorboard',
                    mount_path='/shared/tensorboard')).add_volume_mount(
                        V1VolumeMount(name='data',
                                      mount_path='/data/')).add_volume_mount(
                                          V1VolumeMount(
                                              name='shm',
                                              mount_path='/dev/shm')))
    (add_env(add_ssh_volume(train_op), train_env).add_toleration(
        V1Toleration(key='nvidia.com/gpu',
                     operator='Exists',
                     effect='NoSchedule')).add_node_selector_constraint(
                         'beta.kubernetes.io/instance-type',
                         f'p3.{2 * train_num_gpus}xlarge').
     add_volume(
         V1Volume(name='tensorboard',
                  persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                      'tensorboard-research-kf'))
     ).add_volume(
         V1Volume(name='data',
                  persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                      drive)))
     # .add_volume(V1Volume(name='shm', host_path=V1HostPathVolumeSource(path='/dev/shm')))
     .add_volume(
         V1Volume(name='shm',
                  empty_dir=V1EmptyDirVolumeSource(medium='Memory'))))
Пример #2
0
def train_eval_epic(owner,
                    project,
                    experiment,
                    model,
                    git_rev,
                    pretrained_s3,
                    mode,
                    train_additional_args='',
                    eval_additional_args=''):
    train_env = {}

    train_num_gpus = 1
    train_op = components.load_component_from_file('components/train.yaml')(
        owner=owner,
        project=project,
        experiment=experiment,
        model=model,
        git_rev=git_rev,
        pretrained_s3=pretrained_s3,
        mode=mode,
        additional_args=train_additional_args)
    (train_op.container.set_memory_request('56Gi').set_memory_limit(
        '56Gi').set_cpu_request('7.5').set_cpu_limit('7.5').set_gpu_limit(
            str(train_num_gpus)).add_volume_mount(
                V1VolumeMount(
                    name='tensorboard',
                    mount_path='/shared/tensorboard')).add_volume_mount(
                        V1VolumeMount(name='data',
                                      mount_path='/data/')).add_volume_mount(
                                          V1VolumeMount(
                                              name='shm',
                                              mount_path='/dev/shm')))
    (add_env(add_ssh_volume(train_op), train_env).add_toleration(
        V1Toleration(key='nvidia.com/gpu',
                     operator='Exists',
                     effect='NoSchedule')).add_node_selector_constraint(
                         'beta.kubernetes.io/instance-type',
                         f'p3.{2*train_num_gpus}xlarge').
     add_volume(
         V1Volume(name='tensorboard',
                  persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                      'tensorboard-research-kf'))
     ).add_volume(
         V1Volume(name='data',
                  persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                      'dataset-epic-kitchen')))
     # .add_volume(V1Volume(name='shm', host_path=V1HostPathVolumeSource(path='/dev/shm')))
     .add_volume(
         V1Volume(name='shm',
                  empty_dir=V1EmptyDirVolumeSource(medium='Memory'))))
Пример #3
0
        def my_pipeline(param1, param2):
            vol = VolumeOp(name="myvol_creation",
                           resource_name=param1,
                           size=param2,
                           annotations={"test": "annotation"})

            self.assertCountEqual([x.name for x in vol.inputs],
                                  ["param1", "param2"])
            self.assertEqual(vol.k8s_resource.metadata.name,
                             "{{workflow.name}}-%s" % str(param1))
            expected_attribute_outputs = {
                "manifest": "{}",
                "name": "{.metadata.name}",
                "size": "{.status.capacity.storage}"
            }
            self.assertEqual(vol.attribute_outputs, expected_attribute_outputs)
            expected_outputs = {
                "manifest": PipelineParam(name="manifest", op_name=vol.name),
                "name": PipelineParam(name="name", op_name=vol.name),
                "size": PipelineParam(name="size", op_name=vol.name)
            }
            self.assertEqual(vol.outputs, expected_outputs)
            self.assertEqual(vol.output,
                             PipelineParam(name="name", op_name=vol.name))
            self.assertEqual(vol.dependent_names, [])
            expected_volume = PipelineVolume(
                name="myvol-creation",
                persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
                    claim_name=PipelineParam(name="name", op_name=vol.name)))
Пример #4
0
  def __init__(self, pvc: str = None, volume: V1Volume = None, **kwargs):
    if volume and kwargs:
      raise ValueError("You can't pass a volume along with other " "kwargs.")

    name_provided = True
    init_volume = {}
    if volume:
      init_volume = {
          attr: getattr(volume, attr) for attr in self.attribute_map.keys()
      }
    else:
      if "name" in kwargs:
        if len(kwargs["name"]) > 63:
          raise ValueError("PipelineVolume name must be no more than"
                           " 63 characters")
        init_volume = {"name": kwargs.pop("name")}
      else:
        name_provided = False
        init_volume = {"name": "pvolume-placeholder"}
      if pvc and kwargs:
        raise ValueError("You can only pass 'name' along with 'pvc'.")
      elif pvc and not kwargs:
        pvc_volume_source = V1PersistentVolumeClaimVolumeSource(
            claim_name=str(pvc))
        init_volume["persistent_volume_claim"] = pvc_volume_source
    super().__init__(**init_volume, **kwargs)
    if not name_provided:
      volume_dict = prune_none_dict_values(self.to_dict())
      hash_value = hashlib.sha256(
          bytes(json.dumps(volume_dict, sort_keys=True), "utf-8")).hexdigest()
      name = "pvolume-{}".format(hash_value)
      self.name = name[0:63] if len(name) > 63 else name
    self.dependent_names = []
def pipeline_mount_pvc():
    pvc_name = "kfp-pvc"
    volume_name = 'pipeline'
    volume_mount_path = '/mnt/pipeline'

    dsl.ContainerOp(
        name='mnist_pvc',
        image='kangwoo/kfp-mnist-storage:0.0.1',
        arguments=['--model', '/mnt/pipeline/kfp/mnist/model']
    ).add_volume(V1Volume(name=volume_name, persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(claim_name=pvc_name))) \
        .add_volume_mount(V1VolumeMount(mount_path=volume_mount_path, name=volume_name))
Пример #6
0
    def __init__(self,
                 pvc: str = None,
                 volume: V1Volume = None,
                 **kwargs):
        """Create a new instance of PipelineVolume.

        Args:
            pvc: The name of an existing PVC
            volume: Create a deep copy out of a V1Volume or PipelineVolume
                with no deps
        Raises:
            ValueError: if volume is not None and kwargs is not None
                        if pvc is not None and kwargs.pop("name") is not None
        """
        if volume and kwargs:
            raise ValueError("You can't pass a volume along with other "
                             "kwargs.")

        name_provided = True
        init_volume = {}
        if volume:
            init_volume = {attr: getattr(volume, attr)
                           for attr in self.attribute_map.keys()}
        else:
            if "name" in kwargs:
                init_volume = {"name": kwargs.pop("name")}
            else:
                name_provided = False
                init_volume = {"name": "pvolume-placeholder"}
            if pvc and kwargs:
                raise ValueError("You can only pass 'name' along with 'pvc'.")
            elif pvc and not kwargs:
                pvc_volume_source = V1PersistentVolumeClaimVolumeSource(
                    claim_name=str(pvc)
                )
                init_volume["persistent_volume_claim"] = pvc_volume_source
        super().__init__(**init_volume, **kwargs)
        if not name_provided:
            self.name = "pvolume-%s" % hashlib.sha256(
                bytes(json.dumps(self.to_dict(), sort_keys=True), "utf-8")
            ).hexdigest()
        self.dependent_names = []
Пример #7
0
    def __init__(self,
                 pvc: str = None,
                 volume: V1Volume = None,
                 **kwargs):
        """Create a new instance of PipelineVolume.

        Args:
            pvc: The name of an existing PVC
            volume: Create a deep copy out of a V1Volume or PipelineVolume
                with no deps
        Raises:
            ValueError: if pvc is not None and name is None
                        if volume is not None and kwargs is not None
                        if pvc is not None and kwargs.pop("name") is not None
        """
        if pvc and "name" not in kwargs:
            raise ValueError("Please provide name.")
        elif volume and kwargs:
            raise ValueError("You can't pass a volume along with other "
                             "kwargs.")

        init_volume = {}
        if volume:
            init_volume = {attr: getattr(volume, attr)
                           for attr in self.attribute_map.keys()}
        else:
            init_volume = {"name": kwargs.pop("name")
                           if "name" in kwargs else None}
            if pvc and kwargs:
                raise ValueError("You can only pass 'name' along with 'pvc'.")
            elif pvc and not kwargs:
                pvc_volume_source = V1PersistentVolumeClaimVolumeSource(
                    claim_name=pvc
                )
                init_volume["persistent_volume_claim"] = pvc_volume_source
        super().__init__(**init_volume, **kwargs)
        self.dependent_names = []
Пример #8
0
    def _splice_volumes(self, vollist):
        namespace = self.get_user_namespace()
        already_vols = []
        if self.volumes:
            already_vols = [x["name"] for x in self.volumes]
            self.log.debug("Already_vols: %r" % already_vols)
        for vol in vollist:
            mountpoint = vol["mountpoint"]
            if not mountpoint:
                self.log.error(
                    "Mountpoint not specified for volume '{}'!".format(vol))
                continue
            volname = self._get_volume_name_for_mountpoint(mountpoint)
            shortname = mountpoint[1:].replace("/", "-")
            if shortname in already_vols:
                self.log.info(
                    "Volume '{}' already exists for pod.".format(volname))
                continue
            k8s_vol = vol["k8s_vol"]
            hostpath = vol["hostpath"]
            if k8s_vol and not hostpath:
                # If hostpath is set, k8s_vol should NOT be, but...
                # Create shadow PV and namespaced PVC for volume
                kvol = self._get_nfs_volume(k8s_vol)
                ns_vol = self._replicate_nfs_pv_with_suffix(kvol, namespace)
                self._create_pvc_for_pv(ns_vol)
            mode = "ReadOnlyMany"
            vmro = True
            if vol["mode"] == "rw":
                mode = "ReadWriteMany"
                vmro = False
            vvol = {
                "name": shortname,
            }
            if hostpath:
                vsrc = V1HostPathVolumeSource(path=hostpath, type="Directory")
                vvol["host_path"] = vsrc
            elif k8s_vol:
                pvcvs = V1PersistentVolumeClaimVolumeSource(
                    claim_name=ns_vol.metadata.name, read_only=vmro)
                vvol["persistent_volume_claim"] = pvcvs
            else:
                vvol["nfs"] = {
                    "server": vol["host"],
                    "path": vol["export"],
                    "accessModes": [mode]
                }

            self.volumes.append(vvol)
            options = vol.get("options")
            if options:
                optlist = options.split(',')
                # This does not work.
                # To get NFS with mount_options, you need to specify the
                #  'kubernetes-volume' parameter and create your PV with the
                #  appropriate volumes in the first place.
                vvol["nfs"]["mount_options"] = optlist
            vmount = {"name": shortname, "mountPath": mountpoint}
            if vmro:
                vmount["readOnly"] = True
            self.volume_mounts.append(vmount)
Пример #9
0
    def __init__(
        self,
        pipeline_name: str,
        experiment_name: str,
        notebook: str,
        cos_endpoint: str,
        cos_bucket: str,
        cos_directory: str,
        cos_dependencies_archive: str,
        pipeline_version: Optional[str] = "",
        pipeline_source: Optional[str] = None,
        pipeline_outputs: Optional[List[str]] = None,
        pipeline_inputs: Optional[List[str]] = None,
        pipeline_envs: Optional[Dict[str, str]] = None,
        requirements_url: Optional[str] = None,
        bootstrap_script_url: Optional[str] = None,
        emptydir_volume_size: Optional[str] = None,
        cpu_request: Optional[str] = None,
        mem_request: Optional[str] = None,
        gpu_limit: Optional[str] = None,
        workflow_engine: Optional[str] = "argo",
        volume_mounts: Optional[List[VolumeMount]] = None,
        kubernetes_secrets: Optional[List[KubernetesSecret]] = None,
        **kwargs,
    ):
        """Create a new instance of ContainerOp.
        Args:
          pipeline_name: pipeline that this op belongs to
          experiment_name: the experiment where pipeline_name is executed
          notebook: name of the notebook that will be executed per this operation
          cos_endpoint: object storage endpoint e.g weaikish1.fyre.ibm.com:30442
          cos_bucket: bucket to retrieve archive from
          cos_directory: name of the directory in the object storage bucket to pull
          cos_dependencies_archive: archive file name to get from object storage bucket e.g archive1.tar.gz
          pipeline_version: optional version identifier
          pipeline_source: pipeline source
          pipeline_outputs: comma delimited list of files produced by the notebook
          pipeline_inputs: comma delimited list of files to be consumed/are required by the notebook
          pipeline_envs: dictionary of environmental variables to set in the container prior to execution
          requirements_url: URL to a python requirements.txt file to be installed prior to running the notebook
          bootstrap_script_url: URL to a custom python bootstrap script to run
          emptydir_volume_size: Size(GB) of the volume to create for the workspace when using CRIO container runtime
          cpu_request: number of CPUs requested for the operation
          mem_request: memory requested for the operation (in Gi)
          gpu_limit: maximum number of GPUs allowed for the operation
          workflow_engine: Kubeflow workflow engine, defaults to 'argo'
          volume_mounts: data volumes to be mounted
          kubernetes_secrets: secrets to be made available as environment variables
          kwargs: additional key value pairs to pass e.g. name, image, sidecars & is_exit_handler.
                  See Kubeflow pipelines ContainerOp definition for more parameters or how to use
                  https://kubeflow-pipelines.readthedocs.io/en/latest/source/kfp.dsl.html#kfp.dsl.ContainerOp
        """

        self.pipeline_name = pipeline_name
        self.pipeline_version = pipeline_version
        self.pipeline_source = pipeline_source
        self.experiment_name = experiment_name
        self.notebook = notebook
        self.notebook_name = os.path.basename(notebook)
        self.cos_endpoint = cos_endpoint
        self.cos_bucket = cos_bucket
        self.cos_directory = cos_directory
        self.cos_dependencies_archive = cos_dependencies_archive
        self.container_work_dir_root_path = "./"
        self.container_work_dir_name = "jupyter-work-dir/"
        self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
        self.bootstrap_script_url = bootstrap_script_url
        self.requirements_url = requirements_url
        self.pipeline_outputs = pipeline_outputs
        self.pipeline_inputs = pipeline_inputs
        self.pipeline_envs = pipeline_envs
        self.cpu_request = cpu_request
        self.mem_request = mem_request
        self.gpu_limit = gpu_limit
        self.volume_mounts = volume_mounts  # optional data volumes to be mounted to the pod
        self.kubernetes_secrets = kubernetes_secrets  # optional secrets to be made available as env vars

        argument_list = []
        """ CRI-o support for kfp pipelines
            We need to attach an emptydir volume for each notebook that runs since CRI-o runtime does not allow
            us to write to the base image layer file system, only to volumes.
        """
        self.emptydir_volume_name = "workspace"
        self.emptydir_volume_size = emptydir_volume_size
        self.python_user_lib_path = ""
        self.python_user_lib_path_target = ""
        self.python_pip_config_url = ""

        if self.emptydir_volume_size:
            self.container_work_dir_root_path = "/opt/app-root/src/"
            self.container_python_dir_name = "python3/"
            self.container_work_dir = self.container_work_dir_root_path + self.container_work_dir_name
            self.python_user_lib_path = self.container_work_dir + self.container_python_dir_name
            self.python_user_lib_path_target = "--target=" + self.python_user_lib_path
            self.python_pip_config_url = ELYRA_PIP_CONFIG_URL

        if not self.bootstrap_script_url:
            self.bootstrap_script_url = ELYRA_BOOTSTRAP_SCRIPT_URL

        if not self.requirements_url:
            self.requirements_url = ELYRA_REQUIREMENTS_URL

        if "name" not in kwargs:
            raise TypeError("You need to provide a name for the operation.")
        elif not kwargs.get("name"):
            raise ValueError("You need to provide a name for the operation.")

        if "image" not in kwargs:
            raise ValueError("You need to provide an image.")

        if not notebook:
            raise ValueError("You need to provide a notebook.")

        if "arguments" not in kwargs:
            """If no arguments are passed, we use our own.
            If ['arguments'] are set, we assume container's ENTRYPOINT is set and dependencies are installed
            NOTE: Images being pulled must have python3 available on PATH and cURL utility
            """

            common_curl_options = '--fail -H "Cache-Control: no-cache"'

            argument_list.append(
                f"mkdir -p {self.container_work_dir} && cd {self.container_work_dir} && "
                f"echo 'Downloading {self.bootstrap_script_url}' && "
                f"curl {common_curl_options} -L {self.bootstrap_script_url} --output bootstrapper.py && "
                f"echo 'Downloading {self.requirements_url}' && "
                f"curl {common_curl_options} -L {self.requirements_url} --output requirements-elyra.txt && "
                f"echo 'Downloading {ELYRA_REQUIREMENTS_URL_PY37}' && "
                f"curl {common_curl_options} -L {ELYRA_REQUIREMENTS_URL_PY37} --output requirements-elyra-py37.txt && "
            )

            if self.emptydir_volume_size:
                argument_list.append(
                    f"mkdir {self.container_python_dir_name} && cd {self.container_python_dir_name} && "
                    f"echo 'Downloading {self.python_pip_config_url}' && "
                    f"curl {common_curl_options} -L {self.python_pip_config_url} --output pip.conf && cd .. &&"
                )

            argument_list.append(
                f"python3 -m pip install {self.python_user_lib_path_target} packaging && "
                "python3 -m pip freeze > requirements-current.txt && "
                "python3 bootstrapper.py "
                f'--pipeline-name "{self.pipeline_name}" '
                f"--cos-endpoint {self.cos_endpoint} "
                f"--cos-bucket {self.cos_bucket} "
                f'--cos-directory "{self.cos_directory}" '
                f'--cos-dependencies-archive "{self.cos_dependencies_archive}" '
                f'--file "{self.notebook}" ')

            if self.pipeline_inputs:
                inputs_str = self._artifact_list_to_str(self.pipeline_inputs)
                argument_list.append(f'--inputs "{inputs_str}" ')

            if self.pipeline_outputs:
                outputs_str = self._artifact_list_to_str(self.pipeline_outputs)
                argument_list.append(f'--outputs "{outputs_str}" ')

            if self.emptydir_volume_size:
                argument_list.append(
                    f'--user-volume-path "{self.python_user_lib_path}" ')

            kwargs["command"] = ["sh", "-c"]
            kwargs["arguments"] = "".join(argument_list)

        super().__init__(**kwargs)

        # add user-specified volume mounts: the referenced PVCs must exist
        # or this generic operation will fail
        if self.volume_mounts:
            unique_pvcs = []
            for volume_mount in self.volume_mounts:
                if volume_mount.pvc_name not in unique_pvcs:
                    self.add_volume(
                        V1Volume(
                            name=volume_mount.pvc_name,
                            persistent_volume_claim=
                            V1PersistentVolumeClaimVolumeSource(
                                claim_name=volume_mount.pvc_name),
                        ))
                    unique_pvcs.append(volume_mount.pvc_name)
                self.container.add_volume_mount(
                    V1VolumeMount(mount_path=volume_mount.path,
                                  name=volume_mount.pvc_name))

        # We must deal with the envs after the superclass initialization since these amend the
        # container attribute that isn't available until now.
        if self.pipeline_envs:
            for key, value in self.pipeline_envs.items(
            ):  # Convert dict entries to format kfp needs
                self.container.add_env_variable(V1EnvVar(name=key,
                                                         value=value))

        if self.kubernetes_secrets:
            for secret in self.kubernetes_secrets:  # Convert tuple entries to format kfp needs
                self.container.add_env_variable(
                    V1EnvVar(
                        name=secret.env_var,
                        value_from=V1EnvVarSource(
                            secret_key_ref=V1SecretKeySelector(
                                name=secret.name, key=secret.key)),
                    ))

        # If crio volume size is found then assume kubeflow pipelines environment is using CRI-o as
        # its container runtime
        if self.emptydir_volume_size:
            self.add_volume(
                V1Volume(
                    empty_dir=V1EmptyDirVolumeSource(
                        medium="", size_limit=self.emptydir_volume_size),
                    name=self.emptydir_volume_name,
                ))

            self.container.add_volume_mount(
                V1VolumeMount(mount_path=self.container_work_dir_root_path,
                              name=self.emptydir_volume_name))

            # Append to PYTHONPATH location of elyra dependencies in installed in Volume
            self.container.add_env_variable(
                V1EnvVar(name="PYTHONPATH", value=self.python_user_lib_path))

        if self.cpu_request:
            self.container.set_cpu_request(cpu=str(cpu_request))

        if self.mem_request:
            self.container.set_memory_request(memory=str(mem_request) + "G")

        if self.gpu_limit:
            gpu_vendor = self.pipeline_envs.get("GPU_VENDOR", "nvidia")
            self.container.set_gpu_limit(gpu=str(gpu_limit), vendor=gpu_vendor)

        # Generate unique ELYRA_RUN_NAME value and expose it as an environment
        # variable in the container
        if not workflow_engine:
            raise ValueError(
                "workflow_engine is missing and needs to be specified.")
        if workflow_engine.lower() == "argo":
            # attach RUN_ID_PLACEHOLDER as run name
            # '{{workflow.annotations.pipelines.kubeflow.org/run_name}}' variable
            # cannot be resolved by Argo in KF 1.4
            run_name_placeholder = RUN_ID_PLACEHOLDER
            self.container.add_env_variable(
                V1EnvVar(name="ELYRA_RUN_NAME", value=run_name_placeholder))
        elif workflow_engine.lower() == "tekton":
            try:
                from kfp_tekton import TektonClient  # noqa: F401
            except ImportError:
                raise ValueError(
                    "kfp-tekton not installed. Please install using elyra[kfp-tekton] to use Tekton engine."
                )

            # For Tekton derive the value from the specified pod annotation
            annotation = "pipelines.kubeflow.org/run_name"
            field_path = f"metadata.annotations['{annotation}']"
            self.container.add_env_variable(
                V1EnvVar(
                    name="ELYRA_RUN_NAME",
                    value_from=V1EnvVarSource(field_ref=V1ObjectFieldSelector(
                        field_path=field_path)),
                ))
        else:
            raise ValueError(
                f"{workflow_engine} is not a supported workflow engine.")

        # Attach metadata to the pod
        # Node type (a static type for this op)
        self.add_pod_label(
            "elyra/node-type",
            ExecuteFileOp._normalize_label_value("notebook-script"))
        # Pipeline name
        self.add_pod_label(
            "elyra/pipeline-name",
            ExecuteFileOp._normalize_label_value(self.pipeline_name))
        # Pipeline version
        self.add_pod_label(
            "elyra/pipeline-version",
            ExecuteFileOp._normalize_label_value(self.pipeline_version))
        # Experiment name
        self.add_pod_label(
            "elyra/experiment-name",
            ExecuteFileOp._normalize_label_value(self.experiment_name))
        # Pipeline node name
        self.add_pod_label(
            "elyra/node-name",
            ExecuteFileOp._normalize_label_value(kwargs.get("name")))
        # Pipeline node file
        self.add_pod_annotation("elyra/node-file-name", self.notebook)

        # Identify the pipeline source, which can be a
        # pipeline file (mypipeline.pipeline), a Python
        # script or notebook that was submitted
        if self.pipeline_source is not None:
            self.add_pod_annotation("elyra/pipeline-source",
                                    self.pipeline_source)
    producer_task = producer_op()
    processor_task = processor_op(producer_task.outputs['output_1'],
                                  producer_task.outputs['output_2'])
    consumer_task = consumer_op(processor_task.outputs['output_1'],
                                processor_task.outputs['output_2'])

    markdown_task = create_component_from_func(func=metadata_and_metrics)()
    # This line is only needed for compiling using dsl-compile to work
    kfp.dsl.get_pipeline_conf(
    ).data_passing_method = volume_based_data_passing_method


from kubernetes.client.models import V1Volume, V1PersistentVolumeClaimVolumeSource
from kfp.dsl import data_passing_methods

volume_based_data_passing_method = data_passing_methods.KubernetesVolume(
    volume=V1Volume(
        name='data',
        persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(
            claim_name='data-volume', ),
    ),
    path_prefix='artifact_data/',
)

if __name__ == '__main__':
    pipeline_conf = kfp.dsl.PipelineConf()
    pipeline_conf.data_passing_method = volume_based_data_passing_method
    kfp.compiler.Compiler().compile(artifact_passing_pipeline,
                                    __file__ + '.yaml',
                                    pipeline_conf=pipeline_conf)