Пример #1
0
    def test_run_pipeline(self):
        @kfp.dsl.pipeline(name="dask_pipeline")
        def dask_pipe(x=1, y=10):

            # use_db option will use a function (DB) pointer instead of adding the function spec to the YAML
            self.dask_function.as_step(
                new_task(handler="main",
                         name="dask_pipeline",
                         params={
                             "x": x,
                             "y": y
                         }),
                use_db=True,
            )

        kfp.compiler.Compiler().compile(dask_pipe,
                                        "daskpipe.yaml",
                                        type_check=False)
        arguments = {"x": 4, "y": -5}
        artifact_path = "/User/test"
        workflow_run_id = run_pipeline(
            dask_pipe,
            arguments,
            artifact_path=artifact_path,
            run="DaskExamplePipeline",
            experiment="dask pipe",
        )

        wait_for_pipeline_completion(workflow_run_id)

        runs = self._run_db.list_runs(project=self.project_name,
                                      labels=f"workflow={workflow_run_id}")
        assert len(runs) == 1

        run = runs[0]
        run_uid = run["metadata"]["uid"]
        self._verify_run_metadata(
            run["metadata"],
            uid=run_uid,
            name="mydask-main",
            project=self.project_name,
            labels={
                "v3io_user": self._test_env["V3IO_USERNAME"],
                "owner": self._test_env["V3IO_USERNAME"],
            },
        )
        self._verify_run_spec(
            run["spec"],
            parameters={
                "x": 4,
                "y": -5
            },
            outputs=["run_id"],
            output_path="/User/test",
            data_stores=[],
        )

        # remove compiled dask.yaml file
        os.remove("daskpipe.yaml")
Пример #2
0
    kind="job",
    image=f".mlrun/{image}",
    handler="handler",
)
train_fn = code_to_function(
    filename=f"{project_path}/training.py",
    name="train-model",
    kind="job",
    image=f".mlrun/{image}",
    handler="handler",
)
skproj.set_function(data_fn)
skproj.set_function(train_fn)

# Set Pipeline
skproj.set_workflow("main", "pipeline.py")
skproj.save(f"{project_path}/project.yaml")

# Run Pipeline
run_id = skproj.run(
    "main",
    arguments={},
    artifact_path=artifact_path,
    dirty=True,
)


wait_for_pipeline_completion(run_id)
db = get_run_db().connect()
db.list_runs(project=project_name, labels=f"workflow={run_id}").show()
Пример #3
0
    def test_run_kubeflow_pipeline(self):
        @kfp.dsl.pipeline(name="job test",
                          description="demonstrating mlrun usage")
        def job_pipeline(p1: int = 9) -> None:
            """Define our pipeline.

            :param p1: A model parameter.
            """

            train = self._trainer.as_step(handler="training",
                                          params={"p1": p1},
                                          outputs=["mymodel"])

            self._trainer.as_step(
                handler="validation",
                inputs={"model": train.outputs["mymodel"]},
                outputs=["validation"],
            )

        kfp.compiler.Compiler().compile(job_pipeline, "jobpipe.yaml")
        artifact_path = "v3io:///users/admin/kfp/{{workflow.uid}}/"
        arguments = {"p1": 8}
        workflow_run_id = run_pipeline(job_pipeline,
                                       arguments,
                                       experiment="my-job",
                                       artifact_path=artifact_path)

        wait_for_pipeline_completion(workflow_run_id)

        # TODO: understand why a single db instantiation isn't enough, and fix the bug in the db
        self._run_db = get_run_db()
        runs = self._run_db.list_runs(project=self.project_name,
                                      labels=f"workflow={workflow_run_id}")
        assert len(runs) == 2

        validation_run = runs[0]
        training_run = runs[1]
        self._verify_run_metadata(
            training_run["metadata"],
            uid=training_run["metadata"]["uid"],
            name="my-trainer-training",
            project=self.project_name,
            labels={
                "v3io_user": self._test_env["V3IO_USERNAME"],
                "owner": self._test_env["V3IO_USERNAME"],
                "kind": "job",
                "category": "tests",
            },
        )
        self._verify_run_metadata(
            validation_run["metadata"],
            uid=validation_run["metadata"]["uid"],
            name="my-trainer-validation",
            project=self.project_name,
            labels={
                "v3io_user": self._test_env["V3IO_USERNAME"],
                "owner": self._test_env["V3IO_USERNAME"],
                "kind": "job",
            },
        )
        self._verify_run_spec(
            training_run["spec"],
            parameters={"p1": 8},
            outputs=["mymodel", "run_id"],
            output_path=f"v3io:///users/admin/kfp/{workflow_run_id}/",
            inputs={},
            data_stores=[],
        )
        self._verify_run_spec(
            validation_run["spec"],
            parameters={},
            outputs=["validation", "run_id"],
            output_path=f"v3io:///users/admin/kfp/{workflow_run_id}/",
            inputs={
                "model":
                f"store://artifacts/{self.project_name}/my-trainer-training_mymodel:{workflow_run_id}",
            },
            data_stores=[],
        )

        # remove compiled jobpipe.yaml file
        os.remove("jobpipe.yaml")