Exemplo n.º 1
0
 def test_store_big_run(self):
     """
     Sometimes when the run has artifacts (inputs or outputs) their preview is pretty big (but it is limited to some
     size), when we moved to MySQL a run similar to the one this test is storing was failing to be read from the DB
     after insert on _pickle.UnpicklingError: pickle data was truncated
     So we fixed this by changing the BLOB fields to sqlalchemy.dialects.mysql.MEDIUMBLOB
     This test verifies it's working
     """
     project_name = "runs-project"
     mlrun.new_project(project_name)
     uid = "some-uid"
     run_body_path = str(self.assets_path / "big-run.json")
     with open(run_body_path) as run_body_file:
         run_body = json.load(run_body_file)
     mlrun.get_run_db().store_run(run_body, uid, project_name)
     mlrun.get_run_db().read_run(uid, project_name)
Exemplo n.º 2
0
    def create_demo_project(self) -> mlrun.projects.MlrunProject:
        self._logger.debug("Creating sklearn project")
        demo_project = mlrun.new_project(
            self.project_name, str(self.assets_path), init_git=True
        )

        self._logger.debug("Creating iris-generator function")
        function_path = str(self.assets_path / "iris_generator_function.py")
        iris_generator_function = mlrun.code_to_function(
            name="gen-iris", kind="job", filename=function_path, image="mlrun/mlrun",
        )

        iris_generator_function.spec.remote = True
        iris_generator_function.spec.replicas = 1
        iris_generator_function.spec.service_type = "NodePort"
        iris_generator_function.spec.build.commands.append(
            "pip install pandas sklearn pyarrow"
        )

        self._logger.debug("Setting project functions")
        demo_project.set_function(iris_generator_function)
        demo_project.set_function("hub://describe", "describe")
        demo_project.set_function("hub://sklearn_classifier", "train")
        demo_project.set_function("hub://test_classifier", "test")
        demo_project.set_function("hub://model_server", "serving")
        demo_project.set_function("hub://model_server_tester", "live_tester")

        self._logger.debug("Setting project workflow")
        demo_project.set_workflow(
            "main", str(self.assets_path / "workflow.py"), embed=True
        )

        return demo_project
Exemplo n.º 3
0
    def create_demo_project(self) -> mlrun.projects.MlrunProject:
        self._logger.debug("Creating horovod project")
        demo_project = mlrun.new_project(self.project_name,
                                         str(self.assets_path),
                                         init_git=True)

        mlrun.mount_v3io()

        self._logger.debug("Uploading training file")
        trainer_src_path = str(self.assets_path / "horovod_training.py")
        trainer_dest_path = pathlib.Path("/assets/horovod_training.py")
        stores = mlrun.datastore.store_manager.set()
        datastore, subpath = stores.get_or_create_store(
            self._get_v3io_user_store_path(trainer_dest_path))
        datastore.upload(subpath, trainer_src_path)

        self._logger.debug("Creating iris-generator function")
        function_path = str(self.assets_path / "utils_functions.py")
        utils = mlrun.code_to_function(
            name="utils",
            kind="job",
            filename=function_path,
            image="mlrun/mlrun",
        )

        utils.spec.remote = True
        utils.spec.replicas = 1
        utils.spec.service_type = "NodePort"
        utils.spec.command = function_path

        self._logger.debug("Setting project functions")
        demo_project.set_function(utils)

        trainer = mlrun.new_function(
            name="trainer",
            kind="mpijob",
            command=self._get_v3io_user_store_path(trainer_dest_path,
                                                   remote=False),
            image="mlrun/ml-models",
        )
        trainer.spec.remote = True
        trainer.spec.replicas = 4
        trainer.spec.service_type = "NodePort"

        demo_project.set_function(trainer)
        demo_project.set_function("hub://tf2_serving", "serving")

        demo_project.log_artifact(
            "images",
            target_path=
            "http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip",
            artifact_path=mlrun.mlconf.artifact_path,
        )

        self._logger.debug("Setting project workflow")
        demo_project.set_workflow("main",
                                  str(self.assets_path / "workflow.py"),
                                  embed=True)

        return demo_project
Exemplo n.º 4
0
 def test_create_project(self):
     project_name = "some-project"
     project = mlrun.new_project(project_name)
     project.save_to_db()
     projects = mlrun.get_run_db().list_projects()
     assert len(projects) == 1
     assert projects[0].metadata.name == project_name
Exemplo n.º 5
0
 def _create_project(self, project_name, with_repo=False):
     proj = mlrun.new_project(project_name, str(self.assets_path))
     proj.set_function(
         "prep_data.py",
         "prep-data",
         image="mlrun/mlrun",
         handler="prep_data",
         with_repo=with_repo,
     )
     proj.set_function("hub://describe")
     proj.set_function("hub://sklearn_classifier", "train")
     proj.set_function("hub://test_classifier", "test")
     proj.set_function("hub://v2_model_server", "serving")
     proj.set_artifact("data", Artifact(target_path=data_url))
     proj.spec.params = {"label_column": "label"}
     arg = EntrypointParam(
         "model_pkg_class",
         type="str",
         default=model_pkg_class,
         doc="model package/algorithm",
     )
     proj.set_workflow("main", "./kflow.py", args_schema=[arg])
     proj.set_workflow("newflow", "./newflow.py", handler="newpipe")
     proj.save()
     return proj
Exemplo n.º 6
0
def test_get_set_params():
    project_name = "project-name"
    project = mlrun.new_project(project_name)
    param_key = "param-key"
    param_value = "param-value"
    project.params[param_key] = param_value
    assert param_value == project.get_param(param_key)
    default_value = "default-value"
    assert project.get_param("not-exist", default_value) == default_value
Exemplo n.º 7
0
def test_vault_end_to_end():
    # This requires an MLRun API server to run and work with Vault. This port should
    # be configured to allow access to the server.
    api_server_port = 57764

    _set_vault_mlrun_configuration(api_server_port)
    project_name = "abc"
    func_name = "vault-function"
    aws_key_value = "1234567890"
    github_key_value = "proj1Key!!!"

    project = new_project(project_name)
    # This call will initialize Vault infrastructure and add the given secrets
    # It executes on the API server
    project.create_vault_secrets({
        "aws_key": aws_key_value,
        "github_key": github_key_value
    })

    # This API executes on the client side
    project_secrets = project.get_vault_secret_keys()
    assert project_secrets == ["aws_key", "github_key"], "secrets not created"

    # Create function and set container configuration
    function = code_to_function(
        name=func_name,
        filename="{}/vault_function.py".format(examples_path),
        handler="vault_func",
        project=project_name,
        kind="job",
    )

    function.spec.image = "saarcoiguazio/mlrun:unstable"

    # Create context for the execution
    spec = new_task(
        project=project_name,
        name="vault_test_run",
        handler="vault_func",
        out_path=out_path,
        params={"secrets": ["password", "path", "github_key", "aws_key"]},
    )
    spec.with_secrets("vault", [])

    result = function.run(spec)
    verify_state(result)

    db = get_run_db().connect()
    state, log = db.get_log(result.metadata.uid, project=project_name)
    log = str(log)
    print(state)

    assert (log.find("value: {}".format(aws_key_value)) !=
            -1), "secret value not detected in function output"
    assert (log.find("value: {}".format(github_key_value)) !=
            -1), "secret value not detected in function output"
Exemplo n.º 8
0
    def test_zip_template(self):
        shutil.rmtree(project_dir, ignore_errors=True)
        project = mlrun.new_project("newproj2",
                                    project_dir,
                                    from_template=str(self.assets_path /
                                                      "project.zip"))
        assert project.spec.description == "test", "failed to load yaml template"

        filepath = os.path.join(project_dir, "prep_data.py")
        assert os.path.isfile(filepath), "file not copied"
Exemplo n.º 9
0
    def test_git_template(self):
        shutil.rmtree(project_dir, ignore_errors=True)
        project = mlrun.new_project(
            "newproj3",
            project_dir,
            from_template="git://github.com/mlrun/project-demo.git",
        )
        assert project.spec.description == "test", "failed to load yaml template"

        filepath = os.path.join(project_dir, "prep_data.py")
        assert os.path.isfile(filepath), "file not copied"
Exemplo n.º 10
0
def test_sync_functions():
    project_name = "project-name"
    project = mlrun.new_project(project_name)
    project.set_function("hub://describe")
    project_function_object = project.spec._function_objects
    project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml"
    project.export(str(project_file_path))
    imported_project = mlrun.load_project(None, str(project_file_path))
    assert imported_project.spec._function_objects == {}
    imported_project.sync_functions()
    _assert_project_function_objects(imported_project, project_function_object)
Exemplo n.º 11
0
 def _create_project(self, project_name):
     proj = mlrun.new_project(project_name, f"{project_dir}/{project_name}")
     proj.set_function(
         str(f'{self.assets_path / "localpipe.py"}'),
         "tstfunc",
         image="mlrun/mlrun",
         # kind="job"
     )
     proj.set_artifact("data",
                       mlrun.artifacts.Artifact(target_path=data_url))
     proj.spec.params = {"label_column": "label"}
     return proj
Exemplo n.º 12
0
def test_user_project():
    project_name = "project-name"
    original_username = os.environ.get("V3IO_USERNAME")
    usernames = ["valid-username", "require_Normalization"]
    for username in usernames:
        os.environ["V3IO_USERNAME"] = username
        project = mlrun.new_project(project_name, user_project=True)
        assert (
            project.metadata.name
            == f"{project_name}-{inflection.dasherize(username.lower())}"
        ), "project name doesnt include user name"
    if original_username is not None:
        os.environ["V3IO_USERNAME"] = original_username
Exemplo n.º 13
0
    def create_demo_project(self) -> mlrun.projects.MlrunProject:
        self._logger.debug("Creating churn project")
        demo_project = mlrun.new_project(self.project_name,
                                         str(self.assets_path),
                                         init_git=True)

        data_url = (
            "https://raw.githubusercontent.com/mlrun/demos/master/customer-churn-prediction/WA_Fn-UseC_-Telco-"
            "Customer-Churn.csv")
        demo_project.log_artifact("raw-data", target_path=data_url)

        self._logger.debug("Creating clean-data function")
        function_path = str(self.assets_path / "data_clean_function.py")
        clean_data_function = mlrun.code_to_function(
            name="clean_data",
            kind="job",
            filename=function_path,
            image="mlrun/ml-models-gpu"
            if self.use_gpus else "mlrun/ml-models",
            description="clean and encode raw data",
            categories=["data-prep"],
            labels={
                "author": "yasha",
                "framework": "xgboost"
            },
        ).apply(mlrun.mount_v3io())

        clean_data_function.spec.remote = True
        clean_data_function.spec.replicas = 1
        clean_data_function.spec.service_type = "NodePort"
        clean_data_function.spec.command = function_path

        self._logger.debug("Setting project functions")
        demo_project.set_function(clean_data_function)
        demo_project.set_function("hub://describe", "describe")
        demo_project.set_function("hub://xgb_trainer", "classify")
        demo_project.set_function("hub://xgb_test", "xgbtest")
        demo_project.set_function("hub://coxph_trainer", "survive")
        demo_project.set_function("hub://coxph_test", "coxtest")
        demo_project.set_function("hub://churn_server", "server")

        self._logger.debug("Setting project workflow")
        demo_project.set_workflow("main",
                                  str(self.assets_path / "workflow.py"),
                                  embed=True)

        return demo_project
Exemplo n.º 14
0
def test_sync_functions():
    project_name = "project-name"
    project = mlrun.new_project(project_name)
    project.set_function("hub://describe", "describe")
    project_function_object = project.spec._function_objects
    project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml"
    project.export(str(project_file_path))
    imported_project = mlrun.load_project("./", str(project_file_path))
    assert imported_project.spec._function_objects == {}
    imported_project.sync_functions()
    _assert_project_function_objects(imported_project, project_function_object)

    fn = project.func("describe")
    assert fn.metadata.name == "describe", "func did not return"

    # test that functions can be fetched from the DB (w/o set_function)
    mlrun.import_function("hub://sklearn_classifier", new_name="train").save()
    fn = project.func("train")
    assert fn.metadata.name == "train", "train func did not return"
Exemplo n.º 15
0
 def test_list_artifact_tags(self):
     project_name = "some-project"
     project = mlrun.new_project(project_name)
     project.save_to_db()
     artifact_tags = mlrun.get_run_db().list_artifact_tags(project_name)
     assert artifact_tags == []
     key = "some-key"
     data_frame = pandas.DataFrame({"x": [1, 2]})
     artifact = mlrun.artifacts.dataset.DatasetArtifact(key, data_frame)
     uid = "some-uid"
     uid_2 = "some-uid-2"
     tag = "some-tag"
     tag_2 = "some-tag-2"
     mlrun.get_run_db().store_artifact(
         key, artifact.to_dict(), uid, tag=tag, project=project_name
     )
     mlrun.get_run_db().store_artifact(
         key, artifact.to_dict(), uid_2, tag=tag_2, project=project_name
     )
     artifact_tags = mlrun.get_run_db().list_artifact_tags(project_name)
     assert deepdiff.DeepDiff(artifact_tags, [tag, tag_2], ignore_order=True,) == {}
Exemplo n.º 16
0
def test_model_update():
    model = ModelArtifact("my-model", model_file="a.pkl")
    target_path = results_dir + "model/"

    project = mlrun.new_project("test-proj")
    artifact = project.log_artifact(model,
                                    upload=False,
                                    artifact_path=target_path)

    artifact_uri = f"store://artifacts/{artifact.project}/{artifact.db_key}"
    updated_model_spec = update_model(
        artifact_uri,
        parameters={"a": 1},
        metrics={"b": 2},
        inputs=[Feature(name="f1")],
        outputs=[Feature(name="f2")],
        feature_vector="vec",
        feature_weights=[1, 2],
        key_prefix="test-",
        labels={"lbl": "tst"},
        write_spec_copy=False,
    )
    print(updated_model_spec.to_yaml())

    model_path, model, extra_dataitems = get_model(artifact_uri)
    # print(model_spec.to_yaml())
    assert model_path.endswith(
        f"model/{model.model_file}"), "illegal model path"
    assert model.parameters == {"a": 1}, "wrong parameters"
    assert model.metrics == {"test-b": 2}, "wrong metrics"

    assert model.inputs[0].name == "f1", "wrong inputs"
    assert model.outputs[0].name == "f2", "wrong outputs"

    assert model.feature_vector == "vec", "wrong feature_vector"
    assert model.feature_weights == [1, 2], "wrong feature_weights"
    assert model.labels == {"lbl": "tst"}, "wrong labels"
Exemplo n.º 17
0
def test_user_project():
    project_name = "project-name"
    user = os.environ.get("V3IO_USERNAME") or getpass.getuser()
    project = mlrun.new_project(project_name, user_project=True)
    assert (project.metadata.name == f"{project_name}-{user}"
            ), "project name doesnt include user name"
Exemplo n.º 18
0
 def test_yaml_template(self):
     project = mlrun.new_project("newproj",
                                 "./",
                                 from_template=str(self.assets_path /
                                                   "project.yaml"))
     assert project.spec.description == "test", "failed to load yaml template"
Exemplo n.º 19
0
 def test_load_project_from_db(self):
     project_name = "some-project"
     project = mlrun.new_project(project_name)
     project.save_to_db()
     mlrun.load_project(".", f"db://{project_name}")
Exemplo n.º 20
0
    import_function,
    mlconf,
    mount_v3io,
    new_function,
    new_project,
    run_local,
    wait_for_pipeline_completion,
)

# Load environment variables
load_dotenv()

# Setup Project
project_name = os.getenv("MLRUN_PROJECT_NAME")
project_path = os.getenv("MLRUN_PROJECT_PATH")
skproj = new_project(name=project_name, context=project_path)
artifact_path = os.getenv("MLRUN_ARTIFACT_PATH")
mlconf.dbpath = os.getenv("MLRUN_DBPATH")
image = os.getenv("DOCKER_IMAGE")

print(f"Project name: {project_name}")
print(f"Artifacts path: {artifact_path}\nMLRun DB path: {mlconf.dbpath}")
print("Docker Image:", image)

# # Build Docker Image (only needs to be run once)
# build_image = new_function(name="build-image", kind="job")
# build_image.build_config(
#     image=f".mlrun/{image}", base_image="mlrun/mlrun", commands=["pip install pyhive"]
# )
# build_image.deploy(with_mlrun=False)