def create_demo_project(self) -> mlrun.projects.MlrunProject: self._logger.debug("Creating horovod project") demo_project = mlrun.new_project(self.project_name, str(self.assets_path), init_git=True) mlrun.mount_v3io() self._logger.debug("Uploading training file") trainer_src_path = str(self.assets_path / "horovod_training.py") trainer_dest_path = pathlib.Path("/assets/horovod_training.py") stores = mlrun.datastore.store_manager.set() datastore, subpath = stores.get_or_create_store( self._get_v3io_user_store_path(trainer_dest_path)) datastore.upload(subpath, trainer_src_path) self._logger.debug("Creating iris-generator function") function_path = str(self.assets_path / "utils_functions.py") utils = mlrun.code_to_function( name="utils", kind="job", filename=function_path, image="mlrun/mlrun", ) utils.spec.remote = True utils.spec.replicas = 1 utils.spec.service_type = "NodePort" utils.spec.command = function_path self._logger.debug("Setting project functions") demo_project.set_function(utils) trainer = mlrun.new_function( name="trainer", kind="mpijob", command=self._get_v3io_user_store_path(trainer_dest_path, remote=False), image="mlrun/ml-models", ) trainer.spec.remote = True trainer.spec.replicas = 4 trainer.spec.service_type = "NodePort" demo_project.set_function(trainer) demo_project.set_function("hub://tf2_serving", "serving") demo_project.log_artifact( "images", target_path= "http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip", artifact_path=mlrun.mlconf.artifact_path, ) self._logger.debug("Setting project workflow") demo_project.set_workflow("main", str(self.assets_path / "workflow.py"), embed=True) return demo_project
def test_with_params(): spec = tag_test(base_spec, "test_with_params") result = new_function().run(spec, handler=my_func) assert result.output("accuracy") == 16, "failed to run" assert result.status.artifacts[0].get("key") == "chart", "failed to run" assert result.artifact("chart").url, "failed to return artifact data item"
def enrich_function_from_dict(function, function_dict): override_function = mlrun.new_function(runtime=function_dict, kind=function.kind) for attribute in [ "volumes", "volume_mounts", "env", "resources", "image_pull_policy", "replicas", "node_name", "node_selector", "affinity", "priority_class_name", ]: override_value = getattr(override_function.spec, attribute, None) if override_value: if attribute == "env": for env_dict in override_value: function.set_env(env_dict["name"], env_dict["value"]) elif attribute == "volumes": function.spec.update_vols_and_mounts(override_value, []) elif attribute == "volume_mounts": # volume mounts don't have a well defined identifier (like name for volume) so we can't merge, # only override function.spec.volume_mounts = override_value elif attribute == "resources": # don't override it there are limits and requests but both are empty if override_value.get("limits", {}) or override_value.get( "requests", {}): setattr(function.spec, attribute, override_value) else: setattr(function.spec, attribute, override_value) return function
def test_async_nested(): function = mlrun.new_function("tests", kind="serving") graph = function.set_topology("flow", engine="async") graph.add_step(name="s1", class_name="Echo") graph.add_step(name="s2", handler="multiply_input", after="s1") graph.add_step(name="s3", class_name="Echo", after="s2") router_step = graph.add_step("*", name="ensemble", after="s2") router_step.add_route("m1", class_name="ModelClass", model_path=".", multiplier=100) router_step.add_route("m2", class_name="ModelClass", model_path=".", multiplier=200) router_step.add_route("m3:v1", class_name="ModelClass", model_path=".", multiplier=300) graph.add_step(name="final", class_name="Echo", after="ensemble").respond() logger.info(graph.to_yaml()) server = function.to_mock_server() # plot the graph for test & debug graph.plot(f"{results}/serving/nested.png") resp = server.test("/v2/models/m2/infer", body={"inputs": [5]}) server.wait_for_completion() # resp should be input (5) * multiply_input (2) * m2 multiplier (200) assert resp["outputs"] == 5 * 2 * 200, f"wrong health response {resp}"
def test_basic_flow(): fn = mlrun.new_function("tests", kind="serving") graph = fn.set_topology("flow", engine="sync") graph.add_step(name="s1", class_name="Chain") graph.add_step(name="s2", class_name="Chain", after="$prev") graph.add_step(name="s3", class_name="Chain", after="$prev") server = fn.to_mock_server() # graph.plot("flow.png") print("\nFlow1:\n", graph.to_yaml()) resp = server.test(body=[]) assert resp == ["s1", "s2", "s3"], "flow1 result is incorrect" graph = fn.set_topology("flow", exist_ok=True, engine="sync") graph.add_step(name="s2", class_name="Chain") graph.add_step( name="s1", class_name="Chain", before="s2" ) # should place s1 first and s2 after it graph.add_step(name="s3", class_name="Chain", after="s2") server = fn.to_mock_server() logger.info(f"flow: {graph.to_yaml()}") resp = server.test(body=[]) assert resp == ["s1", "s2", "s3"], "flow2 result is incorrect" graph = fn.set_topology("flow", exist_ok=True, engine="sync") graph.add_step(name="s1", class_name="Chain") graph.add_step(name="s3", class_name="Chain", after="$prev") graph.add_step(name="s2", class_name="Chain", after="s1", before="s3") server = fn.to_mock_server() logger.info(f"flow: {graph.to_yaml()}") resp = server.test(body=[]) assert resp == ["s1", "s2", "s3"], "flow3 result is incorrect"
def test_noparams(): # Since we're executing the function without inputs, it will try to use the input name as the file path result = new_function().run(params={"input_name": str(input_file_path)}, handler=my_func) assert result.output("accuracy") == 2, "failed to run" assert result.status.artifacts[0].get("key") == "chart", "failed to run"
def _generate_runtime(self): # This is following the steps in # https://docs.mlrun.org/en/latest/runtimes/dask-mlrun.html#set-up-the-environment mlconf.remote_host = "http://remote_host" os.environ["V3IO_USERNAME"] = self.v3io_user mlrun.set_environment( project=self.project, access_key=self.v3io_access_key, artifact_path=self.artifact_path, ) dask_cluster = mlrun.new_function(self.name, project=self.project, kind="dask", image=self.image_name) dask_cluster.apply(auto_mount()) dask_cluster.spec.min_replicas = 1 dask_cluster.spec.max_replicas = 4 dask_cluster.spec.remote = True dask_cluster.spec.service_type = "NodePort" return dask_cluster
def test_run_mlbase_sklearn_classification(): sklearn_run = new_function().run(artifact_path="./temp", handler=run_mlbase_sklearn_classification) assert (sklearn_run.artifact("model").meta.to_dict()["metrics"]["accuracy"] ) > 0 assert (sklearn_run.artifact("model").meta.to_dict()["model_file"] ) == "model.pkl"
def test_dask_local_hyper(): task = NewRun().with_hyper_params({'p1': [5, 2, 3]}, 'max.accuracy') spec = tag_test(task, 'test_dask_local_hyper') run = new_function(command='dask://').run(spec, handler=my_func) verify_state(run) assert len(run.status.iterations) == 3 + 1, 'hyper parameters test failed' pprint(run.to_dict())
def test_submit_job_auto_mount(db: Session, client: TestClient, pod_create_mock) -> None: mlconf.storage.auto_mount_type = "v3io_credentials" api_url = "https://api/url" # Set different auto-mount-params, to ensure the auth info is overridden mlconf.storage.auto_mount_params = ( f"api={api_url},user=invalid-user,access_key=invalid-access-key") project = "my-proj1" function_name = "test-function" function_tag = "latest" function = mlrun.new_function( name=function_name, project=project, tag=function_tag, kind="job", image="mlrun/mlrun", ) submit_job_body = _create_submit_job_body(function, project) resp = client.post("/api/submit_job", json=submit_job_body) assert resp expected_env_vars = { "V3IO_API": api_url, "V3IO_USERNAME": username, "V3IO_ACCESS_KEY": access_key, } _assert_pod_env_vars(pod_create_mock, expected_env_vars)
def test_parallel_remote_retry(httpserver): # test calling multiple http clients with failure and one retry from mlrun.serving.remote import BatchHttpRequests retries = 1 tester = RetryTester(retries) httpserver.expect_request( re.compile("^/.*"), method="POST").respond_with_handler(tester.handler) url = httpserver.url_for("/") function = mlrun.new_function("test2", kind="serving") flow = function.set_topology("flow", engine="async") flow.to( BatchHttpRequests( url_expression="event['url']", body_expression="event['data']", method="POST", input_path="req", result_path="resp", retries=1, )).respond() server = function.to_mock_server() items = list(range(2)) request = [{"url": f"{url}{i}", "data": i} for i in items] try: resp = server.test(body={"req": request}) finally: server.wait_for_completion() assert resp["resp"] == items, "unexpected response" assert tester.retries_dict == { "/1": retries + 1, "/0": retries + 1, }, "didnt retry properly"
def test_handler_hyperlist(): run_spec = tag_test(base_spec, 'test_handler_hyperlist') run_spec.spec.param_file = '{}/param_file.csv'.format(here) result = new_function().run(run_spec, handler=my_func) print(result) assert len(result.status.iterations) == 3+1, 'hyper parameters test failed' verify_state(result)
def test_remote_advance(httpserver, engine): from mlrun.serving.remote import RemoteStep httpserver.expect_request("/dog", method="POST", json={ "x": 5 }).respond_with_json({"post": "ok"}) function = mlrun.new_function("test2", kind="serving") flow = function.set_topology("flow", engine=engine) flow.to(name="s1", handler="echo").to( RemoteStep( name="remote_echo", url=httpserver.url_for("/"), url_expression="endpoint + event['url']", body_expression="event['data']", input_path="req", result_path="resp", )).to(name="s3", handler="echo").respond() server = function.to_mock_server() resp = server.test(body={"req": {"url": "/dog", "data": {"x": 5}}}) server.wait_for_completion() assert resp == { "req": { "url": "/dog", "data": { "x": 5 } }, "resp": { "post": "ok" } }
def test_remote_class_no_header_propagation(httpserver, engine): from mlrun.serving.remote import RemoteStep httpserver.expect_request("/cat", method="GET", headers={ "X-dont-propagate": "me" }).respond_with_json({"cat": "ok"}) function = mlrun.new_function("test2", kind="serving") flow = function.set_topology("flow", engine=engine) flow.to(name="s1", handler="echo").to( RemoteStep( name="remote_echo", url=httpserver.url_for("/cat"), method="GET", input_path="req", result_path="resp", retries=0, )).to(name="s3", handler="echo").respond() server = function.to_mock_server() try: server.test(body={"req": {"x": 5}}, headers={"X-dont-propagate": "me"}) assert False except RuntimeError: pass finally: try: server.wait_for_completion() except RuntimeError: pass
def test_async_basic(): function = mlrun.new_function("tests", kind="serving") flow = function.set_topology("flow", engine="async") queue = flow.to(name="s1", class_name="ChainWithContext").to("$queue", "q1", path="") s2 = queue.to(name="s2", class_name="ChainWithContext") s2.to(name="s4", class_name="ChainWithContext") s2.to( name="s5", class_name="ChainWithContext").respond() # this state returns the resp queue.to(name="s3", class_name="ChainWithContext") # plot the graph for test & debug flow.plot(f"{results}/serving/async.png") server = function.to_mock_server() server.context.visits = {} logger.info(f"\nAsync Flow:\n{flow.to_yaml()}") resp = server.test(body=[]) server.wait_for_completion() assert resp == ["s1", "s2", "s5"], "flow result is incorrect" assert server.context.visits == { "s1": 1, "s2": 1, "s4": 1, "s3": 1, "s5": 1, }, "flow didnt visit expected states"
def test_mount_v3io_legacy(): username = "******" access_key = "access-key" os.environ["V3IO_USERNAME"] = username os.environ["V3IO_ACCESS_KEY"] = access_key function = mlrun.new_function("function-name", "function-project", kind=mlrun.runtimes.RuntimeKinds.job) function.apply(mlrun.mount_v3io_legacy()) expected_volume = { "flexVolume": { "driver": "v3io/fuse", "options": { "accessKey": access_key, "container": "users", "subPath": f"/{username}", }, }, "name": "v3io", } expected_volume_mount = { "mountPath": "/User", "name": "v3io", "subPath": "" } assert (deepdiff.DeepDiff( [expected_volume], function.spec.volumes, ignore_order=True, ) == {}) assert (deepdiff.DeepDiff( [expected_volume_mount], function.spec.volume_mounts, ignore_order=True, ) == {})
def test_handler_hyperlist(): run_spec = tag_test(base_spec, "test_handler_hyperlist") run_spec.spec.param_file = f"{tests_root_directory}/param_file.csv" result = new_function().run(run_spec, handler=my_func) print(result) assert len(result.status.iterations) == 3 + 1, "hyper parameters test failed" verify_state(result)
def test_create_project_from_file_with_legacy_structure(): project_name = "project-name" description = "project description" params = {"param_key": "param value"} artifact_path = "/tmp" legacy_project = mlrun.projects.project.MlrunProjectLegacy( project_name, description, params, artifact_path=artifact_path ) function_name = "trainer-function" function = mlrun.new_function(function_name, project_name) legacy_project.set_function(function, function_name) legacy_project.set_function("hub://describe", "describe") workflow_name = "workflow-name" workflow_file_path = ( pathlib.Path(tests.conftest.tests_root_directory) / "projects" / "workflow.py" ) legacy_project.set_workflow(workflow_name, str(workflow_file_path)) artifact_dict = { "key": "raw-data", "kind": "", "iter": 0, "tree": "latest", "target_path": "https://raw.githubusercontent.com/mlrun/demos/master/customer-churn-prediction/WA_Fn-UseC_-Telc" "o-Customer-Churn.csv", "db_key": "raw-data", } legacy_project.artifacts = [artifact_dict] legacy_project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml" legacy_project.save(str(legacy_project_file_path)) project = mlrun.load_project("./", str(legacy_project_file_path)) assert project.kind == "project" assert project.metadata.name == project_name assert project.spec.description == description # assert accessible from the project as well assert project.description == description assert project.spec.artifact_path == artifact_path # assert accessible from the project as well assert project.artifact_path == artifact_path assert deepdiff.DeepDiff(params, project.spec.params, ignore_order=True,) == {} # assert accessible from the project as well assert deepdiff.DeepDiff(params, project.params, ignore_order=True,) == {} assert ( deepdiff.DeepDiff( legacy_project.functions, project.functions, ignore_order=True, ) == {} ) assert ( deepdiff.DeepDiff( legacy_project.workflows, project.workflows, ignore_order=True, ) == {} ) assert ( deepdiff.DeepDiff( legacy_project.artifacts, project.artifacts, ignore_order=True, ) == {} )
def test_handler_project(): spec = tag_test(base_spec, 'test_handler_project') spec.metadata.project = 'myproj' spec.metadata.labels = {'owner': 'yaronh'} result = new_function().run(spec, handler=my_func) print(result) assert result.output('accuracy') == 16, 'failed to run' verify_state(result)
def test_init_class(): fn = mlrun.new_function("tests", kind="serving") graph = fn.set_topology("flow", engine="sync") graph.to(name="s1", class_name="Echo").to(name="s2", class_name="RespName") server = fn.to_mock_server() resp = server.test(body=5) assert resp == [5, "s2"], f"got unexpected result {resp}"
def test_handler_hyper(): run_spec = tag_test(base_spec, 'test_handler_hyper') run_spec.with_hyper_params({'p1': [1, 5, 3]}, selector='max.accuracy') result = new_function().run(run_spec, handler=my_func) print(result) assert len(result.status.iterations) == 3+1, 'hyper parameters test failed' assert result.status.results['best_iteration'] == 2, 'failed to select best iteration' verify_state(result)
def test_handler_project(): spec = tag_test(base_spec, "test_handler_project") spec.metadata.project = "myproj" spec.metadata.labels = {"owner": "yaronh"} result = new_function().run(spec, handler=my_func) print(result) assert result.output("accuracy") == 16, "failed to run" verify_state(result)
def _create_dask_func(uri): dask_cluster_name = "dask-cluster" dask_cluster = new_function(dask_cluster_name, kind='dask', image='mlrun/ml-models') dask_cluster.spec.remote = False dask_uri = uri dask_cluster.export(dask_uri)
def test_hyper_custom(): run_spec = tag_test(base_spec, "test_hyper_custom") run = new_function().run(run_spec, handler=custom_hyper_func) verify_state(run) assert len(run.status.iterations) == 1 + 4, "wrong number of iterations" results = [line[3] for line in run.status.iterations[1:]] print(results) assert run.output("best_iteration") == 3, "wrong best iteration"
def test_run_mlbase_xgboost_regression(): xgb_run = new_function().run(artifact_path="./temp", handler=run_mlbase_xgboost_regression) assert ( xgb_run.artifact("model").meta.to_dict()["metrics"]["accuracy"]) > 0 assert "confusion matrix" not in ( xgb_run.artifact("model").meta.to_dict()["extra_data"]) assert ( xgb_run.artifact("model").meta.to_dict()["model_file"]) == "model.pkl"
def test_simple_function(): # Thread(target=create_function, args=(myfunction, 4444)).start() _thread.start_new_thread(create_function, (myfunction, 4444)) time.sleep(2) spec = tag_test(base_spec, 'simple_function') result = new_function(command='http://localhost:4444').run(spec) print(result) verify_state(result)
def test_hyper_random(): grid_params = {"p2": [2, 1, 3], "p3": [10, 20, 30]} run_spec = tag_test(base_spec, "test_hyper_random") run_spec.with_hyper_params(grid_params, selector="r1", strategy="random") run_spec.spec.parameters["MAX_RANDOM_EVALS"] = 5 run = new_function().run(run_spec, handler=hyper_func) verify_state(run) assert len(run.status.iterations) == 1 + 5, "wrong number of iterations"
def test_handler_hyper(): run_spec = tag_test(base_spec, "test_handler_hyper") run_spec.with_hyper_params({"p1": [1, 5, 3]}, selector="max.accuracy") result = new_function().run(run_spec, handler=my_func) print(result) assert len( result.status.iterations) == 3 + 1, "hyper parameters test failed" assert (result.status.results["best_iteration"] == 2 ), "failed to select best iteration" verify_state(result)
def test_hyper_function(): # Thread(target=create_function, args=(myfunction, 4444)) _thread.start_new_thread(create_function, (myfunction, 4444)) time.sleep(2) spec = tag_test(base_spec, 'hyper_function') spec.spec.hyperparams = {'p1': [1, 2, 3]} result = new_function(command='http://localhost:4444').run(spec) print(result) verify_state(result)
def test_handler_with_context(): fn = mlrun.new_function("tests", kind="serving") graph = fn.set_topology("flow", engine="sync") graph.to(name="s1", handler=myfunc1).to(name="s2", handler=myfunc2).to( name="s3", handler=myfunc1 ) server = fn.to_mock_server() resp = server.test(body=5) # expext 5 * 2 * 2 * 2 = 40 assert resp == 40, f"got unexpected result {resp}"