def test_local_xgb_trainer_import_function(): # importing data preparation function locally fn = import_function("../gen_class_data/function.yaml") fn.run(params={ "n_samples": 10_000, "m_features": 5, "k_classes": 2, "weight": [0.5, 0.5], "sk_params": { "n_informative": 2 }, "file_ext": "csv" }, local=True, artifact_path="./artifacts/inputs") fn = import_function("function.yaml") fn.run(params={ "model_type": "classifier", "CLASS_tree_method": "hist", "CLASS_objective": "binary:logistic", "CLASS_booster": "gbtree", "FIT_verbose": 0, "label_column": "labels", "test_set": "./artifacts/test-set" }, local=True, inputs={"dataset": './artifacts/inputs/classifier-data.csv'}) assert (os.path.exists(os.getcwd() + "/models/model.pkl"))
def test_local_xgb_test_import_local_function(): # importing data preparation function (gen_class_data) locally fn = import_function("../gen_class_data/function.yaml") fn.run(params={ "n_samples": 10_000, "m_features": 5, "k_classes": 2, "weight": [0.5, 0.5], "sk_params": { "n_informative": 2 }, "file_ext": "csv" }, local=True, artifact_path="./artifacts/inputs") # importing model training function (xgb_trainer) locally fn = import_function("../xgb_trainer/function.yaml") fn.run(params={ "model_type": "classifier", "CLASS_tree_method": "hist", "CLASS_objective": "binary:logistic", "CLASS_booster": "gbtree", "FIT_verbose": 0, "label_column": "labels", "test_set": "./artifacts/test-set" }, local=True, inputs={"dataset": './artifacts/inputs/classifier-data.csv'}) # importing xgb_test function.yaml and running tests fn = import_function("function.yaml") fn.run(params={ "label_column": "labels", "plots_dest": "plots/xgb_test" }, local=True, inputs={ "test_set": "./artifacts/inputs/classifier-data.csv", "models_path": os.getcwd() + "/models/model.pkl" }) # tests for gen_class_data assert (os.path.exists("./artifacts/inputs/classifier-data.csv")) is True df = pd.read_csv("artifacts/inputs/classifier-data.csv") assert (True if df["labels"].sum() == 5008 else False) is True # tests for xgb_trainer assert (os.path.exists(os.getcwd() + "/models/model.pkl"))
def init_context(context): setattr(context, "batch", []) setattr(context, "window", int(os.getenv("window", 10))) setattr(context, "save_to", os.getenv("save_to", "/bigdata/inference_pq/")) os.makedirs(context.save_to, exist_ok=True) mlrun.mlconf.dbpath = mlrun.mlconf.dbpath or "http://mlrun-api:8080" artifact_path = os.getenv("artifact_path", None) if artifact_path: mlrun.mlconf.artifact_path = artifact_path if "hub_url" in os.environ: mlrun.mlconf.hub_url = os.environ["hub_url"] virtual_drift_fn = mlrun.import_function("hub://virtual_drift") virtual_drift_fn.apply(mlrun.auto_mount()) setattr(context, "virtual_drift_fn", virtual_drift_fn) predictions_col = os.getenv("predictions", None) label_col = os.getenv("label_col", None) setattr(context, "base_dataset", os.getenv("base_dataset", "")) setattr(context, "indexes", json.loads(os.environ.get("indexes", "[]"))) setattr(context, "predictions_col", predictions_col) setattr(context, "label_col", label_col) setattr(context, "results_tsdb_container", os.getenv("results_tsdb_container", None)) setattr(context, "results_tsdb_table", os.getenv("results_tsdb_table", None))
def test_optimize_help(): """ Test the 'optimize' handler, passing "help" in the 'optimizations'. """ # Setup the tests environment: artifact_path = _setup_environment() # Import the ONNX Utils function: onnx_function = mlrun.import_function("function.yaml") # Run the function, passing "help" in 'optimizations' and see that no exception was raised: is_test_passed = True try: onnx_function.run( handler="optimize", artifact_path=artifact_path, params={ "model_path": "", "optimizations": "help", }, local=True, ) except TypeError as exception: print( f"The test failed, the help was not handled properly and raised the following error: {exception}" ) is_test_passed = False # Cleanup the tests environment: _cleanup_environment(artifact_path=artifact_path) assert is_test_passed
def to_function(self, default_kind=None): if self.url and "://" not in self.url: if not os.path.isfile(self.url): raise OSError("{} not found".format(self.url)) kind = self.kind or default_kind if self.spec: func = mlrun.new_function(self.name, runtime=self.spec) elif (self.url.endswith(".yaml") or self.url.startswith("db://") or self.url.startswith("hub://")): func = mlrun.import_function(self.url) if self.image: func.spec.image = self.image elif self.url.endswith(".ipynb"): func = mlrun.code_to_function(self.name, filename=self.url, image=self.image, kind=kind) elif self.url.endswith(".py"): # todo: support code text as input (for UI) if not self.image: raise ValueError( "image must be provided with py code files, " "use function object for more control/settings") func = mlrun.code_to_function(self.name, filename=self.url, image=self.image, kind=kind) else: raise ValueError("unsupported function url {} or no spec".format( self.url)) if self.requirements: func.with_requirements(self.requirements) self._function = func return func
def function_to_item(function_yaml: Path) -> Item: model = import_function(str(function_yaml.absolute())) item = Item( name=model.metadata.name or "", version=model.metadata.tag or "0.1", mlrun_version="", platform_version="", description=model.spec.description or "", doc="", example="", icon="", url="", generationDate=str(datetime.utcnow()), categories=model.metadata.categories or [], labels=model.metadata.labels or {}, spec=Spec( filename=locate_py_file(function_yaml.parent) or "", handler=model.spec.default_handler or "", requirements=[], kind=model.kind or "", image=get_image(model), ), maintainers=[], ) return item
def test_model_monitoring_voting_ensemble(self): simulation_time = 60 * 5 # 5 minutes project = mlrun.get_run_db().get_project(self.project_name) project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY")) iris = load_iris() train_set = pd.DataFrame( iris["data"], columns=[ "sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm", ], ) # Deploy Model Servers # Use the following code to deploy a model server in the Iguazio instance. # Import the serving function from the function hub serving_fn = mlrun.import_function( "hub://v2_model_server", project=self.project_name ).apply(mlrun.auto_mount()) serving_fn.set_topology( "router", "mlrun.serving.VotingEnsemble", name="VotingEnsemble" ) serving_fn.set_tracking() model_names = [ "sklearn_RandomForestClassifier", "sklearn_LogisticRegression", "sklearn_AdaBoostClassifier", ] for name in model_names: # Log the model through the projects API so that it is available through the feature store API project.log_model( name, model_file=os.path.relpath(str(self.assets_path / "model.pkl")), training_set=train_set, ) # Add the model to the serving function's routing spec serving_fn.add_model( name, model_path=f"store://models/{self.project_name}/{name}:latest" ) # Enable model monitoring serving_fn.deploy() iris_data = iris["data"].tolist() t_end = monotonic() + simulation_time while monotonic() < t_end: data_point = choice(iris_data) serving_fn.invoke( "v2/models/VotingEnsemble/infer", json.dumps({"inputs": [data_point]}) ) sleep(uniform(0.2, 1.7))
def test_run_local_arc_to_parquet(): import os os.getcwd() fn = import_function("function.yaml") fn.run(params={"key": "higgs-sample"}, handler="arc_to_parquet", inputs={"archive_url": DATA_URL}, artifact_path=os.getcwd() + '/artifacts', local=True)
def to_function(self, default_kind=None): """generate a function object from the ref definitions""" if self.url and "://" not in self.url: if not os.path.isfile(self.url): raise OSError(f"{self.url} not found") kind = self.kind or default_kind if self.url: if ( self.url.endswith(".yaml") or self.url.startswith("db://") or self.url.startswith("hub://") ): func = mlrun.import_function(self.url) if self.image: func.spec.image = self.image elif self.url.endswith(".ipynb"): func = mlrun.code_to_function( self.name, filename=self.url, image=self.image, kind=kind ) elif self.url.endswith(".py"): # todo: support code text as input (for UI) if not self.image: raise ValueError( "image must be provided with py code files, " "use function object for more control/settings" ) func = mlrun.code_to_function( self.name, filename=self.url, image=self.image, kind=kind ) else: raise ValueError(f"unsupported function url {self.url} or no spec") if self.spec: func = enrich_function_from_dict(func, self.spec) elif self.code is not None: code = self.code if kind == mlrun.runtimes.RuntimeKinds.serving: code = code + mlrun_footer.format( mlrun.runtimes.serving.serving_subkind ) func = mlrun.new_function(self.name, kind=kind, image=self.image) data = b64encode(code.encode("utf-8")).decode("utf-8") func.spec.build.functionSourceCode = data if kind not in mlrun.runtimes.RuntimeKinds.nuclio_runtimes(): func.spec.default_handler = "handler" if self.spec: func = enrich_function_from_dict(func, self.spec) elif self.spec: func = mlrun.new_function(self.name, runtime=self.spec) else: raise ValueError("url or spec or code must be specified") if self.requirements: func.with_requirements(self.requirements) self._function = func return func
def test_sync_functions(): project_name = "project-name" project = mlrun.new_project(project_name) project.set_function("hub://describe", "describe") project_function_object = project.spec._function_objects project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml" project.export(str(project_file_path)) imported_project = mlrun.load_project("./", str(project_file_path)) assert imported_project.spec._function_objects == {} imported_project.sync_functions() _assert_project_function_objects(imported_project, project_function_object) fn = project.func("describe") assert fn.metadata.name == "describe", "func did not return" # test that functions can be fetched from the DB (w/o set_function) mlrun.import_function("hub://sklearn_classifier", new_name="train").save() fn = project.func("train") assert fn.metadata.name == "train", "train func did not return"
def test_xgb_serving(): model = os.getcwd() + "/models/model.pkl" set_mlrun_hub_url() fn = import_function('hub://xgb_serving') fn.add_model('mymodel', model_path=model, class_name='XGBoostModel') server = fn.to_mock_server() # Testing the model xtest = pd.read_csv('./artifacts/inputs/classifier-data.csv') preds = server.predict({"instances": xtest.values[:10, :-1].tolist()}) assert (preds == [1, 0, 0, 0, 0, 0, 1, 1, 0, 1])
def test_rnn_serving(): model_path = os.path.join(os.path.abspath('./'), 'models') model = model_path + '/bert_classifier_v1.h5' if not path.exists(model): download_pretrained_model(model_path) fn = import_function('function.yaml') fn.add_model('mymodel', model_path=model, class_name='RNN_Model_Serving') # create an emulator (mock server) from the function configuration) server = fn.to_mock_server() # should add assert
def test_rnn_serving(): model_path = os.path.join(os.path.abspath('./'), 'models') model = model_path + '/rnn_model.h5' if not path.exists(model): download_pretrained_model(model_path) fn = import_function('function.yaml') fn.add_model('rnn_model', model_path=model, class_name='RNN_Model_Serving') # create an emulator (mock server) from the function configuration) server = fn.to_mock_server() resp = server.test("/v2/models/rnn_model/infer", {"inputs": DATASET}) assert (resp['outputs'] == '[[0.453309565782547]]')
def test_basic_model_monitoring(self): simulation_time = 60 * 5 # 5 minutes # Deploy Model Servers project = mlrun.get_run_db().get_project(self.project_name) project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY")) iris = load_iris() train_set = pd.DataFrame( iris["data"], columns=[ "sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm", ], ) # Import the serving function from the function hub serving_fn = mlrun.import_function( "hub://v2_model_server", project=self.project_name ).apply(mlrun.auto_mount()) # enable model monitoring serving_fn.set_tracking() model_name = "sklearn_RandomForestClassifier" # Log the model through the projects API so that it is available through the feature store API project.log_model( model_name, model_file=os.path.relpath(str(self.assets_path / "model.pkl")), training_set=train_set, ) # Add the model to the serving function's routing spec serving_fn.add_model( model_name, model_path=f"store://models/{self.project_name}/{model_name}:latest", ) # Deploy the function serving_fn.deploy() # Simulating Requests iris_data = iris["data"].tolist() t_end = monotonic() + simulation_time while monotonic() < t_end: data_point = choice(iris_data) serving_fn.invoke( f"v2/models/{model_name}/infer", json.dumps({"inputs": [data_point]}) ) sleep(uniform(0.2, 1.7))
def test_local_xgb_serving(): # importing data preparation function (gen_class_data) locally fn = import_function("../gen_class_data/function.yaml") fn.run(params={ "n_samples": 10_000, "m_features": 5, "k_classes": 2, "weight": [0.5, 0.5], "sk_params": { "n_informative": 2 }, "file_ext": "csv" }, local=True, artifact_path="./artifacts/inputs") # importing model training function (xgb_trainer) locally fn = import_function("../xgb_trainer/function.yaml") fn.run(params={ "model_type": "classifier", "CLASS_tree_method": "hist", "CLASS_objective": "binary:logistic", "CLASS_booster": "gbtree", "FIT_verbose": 0, "label_column": "labels", "test_set": "./artifacts/test-set" }, local=True, inputs={"dataset": './artifacts/inputs/classifier-data.csv'}) # because this class is implemented with MLModelServer, creating a class instance and not to_mock_server(V2_Model_Server). model = os.getcwd() + "/models/model.pkl" my_server = XGBoostModel("my-model", model_dir=model) my_server.load() # Testing the model xtest = pd.read_csv('./artifacts/inputs/classifier-data.csv') preds = my_server.predict({"instances": xtest.values[:10, :-1].tolist()}) assert (True if preds == [1, 0, 0, 0, 0, 0, 1, 1, 0, 1] else False) is True
def generate_data(): data_url = "https://raw.githubusercontent.com/parrt/random-forest-importances/master/notebooks/data/rent.csv" fn = import_function("../arc_to_parquet/function.yaml") acquire_run = fn.run(params={ "key": "price", "stats": True, "file_ext": "csv" }, inputs={"archive_url": data_url}, handler="arc_to_parquet", local=True, artifact_path="artifacts") return acquire_run
def test_pytorch_to_onnx(): """ Test the 'to_onnx' handler, giving it a pytorch model. """ # Setup the tests environment: artifact_path = _setup_environment() # Create the function parsing this notebook's code using 'code_to_function': log_model_function = mlrun.code_to_function( filename="test_onnx_utils.py", name="log_model", kind="job", image="mlrun/ml-models", ) # Run the function to log the model: log_model_run = log_model_function.run( handler="_log_pytorch_model", artifact_path=artifact_path, params={"model_name": MODEL_NAME}, local=True, ) # Import the ONNX Utils function: onnx_function = mlrun.import_function("function.yaml") # Run the function to convert our model to ONNX: onnx_function.run( handler="to_onnx", artifact_path=artifact_path, params={ "model_path": log_model_run.outputs[ MODEL_NAME], # <- Take the logged model from the previous function. "onnx_model_name": ONNX_MODEL_NAME, "framework_kwargs": { "input_signature": [((3, 224, 224), "float32")] }, }, local=True, ) # Get the artifacts list: artifacts_list = os.listdir(artifact_path) print(f"Produced artifacts: {artifacts_list}") # Cleanup the tests environment: _cleanup_environment(artifact_path=artifact_path) # Verify the '.onnx' model was created: assert "{}.onnx".format(ONNX_MODEL_NAME) in artifacts_list
def arc_to_parquet(): from mlrun import import_function from mlrun.platforms import auto_mount archive_func = import_function('hub://arc_to_parquet') archive_run = archive_func.run(handler="arc_to_parquet", params={ "key": "rent", "stats": True, "file_ext": "csv" }, inputs={"archive_url": DATA_URL}, artifact_path=os.getcwd() + '/artifacts', local=True)
def test_to_onnx_help(): """ Test the 'to_onnx' handler, passing "help" in the 'framework_kwargs'. """ # Setup the tests environment: artifact_path = _setup_environment() # Create the function parsing this notebook's code using 'code_to_function': log_model_function = mlrun.code_to_function( filename="test_onnx_utils.py", name="log_model", kind="job", image="mlrun/ml-models", ) # Run the function to log the model: log_model_run = log_model_function.run( handler="_log_tf_keras_model", artifact_path=artifact_path, params={"model_name": MODEL_NAME}, local=True, ) # Import the ONNX Utils function: onnx_function = mlrun.import_function("function.yaml") # Run the function, passing "help" in 'framework_kwargs' and see that no exception was raised: is_test_passed = True try: onnx_function.run( handler="to_onnx", artifact_path=artifact_path, params={ "model_path": log_model_run.outputs[ MODEL_NAME], # <- Take the logged model from the previous function. "framework_kwargs": "help", }, local=True, ) except TypeError as exception: print( f"The test failed, the help was not handled properly and raised the following error: {exception}" ) is_test_passed = False # Cleanup the tests environment: _cleanup_environment(artifact_path=artifact_path) assert is_test_passed
def test_local_sentiment_analysis_serving(): set_mlrun_hub_url() model_path = os.path.join(os.path.abspath('./'), 'models') model = model_path+'/model.pt' if not path.exists(model): download_pretrained_model(model_path) fn = import_function('hub://sentiment_analysis_serving') fn.add_model('mymodel', model_path=model, class_name='SentimentClassifierServing') # create an emulator (mock server) from the function configuration) server = fn.to_mock_server() instances = ['I had a pleasure to work with such dedicated team. Looking forward to \ cooperate with each and every one of them again.'] result = server.test("/v2/models/mymodel/infer", {"instances": instances}) assert result[0] == 2
def test_optimize(): """ Test the 'optimize' handler, giving it a model from the ONNX zoo git repository. """ # Setup the tests environment: artifact_path = _setup_environment() # Create the function parsing this notebook's code using 'code_to_function': log_model_function = mlrun.code_to_function( filename="test_onnx_utils.py", name="log_model", kind="job", image="mlrun/ml-models", ) # Run the function to log the model: log_model_run = log_model_function.run( handler="_log_onnx_model", artifact_path=artifact_path, params={"model_name": MODEL_NAME}, local=True, ) # Import the ONNX Utils function: onnx_function = mlrun.import_function("function.yaml") # Run the function to optimize our model: onnx_function.run( handler="optimize", artifact_path=artifact_path, params={ "model_path": log_model_run.outputs[ MODEL_NAME], # <- Take the logged model from the previous function. "optimized_model_name": OPTIMIZED_ONNX_MODEL_NAME, }, local=True, ) # Get the artifacts list: artifacts_list = os.listdir(artifact_path) print(f"Produced artifacts: {artifacts_list}") # Cleanup the tests environment: _cleanup_environment(artifact_path=artifact_path) # Verify the '.onnx' model was created: assert "{}.onnx".format(OPTIMIZED_ONNX_MODEL_NAME) in artifacts_list
def test_import_function_aggregate(): fn = import_function("function.yaml") fn.run(params={ 'metrics': ['cpu_utilization'], 'labels': ['is_error'], 'metric_aggs': ['mean', 'sum'], 'label_aggs': ['max'], 'suffix': 'daily', 'inplace': False, 'window': 5, 'center': True, 'save_to': AGGREGATE_PATH, 'files_to_select': 2 }, local=True, inputs={'df_artifact': DATA}) assert Path(AGGREGATE_PATH).is_file()
def test_local_churn_server(): set_mlrun_hub_url() model_path = os.path.join(os.path.abspath("./"), "models") model = model_path + "/model.pt" if not path.exists(model): raise NotImplemented fn = import_function("hub://churn_server") fn.add_model("mymodel", model_path=model, class_name="ChurnModel") # create an emulator (mock server) from the function configuration) server = fn.to_mock_server() instances = [ "I had a pleasure to work with such dedicated team. Looking forward to \ cooperate with each and every one of them again." ] result = server.test("/v2/models/mymodel/infer", {"instances": instances}) assert result[0] == 2
def test_feature_perms_import_function(): arc_to_parquet() train_model() data = "artifacts/rent.csv" labels = "interest_level" model = "model/model.pkl" fi_perms = import_function("function.yaml") fi_perms.run(params={ "labels": labels, "plots_dest": "plots" }, inputs={ "model": model, "dataset": data }, artifact_path=os.getcwd() + '/artifacts', local=True) assert Path(FEATURE_OUTPUT).is_file()
def test_import_function_describe_dask(): dask_uri = "dask_func.yaml" _create_dask_func(dask_uri) fn = import_function('function.yaml') fn.run(inputs={"dataset": DATA_URL}, params={ 'update_dataset': True, 'label_column': 'label', 'dask_function': dask_uri, }, handler="summarize", artifact_path=os.getcwd() + '/artifacts', local=True) _validate_paths(base_path=PLOTS_PATH, paths={ 'corr.html', 'correlation-matrix.csv', 'hist.html', 'imbalance.html', 'imbalance-weights-vec.csv', 'violin.html' })
def train_model(): from mlrun import import_function from mlrun.platforms import auto_mount train = import_function('hub://sklearn_classifier') # .apply(auto_mount()) train_run = train.run( inputs={"dataset": "artifacts/rent.csv"}, params={ "sample": -5_000, # 5k random rows, "model_pkg_class": "sklearn.ensemble.RandomForestClassifier", "label_column": "interest_level", "CLASS_n_estimators": 100, "CLASS_min_samples_leaf": 1, "CLASS_n_jobs": -1, "CLASS_oob_score": True }, local=True)
def test_import_sklearn_classifier(): acquire_run = generate_data() fn = import_function("function.yaml") # define model params = { "sample": -5_000, # 5k random rows, "model_pkg_class": "sklearn.ensemble.RandomForestClassifier", "label_column": "interest_level", "CLASS_n_estimators": 100, "CLASS_min_samples_leaf": 1, "CLASS_n_jobs": -1, "CLASS_oob_score": True } train_run = fn.run(params=params, inputs={"dataset": acquire_run.outputs["price"]}, local=True, artifact_path="artifacts") assert Path(CLASSIFIER_PATH).is_file()
def test_local_xgb_custom(): fn = import_function("function.yaml") fn.run(params={ "nrows": 8192, "label_type": "float", "local_path": "./artifacts/inputs/xgb_custom" }, handler="gen_outliers", local=True) fn.run(params={ "num_boost_round": 40, "verbose_eval": False, "XGB_max_depth": 2, "XGB_subsample": 0.9, "test_set_key": "./artifacts/inputs/test-set" }, inputs={"dataset": "./artifacts/inputs/xgb_custom.parquet"}, handler="fit", local=True) assert (os.path.exists(os.getcwd() + "/plots/learning-curves.html"))
def verify_auth_parameters_and_configure_env(auth_method): if not config["env"].get("AZURE_CONTAINER"): return False for k, env_vars in AUTH_METHODS_AND_REQUIRED_PARAMS.items(): for env_var in env_vars: os.environ.pop(env_var, None) test_params = AUTH_METHODS_AND_REQUIRED_PARAMS.get(auth_method) if not test_params: return False for env_var in test_params: env_value = config["env"].get(env_var) if not env_value: return False os.environ[env_var] = env_value logger.info(f"Testing auth method {auth_method}") logger.info("Creating Dask Client") dask_cluster = os.getenv("DASK_CLUSTER") if dask_cluster: if dask_cluster.startswith("db://"): fn = mlrun.import_function(dask_cluster) client = fn._get_dask_client elif dask_cluster.startswith("tcp://"): from dask.distributed import Client client = Client(dask_cluster) else: from dask.distributed import Client client = Client() # noqa: F841 return True
def test_local_coxph_train(): ctx = get_or_create_ctx(name="tasks survive trainer") data_url = "https://raw.githubusercontent.com/mlrun/demos/0.6.x/customer-churn-prediction/WA_Fn-UseC_-Telco-Customer-Churn.csv" src = mlrun.get_dataitem(data_url) data_clean(context=ctx, src=src, cleaned_key="artifacts/inputs/cleaned-data", encoded_key="artifacts/inputs/encoded-data") fn = import_function("function.yaml") fn.run(params={ "strata_cols": ['InternetService', 'StreamingMovies', 'StreamingTV', 'PhoneService'], "encode_cols": { "Contract": "Contract", "PaymentMethod": "Payment" }, "models_dest": 'models/cox' }, inputs={"dataset": "artifacts/inputs/encoded-data.csv"}, local=True) model = load(open("models/cox/km/model.pkl", "rb")) ans = model.predict([1, 10, 30, 100, 200]) assert (list(np.around(ans, 3)) == [0.969, 0.869, 0.781, 0.668, 0.668])