def test_model_log(h2o_iris_model): h2o_model = h2o_iris_model.model old_uri = kiwi.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True): try: artifact_path = "gbm_model" kiwi.set_tracking_uri("test") if should_start_run: kiwi.start_run() kiwi.h2o.log_model(h2o_model=h2o_model, artifact_path=artifact_path) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) # Load model h2o_model_loaded = kiwi.h2o.load_model(model_uri=model_uri) assert all( h2o_model_loaded.predict(h2o_iris_model.inference_data). as_data_frame() == h2o_model.predict( h2o_iris_model.inference_data).as_data_frame()) finally: kiwi.end_run() kiwi.set_tracking_uri(old_uri)
def test_sparkml_estimator_model_log(tmpdir, spark_model_estimator): # Print the coefficients and intercept for multinomial logistic regression old_tracking_uri = kiwi.get_tracking_uri() cnt = 0 # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: for dfs_tmp_dir in [None, os.path.join(str(tmpdir), "test")]: print("should_start_run =", should_start_run, "dfs_tmp_dir =", dfs_tmp_dir) try: tracking_dir = os.path.abspath(str(tmpdir.join("mlruns"))) kiwi.set_tracking_uri("file://%s" % tracking_dir) if should_start_run: kiwi.start_run() artifact_path = "model%d" % cnt cnt += 1 sparkm.log_model( artifact_path=artifact_path, spark_model=spark_model_estimator.model, dfs_tmpdir=dfs_tmp_dir) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) # test reloaded model reloaded_model = sparkm.load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmp_dir) preds_df = reloaded_model.transform(spark_model_estimator.spark_df) preds = [x.prediction for x in preds_df.select("prediction").collect()] assert spark_model_estimator.predictions == preds finally: kiwi.end_run() kiwi.set_tracking_uri(old_tracking_uri) x = dfs_tmp_dir or sparkm.DFS_TMP shutil.rmtree(x) shutil.rmtree(tracking_dir)
def test_docker_project_tracking_uri_propagation(ProfileConfigProvider, tmpdir, tracking_uri, expected_command_segment, docker_example_base_image): # pylint: disable=unused-argument mock_provider = mock.MagicMock() mock_provider.get_config.return_value = \ DatabricksConfig("host", "user", "pass", None, insecure=True) ProfileConfigProvider.return_value = mock_provider # Create and mock local tracking directory local_tracking_dir = os.path.join(tmpdir.strpath, "mlruns") if tracking_uri is None: tracking_uri = local_tracking_dir old_uri = kiwi.get_tracking_uri() try: kiwi.set_tracking_uri(tracking_uri) with mock.patch("mlflow.tracking._tracking_service.utils._get_store" ) as _get_store_mock: _get_store_mock.return_value = file_store.FileStore( local_tracking_dir) kiwi.projects.run( TEST_DOCKER_PROJECT_DIR, experiment_id=file_store.FileStore.DEFAULT_EXPERIMENT_ID) finally: kiwi.set_tracking_uri(old_uri)
def pyfunc_serve_and_score_model(model_uri, data, content_type, activity_polling_timeout_seconds=500, extra_args=None, stdout=sys.stdout): """ :param model_uri: URI to the model to be served. :param data: The data to send to the pyfunc server for testing. This is either a Pandas dataframe or string of the format specified by `content_type`. :param content_type: The type of the data to send to the pyfunc server for testing. This is one of `mlflow.pyfunc.scoring_server.CONTENT_TYPES`. :param activity_polling_timeout_seconds: The amount of time, in seconds, to wait before declaring the scoring process to have failed. :param extra_args: A list of extra arguments to pass to the pyfunc scoring server command. For example, passing ``extra_args=["--no-conda"]`` will pass the ``--no-conda`` flag to the scoring server to ensure that conda environment activation is skipped. """ env = dict(os.environ) env.update(LC_ALL="en_US.UTF-8", LANG="en_US.UTF-8") env.update(MLFLOW_TRACKING_URI=kiwi.get_tracking_uri()) env.update(MLFLOW_HOME=_get_mlflow_home()) port = get_safe_port() scoring_cmd = [ 'mlflow', 'models', 'serve', '-m', model_uri, "-p", str(port), "--install-mlflow" ] if extra_args is not None: scoring_cmd += extra_args proc = _start_scoring_proc(cmd=scoring_cmd, env=env, stdout=stdout, stderr=stdout) return _evaluate_scoring_proc(proc, port, data, content_type, activity_polling_timeout_seconds)
predicted_qualities = lr.predict(test_x) (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) print(" RMSE: %s" % rmse) print(" MAE: %s" % mae) print(" R2: %s" % r2) kiwi.log_param("alpha", alpha) kiwi.log_param("l1_ratio", l1_ratio) kiwi.log_metric("rmse", rmse) kiwi.log_metric("r2", r2) kiwi.log_metric("mae", mae) tracking_url_type_store = urlparse(kiwi.get_tracking_uri()).scheme # Model registry does not work with file store if tracking_url_type_store != "file": # Register the model # There are other ways to use the Model Registry, which depends on the use case, # please refer to the doc for more information: # https://mlflow.org/docs/latest/model-registry.html#api-workflow kiwi.sklearn.log_model(lr, "model", registered_model_name="ElasticnetWineModel") else: kiwi.sklearn.log_model(lr, "model")
def test_predict(iris_data, sk_model): with TempDir(chdr=True) as tmp: with kiwi.start_run() as active_run: kiwi.sklearn.log_model(sk_model, "model", registered_model_name="impredicting") model_uri = "runs:/{run_id}/model".format( run_id=active_run.info.run_id) model_registry_uri = "models:/{name}/{stage}".format( name="impredicting", stage="None") input_json_path = tmp.path("input.json") input_csv_path = tmp.path("input.csv") output_json_path = tmp.path("output.json") x, _ = iris_data pd.DataFrame(x).to_json(input_json_path, orient="split") pd.DataFrame(x).to_csv(input_csv_path, index=False) # Test with no conda & model registry URI env_with_tracking_uri = os.environ.copy() env_with_tracking_uri.update( MLFLOW_TRACKING_URI=kiwi.get_tracking_uri()) p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_registry_uri, "-i", input_json_path, "-o", output_json_path, "--no-conda" ], stderr=subprocess.PIPE, env=env_with_tracking_uri) assert p.wait() == 0 actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # With conda + --install-mlflow p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-i", input_json_path, "-o", output_json_path ] + extra_options, env=env_with_tracking_uri) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # explicit json format with default orient (should be split) p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-i", input_json_path, "-o", output_json_path, "-t", "json" ] + extra_options, env=env_with_tracking_uri) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # explicit json format with orient==split p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-i", input_json_path, "-o", output_json_path, "-t", "json", "--json-format", "split" ] + extra_options, env=env_with_tracking_uri) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # read from stdin, write to stdout. p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-t", "json", "--json-format", "split" ] + extra_options, universal_newlines=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=sys.stderr, env=env_with_tracking_uri) with open(input_json_path, "r") as f: stdout, _ = p.communicate(f.read()) assert 0 == p.wait() actual = pd.read_json(StringIO(stdout), orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # NB: We do not test orient=records here because records may loose column ordering. # orient == records is tested in other test with simpler model. # csv p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-i", input_csv_path, "-o", output_json_path, "-t", "csv" ] + extra_options, env=env_with_tracking_uri) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual)
import os import shutil import sys import random import tempfile import kiwi from kiwi import log_metric, log_param, log_artifacts, get_artifact_uri, active_run,\ get_tracking_uri, log_artifact if __name__ == "__main__": print("Running {} with tracking URI {}".format(sys.argv[0], get_tracking_uri())) log_param("param1", 5) log_metric("foo", 5) log_metric("foo", 6) log_metric("foo", 7) log_metric("random_int", random.randint(0, 100)) run_id = active_run().info.run_id # Get run metadata & data from the tracking server service = kiwi.tracking.MlflowClient() run = service.get_run(run_id) print("Metadata & data for run with UUID %s: %s" % (run_id, run)) local_dir = tempfile.mkdtemp() message = "test artifact written during run %s within artifact URI %s\n" \ % (active_run().info.run_id, get_artifact_uri()) try: file_path = os.path.join(local_dir, "some_output_file.txt") with open(file_path, "w") as handle: handle.write(message) log_artifacts(local_dir, "some_subdir")