def test_model_log(prophet_model): old_uri = mlflow.get_tracking_uri() with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() artifact_path = "prophet" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["pystan", "prophet"]) model_info = mlflow.prophet.log_model( pr_model=prophet_model.model, artifact_path=artifact_path, conda_env=conda_env) model_uri = f"runs:/{mlflow.active_run().info.run_id}/{artifact_path}" assert model_info.model_uri == model_uri reloaded_prophet_model = mlflow.prophet.load_model( model_uri=model_uri) np.testing.assert_array_equal( generate_forecast(prophet_model.model, FORECAST_HORIZON), generate_forecast(reloaded_prophet_model, FORECAST_HORIZON), ) model_path = _download_artifact_from_uri( artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run() mlflow.set_tracking_uri(old_uri)
def test_model_load_succeeds_with_missing_data_key_when_data_exists_at_default_path( h2o_iris_model, model_path ): """ This is a backwards compatibility test to ensure that models saved in MLflow version <= 0.7.0 can be loaded successfully. These models are missing the `data` flavor configuration key. """ h2o_model = h2o_iris_model.model mlflow.h2o.save_model(h2o_model=h2o_model, path=model_path) model_conf_path = os.path.join(model_path, "MLmodel") model_conf = Model.load(model_conf_path) flavor_conf = model_conf.flavors.get(mlflow.h2o.FLAVOR_NAME, None) assert flavor_conf is not None del flavor_conf["data"] model_conf.save(model_conf_path) h2o_model_loaded = mlflow.h2o.load_model(model_path) assert all( h2o_model_loaded.predict(h2o_iris_model.inference_data).as_data_frame() == h2o_model.predict(h2o_iris_model.inference_data).as_data_frame() )
def test_log_model_built_in_high_level_api( pd_model_built_in_high_level_api, model_path, tmpdir, get_dataset_built_in_high_level_api): model = pd_model_built_in_high_level_api.model test_dataset = get_dataset_built_in_high_level_api[1] try: artifact_path = "model" conda_env = os.path.join(tmpdir, "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["paddle"]) mlflow.paddle.log_model(pd_model=model, artifact_path=artifact_path, conda_env=conda_env, training=True) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) model_retrain = paddle.Model(UCIHousing()) optim = paddle.optimizer.Adam(learning_rate=0.015, parameters=model.parameters()) model_retrain.prepare(optim, paddle.nn.MSELoss()) model_retrain = mlflow.paddle.load_model(model_uri=model_uri, model=model_retrain) np.testing.assert_array_almost_equal( np.array(model.predict(test_dataset)).squeeze(), np.array(model_retrain.predict(test_dataset)).squeeze(), decimal=5, ) model_path = _download_artifact_from_uri(artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run()
def test_log_model_with_signature_and_examples(iris_df, spark_model_iris): _, _, iris_spark_df = iris_df signature_ = infer_signature(iris_spark_df) example_ = iris_spark_df.toPandas().head(3) artifact_path = "model" for signature in (None, signature_): for example in (None, example_): with mlflow.start_run(): sparkm.log_model( spark_model_iris.model, artifact_path=artifact_path, signature=signature, input_example=example, ) artifact_uri = mlflow.get_artifact_uri() model_path = os.path.join(artifact_uri, artifact_path) mlflow_model = Model.load(model_path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, model_path) == example).all())
def test_mleap_model_log(spark_model_iris): artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with mlflow.start_run(), register_model_patch: sparkm.log_model( spark_model=spark_model_iris.model, sample_input=spark_model_iris.spark_df, artifact_path=artifact_path, registered_model_name="Model1", ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path ) mlflow.register_model.assert_called_once_with( model_uri, "Model1", await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS ) model_path = _download_artifact_from_uri(artifact_uri=model_uri) config_path = os.path.join(model_path, "MLmodel") mlflow_model = Model.load(config_path) assert sparkm.FLAVOR_NAME in mlflow_model.flavors assert mleap.FLAVOR_NAME in mlflow_model.flavors
def test_model_save_load(sklearn_knn_model, iris_data, tmpdir, model_path): sk_model_path = os.path.join(str(tmpdir), "knn.pkl") with open(sk_model_path, "wb") as f: pickle.dump(sklearn_knn_model, f) model_config = Model(run_id="test", artifact_path="testtest") mlflow.pyfunc.save_model( path=model_path, data_path=sk_model_path, loader_module=__name__, code_path=[__file__], mlflow_model=model_config, ) reloaded_model_config = Model.load(os.path.join(model_path, "MLmodel")) assert model_config.__dict__ == reloaded_model_config.__dict__ assert mlflow.pyfunc.FLAVOR_NAME in reloaded_model_config.flavors assert mlflow.pyfunc.PY_VERSION in reloaded_model_config.flavors[ mlflow.pyfunc.FLAVOR_NAME] reloaded_model = mlflow.pyfunc.load_pyfunc(model_path) np.testing.assert_array_equal(sklearn_knn_model.predict(iris_data[0]), reloaded_model.predict(iris_data[0]))
def test_schema_and_examples_are_save_correctly(saved_tf_iris_model): train_x, train_y = iris_data_utils.load_data()[0] X = pd.DataFrame(train_x) y = pd.Series(train_y) for signature in (None, infer_signature(X, y)): for example in (None, X.head(3)): with TempDir() as tmp: path = tmp.path("model") mlflow.tensorflow.save_model( tf_saved_model_dir=saved_tf_iris_model.path, tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags, tf_signature_def_key=saved_tf_iris_model.signature_def_key, path=path, signature=signature, input_example=example, ) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_model_log(pd_model, model_path): old_uri = mlflow.get_tracking_uri() model = pd_model.model with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() artifact_path = "model" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["paddle"]) mlflow.paddle.log_model(pd_model=model, artifact_path=artifact_path, conda_env=conda_env) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) reloaded_pd_model = mlflow.paddle.load_model( model_uri=model_uri) np.testing.assert_array_almost_equal( model(pd_model.inference_dataframe), reloaded_pd_model(pd_model.inference_dataframe), ) model_path = _download_artifact_from_uri( artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run() mlflow.set_tracking_uri(old_uri)
def test_model_log_without_pyfunc_flavor(): artifact_path = "model" nlp = spacy.blank("en") # Add a component not compatible with pyfunc if IS_SPACY_VERSION_NEWER_THAN_OR_EQUAL_TO_3_0_0: nlp.add_pipe("ner", last=True) else: ner = nlp.create_pipe("ner") nlp.add_pipe(ner, last=True) # Ensure the pyfunc flavor is not present after logging and loading the model with mlflow.start_run(): mlflow.spacy.log_model(spacy_model=nlp, artifact_path=artifact_path) model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path ) ) loaded_model = Model.load(model_path) assert loaded_model.flavors.keys() == {"spacy"}
def test_model_load_succeeds_with_missing_data_key_when_data_exists_at_default_path( tf_keras_model, model_path, data): """ This is a backwards compatibility test to ensure that models saved in MLflow version <= 0.8.0 can be loaded successfully. These models are missing the `data` flavor configuration key. """ mlflow.keras.save_model(keras_model=tf_keras_model, path=model_path, save_format="h5") shutil.move(os.path.join(model_path, "data", "model.h5"), os.path.join(model_path, "model.h5")) model_conf_path = os.path.join(model_path, "MLmodel") model_conf = Model.load(model_conf_path) flavor_conf = model_conf.flavors.get(mlflow.keras.FLAVOR_NAME, None) assert flavor_conf is not None del flavor_conf["data"] model_conf.save(model_conf_path) model_loaded = mlflow.keras.load_model(model_path) assert all( model_loaded.predict(data[0].values) == tf_keras_model.predict( data[0].values))
def test_signature_and_examples_are_saved_correctly(onnx_model, data, onnx_custom_env): import mlflow.onnx model = onnx_model signature_ = infer_signature(*data) example_ = data[0].head(3) for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") mlflow.onnx.save_model(model, path=path, conda_env=onnx_custom_env, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def load_model(model_uri: str, suppress_warnings: bool = True) -> PyFuncModel: """ Load a model stored in Python function format. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param suppress_warnings: If ``True``, non-fatal warning messages associated with the model loading process will be suppressed. If ``False``, these warning messages will be emitted. """ local_path = _download_artifact_from_uri(artifact_uri=model_uri) model_meta = Model.load(os.path.join(local_path, "MLmodel")) conf = model_meta.flavors.get(FLAVOR_NAME) if conf is None: raise MlflowException( "Model does not have the \"{flavor_name}\" flavor".format( flavor_name=FLAVOR_NAME), RESOURCE_DOES_NOT_EXIST) model_py_version = conf.get(PY_VERSION) if not suppress_warnings: _warn_potentially_incompatible_py_version_if_necessary( model_py_version=model_py_version) if CODE in conf and conf[CODE]: code_path = os.path.join(local_path, conf[CODE]) mlflow.pyfunc.utils._add_code_to_system_path(code_path=code_path) data_path = os.path.join(local_path, conf[DATA]) if (DATA in conf) else local_path model_impl = importlib.import_module(conf[MAIN])._load_pyfunc(data_path) return PyFuncModel(model_meta=model_meta, model_impl=model_impl)
def _load_model(path, **kwargs): mlflow_model_path = os.path.join(path, "MLmodel") if not os.path.exists(mlflow_model_path): raise RuntimeError("MLmodel is not found at '{}'".format(path)) mlflow_model = Model.load(mlflow_model_path) if FLAVOR_NAME not in mlflow_model.flavors: raise ValueError("Could not find flavor '{}' amongst available flavors {}, " "unable to load stored model" .format(FLAVOR_NAME, list(mlflow_model.flavors.keys()))) # This maybe replaced by a warning and then try/except torch.load flavor = mlflow_model.flavors[FLAVOR_NAME] if torch.__version__ != flavor["pytorch_version"]: raise ValueError("Stored model version '{}' does not match " "installed PyTorch version '{}'" .format(flavor["pytorch_version"], torch.__version__)) path = os.path.abspath(path) path = os.path.join(path, mlflow_model.flavors[FLAVOR_NAME]['model_data']) return torch.load(path, **kwargs)
def test_load_model_succeeds_when_data_is_model_file_instead_of_directory( module_scoped_subclassed_model, model_path, data ): """ This test verifies that PyTorch models saved in older versions of MLflow are loaded successfully by ``mlflow.pytorch.load_model``. The ``data`` path associated with these older models is serialized PyTorch model file, as opposed to the current format: a directory containing a serialized model file and pickle module information. """ artifact_path = "pytorch_model" with mlflow.start_run(): mlflow.pytorch.log_model( artifact_path=artifact_path, pytorch_model=module_scoped_subclassed_model ) model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path ) ) model_conf_path = os.path.join(model_path, "MLmodel") model_conf = Model.load(model_conf_path) pyfunc_conf = model_conf.flavors.get(pyfunc.FLAVOR_NAME) assert pyfunc_conf is not None model_data_path = os.path.join(model_path, pyfunc_conf[pyfunc.DATA]) assert os.path.exists(model_data_path) assert mlflow.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME in os.listdir(model_data_path) pyfunc_conf[pyfunc.DATA] = os.path.join( model_data_path, mlflow.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME ) model_conf.save(model_conf_path) loaded_pyfunc = pyfunc.load_pyfunc(model_path) np.testing.assert_array_almost_equal( loaded_pyfunc.predict(data[0]), pd.DataFrame(_predict(model=module_scoped_subclassed_model, data=data)), decimal=4, )
def test_log_model(mlflow_client, backend_store_uri): experiment_id = mlflow_client.create_experiment("Log models") with TempDir(chdr=True): mlflow.set_experiment("Log models") model_paths = ["model/path/{}".format(i) for i in range(3)] with mlflow.start_run(experiment_id=experiment_id) as run: for i, m in enumerate(model_paths): mlflow.pyfunc.log_model(m, loader_module="mlflow.pyfunc") mlflow.pyfunc.save_model( m, mlflow_model=Model(artifact_path=m, run_id=run.info.run_id), loader_module="mlflow.pyfunc", ) model = Model.load(os.path.join(m, "MLmodel")) run = mlflow.get_run(run.info.run_id) tag = run.data.tags["mlflow.log-model.history"] models = json.loads(tag) model.utc_time_created = models[i]["utc_time_created"] assert models[i] == model.to_dict() assert len(models) == i + 1 for j in range(0, i + 1): assert models[j]["artifact_path"] == model_paths[j]
def load_model(path, run_id=None, dfs_tmpdir=None): """ Load the Spark MLlib model from the path. :param path: Local filesystem path or run-relative artifact path to the model. :param run_id: Run ID. If provided, combined with ``path`` to identify the model. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model will be loaded from this destination. Defaults to ``/tmp/mlflow``. :return: pyspark.ml.pipeline.PipelineModel >>> from mlflow import spark >>> model = mlflow.spark.load_model("spark-model") >>> # Prepare test documents, which are unlabeled (id, text) tuples. >>> test = spark.createDataFrame([ ... (4, "spark i j k"), ... (5, "l m n"), ... (6, "spark hadoop spark"), ... (7, "apache hadoop")], ["id", "text"]) >>> # Make predictions on test documents. >>> prediction = model.transform(test) """ dfs_tmpdir = dfs_tmpdir if dfs_tmpdir is not None else DFS_TMP if run_id is not None: path = mlflow.tracking.utils._get_model_log_dir(model_name=path, run_id=run_id) m = Model.load(os.path.join(path, 'MLmodel')) if FLAVOR_NAME not in m.flavors: raise Exception("Model does not have {} flavor".format(FLAVOR_NAME)) conf = m.flavors[FLAVOR_NAME] model_path = os.path.join(path, conf['model_data']) tmp_path = _tmp_path(dfs_tmpdir) # Spark ML expects the model to be stored on DFS # Copy the model to a temp DFS location first. We cannot delete this file, as # Spark may read from it at any point. _HadoopFileSystem.copy_from_local_file(model_path, tmp_path, removeSrc=False) pipeline_model = PipelineModel.load(tmp_path) eprint("Copied SparkML model to %s" % tmp_path) return pipeline_model
def create_deployment(self, name, model_uri, flavor=None, config=None): device = config.get('device', 'CPU') autobatch_size = config.get('batchsize') tag = config.get('tag') path = Path(_download_artifact_from_uri(model_uri)) model_config = path / 'MLmodel' if not model_config.exists(): raise MlflowException( message=( "Failed to find MLmodel configuration within the specified model's" " root directory."), error_code=INVALID_PARAMETER_VALUE) model_config = Model.load(model_config) if flavor is None: flavor = get_preferred_deployment_flavor(model_config) else: validate_deployment_flavor(model_config, flavor) logger.info("Using the {} flavor for deployment!".format(flavor)) if flavor == 'tensorflow': # TODO: test this for tf1.x and tf2.x tags = model_config.flavors[flavor]['meta_graph_tags'] signaturedef = model_config.flavors[flavor]['signature_def_key'] model_dir = path / model_config.flavors[flavor]['saved_model_dir'] model, inputs, outputs = ml2rt.load_model(model_dir, tags, signaturedef) else: model_path = None for file in path.iterdir(): if file.suffix == '.pt': model_path = file if model_path is None: raise RuntimeError("Model file does not have a valid suffix. Expected ``.pt``") model = ml2rt.load_model(model_path) inputs = outputs = None backend = flavor2backend[flavor] self.con.modelset(name, backend, device, model, inputs=inputs, outputs=outputs, batch=autobatch_size, tag=tag) return {'name': name, 'flavor': flavor}
def test_model_save_load(): m = Model( artifact_path="some/path", run_id="123", flavors={"flavor1": {"a": 1, "b": 2}, "flavor2": {"x": 1, "y": 2}}, signature=ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ), saved_input_example_info={"x": 1, "y": 2}, ) assert m.get_input_schema() == m.signature.inputs assert m.get_output_schema() == m.signature.outputs x = Model(artifact_path="some/other/path", run_id="1234") assert x.get_input_schema() is None assert x.get_output_schema() is None n = Model( artifact_path="some/path", run_id="123", flavors={"flavor1": {"a": 1, "b": 2}, "flavor2": {"x": 1, "y": 2}}, signature=ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ), saved_input_example_info={"x": 1, "y": 2}, ) n.utc_time_created = m.utc_time_created n.model_uuid = m.model_uuid assert m == n n.signature = None assert m != n with TempDir() as tmp: m.save(tmp.path("model")) o = Model.load(tmp.path("model")) assert m == o assert m.to_json() == o.to_json() assert m.to_yaml() == o.to_yaml()
def test_log_model(cb_model, tmpdir): model, inference_dataframe = cb_model with mlflow.start_run(): artifact_path = "model" conda_env = os.path.join(tmpdir.strpath, "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["catboost"]) model_info = mlflow.catboost.log_model(model, artifact_path, conda_env=conda_env) model_uri = "runs:/{}/{}".format(mlflow.active_run().info.run_id, artifact_path) assert model_info.model_uri == model_uri loaded_model = mlflow.catboost.load_model(model_uri) np.testing.assert_array_almost_equal( model.predict(inference_dataframe), loaded_model.predict(inference_dataframe), ) local_path = _download_artifact_from_uri(model_uri) model_config = Model.load(os.path.join(local_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(local_path, env_path))
def _serve(): """ Serve the model. Read the MLmodel config, initialize the Conda environment if needed and start python server. """ model_config_path = os.path.join(MODEL_PATH, MLMODEL_FILE_NAME) m = Model.load(model_config_path) if DEPLOYMENT_CONFIG_KEY_FLAVOR_NAME in os.environ: serving_flavor = os.environ[DEPLOYMENT_CONFIG_KEY_FLAVOR_NAME] else: # Older versions of mlflow may not specify a deployment configuration serving_flavor = pyfunc.FLAVOR_NAME if serving_flavor == mleap.FLAVOR_NAME: _serve_mleap() elif pyfunc.FLAVOR_NAME in m.flavors: _serve_pyfunc(m) else: raise Exception( "This container only supports models with the MLeap or PyFunc flavors." )
def test_model_log_load(sklearn_knn_model, iris_data, tmpdir): sk_model_path = os.path.join(str(tmpdir), "knn.pkl") with open(sk_model_path, "wb") as f: pickle.dump(sklearn_knn_model, f) pyfunc_artifact_path = "pyfunc_model" with mlflow.start_run(): mlflow.pyfunc.log_model(artifact_path=pyfunc_artifact_path, data_path=sk_model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__]) pyfunc_model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=pyfunc_artifact_path)) model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel")) assert mlflow.pyfunc.FLAVOR_NAME in model_config.flavors assert mlflow.pyfunc.PY_VERSION in model_config.flavors[ mlflow.pyfunc.FLAVOR_NAME] reloaded_model = mlflow.pyfunc.load_pyfunc(pyfunc_model_path) np.testing.assert_array_equal(sklearn_knn_model.predict(iris_data[0]), reloaded_model.predict(iris_data[0]))
def test_requirements_file_save_model(create_requirements_file, sequential_model): requirements_file, content_expected = create_requirements_file with TempDir(remove_on_exit=True) as tmp: model_path = os.path.join(tmp.path(), "models") mlflow.pytorch.save_model( pytorch_model=sequential_model, path=model_path, requirements_file=requirements_file, ) model_config_path = os.path.join(model_path, "MLmodel") model_config = Model.load(model_config_path) flavor_config = model_config.flavors["pytorch"] assert "requirements_file" in flavor_config loaded_requirements_file = flavor_config["requirements_file"] assert "path" in loaded_requirements_file requirements_file_path = loaded_requirements_file["path"] requirements_file_path = os.path.join(model_path, requirements_file_path) with open(requirements_file_path) as fp: assert fp.read() == content_expected
def test_model_log_with_input_example_succeeds(): with TempDir(chdr=True) as tmp: sig = ModelSignature( inputs=Schema( [ ColSpec("integer", "a"), ColSpec("string", "b"), ColSpec("boolean", "c"), ColSpec("string", "d"), ColSpec("datetime", "e"), ] ), outputs=Schema([ColSpec(name=None, type="double")]), ) input_example = pd.DataFrame( { "a": np.int32(1), "b": "test string", "c": True, "d": date.today(), "e": np.datetime64("2020-01-01T00:00:00"), }, index=[0], ) local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path, schema=sig.inputs) # date column will get deserialized into string input_example["d"] = input_example["d"].apply(lambda x: x.isoformat()) assert x.equals(input_example) loaded_example = loaded_model.load_input_example(local_path) assert isinstance(loaded_example, pd.DataFrame) assert loaded_example.equals(input_example)
def test_load_model_with_differing_cloudpickle_version_at_micro_granularity_logs_warning( model_path, ): class TestModel(mlflow.pyfunc.PythonModel): def predict(self, context, model_input): return model_input mlflow.pyfunc.save_model(path=model_path, python_model=TestModel()) saver_cloudpickle_version = "0.5.8" model_config_path = os.path.join(model_path, "MLmodel") model_config = Model.load(model_config_path) model_config.flavors[mlflow.pyfunc.FLAVOR_NAME][ mlflow.pyfunc.model.CONFIG_KEY_CLOUDPICKLE_VERSION ] = saver_cloudpickle_version model_config.save(model_config_path) log_messages = [] def custom_warn(message_text, *args, **kwargs): log_messages.append(message_text % args % kwargs) loader_cloudpickle_version = "0.5.7" with mock.patch("mlflow.pyfunc._logger.warning") as warn_mock, mock.patch( "cloudpickle.__version__" ) as cloudpickle_version_mock: cloudpickle_version_mock.__str__ = lambda *args, **kwargs: loader_cloudpickle_version warn_mock.side_effect = custom_warn mlflow.pyfunc.load_pyfunc(model_uri=model_path) assert any( [ "differs from the version of CloudPickle that is currently running" in log_message and saver_cloudpickle_version in log_message and loader_cloudpickle_version in log_message for log_message in log_messages ] )
def test_model_log(sklearn_logreg_model, model_path): old_uri = mlflow.get_tracking_uri() with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() artifact_path = "linear" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["scikit-learn"]) mlflow.sklearn.log_model(sk_model=sklearn_logreg_model.model, artifact_path=artifact_path, conda_env=conda_env) run_id = mlflow.active_run().info.run_uuid reloaded_logreg_model = mlflow.sklearn.load_model( artifact_path, run_id) np.testing.assert_array_equal( sklearn_logreg_model.model.predict( sklearn_logreg_model.inference_data), reloaded_logreg_model.predict( sklearn_logreg_model.inference_data)) model_path = _get_model_log_dir(artifact_path, run_id=run_id) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run() mlflow.set_tracking_uri(old_uri)
def test_model_log(): with TempDir(chdr=True) as tmp: sig = ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ) input_example = {"x": 1, "y": 2} local_path, r = _log_model_with_signature_and_example( tmp, sig, input_example) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) assert loaded_model.run_id == r.info.run_id assert loaded_model.artifact_path == "some/path" assert loaded_model.flavors == { "flavor1": { "a": 1, "b": 2 }, "flavor2": { "x": 1, "y": 2 }, } assert loaded_model.signature == sig path = os.path.join( local_path, loaded_model.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path) assert x.to_dict(orient="records")[0] == input_example assert not hasattr(loaded_model, "databricks_runtime") loaded_example = loaded_model.load_input_example(local_path) assert isinstance(loaded_example, pd.DataFrame) assert loaded_example.to_dict(orient="records")[0] == input_example assert Version(loaded_model.mlflow_version) == Version( mlflow.version.VERSION)
def get_module_loader_src(src_path, dst_path): """ Generate Python source of the model loader. Model loader contains ``load_pyfunc`` method with no parameters. It hardcodes model loading of the given model into a Python source. This is done so that the exported model has no unnecessary dependencies on MLflow or any other configuration file format or parsing library. :param src_path: Current path to the model. :param dst_path: Relative or absolute path where the model will be stored in the deployment environment. :return: Python source code of the model loader as string. """ conf_path = os.path.join(src_path, "MLmodel") model = Model.load(conf_path) if FLAVOR_NAME not in model.flavors: raise Exception("Format '{format}' not found not in {path}.".format( format=FLAVOR_NAME, path=conf_path)) conf = model.flavors[FLAVOR_NAME] update_path = "" if CODE in conf and conf[CODE]: src_code_path = os.path.join(src_path, conf[CODE]) dst_code_path = os.path.join(dst_path, conf[CODE]) code_path = [ "os.path.abspath('%s')" % x for x in [dst_code_path] + mlflow.pyfunc.utils._get_code_dirs(src_code_path, dst_code_path) ] update_path = "sys.path = {} + sys.path; ".format("[%s]" % ",".join(code_path)) data_path = os.path.join(dst_path, conf[DATA]) if (DATA in conf) else dst_path return loader_template.format(update_path=update_path, main=conf[MAIN], data_path=data_path)
def test_load_model_with_missing_cloudpickle_version_logs_warning(model_path): class TestModel(mlflow.pyfunc.PythonModel): def predict(self, context, model_input): return model_input mlflow.pyfunc.save_model(path=model_path, python_model=TestModel()) model_config_path = os.path.join(model_path, "MLmodel") model_config = Model.load(model_config_path) del model_config.flavors[mlflow.pyfunc.FLAVOR_NAME][ mlflow.pyfunc.model.CONFIG_KEY_CLOUDPICKLE_VERSION] model_config.save(model_config_path) log_messages = [] def custom_warn(message_text, *args, **kwargs): log_messages.append(message_text % args % kwargs) with mock.patch("mlflow.pyfunc._logger.warning") as warn_mock: warn_mock.side_effect = custom_warn mlflow.pyfunc.load_pyfunc(model_uri=model_path) assert any([( "The version of CloudPickle used to save the model could not be found in the MLmodel" " configuration") in log_message for log_message in log_messages])
def test_lgb_autolog_gets_input_example(bst_params): # we need to check the example input against the initial input given to train function. # we can't use the train_set fixture for this as it defines free_raw_data=False but this # feature should work even if it is True iris = datasets.load_iris() X = pd.DataFrame(iris.data[:, :2], columns=iris.feature_names[:2]) y = iris.target dataset = lgb.Dataset(X, y, free_raw_data=True) mlflow.lightgbm.autolog(log_input_examples=True) lgb.train(bst_params, dataset) run = get_latest_run() model_path = os.path.join(run.info.artifact_uri, "model") model_conf = Model.load(os.path.join(model_path, "MLmodel")) input_example = _read_example(model_conf, model_path) assert input_example.equals(X[:5]) pyfunc_model = mlflow.pyfunc.load_model(os.path.join(run.info.artifact_uri, "model")) # make sure reloading the input_example and predicting on it does not error pyfunc_model.predict(input_example)
def test_xgb_autolog_gets_input_example(bst_params): mlflow.xgboost.autolog(log_input_example=True) # we cannot use dtrain fixture, as the dataset must be constructed # after the call to autolog() in order to get the input example iris = datasets.load_iris() X = pd.DataFrame(iris.data[:, :2], columns=iris.feature_names[:2]) y = iris.target dataset = xgb.DMatrix(X, y) xgb.train(bst_params, dataset) run = get_latest_run() model_path = os.path.join(run.info.artifact_uri, "model") model_conf = Model.load(os.path.join(model_path, "MLmodel")) input_example = _read_example(model_conf, model_path) assert input_example.equals(X[:5]) pyfunc_model = mlflow.pyfunc.load_model(os.path.join(run.info.artifact_uri, "model")) # make sure reloading the input_example and predicting on it does not error pyfunc_model.predict(input_example)