def test_statsmodels_autolog_respects_log_models_flag(log_models): mlflow.statsmodels.autolog(log_models=log_models) ols_model() run = get_latest_run() client = mlflow.tracking.MlflowClient() artifact_paths = [artifact.path for artifact in client.list_artifacts(run.info.run_id)] assert ("model" in artifact_paths) == log_models
def test_statsmodels_autolog_logs_basic_metrics(): mlflow.statsmodels.autolog() ols_model() run = get_latest_run() metrics = run.data.metrics assert set(metrics.keys()) == set( mlflow.statsmodels._autolog_metric_allowlist)
def test_statsmodels_autolog_failed_metrics_warning(): mlflow.statsmodels.autolog() @property def metric_raise_error(_): raise RuntimeError() class MockSummary: def as_text(self): return "mock summary." with mock.patch( "statsmodels.regression.linear_model.OLSResults.f_pvalue", metric_raise_error ), mock.patch( "statsmodels.regression.linear_model.OLSResults.fvalue", metric_raise_error ), mock.patch( # Prevent `OLSResults.summary` from calling `fvalue` and `f_pvalue` that raise an exception "statsmodels.regression.linear_model.OLSResults.summary", return_value=MockSummary(), ), mock.patch("mlflow.statsmodels._logger.warning") as mock_warning: ols_model() mock_warning.assert_called_once_with( "Failed to autolog metrics: f_pvalue, fvalue.")
def test_autolog_registering_model(): registered_model_name = "test_autolog_registered_model" mlflow.statsmodels.autolog(registered_model_name=registered_model_name) with mlflow.start_run(): ols_model() registered_model = MlflowClient().get_registered_model( registered_model_name) assert registered_model.name == registered_model_name
def test_statsmodels_autolog_logs_specified_params(): mlflow.statsmodels.autolog() ols_model(method="qr") expected_params = {"method": "qr"} run = get_latest_run() params = run.data.params for key, val in expected_params.items(): assert key in params assert params[key] == str(val) mlflow.end_run()
def test_log_model_with_extra_pip_requirements(tmpdir): ols = ols_model() default_reqs = mlflow.statsmodels.get_default_pip_requirements() # Path to a requirements file req_file = tmpdir.join("requirements.txt") req_file.write("a") with mlflow.start_run(): mlflow.statsmodels.log_model(ols.model, "model", extra_pip_requirements=req_file.strpath) _assert_pip_requirements(mlflow.get_artifact_uri("model"), ["mlflow", *default_reqs, "a"]) # List of requirements with mlflow.start_run(): mlflow.statsmodels.log_model( ols.model, "model", extra_pip_requirements=[f"-r {req_file.strpath}", "b"] ) _assert_pip_requirements( mlflow.get_artifact_uri("model"), ["mlflow", *default_reqs, "a", "b"] ) # Constraints file with mlflow.start_run(): mlflow.statsmodels.log_model( ols.model, "model", extra_pip_requirements=[f"-c {req_file.strpath}", "b"] ) _assert_pip_requirements( mlflow.get_artifact_uri("model"), ["mlflow", *default_reqs, "b", "-c constraints.txt"], ["a"], )
def test_model_log_without_specified_conda_env_uses_default_env_with_expected_dependencies(): ols = ols_model() artifact_path = "model" with mlflow.start_run(): mlflow.statsmodels.log_model(statsmodels_model=ols.model, artifact_path=artifact_path) model_uri = mlflow.get_artifact_uri(artifact_path) _assert_pip_requirements(model_uri, mlflow.statsmodels.get_default_pip_requirements())
def test_model_log_persists_specified_conda_env_in_mlflow_model_directory( statsmodels_custom_env): ols = ols_model() artifact_path = "model" with mlflow.start_run(): mlflow.statsmodels.log_model( statsmodels_model=ols.model, artifact_path=artifact_path, conda_env=statsmodels_custom_env, ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) model_path = _download_artifact_from_uri(artifact_uri=model_uri) pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) assert os.path.exists(saved_conda_env_path) assert saved_conda_env_path != statsmodels_custom_env with open(statsmodels_custom_env, "r") as f: statsmodels_custom_env_parsed = yaml.safe_load(f) with open(saved_conda_env_path, "r") as f: saved_conda_env_parsed = yaml.safe_load(f) assert saved_conda_env_parsed == statsmodels_custom_env_parsed
def test_model_save_persists_requirements_in_mlflow_model_directory( model_path, statsmodels_custom_env): ols = ols_model() mlflow.statsmodels.save_model(statsmodels_model=ols.model, path=model_path, conda_env=statsmodels_custom_env) saved_pip_req_path = os.path.join(model_path, "requirements.txt") _compare_conda_env_requirements(statsmodels_custom_env, saved_pip_req_path)
def test_statsmodels_autolog_logs_default_params(): mlflow.statsmodels.autolog() ols_model() run = get_latest_run() params = run.data.params expected_params = { "cov_kwds": "None", "cov_type": "nonrobust", "method": "pinv", "use_t": "None", } for key, val in expected_params.items(): assert key in params assert params[key] == str(val) mlflow.end_run()
def test_statsmodels_autolog_logs_summary_artifact(): mlflow.statsmodels.autolog() with mlflow.start_run(): model = ols_model().model summary_path = mlflow.get_artifact_uri("model_summary.txt").replace("file://", "") with open(summary_path, "r") as f: saved_summary = f.read() # don't compare the whole summary text because it includes a "Time" field which may change. assert model.summary().as_text().split("\n")[:4] == saved_summary.split("\n")[:4]
def test_statsmodels_autolog_emit_warning_when_model_is_large(): mlflow.statsmodels.autolog() with mock.patch( "mlflow.statsmodels._model_size_threshold_for_emitting_warning", float("inf")), mock.patch( "mlflow.statsmodels._logger.warning") as mock_warning: ols_model() assert all( not call_args[0][0].startswith("The fitted model is larger than") for call_args in mock_warning.call_args_list) with mock.patch( "mlflow.statsmodels._model_size_threshold_for_emitting_warning", 1), mock.patch( "mlflow.statsmodels._logger.warning") as mock_warning: ols_model() assert any( call_args[0][0].startswith("The fitted model is larger than") for call_args in mock_warning.call_args_list)
def test_log_model_no_registered_model_name(): ols = ols_model() artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with mlflow.start_run(), register_model_patch, TempDir(chdr=True, remove_on_exit=True) as tmp: conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["statsmodels"]) mlflow.statsmodels.log_model( statsmodels_model=ols.model, artifact_path=artifact_path, conda_env=conda_env ) mlflow.register_model.assert_not_called()
def test_model_save_accepts_conda_env_as_dict(model_path): ols = ols_model() conda_env = dict(mlflow.statsmodels.get_default_conda_env()) conda_env["dependencies"].append("pytest") mlflow.statsmodels.save_model(statsmodels_model=ols.model, path=model_path, conda_env=conda_env) pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) assert os.path.exists(saved_conda_env_path) with open(saved_conda_env_path, "r") as f: saved_conda_env_parsed = yaml.safe_load(f) assert saved_conda_env_parsed == conda_env
def test_log_model_with_code_paths(): artifact_path = "model" ols = ols_model() with mlflow.start_run(), mock.patch( "mlflow.statsmodels._add_code_from_conf_to_system_path" ) as add_mock: mlflow.statsmodels.log_model(ols.model, artifact_path, code_paths=[__file__]) model_uri = mlflow.get_artifact_uri(artifact_path) _compare_logged_code_paths(__file__, model_uri, mlflow.statsmodels.FLAVOR_NAME) mlflow.statsmodels.load_model(model_uri) add_mock.assert_called()
def test_pyfunc_serve_and_score(): model, _, inference_dataframe = ols_model() artifact_path = "model" with mlflow.start_run(): mlflow.statsmodels.log_model(model, artifact_path) model_uri = mlflow.get_artifact_uri(artifact_path) resp = pyfunc_serve_and_score_model( model_uri, data=pd.DataFrame(inference_dataframe), content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS, ) scores = pd.read_json(resp.content, orient="records").values.squeeze() np.testing.assert_array_almost_equal(scores, model.predict(inference_dataframe))
def test_statsmodels_autolog_works_after_exception(): mlflow.statsmodels.autolog() # We first fit a model known to raise an exception with pytest.raises(Exception, match=r".+"): failing_logit_model() # and then fit another one that should go well model_with_results = ols_model() run = get_latest_run() run_id = run.info.run_id loaded_model = mlflow.statsmodels.load_model("runs:/{}/model".format(run_id)) model_predictions = model_with_results.model.predict(model_with_results.inference_dataframe) loaded_model_predictions = loaded_model.predict(model_with_results.inference_dataframe) np.testing.assert_array_almost_equal(model_predictions, loaded_model_predictions)
def test_model_log_persists_requirements_in_mlflow_model_directory(statsmodels_custom_env): ols = ols_model() artifact_path = "model" with mlflow.start_run(): mlflow.statsmodels.log_model( statsmodels_model=ols.model, artifact_path=artifact_path, conda_env=statsmodels_custom_env, ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path ) model_path = _download_artifact_from_uri(artifact_uri=model_uri) saved_pip_req_path = os.path.join(model_path, "requirements.txt") _compare_conda_env_requirements(statsmodels_custom_env, saved_pip_req_path)
def test_signature_and_examples_are_saved_correctly(): model, _, X = ols_model() signature_ = infer_signature(X) example_ = X[0:3, :] for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") mlflow.statsmodels.save_model( model, path=path, signature=signature, input_example=example ) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert np.array_equal(_read_example(mlflow_model, path), example)
def test_model_save_persists_specified_conda_env_in_mlflow_model_directory( model_path, statsmodels_custom_env ): ols = ols_model() mlflow.statsmodels.save_model( statsmodels_model=ols.model, path=model_path, conda_env=statsmodels_custom_env ) pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) assert os.path.exists(saved_conda_env_path) assert saved_conda_env_path != statsmodels_custom_env with open(statsmodels_custom_env, "r") as f: statsmodels_custom_env_parsed = yaml.safe_load(f) with open(saved_conda_env_path, "r") as f: saved_conda_env_parsed = yaml.safe_load(f) assert saved_conda_env_parsed == statsmodels_custom_env_parsed
def test_log_model_calls_register_model(): # Adapted from lightgbm tests ols = ols_model() artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with mlflow.start_run(), register_model_patch, TempDir(chdr=True, remove_on_exit=True) as tmp: conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["statsmodels"]) mlflow.statsmodels.log_model( statsmodels_model=ols.model, artifact_path=artifact_path, conda_env=conda_env, registered_model_name="OLSModel1", ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path ) mlflow.register_model.assert_called_once_with( model_uri, "OLSModel1", await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS )
def test_statsmodels_autolog_persists_manually_created_run(): mlflow.statsmodels.autolog() with mlflow.start_run() as run: ols_model() assert mlflow.active_run() assert mlflow.active_run().info.run_id == run.info.run_id
def test_model_save_without_specified_conda_env_uses_default_env_with_expected_dependencies( model_path, ): ols = ols_model() mlflow.statsmodels.save_model(statsmodels_model=ols.model, path=model_path) _assert_pip_requirements(model_path, mlflow.statsmodels.get_default_pip_requirements())