def test_statsmodels_autolog_respects_log_models_flag(log_models):
    mlflow.statsmodels.autolog(log_models=log_models)
    ols_model()
    run = get_latest_run()
    client = mlflow.tracking.MlflowClient()
    artifact_paths = [artifact.path for artifact in client.list_artifacts(run.info.run_id)]
    assert ("model" in artifact_paths) == log_models
def test_statsmodels_autolog_logs_basic_metrics():
    mlflow.statsmodels.autolog()
    ols_model()
    run = get_latest_run()
    metrics = run.data.metrics
    assert set(metrics.keys()) == set(
        mlflow.statsmodels._autolog_metric_allowlist)
def test_statsmodels_autolog_failed_metrics_warning():
    mlflow.statsmodels.autolog()

    @property
    def metric_raise_error(_):
        raise RuntimeError()

    class MockSummary:
        def as_text(self):
            return "mock summary."

    with mock.patch(
            "statsmodels.regression.linear_model.OLSResults.f_pvalue",
            metric_raise_error
    ), mock.patch(
            "statsmodels.regression.linear_model.OLSResults.fvalue",
            metric_raise_error
    ), mock.patch(
            # Prevent `OLSResults.summary` from calling `fvalue` and `f_pvalue` that raise an exception
            "statsmodels.regression.linear_model.OLSResults.summary",
            return_value=MockSummary(),
    ), mock.patch("mlflow.statsmodels._logger.warning") as mock_warning:
        ols_model()
        mock_warning.assert_called_once_with(
            "Failed to autolog metrics: f_pvalue, fvalue.")
def test_autolog_registering_model():
    registered_model_name = "test_autolog_registered_model"
    mlflow.statsmodels.autolog(registered_model_name=registered_model_name)
    with mlflow.start_run():
        ols_model()

        registered_model = MlflowClient().get_registered_model(
            registered_model_name)
        assert registered_model.name == registered_model_name
def test_statsmodels_autolog_logs_specified_params():
    mlflow.statsmodels.autolog()
    ols_model(method="qr")

    expected_params = {"method": "qr"}

    run = get_latest_run()
    params = run.data.params

    for key, val in expected_params.items():
        assert key in params
        assert params[key] == str(val)

    mlflow.end_run()
示例#6
0
def test_log_model_with_extra_pip_requirements(tmpdir):
    ols = ols_model()
    default_reqs = mlflow.statsmodels.get_default_pip_requirements()

    # Path to a requirements file
    req_file = tmpdir.join("requirements.txt")
    req_file.write("a")
    with mlflow.start_run():
        mlflow.statsmodels.log_model(ols.model, "model", extra_pip_requirements=req_file.strpath)
        _assert_pip_requirements(mlflow.get_artifact_uri("model"), ["mlflow", *default_reqs, "a"])

    # List of requirements
    with mlflow.start_run():
        mlflow.statsmodels.log_model(
            ols.model, "model", extra_pip_requirements=[f"-r {req_file.strpath}", "b"]
        )
        _assert_pip_requirements(
            mlflow.get_artifact_uri("model"), ["mlflow", *default_reqs, "a", "b"]
        )

    # Constraints file
    with mlflow.start_run():
        mlflow.statsmodels.log_model(
            ols.model, "model", extra_pip_requirements=[f"-c {req_file.strpath}", "b"]
        )
        _assert_pip_requirements(
            mlflow.get_artifact_uri("model"),
            ["mlflow", *default_reqs, "b", "-c constraints.txt"],
            ["a"],
        )
示例#7
0
def test_model_log_without_specified_conda_env_uses_default_env_with_expected_dependencies():
    ols = ols_model()
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.statsmodels.log_model(statsmodels_model=ols.model, artifact_path=artifact_path)
        model_uri = mlflow.get_artifact_uri(artifact_path)
    _assert_pip_requirements(model_uri, mlflow.statsmodels.get_default_pip_requirements())
def test_model_log_persists_specified_conda_env_in_mlflow_model_directory(
        statsmodels_custom_env):
    ols = ols_model()
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.statsmodels.log_model(
            statsmodels_model=ols.model,
            artifact_path=artifact_path,
            conda_env=statsmodels_custom_env,
        )
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=mlflow.active_run().info.run_id,
            artifact_path=artifact_path)

    model_path = _download_artifact_from_uri(artifact_uri=model_uri)
    pyfunc_conf = _get_flavor_configuration(model_path=model_path,
                                            flavor_name=pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)
    assert saved_conda_env_path != statsmodels_custom_env

    with open(statsmodels_custom_env, "r") as f:
        statsmodels_custom_env_parsed = yaml.safe_load(f)
    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_parsed = yaml.safe_load(f)
    assert saved_conda_env_parsed == statsmodels_custom_env_parsed
def test_model_save_persists_requirements_in_mlflow_model_directory(
        model_path, statsmodels_custom_env):
    ols = ols_model()
    mlflow.statsmodels.save_model(statsmodels_model=ols.model,
                                  path=model_path,
                                  conda_env=statsmodels_custom_env)

    saved_pip_req_path = os.path.join(model_path, "requirements.txt")
    _compare_conda_env_requirements(statsmodels_custom_env, saved_pip_req_path)
def test_statsmodels_autolog_logs_default_params():
    mlflow.statsmodels.autolog()
    ols_model()
    run = get_latest_run()
    params = run.data.params

    expected_params = {
        "cov_kwds": "None",
        "cov_type": "nonrobust",
        "method": "pinv",
        "use_t": "None",
    }

    for key, val in expected_params.items():
        assert key in params
        assert params[key] == str(val)

    mlflow.end_run()
def test_statsmodels_autolog_logs_summary_artifact():
    mlflow.statsmodels.autolog()
    with mlflow.start_run():
        model = ols_model().model
        summary_path = mlflow.get_artifact_uri("model_summary.txt").replace("file://", "")
        with open(summary_path, "r") as f:
            saved_summary = f.read()

    # don't compare the whole summary text because it includes a "Time" field which may change.
    assert model.summary().as_text().split("\n")[:4] == saved_summary.split("\n")[:4]
def test_statsmodels_autolog_emit_warning_when_model_is_large():
    mlflow.statsmodels.autolog()

    with mock.patch(
            "mlflow.statsmodels._model_size_threshold_for_emitting_warning",
            float("inf")), mock.patch(
                "mlflow.statsmodels._logger.warning") as mock_warning:
        ols_model()
        assert all(
            not call_args[0][0].startswith("The fitted model is larger than")
            for call_args in mock_warning.call_args_list)

    with mock.patch(
            "mlflow.statsmodels._model_size_threshold_for_emitting_warning",
            1), mock.patch(
                "mlflow.statsmodels._logger.warning") as mock_warning:
        ols_model()
        assert any(
            call_args[0][0].startswith("The fitted model is larger than")
            for call_args in mock_warning.call_args_list)
示例#13
0
def test_log_model_no_registered_model_name():
    ols = ols_model()
    artifact_path = "model"
    register_model_patch = mock.patch("mlflow.register_model")
    with mlflow.start_run(), register_model_patch, TempDir(chdr=True, remove_on_exit=True) as tmp:
        conda_env = os.path.join(tmp.path(), "conda_env.yaml")
        _mlflow_conda_env(conda_env, additional_pip_deps=["statsmodels"])
        mlflow.statsmodels.log_model(
            statsmodels_model=ols.model, artifact_path=artifact_path, conda_env=conda_env
        )
        mlflow.register_model.assert_not_called()
示例#14
0
def test_model_save_accepts_conda_env_as_dict(model_path):
    ols = ols_model()
    conda_env = dict(mlflow.statsmodels.get_default_conda_env())
    conda_env["dependencies"].append("pytest")
    mlflow.statsmodels.save_model(statsmodels_model=ols.model, path=model_path, conda_env=conda_env)

    pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)

    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_parsed = yaml.safe_load(f)
    assert saved_conda_env_parsed == conda_env
def test_log_model_with_code_paths():
    artifact_path = "model"
    ols = ols_model()
    with mlflow.start_run(), mock.patch(
            "mlflow.statsmodels._add_code_from_conf_to_system_path"
    ) as add_mock:
        mlflow.statsmodels.log_model(ols.model,
                                     artifact_path,
                                     code_paths=[__file__])
        model_uri = mlflow.get_artifact_uri(artifact_path)
        _compare_logged_code_paths(__file__, model_uri,
                                   mlflow.statsmodels.FLAVOR_NAME)
        mlflow.statsmodels.load_model(model_uri)
        add_mock.assert_called()
示例#16
0
def test_pyfunc_serve_and_score():
    model, _, inference_dataframe = ols_model()
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.statsmodels.log_model(model, artifact_path)
        model_uri = mlflow.get_artifact_uri(artifact_path)

    resp = pyfunc_serve_and_score_model(
        model_uri,
        data=pd.DataFrame(inference_dataframe),
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
        extra_args=EXTRA_PYFUNC_SERVING_TEST_ARGS,
    )
    scores = pd.read_json(resp.content, orient="records").values.squeeze()
    np.testing.assert_array_almost_equal(scores, model.predict(inference_dataframe))
def test_statsmodels_autolog_works_after_exception():
    mlflow.statsmodels.autolog()
    # We first fit a model known to raise an exception
    with pytest.raises(Exception, match=r".+"):
        failing_logit_model()
    # and then fit another one that should go well
    model_with_results = ols_model()

    run = get_latest_run()
    run_id = run.info.run_id
    loaded_model = mlflow.statsmodels.load_model("runs:/{}/model".format(run_id))

    model_predictions = model_with_results.model.predict(model_with_results.inference_dataframe)
    loaded_model_predictions = loaded_model.predict(model_with_results.inference_dataframe)
    np.testing.assert_array_almost_equal(model_predictions, loaded_model_predictions)
示例#18
0
def test_model_log_persists_requirements_in_mlflow_model_directory(statsmodels_custom_env):
    ols = ols_model()
    artifact_path = "model"
    with mlflow.start_run():
        mlflow.statsmodels.log_model(
            statsmodels_model=ols.model,
            artifact_path=artifact_path,
            conda_env=statsmodels_custom_env,
        )
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path
        )

    model_path = _download_artifact_from_uri(artifact_uri=model_uri)
    saved_pip_req_path = os.path.join(model_path, "requirements.txt")
    _compare_conda_env_requirements(statsmodels_custom_env, saved_pip_req_path)
示例#19
0
def test_signature_and_examples_are_saved_correctly():
    model, _, X = ols_model()
    signature_ = infer_signature(X)
    example_ = X[0:3, :]

    for signature in (None, signature_):
        for example in (None, example_):
            with TempDir() as tmp:
                path = tmp.path("model")
                mlflow.statsmodels.save_model(
                    model, path=path, signature=signature, input_example=example
                )
                mlflow_model = Model.load(path)
                assert signature == mlflow_model.signature
                if example is None:
                    assert mlflow_model.saved_input_example_info is None
                else:
                    assert np.array_equal(_read_example(mlflow_model, path), example)
示例#20
0
def test_model_save_persists_specified_conda_env_in_mlflow_model_directory(
    model_path, statsmodels_custom_env
):
    ols = ols_model()
    mlflow.statsmodels.save_model(
        statsmodels_model=ols.model, path=model_path, conda_env=statsmodels_custom_env
    )

    pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME)
    saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV])
    assert os.path.exists(saved_conda_env_path)
    assert saved_conda_env_path != statsmodels_custom_env

    with open(statsmodels_custom_env, "r") as f:
        statsmodels_custom_env_parsed = yaml.safe_load(f)
    with open(saved_conda_env_path, "r") as f:
        saved_conda_env_parsed = yaml.safe_load(f)
    assert saved_conda_env_parsed == statsmodels_custom_env_parsed
示例#21
0
def test_log_model_calls_register_model():
    # Adapted from lightgbm tests
    ols = ols_model()
    artifact_path = "model"
    register_model_patch = mock.patch("mlflow.register_model")
    with mlflow.start_run(), register_model_patch, TempDir(chdr=True, remove_on_exit=True) as tmp:
        conda_env = os.path.join(tmp.path(), "conda_env.yaml")
        _mlflow_conda_env(conda_env, additional_pip_deps=["statsmodels"])
        mlflow.statsmodels.log_model(
            statsmodels_model=ols.model,
            artifact_path=artifact_path,
            conda_env=conda_env,
            registered_model_name="OLSModel1",
        )
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path
        )
        mlflow.register_model.assert_called_once_with(
            model_uri, "OLSModel1", await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS
        )
def test_statsmodels_autolog_persists_manually_created_run():
    mlflow.statsmodels.autolog()
    with mlflow.start_run() as run:
        ols_model()
        assert mlflow.active_run()
        assert mlflow.active_run().info.run_id == run.info.run_id
示例#23
0
def test_model_save_without_specified_conda_env_uses_default_env_with_expected_dependencies(
    model_path,
):
    ols = ols_model()
    mlflow.statsmodels.save_model(statsmodels_model=ols.model, path=model_path)
    _assert_pip_requirements(model_path, mlflow.statsmodels.get_default_pip_requirements())