Пример #1
0
def test_model_log(prophet_model):
    old_uri = mlflow.get_tracking_uri()
    with TempDir(chdr=True, remove_on_exit=True) as tmp:
        for should_start_run in [False, True]:
            try:
                mlflow.set_tracking_uri("test")
                if should_start_run:
                    mlflow.start_run()
                artifact_path = "prophet"
                conda_env = os.path.join(tmp.path(), "conda_env.yaml")
                _mlflow_conda_env(conda_env,
                                  additional_pip_deps=["pystan", "prophet"])

                model_info = mlflow.prophet.log_model(
                    pr_model=prophet_model.model,
                    artifact_path=artifact_path,
                    conda_env=conda_env)
                model_uri = f"runs:/{mlflow.active_run().info.run_id}/{artifact_path}"
                assert model_info.model_uri == model_uri
                reloaded_prophet_model = mlflow.prophet.load_model(
                    model_uri=model_uri)

                np.testing.assert_array_equal(
                    generate_forecast(prophet_model.model, FORECAST_HORIZON),
                    generate_forecast(reloaded_prophet_model,
                                      FORECAST_HORIZON),
                )

                model_path = _download_artifact_from_uri(
                    artifact_uri=model_uri)
                model_config = Model.load(os.path.join(model_path, "MLmodel"))
                assert pyfunc.FLAVOR_NAME in model_config.flavors
                assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME]
                env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV]
                assert os.path.exists(os.path.join(model_path, env_path))

            finally:
                mlflow.end_run()
                mlflow.set_tracking_uri(old_uri)
Пример #2
0
def test_model_load_succeeds_with_missing_data_key_when_data_exists_at_default_path(
    h2o_iris_model, model_path
):
    """
    This is a backwards compatibility test to ensure that models saved in MLflow version <= 0.7.0
    can be loaded successfully. These models are missing the `data` flavor configuration key.
    """
    h2o_model = h2o_iris_model.model
    mlflow.h2o.save_model(h2o_model=h2o_model, path=model_path)

    model_conf_path = os.path.join(model_path, "MLmodel")
    model_conf = Model.load(model_conf_path)
    flavor_conf = model_conf.flavors.get(mlflow.h2o.FLAVOR_NAME, None)
    assert flavor_conf is not None
    del flavor_conf["data"]
    model_conf.save(model_conf_path)

    h2o_model_loaded = mlflow.h2o.load_model(model_path)
    assert all(
        h2o_model_loaded.predict(h2o_iris_model.inference_data).as_data_frame()
        == h2o_model.predict(h2o_iris_model.inference_data).as_data_frame()
    )
Пример #3
0
def test_log_model_built_in_high_level_api(
        pd_model_built_in_high_level_api, model_path, tmpdir,
        get_dataset_built_in_high_level_api):
    model = pd_model_built_in_high_level_api.model
    test_dataset = get_dataset_built_in_high_level_api[1]

    try:
        artifact_path = "model"
        conda_env = os.path.join(tmpdir, "conda_env.yaml")
        _mlflow_conda_env(conda_env, additional_pip_deps=["paddle"])

        mlflow.paddle.log_model(pd_model=model,
                                artifact_path=artifact_path,
                                conda_env=conda_env,
                                training=True)
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=mlflow.active_run().info.run_id,
            artifact_path=artifact_path)

        model_retrain = paddle.Model(UCIHousing())
        optim = paddle.optimizer.Adam(learning_rate=0.015,
                                      parameters=model.parameters())
        model_retrain.prepare(optim, paddle.nn.MSELoss())
        model_retrain = mlflow.paddle.load_model(model_uri=model_uri,
                                                 model=model_retrain)

        np.testing.assert_array_almost_equal(
            np.array(model.predict(test_dataset)).squeeze(),
            np.array(model_retrain.predict(test_dataset)).squeeze(),
            decimal=5,
        )
        model_path = _download_artifact_from_uri(artifact_uri=model_uri)
        model_config = Model.load(os.path.join(model_path, "MLmodel"))
        assert pyfunc.FLAVOR_NAME in model_config.flavors
        assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME]
        env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV]
        assert os.path.exists(os.path.join(model_path, env_path))
    finally:
        mlflow.end_run()
Пример #4
0
def test_log_model_with_signature_and_examples(iris_df, spark_model_iris):
    _, _, iris_spark_df = iris_df
    signature_ = infer_signature(iris_spark_df)
    example_ = iris_spark_df.toPandas().head(3)
    artifact_path = "model"
    for signature in (None, signature_):
        for example in (None, example_):
            with mlflow.start_run():
                sparkm.log_model(
                    spark_model_iris.model,
                    artifact_path=artifact_path,
                    signature=signature,
                    input_example=example,
                )
                artifact_uri = mlflow.get_artifact_uri()
                model_path = os.path.join(artifact_uri, artifact_path)
                mlflow_model = Model.load(model_path)
                assert signature == mlflow_model.signature
                if example is None:
                    assert mlflow_model.saved_input_example_info is None
                else:
                    assert all((_read_example(mlflow_model, model_path) == example).all())
Пример #5
0
def test_mleap_model_log(spark_model_iris):
    artifact_path = "model"
    register_model_patch = mock.patch("mlflow.register_model")
    with mlflow.start_run(), register_model_patch:
        sparkm.log_model(
            spark_model=spark_model_iris.model,
            sample_input=spark_model_iris.spark_df,
            artifact_path=artifact_path,
            registered_model_name="Model1",
        )
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path
        )
        mlflow.register_model.assert_called_once_with(
            model_uri, "Model1", await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS
        )

    model_path = _download_artifact_from_uri(artifact_uri=model_uri)
    config_path = os.path.join(model_path, "MLmodel")
    mlflow_model = Model.load(config_path)
    assert sparkm.FLAVOR_NAME in mlflow_model.flavors
    assert mleap.FLAVOR_NAME in mlflow_model.flavors
def test_model_save_load(sklearn_knn_model, iris_data, tmpdir, model_path):
    sk_model_path = os.path.join(str(tmpdir), "knn.pkl")
    with open(sk_model_path, "wb") as f:
        pickle.dump(sklearn_knn_model, f)

    model_config = Model(run_id="test", artifact_path="testtest")
    mlflow.pyfunc.save_model(
        path=model_path,
        data_path=sk_model_path,
        loader_module=__name__,
        code_path=[__file__],
        mlflow_model=model_config,
    )

    reloaded_model_config = Model.load(os.path.join(model_path, "MLmodel"))
    assert model_config.__dict__ == reloaded_model_config.__dict__
    assert mlflow.pyfunc.FLAVOR_NAME in reloaded_model_config.flavors
    assert mlflow.pyfunc.PY_VERSION in reloaded_model_config.flavors[
        mlflow.pyfunc.FLAVOR_NAME]
    reloaded_model = mlflow.pyfunc.load_pyfunc(model_path)
    np.testing.assert_array_equal(sklearn_knn_model.predict(iris_data[0]),
                                  reloaded_model.predict(iris_data[0]))
def test_schema_and_examples_are_save_correctly(saved_tf_iris_model):
    train_x, train_y = iris_data_utils.load_data()[0]
    X = pd.DataFrame(train_x)
    y = pd.Series(train_y)
    for signature in (None, infer_signature(X, y)):
        for example in (None, X.head(3)):
            with TempDir() as tmp:
                path = tmp.path("model")
                mlflow.tensorflow.save_model(
                    tf_saved_model_dir=saved_tf_iris_model.path,
                    tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags,
                    tf_signature_def_key=saved_tf_iris_model.signature_def_key,
                    path=path,
                    signature=signature,
                    input_example=example,
                )
                mlflow_model = Model.load(path)
                assert signature == mlflow_model.signature
                if example is None:
                    assert mlflow_model.saved_input_example_info is None
                else:
                    assert all((_read_example(mlflow_model, path) == example).all())
Пример #8
0
def test_model_log(pd_model, model_path):
    old_uri = mlflow.get_tracking_uri()
    model = pd_model.model
    with TempDir(chdr=True, remove_on_exit=True) as tmp:
        for should_start_run in [False, True]:
            try:
                mlflow.set_tracking_uri("test")
                if should_start_run:
                    mlflow.start_run()

                artifact_path = "model"
                conda_env = os.path.join(tmp.path(), "conda_env.yaml")
                _mlflow_conda_env(conda_env, additional_pip_deps=["paddle"])

                mlflow.paddle.log_model(pd_model=model,
                                        artifact_path=artifact_path,
                                        conda_env=conda_env)
                model_uri = "runs:/{run_id}/{artifact_path}".format(
                    run_id=mlflow.active_run().info.run_id,
                    artifact_path=artifact_path)

                reloaded_pd_model = mlflow.paddle.load_model(
                    model_uri=model_uri)
                np.testing.assert_array_almost_equal(
                    model(pd_model.inference_dataframe),
                    reloaded_pd_model(pd_model.inference_dataframe),
                )

                model_path = _download_artifact_from_uri(
                    artifact_uri=model_uri)
                model_config = Model.load(os.path.join(model_path, "MLmodel"))
                assert pyfunc.FLAVOR_NAME in model_config.flavors
                assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME]
                env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV]
                assert os.path.exists(os.path.join(model_path, env_path))

            finally:
                mlflow.end_run()
                mlflow.set_tracking_uri(old_uri)
Пример #9
0
def test_model_log_without_pyfunc_flavor():
    artifact_path = "model"
    nlp = spacy.blank("en")

    # Add a component not compatible with pyfunc
    if IS_SPACY_VERSION_NEWER_THAN_OR_EQUAL_TO_3_0_0:
        nlp.add_pipe("ner", last=True)
    else:
        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner, last=True)

    # Ensure the pyfunc flavor is not present after logging and loading the model
    with mlflow.start_run():
        mlflow.spacy.log_model(spacy_model=nlp, artifact_path=artifact_path)
        model_path = _download_artifact_from_uri(
            "runs:/{run_id}/{artifact_path}".format(
                run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path
            )
        )

        loaded_model = Model.load(model_path)
        assert loaded_model.flavors.keys() == {"spacy"}
Пример #10
0
def test_model_load_succeeds_with_missing_data_key_when_data_exists_at_default_path(
        tf_keras_model, model_path, data):
    """
    This is a backwards compatibility test to ensure that models saved in MLflow version <= 0.8.0
    can be loaded successfully. These models are missing the `data` flavor configuration key.
    """
    mlflow.keras.save_model(keras_model=tf_keras_model,
                            path=model_path,
                            save_format="h5")
    shutil.move(os.path.join(model_path, "data", "model.h5"),
                os.path.join(model_path, "model.h5"))
    model_conf_path = os.path.join(model_path, "MLmodel")
    model_conf = Model.load(model_conf_path)
    flavor_conf = model_conf.flavors.get(mlflow.keras.FLAVOR_NAME, None)
    assert flavor_conf is not None
    del flavor_conf["data"]
    model_conf.save(model_conf_path)

    model_loaded = mlflow.keras.load_model(model_path)
    assert all(
        model_loaded.predict(data[0].values) == tf_keras_model.predict(
            data[0].values))
Пример #11
0
def test_signature_and_examples_are_saved_correctly(onnx_model, data,
                                                    onnx_custom_env):
    import mlflow.onnx
    model = onnx_model
    signature_ = infer_signature(*data)
    example_ = data[0].head(3)
    for signature in (None, signature_):
        for example in (None, example_):
            with TempDir() as tmp:
                path = tmp.path("model")
                mlflow.onnx.save_model(model,
                                       path=path,
                                       conda_env=onnx_custom_env,
                                       signature=signature,
                                       input_example=example)
                mlflow_model = Model.load(path)
                assert signature == mlflow_model.signature
                if example is None:
                    assert mlflow_model.saved_input_example_info is None
                else:
                    assert all((_read_example(mlflow_model,
                                              path) == example).all())
Пример #12
0
def load_model(model_uri: str, suppress_warnings: bool = True) -> PyFuncModel:
    """
    Load a model stored in Python function format.

    :param model_uri: The location, in URI format, of the MLflow model. For example:

                      - ``/Users/me/path/to/local/model``
                      - ``relative/path/to/local/model``
                      - ``s3://my_bucket/path/to/model``
                      - ``runs:/<mlflow_run_id>/run-relative/path/to/model``
                      - ``models:/<model_name>/<model_version>``
                      - ``models:/<model_name>/<stage>``

                      For more information about supported URI schemes, see
                      `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html#
                      artifact-locations>`_.
    :param suppress_warnings: If ``True``, non-fatal warning messages associated with the model
                              loading process will be suppressed. If ``False``, these warning
                              messages will be emitted.
    """
    local_path = _download_artifact_from_uri(artifact_uri=model_uri)
    model_meta = Model.load(os.path.join(local_path, "MLmodel"))

    conf = model_meta.flavors.get(FLAVOR_NAME)
    if conf is None:
        raise MlflowException(
            "Model does not have the \"{flavor_name}\" flavor".format(
                flavor_name=FLAVOR_NAME), RESOURCE_DOES_NOT_EXIST)
    model_py_version = conf.get(PY_VERSION)
    if not suppress_warnings:
        _warn_potentially_incompatible_py_version_if_necessary(
            model_py_version=model_py_version)
    if CODE in conf and conf[CODE]:
        code_path = os.path.join(local_path, conf[CODE])
        mlflow.pyfunc.utils._add_code_to_system_path(code_path=code_path)
    data_path = os.path.join(local_path,
                             conf[DATA]) if (DATA in conf) else local_path
    model_impl = importlib.import_module(conf[MAIN])._load_pyfunc(data_path)
    return PyFuncModel(model_meta=model_meta, model_impl=model_impl)
Пример #13
0
def _load_model(path, **kwargs):
    mlflow_model_path = os.path.join(path, "MLmodel")
    if not os.path.exists(mlflow_model_path):
        raise RuntimeError("MLmodel is not found at '{}'".format(path))

    mlflow_model = Model.load(mlflow_model_path)

    if FLAVOR_NAME not in mlflow_model.flavors:
        raise ValueError("Could not find flavor '{}' amongst available flavors {}, "
                         "unable to load stored model"
                         .format(FLAVOR_NAME, list(mlflow_model.flavors.keys())))

    # This maybe replaced by a warning and then try/except torch.load
    flavor = mlflow_model.flavors[FLAVOR_NAME]
    if torch.__version__ != flavor["pytorch_version"]:
        raise ValueError("Stored model version '{}' does not match "
                         "installed PyTorch version '{}'"
                         .format(flavor["pytorch_version"], torch.__version__))

    path = os.path.abspath(path)
    path = os.path.join(path, mlflow_model.flavors[FLAVOR_NAME]['model_data'])
    return torch.load(path, **kwargs)
Пример #14
0
def test_load_model_succeeds_when_data_is_model_file_instead_of_directory(
    module_scoped_subclassed_model, model_path, data
):
    """
    This test verifies that PyTorch models saved in older versions of MLflow are loaded successfully
    by ``mlflow.pytorch.load_model``. The ``data`` path associated with these older models is
    serialized PyTorch model file, as opposed to the current format: a directory containing a
    serialized model file and pickle module information.
    """
    artifact_path = "pytorch_model"
    with mlflow.start_run():
        mlflow.pytorch.log_model(
            artifact_path=artifact_path, pytorch_model=module_scoped_subclassed_model
        )
        model_path = _download_artifact_from_uri(
            "runs:/{run_id}/{artifact_path}".format(
                run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path
            )
        )

    model_conf_path = os.path.join(model_path, "MLmodel")
    model_conf = Model.load(model_conf_path)
    pyfunc_conf = model_conf.flavors.get(pyfunc.FLAVOR_NAME)
    assert pyfunc_conf is not None
    model_data_path = os.path.join(model_path, pyfunc_conf[pyfunc.DATA])
    assert os.path.exists(model_data_path)
    assert mlflow.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME in os.listdir(model_data_path)
    pyfunc_conf[pyfunc.DATA] = os.path.join(
        model_data_path, mlflow.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME
    )
    model_conf.save(model_conf_path)

    loaded_pyfunc = pyfunc.load_pyfunc(model_path)

    np.testing.assert_array_almost_equal(
        loaded_pyfunc.predict(data[0]),
        pd.DataFrame(_predict(model=module_scoped_subclassed_model, data=data)),
        decimal=4,
    )
Пример #15
0
def test_log_model(mlflow_client, backend_store_uri):
    experiment_id = mlflow_client.create_experiment("Log models")
    with TempDir(chdr=True):
        mlflow.set_experiment("Log models")
        model_paths = ["model/path/{}".format(i) for i in range(3)]
        with mlflow.start_run(experiment_id=experiment_id) as run:
            for i, m in enumerate(model_paths):
                mlflow.pyfunc.log_model(m, loader_module="mlflow.pyfunc")
                mlflow.pyfunc.save_model(
                    m,
                    mlflow_model=Model(artifact_path=m, run_id=run.info.run_id),
                    loader_module="mlflow.pyfunc",
                )
                model = Model.load(os.path.join(m, "MLmodel"))
                run = mlflow.get_run(run.info.run_id)
                tag = run.data.tags["mlflow.log-model.history"]
                models = json.loads(tag)
                model.utc_time_created = models[i]["utc_time_created"]
                assert models[i] == model.to_dict()
                assert len(models) == i + 1
                for j in range(0, i + 1):
                    assert models[j]["artifact_path"] == model_paths[j]
Пример #16
0
def load_model(path, run_id=None, dfs_tmpdir=None):
    """
    Load the Spark MLlib model from the path.

    :param path: Local filesystem path or run-relative artifact path to the model.
    :param run_id: Run ID. If provided, combined with ``path`` to identify the model.
    :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local
                       filesystem if running in local mode. The model will be loaded from this
                       destination. Defaults to ``/tmp/mlflow``.
    :return: pyspark.ml.pipeline.PipelineModel

    >>> from mlflow import spark
    >>> model = mlflow.spark.load_model("spark-model")
    >>> # Prepare test documents, which are unlabeled (id, text) tuples.
    >>> test = spark.createDataFrame([
    ...   (4, "spark i j k"),
    ...   (5, "l m n"),
    ...   (6, "spark hadoop spark"),
    ...   (7, "apache hadoop")], ["id", "text"])
    >>>  # Make predictions on test documents.
    >>> prediction = model.transform(test)
    """
    dfs_tmpdir = dfs_tmpdir if dfs_tmpdir is not None else DFS_TMP
    if run_id is not None:
        path = mlflow.tracking.utils._get_model_log_dir(model_name=path, run_id=run_id)
    m = Model.load(os.path.join(path, 'MLmodel'))
    if FLAVOR_NAME not in m.flavors:
        raise Exception("Model does not have {} flavor".format(FLAVOR_NAME))
    conf = m.flavors[FLAVOR_NAME]
    model_path = os.path.join(path, conf['model_data'])
    tmp_path = _tmp_path(dfs_tmpdir)
    # Spark ML expects the model to be stored on DFS
    # Copy the model to a temp DFS location first. We cannot delete this file, as
    # Spark may read from it at any point.
    _HadoopFileSystem.copy_from_local_file(model_path, tmp_path, removeSrc=False)
    pipeline_model = PipelineModel.load(tmp_path)
    eprint("Copied SparkML model to %s" % tmp_path)
    return pipeline_model
Пример #17
0
    def create_deployment(self, name, model_uri, flavor=None, config=None):
        device = config.get('device', 'CPU')
        autobatch_size = config.get('batchsize')
        tag = config.get('tag')
        path = Path(_download_artifact_from_uri(model_uri))
        model_config = path / 'MLmodel'
        if not model_config.exists():
            raise MlflowException(
                message=(
                    "Failed to find MLmodel configuration within the specified model's"
                    " root directory."),
                error_code=INVALID_PARAMETER_VALUE)
        model_config = Model.load(model_config)

        if flavor is None:
            flavor = get_preferred_deployment_flavor(model_config)
        else:
            validate_deployment_flavor(model_config, flavor)
        logger.info("Using the {} flavor for deployment!".format(flavor))

        if flavor == 'tensorflow':
            # TODO: test this for tf1.x and tf2.x
            tags = model_config.flavors[flavor]['meta_graph_tags']
            signaturedef = model_config.flavors[flavor]['signature_def_key']
            model_dir = path / model_config.flavors[flavor]['saved_model_dir']
            model, inputs, outputs = ml2rt.load_model(model_dir, tags, signaturedef)
        else:
            model_path = None
            for file in path.iterdir():
                if file.suffix == '.pt':
                    model_path = file
            if model_path is None:
                raise RuntimeError("Model file does not have a valid suffix. Expected ``.pt``")
            model = ml2rt.load_model(model_path)
            inputs = outputs = None
        backend = flavor2backend[flavor]
        self.con.modelset(name, backend, device, model, inputs=inputs, outputs=outputs, batch=autobatch_size, tag=tag)
        return {'name': name, 'flavor': flavor}
Пример #18
0
def test_model_save_load():
    m = Model(
        artifact_path="some/path",
        run_id="123",
        flavors={"flavor1": {"a": 1, "b": 2}, "flavor2": {"x": 1, "y": 2}},
        signature=ModelSignature(
            inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        ),
        saved_input_example_info={"x": 1, "y": 2},
    )
    assert m.get_input_schema() == m.signature.inputs
    assert m.get_output_schema() == m.signature.outputs
    x = Model(artifact_path="some/other/path", run_id="1234")
    assert x.get_input_schema() is None
    assert x.get_output_schema() is None

    n = Model(
        artifact_path="some/path",
        run_id="123",
        flavors={"flavor1": {"a": 1, "b": 2}, "flavor2": {"x": 1, "y": 2}},
        signature=ModelSignature(
            inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        ),
        saved_input_example_info={"x": 1, "y": 2},
    )
    n.utc_time_created = m.utc_time_created
    n.model_uuid = m.model_uuid
    assert m == n
    n.signature = None
    assert m != n
    with TempDir() as tmp:
        m.save(tmp.path("model"))
        o = Model.load(tmp.path("model"))
    assert m == o
    assert m.to_json() == o.to_json()
    assert m.to_yaml() == o.to_yaml()
Пример #19
0
def test_log_model(cb_model, tmpdir):
    model, inference_dataframe = cb_model
    with mlflow.start_run():
        artifact_path = "model"
        conda_env = os.path.join(tmpdir.strpath, "conda_env.yaml")
        _mlflow_conda_env(conda_env, additional_pip_deps=["catboost"])

        model_info = mlflow.catboost.log_model(model, artifact_path, conda_env=conda_env)
        model_uri = "runs:/{}/{}".format(mlflow.active_run().info.run_id, artifact_path)
        assert model_info.model_uri == model_uri

        loaded_model = mlflow.catboost.load_model(model_uri)
        np.testing.assert_array_almost_equal(
            model.predict(inference_dataframe),
            loaded_model.predict(inference_dataframe),
        )

        local_path = _download_artifact_from_uri(model_uri)
        model_config = Model.load(os.path.join(local_path, "MLmodel"))
        assert pyfunc.FLAVOR_NAME in model_config.flavors
        assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME]
        env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV]
        assert os.path.exists(os.path.join(local_path, env_path))
Пример #20
0
def _serve():
    """
    Serve the model.

    Read the MLmodel config, initialize the Conda environment if needed and start python server.
    """
    model_config_path = os.path.join(MODEL_PATH, MLMODEL_FILE_NAME)
    m = Model.load(model_config_path)

    if DEPLOYMENT_CONFIG_KEY_FLAVOR_NAME in os.environ:
        serving_flavor = os.environ[DEPLOYMENT_CONFIG_KEY_FLAVOR_NAME]
    else:
        # Older versions of mlflow may not specify a deployment configuration
        serving_flavor = pyfunc.FLAVOR_NAME

    if serving_flavor == mleap.FLAVOR_NAME:
        _serve_mleap()
    elif pyfunc.FLAVOR_NAME in m.flavors:
        _serve_pyfunc(m)
    else:
        raise Exception(
            "This container only supports models with the MLeap or PyFunc flavors."
        )
Пример #21
0
def test_model_log_load(sklearn_knn_model, iris_data, tmpdir):
    sk_model_path = os.path.join(str(tmpdir), "knn.pkl")
    with open(sk_model_path, "wb") as f:
        pickle.dump(sklearn_knn_model, f)

    pyfunc_artifact_path = "pyfunc_model"
    with mlflow.start_run():
        mlflow.pyfunc.log_model(artifact_path=pyfunc_artifact_path,
                                data_path=sk_model_path,
                                loader_module=os.path.basename(__file__)[:-3],
                                code_path=[__file__])
        pyfunc_model_path = _download_artifact_from_uri(
            "runs:/{run_id}/{artifact_path}".format(
                run_id=mlflow.active_run().info.run_id,
                artifact_path=pyfunc_artifact_path))

    model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel"))
    assert mlflow.pyfunc.FLAVOR_NAME in model_config.flavors
    assert mlflow.pyfunc.PY_VERSION in model_config.flavors[
        mlflow.pyfunc.FLAVOR_NAME]
    reloaded_model = mlflow.pyfunc.load_pyfunc(pyfunc_model_path)
    np.testing.assert_array_equal(sklearn_knn_model.predict(iris_data[0]),
                                  reloaded_model.predict(iris_data[0]))
Пример #22
0
def test_requirements_file_save_model(create_requirements_file,
                                      sequential_model):
    requirements_file, content_expected = create_requirements_file
    with TempDir(remove_on_exit=True) as tmp:
        model_path = os.path.join(tmp.path(), "models")
        mlflow.pytorch.save_model(
            pytorch_model=sequential_model,
            path=model_path,
            requirements_file=requirements_file,
        )
        model_config_path = os.path.join(model_path, "MLmodel")
        model_config = Model.load(model_config_path)
        flavor_config = model_config.flavors["pytorch"]

        assert "requirements_file" in flavor_config
        loaded_requirements_file = flavor_config["requirements_file"]

        assert "path" in loaded_requirements_file
        requirements_file_path = loaded_requirements_file["path"]
        requirements_file_path = os.path.join(model_path,
                                              requirements_file_path)
        with open(requirements_file_path) as fp:
            assert fp.read() == content_expected
Пример #23
0
def test_model_log_with_input_example_succeeds():
    with TempDir(chdr=True) as tmp:
        sig = ModelSignature(
            inputs=Schema(
                [
                    ColSpec("integer", "a"),
                    ColSpec("string", "b"),
                    ColSpec("boolean", "c"),
                    ColSpec("string", "d"),
                    ColSpec("datetime", "e"),
                ]
            ),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = pd.DataFrame(
            {
                "a": np.int32(1),
                "b": "test string",
                "c": True,
                "d": date.today(),
                "e": np.datetime64("2020-01-01T00:00:00"),
            },
            index=[0],
        )

        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path, schema=sig.inputs)

        # date column will get deserialized into string
        input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
        assert x.equals(input_example)

        loaded_example = loaded_model.load_input_example(local_path)
        assert isinstance(loaded_example, pd.DataFrame)
        assert loaded_example.equals(input_example)
def test_load_model_with_differing_cloudpickle_version_at_micro_granularity_logs_warning(
    model_path,
):
    class TestModel(mlflow.pyfunc.PythonModel):
        def predict(self, context, model_input):
            return model_input

    mlflow.pyfunc.save_model(path=model_path, python_model=TestModel())
    saver_cloudpickle_version = "0.5.8"
    model_config_path = os.path.join(model_path, "MLmodel")
    model_config = Model.load(model_config_path)
    model_config.flavors[mlflow.pyfunc.FLAVOR_NAME][
        mlflow.pyfunc.model.CONFIG_KEY_CLOUDPICKLE_VERSION
    ] = saver_cloudpickle_version
    model_config.save(model_config_path)

    log_messages = []

    def custom_warn(message_text, *args, **kwargs):
        log_messages.append(message_text % args % kwargs)

    loader_cloudpickle_version = "0.5.7"
    with mock.patch("mlflow.pyfunc._logger.warning") as warn_mock, mock.patch(
        "cloudpickle.__version__"
    ) as cloudpickle_version_mock:
        cloudpickle_version_mock.__str__ = lambda *args, **kwargs: loader_cloudpickle_version
        warn_mock.side_effect = custom_warn
        mlflow.pyfunc.load_pyfunc(model_uri=model_path)

    assert any(
        [
            "differs from the version of CloudPickle that is currently running" in log_message
            and saver_cloudpickle_version in log_message
            and loader_cloudpickle_version in log_message
            for log_message in log_messages
        ]
    )
Пример #25
0
def test_model_log(sklearn_logreg_model, model_path):
    old_uri = mlflow.get_tracking_uri()
    with TempDir(chdr=True, remove_on_exit=True) as tmp:
        for should_start_run in [False, True]:
            try:
                mlflow.set_tracking_uri("test")
                if should_start_run:
                    mlflow.start_run()

                artifact_path = "linear"
                conda_env = os.path.join(tmp.path(), "conda_env.yaml")
                _mlflow_conda_env(conda_env,
                                  additional_pip_deps=["scikit-learn"])

                mlflow.sklearn.log_model(sk_model=sklearn_logreg_model.model,
                                         artifact_path=artifact_path,
                                         conda_env=conda_env)
                run_id = mlflow.active_run().info.run_uuid

                reloaded_logreg_model = mlflow.sklearn.load_model(
                    artifact_path, run_id)
                np.testing.assert_array_equal(
                    sklearn_logreg_model.model.predict(
                        sklearn_logreg_model.inference_data),
                    reloaded_logreg_model.predict(
                        sklearn_logreg_model.inference_data))

                model_path = _get_model_log_dir(artifact_path, run_id=run_id)
                model_config = Model.load(os.path.join(model_path, "MLmodel"))
                assert pyfunc.FLAVOR_NAME in model_config.flavors
                assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME]
                env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV]
                assert os.path.exists(os.path.join(model_path, env_path))

            finally:
                mlflow.end_run()
                mlflow.set_tracking_uri(old_uri)
Пример #26
0
def test_model_log():
    with TempDir(chdr=True) as tmp:
        sig = ModelSignature(
            inputs=Schema([ColSpec("integer", "x"),
                           ColSpec("integer", "y")]),
            outputs=Schema([ColSpec(name=None, type="double")]),
        )
        input_example = {"x": 1, "y": 2}
        local_path, r = _log_model_with_signature_and_example(
            tmp, sig, input_example)

        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
        assert loaded_model.run_id == r.info.run_id
        assert loaded_model.artifact_path == "some/path"
        assert loaded_model.flavors == {
            "flavor1": {
                "a": 1,
                "b": 2
            },
            "flavor2": {
                "x": 1,
                "y": 2
            },
        }
        assert loaded_model.signature == sig
        path = os.path.join(
            local_path, loaded_model.saved_input_example_info["artifact_path"])
        x = _dataframe_from_json(path)
        assert x.to_dict(orient="records")[0] == input_example
        assert not hasattr(loaded_model, "databricks_runtime")

        loaded_example = loaded_model.load_input_example(local_path)
        assert isinstance(loaded_example, pd.DataFrame)
        assert loaded_example.to_dict(orient="records")[0] == input_example

        assert Version(loaded_model.mlflow_version) == Version(
            mlflow.version.VERSION)
Пример #27
0
def get_module_loader_src(src_path, dst_path):
    """
    Generate Python source of the model loader.

    Model loader contains ``load_pyfunc`` method with no parameters. It hardcodes model
    loading of the given model into a Python source. This is done so that the exported model has no
    unnecessary dependencies on MLflow or any other configuration file format or parsing library.

    :param src_path: Current path to the model.
    :param dst_path: Relative or absolute path where the model will be stored in the deployment
                     environment.
    :return: Python source code of the model loader as string.

    """
    conf_path = os.path.join(src_path, "MLmodel")
    model = Model.load(conf_path)
    if FLAVOR_NAME not in model.flavors:
        raise Exception("Format '{format}' not found not in {path}.".format(
            format=FLAVOR_NAME, path=conf_path))
    conf = model.flavors[FLAVOR_NAME]
    update_path = ""
    if CODE in conf and conf[CODE]:
        src_code_path = os.path.join(src_path, conf[CODE])
        dst_code_path = os.path.join(dst_path, conf[CODE])
        code_path = [
            "os.path.abspath('%s')" % x for x in [dst_code_path] +
            mlflow.pyfunc.utils._get_code_dirs(src_code_path, dst_code_path)
        ]
        update_path = "sys.path = {} + sys.path; ".format("[%s]" %
                                                          ",".join(code_path))

    data_path = os.path.join(dst_path, conf[DATA]) if (DATA
                                                       in conf) else dst_path
    return loader_template.format(update_path=update_path,
                                  main=conf[MAIN],
                                  data_path=data_path)
def test_load_model_with_missing_cloudpickle_version_logs_warning(model_path):
    class TestModel(mlflow.pyfunc.PythonModel):
        def predict(self, context, model_input):
            return model_input

    mlflow.pyfunc.save_model(path=model_path, python_model=TestModel())
    model_config_path = os.path.join(model_path, "MLmodel")
    model_config = Model.load(model_config_path)
    del model_config.flavors[mlflow.pyfunc.FLAVOR_NAME][
        mlflow.pyfunc.model.CONFIG_KEY_CLOUDPICKLE_VERSION]
    model_config.save(model_config_path)

    log_messages = []

    def custom_warn(message_text, *args, **kwargs):
        log_messages.append(message_text % args % kwargs)

    with mock.patch("mlflow.pyfunc._logger.warning") as warn_mock:
        warn_mock.side_effect = custom_warn
        mlflow.pyfunc.load_pyfunc(model_uri=model_path)

    assert any([(
        "The version of CloudPickle used to save the model could not be found in the MLmodel"
        " configuration") in log_message for log_message in log_messages])
Пример #29
0
def test_lgb_autolog_gets_input_example(bst_params):
    # we need to check the example input against the initial input given to train function.
    # we can't use the train_set fixture for this as it defines free_raw_data=False but this
    # feature should work even if it is True
    iris = datasets.load_iris()
    X = pd.DataFrame(iris.data[:, :2], columns=iris.feature_names[:2])
    y = iris.target
    dataset = lgb.Dataset(X, y, free_raw_data=True)

    mlflow.lightgbm.autolog(log_input_examples=True)
    lgb.train(bst_params, dataset)
    run = get_latest_run()

    model_path = os.path.join(run.info.artifact_uri, "model")
    model_conf = Model.load(os.path.join(model_path, "MLmodel"))

    input_example = _read_example(model_conf, model_path)

    assert input_example.equals(X[:5])

    pyfunc_model = mlflow.pyfunc.load_model(os.path.join(run.info.artifact_uri, "model"))

    # make sure reloading the input_example and predicting on it does not error
    pyfunc_model.predict(input_example)
Пример #30
0
def test_xgb_autolog_gets_input_example(bst_params):
    mlflow.xgboost.autolog(log_input_example=True)

    # we cannot use dtrain fixture, as the dataset must be constructed
    #   after the call to autolog() in order to get the input example
    iris = datasets.load_iris()
    X = pd.DataFrame(iris.data[:, :2], columns=iris.feature_names[:2])
    y = iris.target
    dataset = xgb.DMatrix(X, y)

    xgb.train(bst_params, dataset)
    run = get_latest_run()

    model_path = os.path.join(run.info.artifact_uri, "model")
    model_conf = Model.load(os.path.join(model_path, "MLmodel"))

    input_example = _read_example(model_conf, model_path)

    assert input_example.equals(X[:5])

    pyfunc_model = mlflow.pyfunc.load_model(os.path.join(run.info.artifact_uri, "model"))

    # make sure reloading the input_example and predicting on it does not error
    pyfunc_model.predict(input_example)