Exemplo n.º 1
0
def test_load_model_loads_torch_model_using_pickle_module_specified_at_save_time(
        module_scoped_subclassed_model):
    custom_pickle_module = pickle

    artifact_path = "pytorch_model"
    with kiwi.start_run():
        kiwi.pytorch.log_model(
            artifact_path=artifact_path,
            pytorch_model=module_scoped_subclassed_model,
            conda_env=None,
            pickle_module=custom_pickle_module)
        model_uri = "runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id,
            artifact_path=artifact_path)

    import_module_fn = importlib.import_module
    imported_modules = []

    def track_module_imports(module_name):
        imported_modules.append(module_name)
        return import_module_fn(module_name)

    with mock.patch("importlib.import_module") as import_mock,\
            mock.patch("torch.load") as torch_load_mock:
        import_mock.side_effect = track_module_imports
        pyfunc.load_pyfunc(model_uri=model_uri)

    torch_load_mock.assert_called_with(mock.ANY, pickle_module=custom_pickle_module)
    assert custom_pickle_module.__name__ in imported_modules
Exemplo n.º 2
0
def test_load_model_succeeds_when_data_is_model_file_instead_of_directory(
        module_scoped_subclassed_model, model_path, data):
    """
    This test verifies that PyTorch models saved in older versions of MLflow are loaded successfully
    by ``mlflow.pytorch.load_model``. The ``data`` path associated with these older models is
    serialized PyTorch model file, as opposed to the current format: a directory containing a
    serialized model file and pickle module information.
    """
    artifact_path = "pytorch_model"
    with kiwi.start_run():
        kiwi.pytorch.log_model(
            artifact_path=artifact_path,
            pytorch_model=module_scoped_subclassed_model,
            conda_env=None)
        model_path = _download_artifact_from_uri("runs:/{run_id}/{artifact_path}".format(
            run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path))

    model_conf_path = os.path.join(model_path, "MLmodel")
    model_conf = Model.load(model_conf_path)
    pyfunc_conf = model_conf.flavors.get(pyfunc.FLAVOR_NAME)
    assert pyfunc_conf is not None
    model_data_path = os.path.join(model_path, pyfunc_conf[pyfunc.DATA])
    assert os.path.exists(model_data_path)
    assert kiwi.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME in os.listdir(model_data_path)
    pyfunc_conf[pyfunc.DATA] = os.path.join(
        model_data_path, kiwi.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME)
    model_conf.save(model_conf_path)

    loaded_pyfunc = pyfunc.load_pyfunc(model_path)

    np.testing.assert_array_almost_equal(
        loaded_pyfunc.predict(data[0]),
        pd.DataFrame(_predict(model=module_scoped_subclassed_model, data=data)),
        decimal=4)
Exemplo n.º 3
0
def test_load_pyfunc_succeeds_for_older_models_with_pyfunc_data_field(
        sklearn_knn_model, model_path):
    """
    This test verifies that scikit-learn models saved in older versions of MLflow are loaded
    successfully by ``mlflow.pyfunc.load_model``. These older models specify a pyfunc ``data``
    field referring directly to a serialized scikit-learn model file. In contrast, newer models
    omit the ``data`` field.
    """
    kiwi.sklearn.save_model(
        sk_model=sklearn_knn_model.model,
        path=model_path,
        serialization_format=kiwi.sklearn.SERIALIZATION_FORMAT_PICKLE)

    model_conf_path = os.path.join(model_path, "MLmodel")
    model_conf = Model.load(model_conf_path)
    pyfunc_conf = model_conf.flavors.get(pyfunc.FLAVOR_NAME)
    sklearn_conf = model_conf.flavors.get(kiwi.sklearn.FLAVOR_NAME)
    assert sklearn_conf is not None
    assert pyfunc_conf is not None
    pyfunc_conf[pyfunc.DATA] = sklearn_conf["pickled_model"]

    reloaded_knn_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    np.testing.assert_array_equal(
        sklearn_knn_model.model.predict(sklearn_knn_model.inference_data),
        reloaded_knn_pyfunc.predict(sklearn_knn_model.inference_data))
Exemplo n.º 4
0
def test_estimator_model_export(spark_model_estimator, model_path, spark_custom_env):
    sparkm.save_model(spark_model_estimator.model, path=model_path, conda_env=spark_custom_env)
    # score and compare the reloaded sparkml model
    reloaded_model = sparkm.load_model(model_uri=model_path)
    preds_df = reloaded_model.transform(spark_model_estimator.spark_df)
    preds = [x.prediction for x in preds_df.select("prediction").collect()]
    assert spark_model_estimator.predictions == preds
    # 2. score and compare reloaded pyfunc
    m = pyfunc.load_pyfunc(model_path)
    preds2 = m.predict(spark_model_estimator.spark_df.toPandas())
    assert spark_model_estimator.predictions == preds2
Exemplo n.º 5
0
def test_model_save_load(xgb_model, model_path):
    model = xgb_model.model

    kiwi.xgboost.save_model(xgb_model=model, path=model_path)
    reloaded_model = kiwi.xgboost.load_model(model_uri=model_path)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    np.testing.assert_array_almost_equal(
        model.predict(xgb_model.inference_dmatrix),
        reloaded_model.predict(xgb_model.inference_dmatrix))

    np.testing.assert_array_almost_equal(
        reloaded_model.predict(xgb_model.inference_dmatrix),
        reloaded_pyfunc.predict(xgb_model.inference_dataframe))
Exemplo n.º 6
0
def test_model_save_load(lgb_model, model_path):
    model = lgb_model.model

    kiwi.lightgbm.save_model(lgb_model=model, path=model_path)
    reloaded_model = kiwi.lightgbm.load_model(model_uri=model_path)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    np.testing.assert_array_almost_equal(
        model.predict(lgb_model.inference_dataframe),
        reloaded_model.predict(lgb_model.inference_dataframe))

    np.testing.assert_array_almost_equal(
        reloaded_model.predict(lgb_model.inference_dataframe),
        reloaded_pyfunc.predict(lgb_model.inference_dataframe))
Exemplo n.º 7
0
def test_model_save_load(sklearn_knn_model, model_path):
    knn_model = sklearn_knn_model.model

    kiwi.sklearn.save_model(sk_model=knn_model, path=model_path)
    reloaded_knn_model = kiwi.sklearn.load_model(model_uri=model_path)
    reloaded_knn_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    np.testing.assert_array_equal(
        knn_model.predict(sklearn_knn_model.inference_data),
        reloaded_knn_model.predict(sklearn_knn_model.inference_data))

    np.testing.assert_array_equal(
        reloaded_knn_model.predict(sklearn_knn_model.inference_data),
        reloaded_knn_pyfunc.predict(sklearn_knn_model.inference_data))
Exemplo n.º 8
0
def test_load_pyfunc_loads_torch_model_using_pickle_module_specified_at_save_time(
        module_scoped_subclassed_model, model_path):
    custom_pickle_module = pickle

    kiwi.pytorch.save_model(
        path=model_path,
        pytorch_model=module_scoped_subclassed_model,
        conda_env=None,
        pickle_module=custom_pickle_module)

    import_module_fn = importlib.import_module
    imported_modules = []

    def track_module_imports(module_name):
        imported_modules.append(module_name)
        return import_module_fn(module_name)

    with mock.patch("importlib.import_module") as import_mock,\
            mock.patch("torch.load") as torch_load_mock:
        import_mock.side_effect = track_module_imports
        pyfunc.load_pyfunc(model_path)

    torch_load_mock.assert_called_with(mock.ANY, pickle_module=custom_pickle_module)
    assert custom_pickle_module.__name__ in imported_modules
Exemplo n.º 9
0
def test_model_export(spark_model_iris, model_path, spark_custom_env):
    sparkm.save_model(spark_model_iris.model, path=model_path,
                      conda_env=spark_custom_env)
    # 1. score and compare reloaded sparkml model
    reloaded_model = sparkm.load_model(model_uri=model_path)
    preds_df = reloaded_model.transform(spark_model_iris.spark_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    assert spark_model_iris.predictions == preds1
    m = pyfunc.load_pyfunc(model_path)
    # 2. score and compare reloaded pyfunc
    preds2 = m.predict(spark_model_iris.pandas_df)
    assert spark_model_iris.predictions == preds2
    # 3. score and compare reloaded pyfunc Spark udf
    preds3 = score_model_as_udf(model_uri=model_path, pandas_df=spark_model_iris.pandas_df)
    assert spark_model_iris.predictions == preds3
    assert os.path.exists(sparkm.DFS_TMP)
Exemplo n.º 10
0
def test_sagemaker_docker_model_scoring_with_default_conda_env(
        xgb_model, model_path):
    kiwi.xgboost.save_model(xgb_model=xgb_model.model,
                            path=model_path,
                            conda_env=None)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    scoring_response = score_model_in_sagemaker_docker_container(
        model_uri=model_path,
        data=xgb_model.inference_dataframe,
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
        flavor=kiwi.pyfunc.FLAVOR_NAME)
    deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content))

    pandas.testing.assert_frame_equal(
        deployed_model_preds,
        pd.DataFrame(reloaded_pyfunc.predict(xgb_model.inference_dataframe)),
        check_dtype=False,
        check_less_precise=6)
Exemplo n.º 11
0
    def get_or_load(archive_path):
        """Given a path returned by add_local_model(), this method will return the loaded model.
        If this Python process ever loaded the model before, we will reuse that copy.
        """
        if archive_path in SparkModelCache._models:
            SparkModelCache._cache_hits += 1
            return SparkModelCache._models[archive_path]

        # BUG: Despite the documentation of SparkContext.addFile() and SparkFiles.get() in Scala
        # and Python, it turns out that we actually need to use the basename as the input to
        # SparkFiles.get(), as opposed to the (absolute) path.
        archive_path_basename = os.path.basename(archive_path)
        local_path = SparkFiles.get(archive_path_basename)
        temp_dir = tempfile.mkdtemp()
        zip_ref = zipfile.ZipFile(local_path, 'r')
        zip_ref.extractall(temp_dir)
        zip_ref.close()

        # We must rely on a supposed cyclic import here because we want this behavior
        # on the Spark Executors (i.e., don't try to pickle the load_model function).
        from kiwi.pyfunc import load_pyfunc  # pylint: disable=cyclic-import
        SparkModelCache._models[archive_path] = load_pyfunc(temp_dir)
        return SparkModelCache._models[archive_path]
Exemplo n.º 12
0
Arquivo: wsgi.py Projeto: iPieter/kiwi
from kiwi.pyfunc import scoring_server
from kiwi import pyfunc
app = scoring_server.init(pyfunc.load_pyfunc("/opt/ml/model/"))