def test_predict_with_old_mlflow_in_conda_and_with_orient_records(iris_data): if no_conda: pytest.skip("This test needs conda.") # TODO: Enable this test after 1.0 is out to ensure we do not break the serve / predict # TODO: Also add a test for serve, not just predict. pytest.skip("TODO: enable this after 1.0 release is out.") x, _ = iris_data with TempDir() as tmp: input_records_path = tmp.path("input_records.json") pd.DataFrame(x).to_json(input_records_path, orient="records") output_json_path = tmp.path("output.json") test_model_path = tmp.path("test_model") test_model_conda_path = tmp.path("conda.yml") # create env with old mlflow! _mlflow_conda_env(path=test_model_conda_path, additional_pip_deps=[ "mlflow=={}".format(test_pyfunc.MLFLOW_VERSION) ]) pyfunc.save_model(path=test_model_path, loader_module=test_pyfunc.__name__.split(".")[-1], code_path=[test_pyfunc.__file__], conda_env=test_model_conda_path) # explicit json format with orient records p = subprocess.Popen([ "mlflow", "models", "predict", "-m", path_to_local_file_uri(test_model_path), "-i", input_records_path, "-o", output_json_path, "-t", "json", "--json-format", "records" ] + no_conda) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = test_pyfunc.PyFuncTestModel(check_version=False).predict( df=pd.DataFrame(x)) assert all(expected == actual)
def onnx_custom_env(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env( conda_env, additional_conda_deps=["pytest", "keras"], additional_pip_deps=["onnx", "onnxmltools"]) return conda_env
def pytorch_custom_env(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env( conda_env, additional_conda_deps=["pytorch", "torchvision", "pytest"], additional_conda_channels=["pytorch"]) return conda_env
def pyfunc_custom_env_file(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env( conda_env, additional_conda_deps=["scikit-learn", "pytest", "cloudpickle"], additional_pip_deps=["-e " + os.path.dirname(kiwi.__path__[0])]) return conda_env
def test_log_model_no_registered_model_name(xgb_model): artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with kiwi.start_run(), register_model_patch, TempDir( chdr=True, remove_on_exit=True) as tmp: conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["xgboost"]) kiwi.xgboost.log_model(xgb_model=xgb_model.model, artifact_path=artifact_path, conda_env=conda_env) kiwi.register_model.assert_not_called()
def test_log_model_calls_register_model(xgb_model): artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with kiwi.start_run(), register_model_patch, TempDir( chdr=True, remove_on_exit=True) as tmp: conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["xgboost"]) kiwi.xgboost.log_model(xgb_model=xgb_model.model, artifact_path=artifact_path, conda_env=conda_env, registered_model_name="AdsModel1") model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) kiwi.register_model.assert_called_once_with(model_uri, "AdsModel1")
def get_default_conda_env(include_cloudpickle=False, keras_module=None): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ import tensorflow as tf conda_deps = [ ] # if we use tf.keras we only need to declare dependency on tensorflow pip_deps = [] if keras_module is None: import keras keras_module = keras if keras_module.__name__ == "keras": # Temporary fix: the created conda environment has issues installing keras >= 2.3.1 if LooseVersion(keras_module.__version__) < LooseVersion('2.3.1'): conda_deps.append("keras=={}".format(keras_module.__version__)) else: pip_deps.append("keras=={}".format(keras_module.__version__)) if include_cloudpickle: import cloudpickle pip_deps.append("cloudpickle=={}".format(cloudpickle.__version__)) # Temporary fix: conda-forge currently does not have tensorflow > 1.14 # The Keras pyfunc representation requires the TensorFlow # backend for Keras. Therefore, the conda environment must # include TensorFlow if LooseVersion(tf.__version__) <= LooseVersion('1.13.2'): conda_deps.append("tensorflow=={}".format(tf.__version__)) else: pip_deps.append("tensorflow=={}".format(tf.__version__)) return _mlflow_conda_env(additional_conda_deps=conda_deps, additional_pip_deps=pip_deps, additional_conda_channels=None)
def test_mlflow_conda_env_returns_expected_env_dict_when_output_path_is_not_specified( ): conda_deps = ["conda-dep-1=0.0.1", "conda-dep-2"] env = _mlflow_conda_env(path=None, additional_conda_deps=conda_deps) for conda_dep in conda_deps: assert conda_dep in env["dependencies"]
def test_mlflow_conda_env_returns_none_when_output_path_is_specified( conda_env_path): env_creation_output = _mlflow_conda_env( path=conda_env_path, additional_conda_deps=["conda-dep-1=0.0.1", "conda-dep-2"], additional_pip_deps=["pip-dep-1", "pip-dep2==0.1.0"]) assert env_creation_output is None
def get_default_conda_env(): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ pip_deps = ["mxnet=={}".format(mx.__version__)] return _mlflow_conda_env(additional_pip_deps=pip_deps)
def test_model_log(fastai_model, model_path): old_uri = kiwi.get_tracking_uri() model = fastai_model.model with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: kiwi.set_tracking_uri("test") if should_start_run: kiwi.start_run() artifact_path = "model" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["fastai"]) kiwi.fastai.log_model(fastai_learner=model, artifact_path=artifact_path, conda_env=conda_env) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) reloaded_model = kiwi.fastai.load_model(model_uri=model_uri) model_wrapper = kiwi.fastai._FastaiModelWrapper(model) reloaded_model_wrapper = kiwi.fastai._FastaiModelWrapper( reloaded_model) compare_wrapper_results( model_wrapper.predict(fastai_model.inference_dataframe), reloaded_model_wrapper.predict( fastai_model.inference_dataframe)) model_path = _download_artifact_from_uri( artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: kiwi.end_run() kiwi.set_tracking_uri(old_uri)
def test_mlflow_conda_env_includes_pip_dependencies_but_pip_is_not_specified( conda_deps): additional_pip_deps = ["pip-dep==0.0.1"] env = _mlflow_conda_env(path=None, additional_conda_deps=conda_deps, additional_pip_deps=additional_pip_deps) if conda_deps is not None: for conda_dep in conda_deps: assert conda_dep in env["dependencies"] assert "pip" in env["dependencies"]
def get_default_conda_env(): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ return _mlflow_conda_env(additional_conda_deps=[ "tensorflow={}".format(tensorflow.__version__), ], additional_pip_deps=None, additional_conda_channels=None)
def _conda_env(): # NB: We need mlflow as a dependency in the environment. return _mlflow_conda_env(additional_conda_deps=None, install_mlflow=False, additional_pip_deps=[ "-e " + os.path.dirname(kiwi.__path__[0]), "cloudpickle=={}".format( cloudpickle.__version__), "scikit-learn=={}".format(sklearn.__version__) ], additional_conda_channels=None)
def test_mlflow_conda_env_includes_pip_dependencies_and_pip_is_specified( pip_specification): conda_deps = ["conda-dep-1=0.0.1", "conda-dep-2", pip_specification] additional_pip_deps = ["pip-dep==0.0.1"] env = _mlflow_conda_env(path=None, additional_conda_deps=conda_deps, additional_pip_deps=additional_pip_deps) for conda_dep in conda_deps: assert conda_dep in env["dependencies"] assert pip_specification in env["dependencies"] assert env["dependencies"].count("pip") == (2 if pip_specification == "pip" else 1)
def test_model_log(sklearn_logreg_model, model_path): old_uri = kiwi.get_tracking_uri() with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: kiwi.set_tracking_uri("test") if should_start_run: kiwi.start_run() artifact_path = "linear" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["scikit-learn"]) kiwi.sklearn.log_model(sk_model=sklearn_logreg_model.model, artifact_path=artifact_path, conda_env=conda_env) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=kiwi.active_run().info.run_id, artifact_path=artifact_path) reloaded_logreg_model = kiwi.sklearn.load_model( model_uri=model_uri) np.testing.assert_array_equal( sklearn_logreg_model.model.predict( sklearn_logreg_model.inference_data), reloaded_logreg_model.predict( sklearn_logreg_model.inference_data)) model_path = _download_artifact_from_uri( artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: kiwi.end_run() kiwi.set_tracking_uri(old_uri)
def get_default_conda_env(): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model() <mlflow.pyfunc.save_model>` and :func:`log_model() <mlflow.pyfunc.log_model>` when a user-defined subclass of :class:`PythonModel` is provided. """ return _mlflow_conda_env(additional_conda_deps=None, additional_pip_deps=[ "cloudpickle=={}".format( cloudpickle.__version__), ], additional_conda_channels=None)
def test_mlflow_is_not_installed_unless_specified(): if no_conda: pytest.skip("This test requires conda.") with TempDir(chdr=True) as tmp: fake_model_path = tmp.path("fake_model") fake_env_path = tmp.path("fake_env.yaml") _mlflow_conda_env(path=fake_env_path, install_mlflow=False) kiwi.pyfunc.save_model(fake_model_path, loader_module=__name__, conda_env=fake_env_path) # The following should fail because there should be no mlflow in the env: p = subprocess.Popen( ["mlflow", "models", "predict", "-m", fake_model_path], stderr=subprocess.PIPE, cwd=tmp.path("")) _, stderr = p.communicate() stderr = stderr.decode("utf-8") print(stderr) assert p.wait() != 0 if PYTHON_VERSION.startswith("3"): assert "ModuleNotFoundError: No module named 'mlflow'" in stderr else: assert "ImportError: No module named mlflow.pyfunc.scoring_server" in stderr
def get_default_conda_env(): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ import xgboost as xgb return _mlflow_conda_env( additional_conda_deps=None, # XGBoost is not yet available via the default conda channels, so we install it via pip additional_pip_deps=[ "xgboost=={}".format(xgb.__version__), ], additional_conda_channels=None)
def get_default_conda_env(include_cloudpickle=False): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ import sklearn pip_deps = None if include_cloudpickle: import cloudpickle pip_deps = ["cloudpickle=={}".format(cloudpickle.__version__)] return _mlflow_conda_env(additional_conda_deps=[ "scikit-learn={}".format(sklearn.__version__), ], additional_pip_deps=pip_deps, additional_conda_channels=None)
def get_default_conda_env(): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ import onnx import onnxruntime return _mlflow_conda_env( additional_conda_deps=None, additional_pip_deps=[ "onnx=={}".format(onnx.__version__), # The ONNX pyfunc representation requires the OnnxRuntime # inference engine. Therefore, the conda environment must # include OnnxRuntime "onnxruntime=={}".format(onnxruntime.__version__), ], additional_conda_channels=None, )
def get_default_conda_env(): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. This Conda environment contains the current version of PySpark that is installed on the caller's system. ``dev`` versions of PySpark are replaced with stable versions in the resulting Conda environment (e.g., if you are running PySpark version ``2.4.5.dev0``, invoking this method produces a Conda environment with a dependency on PySpark version ``2.4.5``). """ import pyspark # Strip the suffix from `dev` versions of PySpark, which are not # available for installation from Anaconda or PyPI pyspark_version = re.sub(r"(\.?)dev.*", "", pyspark.__version__) return _mlflow_conda_env(additional_conda_deps=[ "pyspark={}".format(pyspark_version), ], additional_pip_deps=None, additional_conda_channels=None)
def get_default_conda_env(): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ import torch import torchvision return _mlflow_conda_env( additional_conda_deps=[ "pytorch={}".format(torch.__version__), "torchvision={}".format(torchvision.__version__), ], additional_pip_deps=[ # We include CloudPickle in the default environment because # it's required by the default pickle module used by `save_model()` # and `log_model()`: `mlflow.pytorch.pickle_module`. "cloudpickle=={}".format(cloudpickle.__version__) ], additional_conda_channels=[ "pytorch", ])
def gluon_custom_env(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env(conda_env, additional_conda_deps=["mxnet", "pytest"]) return conda_env
def tf_custom_env(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env(conda_env, additional_conda_deps=["tensorflow", "pytest"]) return conda_env
def pyfunc_custom_env_dict(): return _mlflow_conda_env( additional_conda_deps=["scikit-learn", "pytest", "cloudpickle"], additional_pip_deps=["-e " + os.path.dirname(kiwi.__path__[0])])
def sklearn_custom_env(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env(conda_env, additional_conda_deps=["scikit-learn", "pytest"]) return conda_env
def xgb_custom_env(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env(conda_env, additional_pip_deps=["xgboost", "pytest"]) return conda_env
def h2o_custom_env(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env(conda_env, additional_conda_deps=["pytest"], additional_pip_deps=["h2o"]) return conda_env