def test_build_image_passes_model_conda_environment_to_azure_image_creation_routine( sklearn_model, model_path): sklearn_conda_env_text = """\ name: sklearn-env dependencies: - scikit-learn """ with TempDir(chdr=True) as tmp: sklearn_conda_env_path = tmp.path("conda.yaml") with open(sklearn_conda_env_path, "w") as f: f.write(sklearn_conda_env_text) mlflow.sklearn.save_model(sk_model=sklearn_model, path=model_path, conda_env=sklearn_conda_env_path) # Mock the TempDir.__exit__ function to ensure that the enclosing temporary # directory is not deleted with AzureMLMocks() as aml_mocks,\ mock.patch("mlflow.utils.file_utils.TempDir.path") as tmpdir_path_mock,\ mock.patch("mlflow.utils.file_utils.TempDir.__exit__"): def get_mock_path(subpath): # Our current working directory is a temporary directory. Therefore, it is safe to # directly return the specified subpath. return subpath tmpdir_path_mock.side_effect = get_mock_path workspace = get_azure_workspace() mlflow.azureml.build_image(model_uri=model_path, workspace=workspace) create_image_call_args = aml_mocks["create_image"].call_args_list assert len(create_image_call_args) == 1 _, create_image_call_kwargs = create_image_call_args[0] image_config = create_image_call_kwargs["image_config"] assert image_config.conda_file is not None with open(image_config.conda_file, "r") as f: assert yaml.safe_load(f.read()) == yaml.safe_load(sklearn_conda_env_text)
def test_raise_exception(sequential_model): with TempDir(chdr=True, remove_on_exit=True) as tmp: path = tmp.path("model") with pytest.raises(IOError): mlflow.pytorch.load_model(path) with pytest.raises(TypeError): mlflow.pytorch.save_model([1, 2, 3], path) mlflow.pytorch.save_model(sequential_model, path) with pytest.raises(RuntimeError): mlflow.pytorch.save_model(sequential_model, path) from mlflow import sklearn import sklearn.neighbors as knn path = tmp.path("knn.pkl") knn = knn.KNeighborsClassifier() with open(path, "wb") as f: pickle.dump(knn, f) path = tmp.path("knn") sklearn.save_model(knn, path=path) with pytest.raises(MlflowException): mlflow.pytorch.load_model(path)
def test_run(): for use_start_run in map(str, [0, 1]): with TempDir() as tmp, mock.patch("mlflow.tracking.get_tracking_uri")\ as get_tracking_uri_mock: tmp_dir = tmp.path() get_tracking_uri_mock.return_value = tmp_dir submitted_run = mlflow.projects.run( TEST_PROJECT_DIR, entry_point="test_tracking", parameters={"use_start_run": use_start_run}, use_conda=False, experiment_id=0) # Blocking runs should be finished when they return validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED) # Test that we can call wait() on a synchronous run & that the run has the correct # status after calling wait(). submitted_run.wait() validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED) # Validate run contents in the FileStore run_uuid = submitted_run.run_id store = FileStore(tmp_dir) run_infos = store.list_run_infos(experiment_id=0) assert len(run_infos) == 1 store_run_uuid = run_infos[0].run_uuid assert run_uuid == store_run_uuid run = store.get_run(run_uuid) expected_params = {"use_start_run": use_start_run} assert run.info.status == RunStatus.FINISHED assert len(run.data.params) == len(expected_params) for param in run.data.params: assert param.value == expected_params[param.key] expected_metrics = {"some_key": 3} for metric in run.data.metrics: assert metric.value == expected_metrics[metric.key]
def test_delete_artifact(artifact_path): file_content = f"A simple test artifact\nThe artifact is located in: {artifact_path}" with NamedTemporaryFile(mode="w") as local, TempDir() as remote: local.write(file_content) local.flush() sftp_path = f"sftp://{remote.path}" store = SFTPArtifactRepository(sftp_path) store.log_artifact(local.name, artifact_path) remote_file = posixpath.join( remote.path(), "." if artifact_path is None else artifact_path, os.path.basename(local.name), ) assert posixpath.isfile(remote_file) with open(remote_file, "r", encoding="uft8") as remote_content: assert remote_content.read() == file_content store.delete_artifacts(remote.path()) assert not posixpath.exists(remote_file) assert not posixpath.exists(remote.path())
def test_prepare_env_fails(sk_model): if no_conda: pytest.skip("This test requires conda.") with TempDir(chdr=True): with mlflow.start_run() as active_run: mlflow.sklearn.log_model( sk_model, "model", conda_env={"dependencies": ["mlflow-does-not-exist-dep==abc"]}) model_uri = "runs:/{run_id}/model".format( run_id=active_run.info.run_id) # Test with no conda p = subprocess.Popen([ "mlflow", "models", "prepare-env", "-m", model_uri, "--env-manager", "local" ]) assert p.wait() == 0 # With conda - should fail due to bad conda environment. p = subprocess.Popen( ["mlflow", "models", "prepare-env", "-m", model_uri]) assert p.wait() != 0
def test_serve_gunicorn_opts(iris_data, sk_model): if sys.platform == "win32": pytest.skip( "This test requires gunicorn which is not available on windows.") with mlflow.start_run() as active_run: mlflow.sklearn.log_model(sk_model, "model", registered_model_name="imlegit") run_id = active_run.info.run_id model_uris = [ "models:/{name}/{stage}".format(name="imlegit", stage="None"), "runs:/{run_id}/model".format(run_id=run_id), ] for model_uri in model_uris: with TempDir() as tpm: output_file_path = tpm.path("stoudt") with open(output_file_path, "w") as output_file: x, _ = iris_data scoring_response = pyfunc_serve_and_score_model( model_uri, pd.DataFrame(x), content_type=CONTENT_TYPE_JSON_SPLIT_ORIENTED, stdout=output_file, extra_args=["-w", "3"], ) with open(output_file_path, "r") as output_file: stdout = output_file.read() actual = pd.read_json(scoring_response.content.decode("utf-8"), orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) expected_command_pattern = re.compile( ("gunicorn.*-w 3.*mlflow.pyfunc.scoring_server.wsgi:app")) assert expected_command_pattern.search(stdout) is not None
def test_model_log(model, data, predicted): x, y = data old_uri = tracking.get_tracking_uri() # should_start_run tests whether or not calling log_model() automatically starts a run. for should_start_run in [False, True]: with TempDir(chdr=True, remove_on_exit=True) as tmp: try: tracking.set_tracking_uri("test") if should_start_run: tracking.start_run() mlflow.keras.log_model(model, artifact_path="keras_model") # Load model model_loaded = mlflow.keras.load_model( "keras_model", run_id=tracking.active_run().info.run_uuid) assert all(model_loaded.predict(x) == predicted) # Loading pyfunc model pyfunc_loaded = mlflow.pyfunc.load_pyfunc( "keras_model", run_id=tracking.active_run().info.run_uuid) assert all(pyfunc_loaded.predict(x).values == predicted) finally: tracking.end_run() tracking.set_tracking_uri(old_uri)
def _upload_s3(local_model_path, bucket, prefix): """ Upload dir to S3 as .tar.gz. :param local_model_path: local path to a dir. :param bucket: S3 bucket where to store the data. :param prefix: path within the bucket. :return: s3 path of the uploaded artifact """ sess = boto3.Session() with TempDir() as tmp: model_data_file = tmp.path("model.tar.gz") _make_tarfile(model_data_file, local_model_path) s3 = boto3.client('s3') with open(model_data_file, 'rb') as fobj: key = os.path.join(prefix, 'model.tar.gz') obj = sess.resource('s3').Bucket(bucket).Object(key) obj.upload_fileobj(fobj) response = s3.put_object_tagging( Bucket=bucket, Key=key, Tagging={'TagSet': [{'Key': 'SageMaker', 'Value': 'true'}, ]} ) eprint('tag response', response) return '{}/{}/{}'.format(s3.meta.endpoint_url, bucket, key)
def pytorch_model_with_callback(patience): mlflow.pytorch.autolog() model = IrisClassification() dm = IrisDataModule() dm.prepare_data() dm.setup(stage="fit") early_stopping = EarlyStopping( monitor="val_loss", mode="min", min_delta=99999999, # forces early stopping patience=patience, verbose=True, ) with TempDir() as tmp: keyword = "dirpath" if LooseVersion( pl.__version__) >= LooseVersion("1.2.0") else "filepath" checkpoint_callback = ModelCheckpoint( **{keyword: tmp.path()}, save_top_k=1, verbose=True, monitor="val_loss", mode="min", ) trainer = pl.Trainer( max_epochs=NUM_EPOCHS * 2, callbacks=[early_stopping, checkpoint_callback], ) trainer.fit(model, dm) client = mlflow.tracking.MlflowClient() run = client.get_run( client.list_run_infos(experiment_id="0")[0].run_id) return trainer, run
def test_pytorch_with_early_stopping_autolog_log_models_configuration_with( log_models, patience): mlflow.pytorch.autolog(log_models=log_models) model = IrisClassification() dm = IrisDataModule() dm.prepare_data() dm.setup(stage="fit") early_stopping = EarlyStopping(monitor="val_loss", mode="min", patience=patience, verbose=True) with TempDir() as tmp: keyword = "dirpath" if LooseVersion( pl.__version__) >= LooseVersion("1.2.0") else "filepath" checkpoint_callback = ModelCheckpoint( **{keyword: tmp.path()}, save_top_k=1, verbose=True, monitor="val_loss", mode="min", ) trainer = pl.Trainer( max_epochs=NUM_EPOCHS * 2, callbacks=[early_stopping, checkpoint_callback], ) trainer.fit(model, dm) client = mlflow.tracking.MlflowClient() run = client.get_run( client.list_run_infos(experiment_id="0")[0].run_id) run_id = run.info.run_id client = mlflow.tracking.MlflowClient() artifacts = [f.path for f in client.list_artifacts(run_id)] assert ("restored_model_checkpoint" in artifacts) == log_models
def log_model( spark_model, artifact_path, conda_env=None, dfs_tmpdir=None, sample_input=None, registered_model_name=None, signature: ModelSignature = None, input_example: ModelInputExample = None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, pip_requirements=None, extra_pip_requirements=None, ): """ Log a Spark MLlib model as an MLflow artifact for the current run. This uses the MLlib persistence format and produces an MLflow Model with the Spark flavor. Note: If no run is active, it will instantiate a run to obtain a run_id. :param spark_model: Spark model to be saved - MLflow can only save descendants of pyspark.ml.Model which implement MLReadable and MLWritable. :param artifact_path: Run relative artifact path. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If `None`, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pyspark=2.3.0' ] } :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is written in this destination and then copied into the model's artifact directory. This is necessary as Spark ML models read from and write to DFS if running on a cluster. If this operation completes successfully, all temporary files created on the DFS are removed. Defaults to ``/tmp/mlflow``. :param sample_input: A sample input used to add the MLeap flavor to the model. This must be a PySpark DataFrame that the model can evaluate. If ``sample_input`` is ``None``, the MLeap flavor is not added. :param registered_model_name: If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param await_registration_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} :return: A :py:class:`ModelInfo <mlflow.models.model.ModelInfo>` instance that contains the metadata of the logged model. .. code-block:: python :caption: Example from pyspark.ml import Pipeline from pyspark.ml.classification import LogisticRegression from pyspark.ml.feature import HashingTF, Tokenizer training = spark.createDataFrame([ (0, "a b c d e spark", 1.0), (1, "b d", 0.0), (2, "spark f g h", 1.0), (3, "hadoop mapreduce", 0.0) ], ["id", "text", "label"]) tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") lr = LogisticRegression(maxIter=10, regParam=0.001) pipeline = Pipeline(stages=[tokenizer, hashingTF, lr]) model = pipeline.fit(training) mlflow.spark.log_model(model, "spark-model") """ from py4j.protocol import Py4JError _validate_model(spark_model) from pyspark.ml import PipelineModel if not isinstance(spark_model, PipelineModel): spark_model = PipelineModel([spark_model]) run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id run_root_artifact_uri = mlflow.get_artifact_uri() # If the artifact URI is a local filesystem path, defer to Model.log() to persist the model, # since Spark may not be able to write directly to the driver's filesystem. For example, # writing to `file:/uri` will write to the local filesystem from each executor, which will # be incorrect on multi-node clusters - to avoid such issues we just use the Model.log() path # here. if is_local_uri(run_root_artifact_uri): return Model.log( artifact_path=artifact_path, flavor=mlflow.spark, spark_model=spark_model, conda_env=conda_env, dfs_tmpdir=dfs_tmpdir, sample_input=sample_input, registered_model_name=registered_model_name, signature=signature, input_example=input_example, await_registration_for=await_registration_for, pip_requirements=pip_requirements, extra_pip_requirements=extra_pip_requirements, ) model_dir = os.path.join(run_root_artifact_uri, artifact_path) # Try to write directly to the artifact repo via Spark. If this fails, defer to Model.log() # to persist the model try: spark_model.save(posixpath.join(model_dir, _SPARK_MODEL_PATH_SUB)) except Py4JError: return Model.log( artifact_path=artifact_path, flavor=mlflow.spark, spark_model=spark_model, conda_env=conda_env, dfs_tmpdir=dfs_tmpdir, sample_input=sample_input, registered_model_name=registered_model_name, signature=signature, input_example=input_example, await_registration_for=await_registration_for, pip_requirements=pip_requirements, extra_pip_requirements=extra_pip_requirements, ) # Otherwise, override the default model log behavior and save model directly to artifact repo mlflow_model = Model(artifact_path=artifact_path, run_id=run_id) with TempDir() as tmp: tmp_model_metadata_dir = tmp.path() _save_model_metadata( tmp_model_metadata_dir, spark_model, mlflow_model, sample_input, conda_env, signature=signature, input_example=input_example, ) mlflow.tracking.fluent.log_artifacts(tmp_model_metadata_dir, artifact_path) if registered_model_name is not None: mlflow.register_model( "runs:/%s/%s" % (run_id, artifact_path), registered_model_name, await_registration_for, ) return mlflow_model.get_model_info()
def save_model( pytorch_model, path, conda_env=None, mlflow_model=None, code_paths=None, pickle_module=None, signature: ModelSignature = None, input_example: ModelInputExample = None, requirements_file=None, extra_files=None, pip_requirements=None, extra_pip_requirements=None, **kwargs, ): """ Save a PyTorch model to a path on the local file system. :param pytorch_model: PyTorch model to be saved. Can be either an eager model (subclass of ``torch.nn.Module``) or scripted model prepared via ``torch.jit.script`` or ``torch.jit.trace``. The model accept a single ``torch.FloatTensor`` as input and produce a single output tensor. If saving an eager model, any code dependencies of the model's class, including the class definition itself, should be included in one of the following locations: - The package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_paths`` parameter. :param path: Local path where the model is to be saved. :param conda_env: {{ conda_env }} :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified ``pytorch_model``. This is passed as the ``pickle_module`` parameter to ``torch.save()``. By default, this module is also used to deserialize ("unpickle") the PyTorch model at load time. :param signature: :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example can be a Pandas DataFrame where the given example will be serialized to json using the Pandas split-oriented format, or a numpy array where the example will be serialized to json by converting it to a list. Bytes are base64-encoded. :param requirements_file: .. warning:: ``requirements_file`` has been deprecated. Please use ``pip_requirements`` instead. A string containing the path to requirements file. Remote URIs are resolved to absolute filesystem paths. For example, consider the following ``requirements_file`` string: .. code-block:: python requirements_file = "s3://my-bucket/path/to/my_file" In this case, the ``"my_file"`` requirements file is downloaded from S3. If ``None``, no requirements file is added to the model. :param extra_files: A list containing the paths to corresponding extra files. Remote URIs are resolved to absolute filesystem paths. For example, consider the following ``extra_files`` list - extra_files = ["s3://my-bucket/path/to/my_file1", "s3://my-bucket/path/to/my_file2"] In this case, the ``"my_file1 & my_file2"`` extra file is downloaded from S3. If ``None``, no extra files are added to the model. :param pip_requirements: {{ pip_requirements }} :param extra_pip_requirements: {{ extra_pip_requirements }} :param kwargs: kwargs to pass to ``torch.save`` method. .. code-block:: python :caption: Example import os import torch import mlflow.pytorch # Class defined here class LinearNNModel(torch.nn.Module): ... # Initialize our model, criterion and optimizer ... # Training loop ... # Save PyTorch models to current working directory with mlflow.start_run() as run: mlflow.pytorch.save_model(model, "model") # Convert to a scripted model and save it scripted_pytorch_model = torch.jit.script(model) mlflow.pytorch.save_model(scripted_pytorch_model, "scripted_model") # Load each saved model for inference for model_path in ["model", "scripted_model"]: model_uri = "{}/{}".format(os.getcwd(), model_path) loaded_model = mlflow.pytorch.load_model(model_uri) print("Loaded {}:".format(model_path)) for x in [6.0, 8.0, 12.0, 30.0]: X = torch.Tensor([[x]]) y_pred = loaded_model(X) print("predict X: {}, y_pred: {:.2f}".format(x, y_pred.data.item())) print("--") .. code-block:: text :caption: Output Loaded model: predict X: 6.0, y_pred: 11.90 predict X: 8.0, y_pred: 15.92 predict X: 12.0, y_pred: 23.96 predict X: 30.0, y_pred: 60.13 -- Loaded scripted_model: predict X: 6.0, y_pred: 11.90 predict X: 8.0, y_pred: 15.92 predict X: 12.0, y_pred: 23.96 predict X: 30.0, y_pred: 60.13 """ import torch _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements) pickle_module = pickle_module or mlflow_pytorch_pickle_module if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") if code_paths is not None: if not isinstance(code_paths, list): raise TypeError("Argument code_paths should be a list, not {}".format(type(code_paths))) path = os.path.abspath(path) if os.path.exists(path): raise RuntimeError("Path '{}' already exists".format(path)) if mlflow_model is None: mlflow_model = Model() os.makedirs(path) if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "data" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) # Persist the pickle module name as a file in the model's `data` directory. This is necessary # because the `data` directory is the only available parameter to `_load_pyfunc`, and it # does not contain the MLmodel configuration; therefore, it is not sufficient to place # the module name in the MLmodel # # TODO: Stop persisting this information to the filesystem once we have a mechanism for # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc` pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME) with open(pickle_module_path, "w") as f: f.write(pickle_module.__name__) # Save pytorch model model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME) if isinstance(pytorch_model, torch.jit.ScriptModule): torch.jit.ScriptModule.save(pytorch_model, model_path) else: torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs) torchserve_artifacts_config = {} if extra_files: torchserve_artifacts_config[_EXTRA_FILES_KEY] = [] if not isinstance(extra_files, list): raise TypeError("Extra files argument should be a list") with TempDir() as tmp_extra_files_dir: for extra_file in extra_files: _download_artifact_from_uri( artifact_uri=extra_file, output_path=tmp_extra_files_dir.path() ) rel_path = posixpath.join(_EXTRA_FILES_KEY, os.path.basename(extra_file)) torchserve_artifacts_config[_EXTRA_FILES_KEY].append({"path": rel_path}) shutil.move( tmp_extra_files_dir.path(), posixpath.join(path, _EXTRA_FILES_KEY), ) if requirements_file: warnings.warn( "`requirements_file` has been deprecated. Please use `pip_requirements` instead.", FutureWarning, stacklevel=2, ) if not isinstance(requirements_file, str): raise TypeError("Path to requirements file should be a string") with TempDir() as tmp_requirements_dir: _download_artifact_from_uri( artifact_uri=requirements_file, output_path=tmp_requirements_dir.path() ) rel_path = os.path.basename(requirements_file) torchserve_artifacts_config[_REQUIREMENTS_FILE_KEY] = {"path": rel_path} shutil.move(tmp_requirements_dir.path(rel_path), path) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlflow_model.add_flavor( FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=str(torch.__version__), **torchserve_artifacts_config, ) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=_CONDA_ENV_FILE_NAME, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME)) if conda_env is None: if pip_requirements is None: default_reqs = get_default_pip_requirements() # To ensure `_load_pyfunc` can successfully load the model during the dependency # inference, `mlflow_model.save` must be called beforehand to save an MLmodel file. inferred_reqs = mlflow.models.infer_pip_requirements( model_data_path, FLAVOR_NAME, fallback=default_reqs, ) default_reqs = sorted(set(inferred_reqs).union(default_reqs)) else: default_reqs = None conda_env, pip_requirements, pip_constraints = _process_pip_requirements( default_reqs, pip_requirements, extra_pip_requirements, ) else: conda_env, pip_requirements, pip_constraints = _process_conda_env(conda_env) with open(os.path.join(path, _CONDA_ENV_FILE_NAME), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) # Save `constraints.txt` if necessary if pip_constraints: write_to(os.path.join(path, _CONSTRAINTS_FILE_NAME), "\n".join(pip_constraints)) if not requirements_file: # Save `requirements.txt` write_to(os.path.join(path, _REQUIREMENTS_FILE_NAME), "\n".join(pip_requirements))
def spark_udf(spark, model_uri, result_type="double"): """ A Spark UDF that can be used to invoke the Python function formatted model. Parameters passed to the UDF are forwarded to the model as a DataFrame where the column names are ordinals (0, 1, ...). On some versions of Spark, it is also possible to wrap the input in a struct. In that case, the data will be passed as a DataFrame with column names given by the struct definition (e.g. when invoked as my_udf(struct('x', 'y'), the model will ge the data as a pandas DataFrame with 2 columns 'x' and 'y'). The predictions are filtered to contain only the columns that can be represented as the ``result_type``. If the ``result_type`` is string or array of strings, all predictions are converted to string. If the result type is not an array type, the left most column with matching type is returned. .. code-block:: python :caption: Example predict = mlflow.pyfunc.spark_udf(spark, "/my/local/model") df.withColumn("prediction", predict("name", "age")).show() :param spark: A SparkSession object. :param model_uri: The location, in URI format, of the MLflow model with the :py:mod:`mlflow.pyfunc` flavor. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param result_type: the return type of the user-defined function. The value can be either a ``pyspark.sql.types.DataType`` object or a DDL-formatted type string. Only a primitive type or an array ``pyspark.sql.types.ArrayType`` of primitive type are allowed. The following classes of result type are supported: - "int" or ``pyspark.sql.types.IntegerType``: The leftmost integer that can fit in an ``int32`` or an exception if there is none. - "long" or ``pyspark.sql.types.LongType``: The leftmost long integer that can fit in an ``int64`` or an exception if there is none. - ``ArrayType(IntegerType|LongType)``: All integer columns that can fit into the requested size. - "float" or ``pyspark.sql.types.FloatType``: The leftmost numeric result cast to ``float32`` or an exception if there is none. - "double" or ``pyspark.sql.types.DoubleType``: The leftmost numeric result cast to ``double`` or an exception if there is none. - ``ArrayType(FloatType|DoubleType)``: All numeric columns cast to the requested type or an exception if there are no numeric columns. - "string" or ``pyspark.sql.types.StringType``: The leftmost column converted to ``string``. - ``ArrayType(StringType)``: All columns converted to ``string``. :return: Spark UDF that applies the model's ``predict`` method to the data and returns a type specified by ``result_type``, which by default is a double. """ # Scope Spark import to this method so users don't need pyspark to use non-Spark-related # functionality. from mlflow.pyfunc.spark_model_cache import SparkModelCache from pyspark.sql.functions import pandas_udf from pyspark.sql.types import _parse_datatype_string from pyspark.sql.types import ArrayType, DataType as SparkDataType from pyspark.sql.types import DoubleType, IntegerType, FloatType, LongType, StringType if not isinstance(result_type, SparkDataType): result_type = _parse_datatype_string(result_type) elem_type = result_type if isinstance(elem_type, ArrayType): elem_type = elem_type.elementType supported_types = [ IntegerType, LongType, FloatType, DoubleType, StringType ] if not any([isinstance(elem_type, x) for x in supported_types]): raise MlflowException( message= "Invalid result_type '{}'. Result type can only be one of or an array of one " "of the following types types: {}".format(str(elem_type), str(supported_types)), error_code=INVALID_PARAMETER_VALUE, ) with TempDir() as local_tmpdir: local_model_path = _download_artifact_from_uri( artifact_uri=model_uri, output_path=local_tmpdir.path()) archive_path = SparkModelCache.add_local_model(spark, local_model_path) def predict(*args): model = SparkModelCache.get_or_load(archive_path) input_schema = model.metadata.get_input_schema() pdf = None for x in args: if type(x) == pandas.DataFrame: if len(args) != 1: raise Exception( "If passing a StructType column, there should be only one " "input column, but got %d" % len(args)) pdf = x if pdf is None: args = list(args) if input_schema is None: names = [str(i) for i in range(len(args))] else: names = input_schema.column_names() if len(args) > len(names): args = args[:len(names)] if len(args) < len(names): message = ( "Model input is missing columns. Expected {0} input columns {1}," " but the model received only {2} unnamed input columns" " (Since the columns were passed unnamed they are expected to be in" " the order specified by the schema).".format( len(names), names, len(args))) raise MlflowException(message) pdf = pandas.DataFrame( data={names[i]: x for i, x in enumerate(args)}, columns=names) result = model.predict(pdf) if not isinstance(result, pandas.DataFrame): result = pandas.DataFrame(data=result) elem_type = result_type.elementType if isinstance( result_type, ArrayType) else result_type if type(elem_type) == IntegerType: result = result.select_dtypes( [np.byte, np.ubyte, np.short, np.ushort, np.int32]).astype(np.int32) elif type(elem_type) == LongType: result = result.select_dtypes( [np.byte, np.ubyte, np.short, np.ushort, np.int, np.long]) elif type(elem_type) == FloatType: result = result.select_dtypes(include=(np.number, )).astype( np.float32) elif type(elem_type) == DoubleType: result = result.select_dtypes(include=(np.number, )).astype( np.float64) if len(result.columns) == 0: raise MlflowException( message= "The the model did not produce any values compatible with the requested " "type '{}'. Consider requesting udf with StringType or " "Arraytype(StringType).".format(str(elem_type)), error_code=INVALID_PARAMETER_VALUE, ) if type(elem_type) == StringType: result = result.applymap(str) if type(result_type) == ArrayType: return pandas.Series(result.to_numpy().tolist()) else: return result[result.columns[0]] return pandas_udf(predict, result_type)
def test_model_export(self): path_to_remove = None try: with TempDir(chdr=True, remove_on_exit=False) as tmp: path_to_remove = tmp._path # NOTE: Changed dir to temp dir and use relative paths to get around the way temp # dirs are handled in python. model_pkl = tmp.path("model.pkl") with open(model_pkl, "wb") as f: pickle.dump(self._linear_lr, f) input_path = tmp.path("input_model") conda_env = "conda.env" from sys import version_info python_version = "{major}.{minor}.{micro}".format( major=version_info.major, minor=version_info.minor, micro=version_info.micro) with open(conda_env, "w") as f: f.write(CONDA_ENV.format(python_version=python_version)) pyfunc.save_model(input_path, loader_module="test_model_export", code_path=[__file__], data_path=model_pkl, conda_env=conda_env) proc = Popen( ['mlflow', 'sagemaker', 'run-local', '-m', input_path], stdout=PIPE, stderr=STDOUT, universal_newlines=True) try: for i in range(0, 50): self.assertTrue(proc.poll() is None, "scoring process died") time.sleep(5) # noinspection PyBroadException try: ping_status = requests.get( url='http://localhost:5000/ping') print('connection attempt', i, "server is up! ping status", ping_status) if ping_status.status_code == 200: break except Exception: print('connection attempt', i, "failed, server is not up yet") self.assertTrue(proc.poll() is None, "scoring process died") ping_status = requests.get( url='http://localhost:5000/ping') print("server up, ping status", ping_status) if ping_status.status_code != 200: raise Exception("ping failed, server is not happy") x = self._iris_df.to_dict(orient='records') y = requests.post(url='http://localhost:5000/invocations', json=x) import json xpred = json.loads(y.content) print('expected', self._linear_lr_predict) print('actual ', xpred) np.testing.assert_array_equal(self._linear_lr_predict, xpred) finally: if proc.poll() is None: proc.terminate() print("captured output of the scoring process") print(proc.stdout.read()) finally: if path_to_remove: try: import shutil shutil.rmtree(path_to_remove) except PermissionError: print("Failed to remove", path_to_remove)
def log_model(spark_model, artifact_path, conda_env=None, jars=None, dfs_tmpdir=None, sample_input=None): """ Log a Spark MLlib model as an MLflow artifact for the current run. This uses the MLlib persistence format, and the logged model will have the Spark flavor. :param spark_model: PipelineModel to be saved. :param artifact_path: Run relative artifact path. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decribes the environment this model should be run in. At minimum, it should specify the dependencies contained in ``mlflow.spark.DEFAULT_CONDA_ENV``. If `None`, the default ``mlflow.spark.DEFAULT_CONDA_ENV`` environment will be added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pyspark=2.3.0' ] } :param jars: List of JARs needed by the model. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model will be writen in this destination and then copied into the model's artifact directory. This is necessary as Spark ML models read from and write to DFS if running on a cluster. If this operation completes successfully, all temporary files created on the DFS are removed. Defaults to ``/tmp/mlflow``. :param sample_input: A sample input used to add the MLeap flavor to the model. This must be a PySpark DataFrame that the model can evaluate. If ``sample_input`` is ``None``, the MLeap flavor is not added. >>> from pyspark.ml import Pipeline >>> from pyspark.ml.classification import LogisticRegression >>> from pyspark.ml.feature import HashingTF, Tokenizer >>> training = spark.createDataFrame([ ... (0, "a b c d e spark", 1.0), ... (1, "b d", 0.0), ... (2, "spark f g h", 1.0), ... (3, "hadoop mapreduce", 0.0) ], ["id", "text", "label"]) >>> tokenizer = Tokenizer(inputCol="text", outputCol="words") >>> hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") >>> lr = LogisticRegression(maxIter=10, regParam=0.001) >>> pipeline = Pipeline(stages=[tokenizer, hashingTF, lr]) >>> model = pipeline.fit(training) >>> mlflow.spark.log_model(model, "spark-model") """ _validate_model(spark_model, jars) run_id = mlflow.tracking.fluent._get_or_start_run().info.run_uuid run_root_artifact_uri = mlflow.get_artifact_uri() # If the artifact URI is a local filesystem path, defer to Model.log() to persist the model, # since Spark may not be able to write directly to the driver's filesystem. For example, # writing to `file:/uri` will write to the local filesystem from each executor, which will # be incorrect on multi-node clusters - to avoid such issues we just use the Model.log() path # here. if mlflow.tracking.utils._is_local_uri(run_root_artifact_uri): return Model.log(artifact_path=artifact_path, flavor=mlflow.spark, spark_model=spark_model, jars=jars, conda_env=conda_env, dfs_tmpdir=dfs_tmpdir, sample_input=sample_input) # If Spark cannot write directly to the artifact repo, defer to Model.log() to persist the # model model_dir = os.path.join(run_root_artifact_uri, artifact_path) try: spark_model.save(os.path.join(model_dir, _SPARK_MODEL_PATH_SUB)) except Py4JJavaError: return Model.log(artifact_path=artifact_path, flavor=mlflow.spark, spark_model=spark_model, jars=jars, conda_env=conda_env, dfs_tmpdir=dfs_tmpdir, sample_input=sample_input) # Otherwise, override the default model log behavior and save model directly to artifact repo mlflow_model = Model(artifact_path=artifact_path, run_id=run_id) with TempDir() as tmp: tmp_model_metadata_dir = tmp.path() _save_model_metadata(tmp_model_metadata_dir, spark_model, mlflow_model, sample_input, conda_env) mlflow.tracking.fluent.log_artifacts(tmp_model_metadata_dir, artifact_path)
def test_list_artifacts(self): with TempDir() as root_dir: with open(root_dir.path("file_one.txt"), "w") as f: f.write('DB store Test One') os.mkdir(root_dir.path("subdir")) with open(root_dir.path("subdir/file_two.txt"), "w") as f: f.write('DB store Test Two') with open(root_dir.path("subdir/file_three.txt"), "w") as f: f.write('DB store Test Three') self.store.log_artifacts(root_dir._path, 'new_path/path') self.store.log_artifacts(root_dir._path, 'new_path2/path') self.assertEqual(len(self.store.list_artifacts('new_path/path')), 3) filenames = [ f.path for f in self.store.list_artifacts('new_path/path') ] self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path/path'), 'file_one.txt'))) self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path/path/subdir'), 'file_two.txt'))) self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path/path/subdir'), 'file_three.txt'))) self.assertEqual(len(self.store.list_artifacts('new_path')), 3) filenames = [f.path for f in self.store.list_artifacts('new_path')] self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path/path'), 'file_one.txt'))) self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path/path/subdir'), 'file_two.txt'))) self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path/path/subdir'), 'file_three.txt'))) self.assertEqual(len(self.store.list_artifacts('new_path2/path')), 3) filenames = [ f.path for f in self.store.list_artifacts('new_path2/path') ] self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path2/path'), 'file_one.txt'))) self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path2/path/subdir'), 'file_two.txt'))) self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path2/path/subdir'), 'file_three.txt'))) self.assertEqual(len(self.store.list_artifacts('new_path2')), 3) filenames = [ f.path for f in self.store.list_artifacts('new_path2') ] self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path2/path'), 'file_one.txt'))) self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path2/path/subdir'), 'file_two.txt'))) self.assertTrue( filenames.__contains__( os.path.join(root_uri, os.path.normpath('new_path2/path/subdir'), 'file_three.txt')))
def test_predict(iris_data, sk_model): with TempDir(chdr=True) as tmp: with mlflow.start_run() as active_run: mlflow.sklearn.log_model(sk_model, "model", registered_model_name="impredicting") model_uri = "runs:/{run_id}/model".format( run_id=active_run.info.run_id) model_registry_uri = "models:/{name}/{stage}".format( name="impredicting", stage="None") input_json_path = tmp.path("input.json") input_csv_path = tmp.path("input.csv") output_json_path = tmp.path("output.json") x, _ = iris_data pd.DataFrame(x).to_json(input_json_path, orient="split") pd.DataFrame(x).to_csv(input_csv_path, index=False) # Test with no conda & model registry URI env_with_tracking_uri = os.environ.copy() env_with_tracking_uri.update( MLFLOW_TRACKING_URI=mlflow.get_tracking_uri()) p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_registry_uri, "-i", input_json_path, "-o", output_json_path, "--no-conda" ], stderr=subprocess.PIPE, env=env_with_tracking_uri) assert p.wait() == 0 actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # With conda + --install-mlflow p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-i", input_json_path, "-o", output_json_path ] + extra_options, env=env_with_tracking_uri) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # explicit json format with default orient (should be split) p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-i", input_json_path, "-o", output_json_path, "-t", "json" ] + extra_options, env=env_with_tracking_uri) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # explicit json format with orient==split p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-i", input_json_path, "-o", output_json_path, "-t", "json", "--json-format", "split" ] + extra_options, env=env_with_tracking_uri) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # read from stdin, write to stdout. p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-t", "json", "--json-format", "split" ] + extra_options, universal_newlines=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=sys.stderr, env=env_with_tracking_uri) with open(input_json_path, "r") as f: stdout, _ = p.communicate(f.read()) assert 0 == p.wait() actual = pd.read_json(StringIO(stdout), orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual) # NB: We do not test orient=records here because records may loose column ordering. # orient == records is tested in other test with simpler model. # csv p = subprocess.Popen([ "mlflow", "models", "predict", "-m", model_uri, "-i", input_csv_path, "-o", output_json_path, "-t", "csv" ] + extra_options, env=env_with_tracking_uri) assert 0 == p.wait() actual = pd.read_json(output_json_path, orient="records") actual = actual[actual.columns[0]].values expected = sk_model.predict(x) assert all(expected == actual)
def log(cls, artifact_path, flavor, registered_model_name=None, signature: ModelSignature = None, input_example: ModelInputExample = None, **kwargs): """ Log model using supplied flavor module. If no run is active, this method will create a new active run. :param artifact_path: Run relative path identifying the model. :param flavor: Flavor module to save the model with. The module must have the ``save_model`` function that will persist the model as a valid MLflow model. :param registered_model_name: (Experimental) If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param signature: (Experimental) :py:class:`ModelSignature` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <infer_signature>` from datasets representing valid model input (e.g. the training dataset) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") signature = infer_signature(train, model.predict(train)) :param input_example: (Experimental) Input example provides one or several examples of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param kwargs: Extra args passed to the model flavor. """ with TempDir() as tmp: local_path = tmp.path("model") run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) if signature is not None: mlflow_model.signature = signature if input_example is not None: input_example = _Example(input_example) mlflow_model.input_example = input_example.info flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) if input_example is not None: input_example.save(local_path) mlflow.tracking.fluent.log_artifacts(local_path, artifact_path) try: mlflow.tracking.fluent._record_logged_model(mlflow_model) except MlflowException: # We need to swallow all mlflow exceptions to maintain backwards compatibility with # older tracking servers. Only print out a warning for now. _logger.warning( "Logging model metadata to the tracking server has failed, possibly due older " "server version. The model artifacts have been logged successfully under %s. " "In addition to exporting model artifacts, MLflow clients 1.7.0 and above " "attempt to record model metadata to the tracking store. If logging to a " "mlflow server via REST, consider upgrading the server version to MLflow " "1.7.0 or above.", mlflow.get_artifact_uri()) if registered_model_name is not None: run_id = mlflow.tracking.fluent.active_run().info.run_id mlflow.register_model("runs:/%s/%s" % (run_id, artifact_path), registered_model_name)
def _log_specialized_estimator_content(fitted_estimator, run_id, prefix, X, y_true, sample_weight=None): import sklearn mlflow_client = MlflowClient() metrics = dict() try: if sklearn.base.is_classifier(fitted_estimator): metrics = _get_classifier_metrics(fitted_estimator, prefix, X, y_true, sample_weight) elif sklearn.base.is_regressor(fitted_estimator): metrics = _get_regressor_metrics(fitted_estimator, prefix, X, y_true, sample_weight) except Exception as err: msg = ("Failed to autolog metrics for " + fitted_estimator.__class__.__name__ + ". Logging error: " + str(err)) _logger.warning(msg) else: # batch log all metrics try_mlflow_log( mlflow_client.log_batch, run_id, metrics=[ Metric(key=str(key), value=value, timestamp=int(time.time() * 1000), step=0) for key, value in metrics.items() ], ) if sklearn.base.is_classifier(fitted_estimator): try: artifacts = _get_classifier_artifacts(fitted_estimator, prefix, X, y_true, sample_weight) except Exception as e: msg = ("Failed to autolog artifacts for " + fitted_estimator.__class__.__name__ + ". Logging error: " + str(e)) _logger.warning(msg) return with TempDir() as tmp_dir: for artifact in artifacts: try: display = artifact.function(**artifact.arguments) display.ax_.set_title(artifact.title) artifact_path = "{}.png".format(artifact.name) filepath = tmp_dir.path(artifact_path) display.figure_.savefig(filepath) import matplotlib.pyplot as plt plt.close(display.figure_) except Exception as e: _log_warning_for_artifacts(artifact.name, artifact.function, e) try_mlflow_log(mlflow_client.log_artifacts, run_id, tmp_dir.path()) return metrics
def test_basic_functions(self): with TempDir() as test_root, TempDir() as tmp: repo = get_artifact_repository(test_root.path(), Mock()) self.assertIsInstance(repo, LocalArtifactRepository) self.assertListEqual(repo.list_artifacts(), []) with self.assertRaises(Exception): open(repo.download_artifacts("test.txt")).read() # Create and log a test.txt file directly artifact_name = "test.txt" local_file = tmp.path(artifact_name) with open(local_file, "w") as f: f.write("Hello world!") repo.log_artifact(local_file) text = open(repo.download_artifacts(artifact_name)).read() self.assertEqual(text, "Hello world!") # Check that it actually got written in the expected place text = open(os.path.join(test_root.path(), artifact_name)).read() self.assertEqual(text, "Hello world!") # log artifact in subdir repo.log_artifact(local_file, "aaa") text = open( repo.download_artifacts(os.path.join("aaa", artifact_name))).read() self.assertEqual(text, "Hello world!") # log a hidden artifact hidden_file = tmp.path(".mystery") with open(hidden_file, 'w') as f: f.write("42") repo.log_artifact(hidden_file, "aaa") hidden_text = open( repo.download_artifacts(os.path.join("aaa", hidden_file))).read() self.assertEqual(hidden_text, "42") # log artifacts in deep nested subdirs nested_subdir = "bbb/ccc/ddd/eee/fghi" repo.log_artifact(local_file, nested_subdir) text = open( repo.download_artifacts( os.path.join(nested_subdir, artifact_name))).read() self.assertEqual(text, "Hello world!") for bad_path in [ "/", "//", "/tmp", "/bad_path", ".", "../terrible_path" ]: with self.assertRaises(Exception): repo.log_artifact(local_file, bad_path) # Create a subdirectory for log_artifacts os.mkdir(tmp.path("subdir")) os.mkdir(tmp.path("subdir", "nested")) with open(tmp.path("subdir", "a.txt"), "w") as f: f.write("A") with open(tmp.path("subdir", "b.txt"), "w") as f: f.write("B") with open(tmp.path("subdir", "nested", "c.txt"), "w") as f: f.write("C") repo.log_artifacts(tmp.path("subdir")) text = open(repo.download_artifacts("a.txt")).read() self.assertEqual(text, "A") text = open(repo.download_artifacts("b.txt")).read() self.assertEqual(text, "B") text = open(repo.download_artifacts("nested/c.txt")).read() self.assertEqual(text, "C") infos = self._get_contents(repo, None) self.assertListEqual(infos, [ ("a.txt", False, 1), ("aaa", True, None), ("b.txt", False, 1), ("bbb", True, None), ("nested", True, None), ("test.txt", False, 12), ]) # Verify contents of subdirectories self.assertListEqual(self._get_contents(repo, "nested"), [("nested/c.txt", False, 1)]) infos = self._get_contents(repo, "aaa") self.assertListEqual(infos, [("aaa/.mystery", False, 2), ("aaa/test.txt", False, 12)]) self.assertListEqual(self._get_contents(repo, "bbb"), [("bbb/ccc", True, None)]) self.assertListEqual(self._get_contents(repo, "bbb/ccc"), [("bbb/ccc/ddd", True, None)]) infos = self._get_contents(repo, "bbb/ccc/ddd/eee") self.assertListEqual(infos, [("bbb/ccc/ddd/eee/fghi", True, None)]) infos = self._get_contents(repo, "bbb/ccc/ddd/eee/fghi") self.assertListEqual( infos, [("bbb/ccc/ddd/eee/fghi/test.txt", False, 12)]) # Download a subdirectory downloaded_dir = repo.download_artifacts("nested") self.assertEqual(os.path.basename(downloaded_dir), "nested") text = open(os.path.join(downloaded_dir, "c.txt")).read() self.assertEqual(text, "C")
def download_artifacts(self, artifact_path): with TempDir(remove_on_exit=False) as tmp: return self._download_artifacts_into(artifact_path, tmp.path())
def test_path_params(): data_file = "s3://path.test/resources/data_file.csv" defaults = { "constants": { "type": "uri", "default": "s3://path.test/b1" }, "data": { "type": "path", "default": data_file }, } entry_point = EntryPoint("entry_point_name", defaults, "command_name script.py") with mock.patch( "mlflow.tracking.artifact_utils._download_artifact_from_uri" ) as download_uri_mock: final_1, extra_1 = entry_point.compute_parameters({}, None) assert final_1 == {"constants": "s3://path.test/b1", "data": data_file} assert extra_1 == {} assert download_uri_mock.call_count == 0 with mock.patch( "mlflow.tracking.artifact_utils._download_artifact_from_uri" ) as download_uri_mock: user_2 = {"alpha": 0.001, "constants": "s3://path.test/b_two"} final_2, extra_2 = entry_point.compute_parameters(user_2, None) assert final_2 == { "constants": "s3://path.test/b_two", "data": data_file } assert extra_2 == {"alpha": "0.001"} assert download_uri_mock.call_count == 0 with mock.patch( "mlflow.tracking.artifact_utils._download_artifact_from_uri" ) as download_uri_mock, TempDir() as tmp: dest_path = tmp.path() download_path = "%s/data_file.csv" % dest_path download_uri_mock.return_value = download_path user_3 = {"alpha": 0.001} final_3, extra_3 = entry_point.compute_parameters(user_3, dest_path) assert final_3 == { "constants": "s3://path.test/b1", "data": download_path } assert extra_3 == {"alpha": "0.001"} assert download_uri_mock.call_count == 1 with mock.patch( "mlflow.tracking.artifact_utils._download_artifact_from_uri" ) as download_uri_mock, TempDir() as tmp: dest_path = tmp.path() download_path = "%s/images.tgz" % dest_path download_uri_mock.return_value = download_path user_4 = {"data": "s3://another.example.test/data_stash/images.tgz"} final_4, extra_4 = entry_point.compute_parameters(user_4, dest_path) assert final_4 == { "constants": "s3://path.test/b1", "data": download_path } assert extra_4 == {} assert download_uri_mock.call_count == 1
def log_model(artifact_path, loader_module=None, data_path=None, code_path=None, conda_env=None, python_model=None, artifacts=None): """ Create a custom Pyfunc model, incorporating custom inference logic and data dependencies. For information about the workflows that this method supports, see :ref:`Workflows for creating custom pyfunc models <pyfunc-create-custom-workflows>` and :ref:`Which workflow is right for my use case? <pyfunc-create-custom-selecting-workflow>`. You cannot specify the parameters for the first workflow: ``loader_module``, ``data_path`` and the parameters for the second workflow: ``python_model``, ``artifacts`` together. :param artifact_path: The run-relative artifact path to which to log the Python model. :param loader_module: The name of the Python module that will be used to load the model from ``data_path``. This module must define a method with the prototype ``_load_pyfunc(data_path)``. If not *None*, this module and its dependencies must be included in one of the following locations: - The MLflow library. - Package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_path`` parameter. :param data_path: Path to a file or directory containing model data. :param code_path: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files will be *prepended* to the system path before the model is loaded. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. This decribes the environment this model should be run in. If ``python_model`` is not *None*, the Conda environment must at least specify the dependencies contained in :data:`mlflow.pyfunc.DEFAULT_CONDA_ENV`. If `None`, the default :data:`mlflow.pyfunc.DEFAULT_CONDA_ENV` environment will be added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'cloudpickle==0.5.8' ] } :param python_model: An instance of a subclass of :class:`~PythonModel`. This class will be serialized using the CloudPickle library. Any dependencies of the class should be included in one of the following locations: - The MLflow library. - Package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_path`` parameter. Note: If the class is imported from another module, as opposed to being defined in the ``__main__`` scope, the defining module should also be included in one of the listed locations. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs will be resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` parameter in :func:`PythonModel.load_context() <mlflow.pyfunc.PythonModel.load_context>` and :func:`PythonModel.predict() <mlflow.pyfunc.PythonModel.predict>`. For example, consider the following ``artifacts`` dictionary:: { "my_file": "s3://my-bucket/path/to/my/file" } In this case, the ``"my_file"`` artifact will be downloaded from S3. The ``python_model`` can then refer to ``"my_file"`` as an absolute filesystem path via ``context.artifacts["my_file"]``. If *None*, no artifacts will be added to the model. """ with TempDir() as tmp: local_path = tmp.path(artifact_path) run_id = active_run().info.run_uuid save_model(dst_path=local_path, model=Model(artifact_path=artifact_path, run_id=run_id), loader_module=loader_module, data_path=data_path, code_path=code_path, conda_env=conda_env, python_model=python_model, artifacts=artifacts) log_artifacts(local_path, artifact_path)
def save_model( pytorch_model, path, conda_env=None, mlflow_model=None, code_paths=None, pickle_module=None, signature: ModelSignature = None, input_example: ModelInputExample = None, requirements_file=None, extra_files=None, **kwargs ): """ Save a PyTorch model to a path on the local file system. :param pytorch_model: PyTorch model to be saved. Can be either an eager model (subclass of ``torch.nn.Module``) or scripted model prepared via ``torch.jit.script`` or ``torch.jit.trace``. The model accept a single ``torch.FloatTensor`` as input and produce a single output tensor. If saving an eager model, any code dependencies of the model's class, including the class definition itself, should be included in one of the following locations: - The package(s) listed in the model's Conda environment, specified by the ``conda_env`` parameter. - One or more of the files specified by the ``code_paths`` parameter. :param path: Local path where the model is to be saved. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. The following is an *example* dictionary representation of a Conda environment:: { 'name': 'mlflow-env', 'channels': ['defaults'], 'dependencies': [ 'python=3.7.0', 'pytorch=0.4.1', 'torchvision=0.2.1' ] } :param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path when the model is loaded. :param pickle_module: The module that PyTorch should use to serialize ("pickle") the specified ``pytorch_model``. This is passed as the ``pickle_module`` parameter to ``torch.save()``. By default, this module is also used to deserialize ("unpickle") the PyTorch model at load time. :param signature: (Experimental) :py:class:`ModelSignature <mlflow.models.ModelSignature>` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <mlflow.models.infer_signature>` from datasets with valid model input (e.g. the training dataset with target column omitted) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") predictions = ... # compute model predictions signature = infer_signature(train, predictions) :param input_example: (Experimental) Input example provides one or several instances of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param requirements_file: A string containing the path to requirements file. Remote URIs are resolved to absolute filesystem paths. For example, consider the following ``requirements_file`` string - requirements_file = "s3://my-bucket/path/to/my_file" In this case, the ``"my_file"`` requirements file is downloaded from S3. If ``None``, no requirements file is added to the model. :param extra_files: A list containing the paths to corresponding extra files. Remote URIs are resolved to absolute filesystem paths. For example, consider the following ``extra_files`` list - extra_files = ["s3://my-bucket/path/to/my_file1", "s3://my-bucket/path/to/my_file2"] In this case, the ``"my_file1 & my_file2"`` extra file is downloaded from S3. If ``None``, no extra files are added to the model. :param kwargs: kwargs to pass to ``torch.save`` method. .. code-block:: python :caption: Example import torch import mlflow import mlflow.pytorch # Create model and set values pytorch_model = Model() pytorch_model_path = ... # train our model for epoch in range(500): y_pred = pytorch_model(x_data) ... # Save the model with mlflow.start_run() as run: mlflow.log_param("epochs", 500) mlflow.pytorch.save_model(pytorch_model, pytorch_model_path) # Saving scripted model scripted_pytorch_model = torch.jit.script(model) mlflow.pytorch.save_model(scripted_pytorch_model, pytorch_model_path) """ import torch pickle_module = pickle_module or mlflow_pytorch_pickle_module if not isinstance(pytorch_model, torch.nn.Module): raise TypeError("Argument 'pytorch_model' should be a torch.nn.Module") if code_paths is not None: if not isinstance(code_paths, list): raise TypeError("Argument code_paths should be a list, not {}".format(type(code_paths))) path = os.path.abspath(path) if os.path.exists(path): raise RuntimeError("Path '{}' already exists".format(path)) if mlflow_model is None: mlflow_model = Model() os.makedirs(path) if signature is not None: mlflow_model.signature = signature if input_example is not None: _save_example(mlflow_model, input_example, path) model_data_subpath = "data" model_data_path = os.path.join(path, model_data_subpath) os.makedirs(model_data_path) # Persist the pickle module name as a file in the model's `data` directory. This is necessary # because the `data` directory is the only available parameter to `_load_pyfunc`, and it # does not contain the MLmodel configuration; therefore, it is not sufficient to place # the module name in the MLmodel # # TODO: Stop persisting this information to the filesystem once we have a mechanism for # supplying the MLmodel configuration to `mlflow.pytorch._load_pyfunc` pickle_module_path = os.path.join(model_data_path, _PICKLE_MODULE_INFO_FILE_NAME) with open(pickle_module_path, "w") as f: f.write(pickle_module.__name__) # Save pytorch model model_path = os.path.join(model_data_path, _SERIALIZED_TORCH_MODEL_FILE_NAME) if isinstance(pytorch_model, torch.jit.ScriptModule): torch.jit.ScriptModule.save(pytorch_model, model_path) else: torch.save(pytorch_model, model_path, pickle_module=pickle_module, **kwargs) torchserve_artifacts_config = {} if requirements_file: if not isinstance(requirements_file, str): raise TypeError("Path to requirements file should be a string") with TempDir() as tmp_requirements_dir: _download_artifact_from_uri( artifact_uri=requirements_file, output_path=tmp_requirements_dir.path() ) rel_path = os.path.basename(requirements_file) torchserve_artifacts_config[_REQUIREMENTS_FILE_KEY] = {"path": rel_path} shutil.move(tmp_requirements_dir.path(rel_path), path) if extra_files: torchserve_artifacts_config[_EXTRA_FILES_KEY] = [] if not isinstance(extra_files, list): raise TypeError("Extra files argument should be a list") with TempDir() as tmp_extra_files_dir: for extra_file in extra_files: _download_artifact_from_uri( artifact_uri=extra_file, output_path=tmp_extra_files_dir.path() ) rel_path = posixpath.join(_EXTRA_FILES_KEY, os.path.basename(extra_file),) torchserve_artifacts_config[_EXTRA_FILES_KEY].append({"path": rel_path}) shutil.move( tmp_extra_files_dir.path(), posixpath.join(path, _EXTRA_FILES_KEY), ) conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) if code_paths is not None: code_dir_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=code_dir_subpath) else: code_dir_subpath = None mlflow_model.add_flavor( FLAVOR_NAME, model_data=model_data_subpath, pytorch_version=torch.__version__, **torchserve_artifacts_config, ) pyfunc.add_to_model( mlflow_model, loader_module="mlflow.pytorch", data=model_data_subpath, pickle_module_name=pickle_module.__name__, code=code_dir_subpath, env=conda_env_subpath, ) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def log( cls, artifact_path, flavor, registered_model_name=None, await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS, **kwargs, ): """ Log model using supplied flavor module. If no run is active, this method will create a new active run. :param artifact_path: Run relative path identifying the model. :param flavor: Flavor module to save the model with. The module must have the ``save_model`` function that will persist the model as a valid MLflow model. :param registered_model_name: If given, create a model version under ``registered_model_name``, also creating a registered model if one with the given name does not exist. :param signature: :py:class:`ModelSignature` describes model input and output :py:class:`Schema <mlflow.types.Schema>`. The model signature can be :py:func:`inferred <infer_signature>` from datasets representing valid model input (e.g. the training dataset) and valid model output (e.g. model predictions generated on the training dataset), for example: .. code-block:: python from mlflow.models.signature import infer_signature train = df.drop_column("target_label") signature = infer_signature(train, model.predict(train)) :param input_example: Input example provides one or several examples of valid model input. The example can be used as a hint of what data to feed the model. The given example will be converted to a Pandas DataFrame and then serialized to json using the Pandas split-oriented format. Bytes are base64-encoded. :param await_registration_for: Number of seconds to wait for the model version to finish being created and is in ``READY`` status. By default, the function waits for five minutes. Specify 0 or None to skip waiting. :param kwargs: Extra args passed to the model flavor. """ with TempDir() as tmp: local_path = tmp.path("model") run_id = mlflow.tracking.fluent._get_or_start_run().info.run_id mlflow_model = cls(artifact_path=artifact_path, run_id=run_id) flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs) mlflow.tracking.fluent.log_artifacts(local_path, artifact_path) try: mlflow.tracking.fluent._record_logged_model(mlflow_model) except MlflowException: # We need to swallow all mlflow exceptions to maintain backwards compatibility with # older tracking servers. Only print out a warning for now. _logger.warning(_LOG_MODEL_METADATA_WARNING_TEMPLATE, mlflow.get_artifact_uri()) if registered_model_name is not None: run_id = mlflow.tracking.fluent.active_run().info.run_id mlflow.register_model( "runs:/%s/%s" % (run_id, artifact_path), registered_model_name, await_registration_for=await_registration_for, )
def deploy( model_uri, workspace, deployment_config=None, service_name=None, model_name=None, tags=None, mlflow_home=None, synchronous=True, ): """ Register an MLflow model with Azure ML and deploy a websevice to Azure Container Instances (ACI) or Azure Kubernetes Service (AKS). The deployed service will contain a webserver that processes model queries. For information about the input data formats accepted by this webserver, see the :ref:`MLflow deployment tools documentation <azureml_deployment>`. :param model_uri: The location, in URI format, of the MLflow model used to build the Azure ML deployment image. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param workspace: The AzureML workspace in which to deploy the service. This is a `azureml.core.Workspace` object. :param deployment_config: The configuration for the Azure web service. This configuration allows you to specify the resources the webservice will use and the compute cluster it will be deployed in. If unspecified, the web service will be deployed into a Azure Container Instance. This is a `azureml.core.DeploymentConfig` object. For more information, see `<https://docs.microsoft.com/python/api/azureml-core/ azureml.core.webservice.aks.aksservicedeploymentconfiguration>`_ and `<https://docs.microsoft.com/en-us/python/api/azureml-core/azureml .core.webservice.aci.aciservicedeploymentconfiguration>`_ :param service_name: The name to assign the Azure Machine learning webservice that will be created. If unspecified, a unique name will be generated. :param model_name: The name to assign the Azure Model will be created. If unspecified, a unique model name will be generated. Only used if the model is not already registered with Azure. :param tags: A collection of tags, represented as a dictionary of string key-value pairs, to associate with the Azure Model and Deployment that will be created. These tags are added to a set of default tags that include the model uri, and more. For more information, see `<https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model(class)?view=azure-ml-py>`_. :param mlflow_home: Path to a local copy of the MLflow GitHub repository. If specified, the image will install MLflow from this directory. Otherwise, it will install MLflow from pip. :param synchronous: If ``True``, this method blocks until the image creation procedure terminates before returning. If ``False``, the method returns immediately, but the returned image will not be available until the asynchronous creation process completes. Use the ``azureml.core.Webservice.wait_for_deployment()`` function to wait for the deployment process to complete. :return: A tuple containing the following elements in order: - An ``azureml.core.webservice.Webservice`` object containing metadata for the new service. - An ``azureml.core.model.Model`` object containing metadata for the new model. .. code-block:: python :caption: Example import mlflow.azureml from azureml.core import Workspace from azureml.core.webservice import AciWebservice, Webservice # Load or create an Azure ML Workspace workspace_name = "<Name of your Azure ML workspace>" subscription_id = "<Your Azure subscription ID>" resource_group = "<Name of the Azure resource group in which to create Azure ML resources>" location = "<Name of the Azure location (region) in which to create Azure ML resources>" azure_workspace = Workspace.create(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, location=location, create_resource_group=True, exist_ok=True) # Create an Azure Container Instance webservice for an MLflow model azure_service, azure_model = mlflow.azureml.deploy(model_uri="<model_uri>", service_name="<deployment-name>", workspace=azure_workspace, synchronous=True) """ # The Azure ML SDK is only compatible with Python 3. However, the `mlflow.azureml` module should # still be accessible for import from Python 2. Therefore, we will only import from the SDK # upon method invocation. # pylint: disable=import-error from azureml.core.model import Model as AzureModel, InferenceConfig from azureml.core import Environment as AzureEnvironment from azureml.core import VERSION as AZUREML_VERSION from azureml.core.webservice import AciWebservice absolute_model_path = _download_artifact_from_uri(model_uri) model_pyfunc_conf, model = _load_pyfunc_conf_with_model( model_path=absolute_model_path) model_python_version = model_pyfunc_conf.get(pyfunc.PY_VERSION, None) run_id = None run_id_tag = None try: run_id = model.run_id run_id_tag = run_id except AttributeError: run_id = str(uuid.uuid4()) if model_python_version is not None and StrictVersion( model_python_version) < StrictVersion("3.0.0"): raise MlflowException( message= ("Azure ML can only deploy models trained in Python 3 and above. See" " the following MLflow GitHub issue for a thorough explanation of this" " limitation and a workaround to enable support for deploying models" " trained in Python 2: https://github.com/mlflow/mlflow/issues/668" ), error_code=INVALID_PARAMETER_VALUE, ) tags = _build_tags( model_uri=model_uri, model_python_version=model_python_version, user_tags=tags, run_id=run_id_tag, ) if service_name is None: service_name = _get_mlflow_azure_name(run_id) if model_name is None: model_name = _get_mlflow_azure_name(run_id) with TempDir(chdr=True) as tmp: model_directory_path = tmp.path("model") tmp_model_path = os.path.join( model_directory_path, _copy_file_or_tree(src=absolute_model_path, dst=model_directory_path), ) registered_model = None azure_model_id = None # If we are passed a 'models' uri, we will attempt to extract a name and version which # can be used to retreive an AzureML Model. This will ignore stage based model uris, # which is alright until we have full deployment plugin support. # # If instead we are passed a 'runs' uri while the user is using the AzureML tracking # and registry stores, we will be able to register the model on their behalf using # the AzureML plugin, which will maintain lineage between the model and the run that # produced it. This returns an MLFlow Model object however, so we'll still need the # name and ID in order to retrieve the AzureML Model object which is currently # needed to deploy. if model_uri.startswith("models:/"): m_name = model_uri.split("/")[-2] m_version = int(model_uri.split("/")[-1]) azure_model_id = "{}:{}".format(m_name, m_version) elif (model_uri.startswith("runs:/") and get_tracking_uri().startswith("azureml") and get_registry_uri().startswith("azureml")): mlflow_model = mlflow_register_model(model_uri, model_name) azure_model_id = "{}:{}".format(mlflow_model.name, mlflow_model.version) _logger.info( "Registered an Azure Model with name: `%s` and version: `%s`", mlflow_model.name, azure_model_id, ) # Attempt to retrieve an AzureML Model object which we intend to deploy if azure_model_id: try: registered_model = AzureModel(workspace, id=azure_model_id) _logger.info("Found registered model in AzureML with ID '%s'", azure_model_id) except Exception as e: # pylint: disable=broad-except _logger.info( "Unable to find model in AzureML with ID '%s', will register the model.\n" "Exception was: %s", azure_model_id, e, ) # If we have not found a registered model by this point, we will register it on the users' # behalf. It is required for a Model to be registered in some way with Azure in order to # deploy to Azure, so this is expected for Azure users. if not registered_model: registered_model = AzureModel.register(workspace=workspace, model_path=tmp_model_path, model_name=model_name, tags=tags) _logger.info( "Registered an Azure Model with name: `%s` and version: `%s`", registered_model.name, registered_model.version, ) # Create an execution script (entry point) for the image's model server. Azure ML requires # the container's execution script to be located in the current working directory during # image creation, so we create the execution script as a temporary file in the current # working directory. execution_script_path = tmp.path("execution_script.py") _create_execution_script(output_path=execution_script_path, azure_model=registered_model) environment = None if pyfunc.ENV in model_pyfunc_conf: environment = AzureEnvironment.from_conda_specification( _get_mlflow_azure_name(run_id), os.path.join(tmp_model_path, model_pyfunc_conf[pyfunc.ENV]), ) else: environment = AzureEnvironment(_get_mlflow_azure_name(run_id)) if mlflow_home is not None: path = tmp.path("dist") _logger.info("Bulding temporary MLFlow wheel in %s", path) wheel = _create_mlflow_wheel(mlflow_home, path) whl_url = AzureEnvironment.add_private_pip_wheel( workspace=workspace, file_path=wheel, exist_ok=True) environment.python.conda_dependencies.add_pip_package(whl_url) else: environment.python.conda_dependencies.add_pip_package( "mlflow=={}".format(mlflow_version)) # AzureML requires azureml-defaults to be installed to include # flask for the inference server. environment.python.conda_dependencies.add_pip_package( "azureml-defaults=={}".format(AZUREML_VERSION)) inference_config = InferenceConfig(entry_script=execution_script_path, environment=environment) if deployment_config is not None: if deployment_config.tags is not None: # We want more narrowly-scoped tags to win on merge tags.update(deployment_config.tags) deployment_config.tags = tags else: deployment_config = AciWebservice.deploy_configuration(tags=tags) # Finally, deploy the AzureML Model object to a webservice, and return back webservice = AzureModel.deploy( workspace=workspace, name=service_name, models=[registered_model], inference_config=inference_config, deployment_config=deployment_config, ) _logger.info("Deploying an Azure Webservice with name: `%s`", webservice.name) if synchronous: webservice.wait_for_deployment(show_output=True) return webservice, registered_model
def run(training_data, max_runs, batch_size, max_p, epochs, metric, gpy_model, gpy_acquisition, initial_design, seed): bounds = [ { 'name': 'lr', 'type': 'continuous', 'domain': (1e-5, 1e-1) }, { 'name': 'momentum', 'type': 'continuous', 'domain': (0.0, 1.0) }, ] # create random file to store run ids of the training tasks tracking_client = mlflow.tracking.MlflowClient() def new_eval(nepochs, experiment_id, null_train_loss, null_valid_loss, null_test_loss, return_all=False): """ Create a new eval function :param nepochs: Number of epochs to train the model. :experiment_id: Experiment id for the training run :valid_null_loss: Loss of a null model on the validation dataset :test_null_loss: Loss of a null model on the test dataset. :return_test_loss: Return both validation and test loss if set. :return: new eval function. """ def eval(params): """ Train Keras model with given parameters by invoking MLflow run. Notice we store runUuid and resulting metric in a file. We will later use these to pick the best run and to log the runUuids of the child runs as an artifact. This is a temporary workaround until MLflow offers better mechanism of linking runs together. :param params: Parameters to the train_keras script we optimize over: learning_rate, drop_out_1 :return: The metric value evaluated on the validation data. """ lr, momentum = params[0] with mlflow.start_run(nested=True) as child_run: p = mlflow.projects.run(run_id=child_run.info.run_id, uri=".", entry_point="train", parameters={ "training_data": training_data, "epochs": str(nepochs), "learning_rate": str(lr), "momentum": str(momentum), "seed": str(seed) }, experiment_id=experiment_id, synchronous=False) succeeded = p.wait() if succeeded: training_run = tracking_client.get_run(p.run_id) metrics = training_run.data.metrics # cap the loss at the loss of the null model train_loss = min(null_valid_loss, metrics["train_{}".format(metric)]) valid_loss = min(null_valid_loss, metrics["val_{}".format(metric)]) test_loss = min(null_test_loss, metrics["test_{}".format(metric)]) else: # run failed => return null loss tracking_client.set_terminated(p.run_id, "FAILED") train_loss = null_train_loss valid_loss = null_valid_loss test_loss = null_test_loss mlflow.log_metrics({ "train_{}".format(metric): train_loss, "val_{}".format(metric): valid_loss, "test_{}".format(metric): test_loss }) if return_all: return train_loss, valid_loss, test_loss else: return valid_loss return eval with mlflow.start_run() as run: experiment_id = run.info.experiment_id # Evaluate null model first. # We use null model (predict everything to the mean) as a reasonable upper bound on loss. # We need an upper bound to handle the failed runs (e.g. return NaNs) because GPyOpt can not # handle Infs. # Always including a null model in our results is also a good ML practice. train_null_loss, valid_null_loss, test_null_loss = new_eval( 0, experiment_id, _inf, _inf, _inf, True)(params=[[0, 0]]) myProblem = GPyOpt.methods.BayesianOptimization( new_eval(epochs, experiment_id, train_null_loss, valid_null_loss, test_null_loss), bounds, evaluator_type="local_penalization" if min(batch_size, max_p) > 1 else "sequential", batch_size=batch_size, num_cores=max_p, model_type=gpy_model, acquisition_type=gpy_acquisition, initial_design_type=initial_design, initial_design_numdata=max_runs >> 2, exact_feval=False) myProblem.run_optimization(max_runs) matplotlib.use('agg') plt.switch_backend('agg') with TempDir() as tmp: acquisition_plot = tmp.path("acquisition_plot.png") convergence_plot = tmp.path("convergence_plot.png") myProblem.plot_acquisition(filename=acquisition_plot) myProblem.plot_convergence(filename=convergence_plot) if os.path.exists(convergence_plot): mlflow.log_artifact(convergence_plot, "converegence_plot") if os.path.exists(acquisition_plot): mlflow.log_artifact(acquisition_plot, "acquisition_plot") # find the best run, log its metrics as the final metrics of this run. client = MlflowClient() runs = client.search_runs( [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format( run_id=run.info.run_id)) best_val_train = _inf best_val_valid = _inf best_val_test = _inf best_run = None for r in runs: if r.data.metrics["val_rmse"] < best_val_valid: best_run = r best_val_train = r.data.metrics["train_rmse"] best_val_valid = r.data.metrics["val_rmse"] best_val_test = r.data.metrics["test_rmse"] mlflow.set_tag("best_run", best_run.info.run_id) mlflow.log_metrics({ "train_{}".format(metric): best_val_train, "val_{}".format(metric): best_val_valid, "test_{}".format(metric): best_val_test })
def test_categorical_columns(self): """ This tests logging capabilities on datasets with categorical columns. See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/get_started/regression/imports85.py for reference code. """ with TempDir(chdr=False, remove_on_exit=True) as tmp: # Downloading the data into a pandas DataFrame. URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data" path = tf.contrib.keras.utils.get_file(URL.split("/")[-1], URL) # Order is important for the csv-readers, so we use an OrderedDict here. defaults = collections.OrderedDict([("body-style", [""]), ("curb-weight", [0.0]), ("highway-mpg", [0.0]), ("price", [0.0])]) types = collections.OrderedDict( (key, type(value[0])) for key, value in defaults.items()) df = pandas.read_csv(path, names=types.keys(), dtype=types, na_values="?") df = df.dropna() # Extract the label from the features dataframe. y_train = df.pop("price") # Creating the input training function required. trainingFeatures = {} for i in df: trainingFeatures[i] = df[i].values input_train = tf.estimator.inputs.numpy_input_fn(trainingFeatures, y_train.values, shuffle=False, batch_size=1) # Creating the feature columns required for the DNNRegressor. body_style_vocab = [ "hardtop", "wagon", "sedan", "hatchback", "convertible" ] body_style = tf.feature_column.categorical_column_with_vocabulary_list( key="body-style", vocabulary_list=body_style_vocab) feature_columns = [ tf.feature_column.numeric_column(key="curb-weight"), tf.feature_column.numeric_column(key="highway-mpg"), # Since this is a DNN model, convert categorical columns from sparse # to dense. # Wrap them in an `indicator_column` to create a # one-hot vector from the input. tf.feature_column.indicator_column(body_style), ] # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns # defined above as input. estimator = tf.estimator.DNNRegressor( hidden_units=[20, 20], feature_columns=feature_columns) # Training the estimator. estimator.train(input_fn=input_train, steps=100) # Saving the estimator's prediction on the training data. estimator_preds = estimator.predict(input_train) # Setting the logging such that it is in the temp folder and deleted after the test. old_tracking_dir = tracking.get_tracking_uri() tracking_dir = os.path.abspath(tmp.path("mlruns")) tracking.set_tracking_uri("file://%s" % tracking_dir) tracking.start_run() try: # Creating dict of features names (str) to placeholders (tensors) feature_spec = {} feature_spec["body-style"] = tf.placeholder("string", name="body-style", shape=[None]) feature_spec["curb-weight"] = tf.placeholder( "float", name="curb-weight", shape=[None]) feature_spec["highway-mpg"] = tf.placeholder( "float", name="highway-mpg", shape=[None]) saved = [s['predictions'] for s in estimator_preds] results = self.helper(feature_spec, tmp, estimator, df) # Asserting that the loaded model predictions are as expected. # TensorFlow is known to have precision errors, hence the almost_equal. np.testing.assert_array_almost_equal(saved, results, decimal=2) finally: # Restoring the old logging location. tracking.end_run() tracking.set_tracking_uri(old_tracking_dir)
def _save_model_with_class_artifacts_params(path, python_model, artifacts=None, conda_env=None, code_paths=None, mlflow_model=Model()): """ :param path: The path to which to save the Python model. :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model`` defines how the model loads artifacts and how it performs inference. :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries. Remote artifact URIs are resolved to absolute filesystem paths, producing a dictionary of ``<name, absolute_path>`` entries. ``python_model`` can reference these resolved entries as the ``artifacts`` property of the ``context`` attribute. If ``None``, no artifacts are added to the model. :param conda_env: Either a dictionary representation of a Conda environment or the path to a Conda environment yaml file. If provided, this decsribes the environment this model should be run in. At minimum, it should specify the dependencies contained in :func:`get_default_conda_env()`. If ``None``, the default :func:`get_default_conda_env()` environment is added to the model. :param code_paths: A list of local filesystem paths to Python file dependencies (or directories containing file dependencies). These files are *prepended* to the system path before the model is loaded. :param mlflow_model: The model configuration to which to add the ``mlflow.pyfunc`` flavor. """ custom_model_config_kwargs = { CONFIG_KEY_CLOUDPICKLE_VERSION: cloudpickle.__version__, } if isinstance(python_model, PythonModel): saved_python_model_subpath = "python_model.pkl" with open(os.path.join(path, saved_python_model_subpath), "wb") as out: cloudpickle.dump(python_model, out) custom_model_config_kwargs[ CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath else: raise MlflowException( message= ("`python_model` must be a subclass of `PythonModel`. Instead, found an" " object of type: {python_model_type}".format( python_model_type=type(python_model))), error_code=INVALID_PARAMETER_VALUE, ) if artifacts: saved_artifacts_config = {} with TempDir() as tmp_artifacts_dir: tmp_artifacts_config = {} saved_artifacts_dir_subpath = "artifacts" for artifact_name, artifact_uri in artifacts.items(): tmp_artifact_path = _download_artifact_from_uri( artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()) tmp_artifacts_config[artifact_name] = tmp_artifact_path saved_artifact_subpath = posixpath.join( saved_artifacts_dir_subpath, os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()), ) saved_artifacts_config[artifact_name] = { CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath, CONFIG_KEY_ARTIFACT_URI: artifact_uri, } shutil.move(tmp_artifacts_dir.path(), os.path.join(path, saved_artifacts_dir_subpath)) custom_model_config_kwargs[ CONFIG_KEY_ARTIFACTS] = saved_artifacts_config conda_env_subpath = "conda.yaml" if conda_env is None: conda_env = get_default_conda_env() elif not isinstance(conda_env, dict): with open(conda_env, "r") as f: conda_env = yaml.safe_load(f) with open(os.path.join(path, conda_env_subpath), "w") as f: yaml.safe_dump(conda_env, stream=f, default_flow_style=False) saved_code_subpath = None if code_paths is not None: saved_code_subpath = "code" for code_path in code_paths: _copy_file_or_tree(src=code_path, dst=path, dst_dir=saved_code_subpath) mlflow.pyfunc.add_to_model(model=mlflow_model, loader_module=__name__, code=saved_code_subpath, env=conda_env_subpath, **custom_model_config_kwargs) mlflow_model.save(os.path.join(path, MLMODEL_FILE_NAME))
def build_image(model_uri, workspace, image_name=None, model_name=None, mlflow_home=None, description=None, tags=None, synchronous=True): """ Register an MLflow model with Azure ML and build an Azure ML ContainerImage for deployment. The resulting image can be deployed as a web service to Azure Container Instances (ACI) or Azure Kubernetes Service (AKS). The resulting Azure ML ContainerImage will contain a webserver that processes model queries. For information about the input data formats accepted by this webserver, see the :ref:`MLflow deployment tools documentation <azureml_deployment>`. :param model_uri: The location, in URI format, of the MLflow model for which to build an Azure ML deployment image, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` For more information about supported URI schemes, see the `Artifacts Documentation <https://www.mlflow.org/docs/latest/tracking.html# supported-artifact-stores>`_. :param image_name: The name to assign the Azure Container Image that will be created. If unspecified, a unique image name will be generated. :param model_name: The name to assign the Azure Model will be created. If unspecified, a unique model name will be generated. :param workspace: The AzureML workspace in which to build the image. This is a `azureml.core.Workspace` object. :param mlflow_home: Path to a local copy of the MLflow GitHub repository. If specified, the image will install MLflow from this directory. Otherwise, it will install MLflow from pip. :param description: A string description to associate with the Azure Container Image and the Azure Model that will be created. For more information, see `<https://docs.microsoft.com/en-us/python/api/azureml-core/ azureml.core.image.container.containerimageconfig>`_ and `<https://docs.microsoft.com/en-us/python/api/azureml-core/ azureml.core.model.model?view=azure-ml-py#register>`_. :param tags: A collection of tags, represented as a dictionary of string key-value pairs, to associate with the Azure Container Image and the Azure Model that will be created. These tags will be added to a set of default tags that include the model path, the model run id (if specified), and more. For more information, see `<https://docs.microsoft.com/en-us/python/api/azureml-core/ azureml.core.image.container.containerimageconfig>`_ and `<https://docs.microsoft.com/en-us/python/api/azureml-core/ azureml.core.model.model?view=azure-ml-py#register>`_. :param synchronous: If `True`, this method will block until the image creation procedure terminates before returning. If `False`, the method will return immediately, but the returned image will not be available until the asynchronous creation process completes. The `azureml.core.Image.wait_for_creation()` function can be used to wait for the creation process to complete. :return: A tuple containing the following elements in order: - An `azureml.core.image.ContainerImage` object containing metadata for the new image. - An `azureml.core.model.Model` object containing metadata for the new model. >>> import mlflow.azureml >>> from azureml.core import Workspace >>> from azureml.core.webservice import AciWebservice, Webservice >>> >>> # Load or create an Azure ML Workspace >>> workspace_name = "<Name of your Azure ML workspace>" >>> subscription_id = "<Your Azure subscription ID>" >>> resource_group = "<Name of the Azure resource group in which to create Azure ML resources>" >>> location = "<Name of the Azure location (region) in which to create Azure ML resources>" >>> azure_workspace = Workspace.create(name=workspace_name, >>> subscription_id=subscription_id, >>> resource_group=resource_group, >>> location=location, >>> create_resource_group=True, >>> exist_okay=True) >>> >>> # Build an Azure ML Container Image for an MLflow model >>> azure_image, azure_model = mlflow.azureml.build_image( >>> model_path="<model_path>", >>> workspace=azure_workspace, >>> synchronous=True) >>> # If your image build failed, you can access build logs at the following URI: >>> print("Access the following URI for build logs: {}".format(azure_image.image_build_log_uri)) >>> >>> # Deploy the image to Azure Container Instances (ACI) for real-time serving >>> webservice_deployment_config = AciWebservice.deploy_configuration() >>> webservice = Webservice.deploy_from_image( >>> image=azure_image, workspace=azure_workspace, name="<deployment-name>") >>> webservice.wait_for_deployment() """ # The Azure ML SDK is only compatible with Python 3. However, the `mlflow.azureml` module should # still be accessible for import from Python 2. Therefore, we will only import from the SDK # upon method invocation. # pylint: disable=import-error from azureml.core.image import ContainerImage from azureml.core.model import Model as AzureModel absolute_model_path = _download_artifact_from_uri(model_uri) model_pyfunc_conf = _load_pyfunc_conf(model_path=absolute_model_path) model_python_version = model_pyfunc_conf.get(pyfunc.PY_VERSION, None) if model_python_version is not None and\ StrictVersion(model_python_version) < StrictVersion("3.0.0"): raise MlflowException(message=( "Azure ML can only deploy models trained in Python 3 or above! Please see" " the following MLflow GitHub issue for a thorough explanation of this" " limitation and a workaround to enable support for deploying models" " trained in Python 2: https://github.com/mlflow/mlflow/issues/668" ), error_code=INVALID_PARAMETER_VALUE) tags = _build_tags(model_uri=model_uri, model_python_version=model_python_version, user_tags=tags) if image_name is None: image_name = _get_mlflow_azure_resource_name() if model_name is None: model_name = _get_mlflow_azure_resource_name() with TempDir(chdr=True) as tmp: model_directory_path = tmp.path("model") tmp_model_path = os.path.join( model_directory_path, _copy_file_or_tree(src=absolute_model_path, dst=model_directory_path)) registered_model = AzureModel.register(workspace=workspace, model_path=tmp_model_path, model_name=model_name, tags=tags, description=description) _logger.info( "Registered an Azure Model with name: `%s` and version: `%s`", registered_model.name, registered_model.version) # Create an execution script (entry point) for the image's model server. Azure ML requires # the container's execution script to be located in the current working directory during # image creation, so we create the execution script as a temporary file in the current # working directory. execution_script_path = tmp.path("execution_script.py") _create_execution_script(output_path=execution_script_path, azure_model=registered_model) # Azure ML copies the execution script into the image's application root directory by # prepending "/var/azureml-app" to the specified script path. The script is then executed # by referencing its path relative to the "/var/azureml-app" directory. Unfortunately, # if the script path is an absolute path, Azure ML attempts to reference it directly, # resulting in a failure. To circumvent this problem, we provide Azure ML with the relative # script path. Because the execution script was created in the current working directory, # this relative path is the script path's base name. execution_script_path = os.path.basename(execution_script_path) if mlflow_home is not None: _logger.info( "Copying the specified mlflow_home directory: `%s` to a temporary location for" " container creation", mlflow_home) mlflow_home = os.path.join( tmp.path(), _copy_project(src_path=mlflow_home, dst_path=tmp.path())) image_file_dependencies = [mlflow_home] else: image_file_dependencies = None dockerfile_path = tmp.path("Dockerfile") _create_dockerfile(output_path=dockerfile_path, mlflow_path=mlflow_home) conda_env_path = None if pyfunc.ENV in model_pyfunc_conf: conda_env_path = os.path.join(tmp_model_path, model_pyfunc_conf[pyfunc.ENV]) image_configuration = ContainerImage.image_configuration( execution_script=execution_script_path, runtime="python", docker_file=dockerfile_path, dependencies=image_file_dependencies, conda_file=conda_env_path, description=description, tags=tags, ) image = ContainerImage.create(workspace=workspace, name=image_name, image_config=image_configuration, models=[registered_model]) _logger.info( "Building an Azure Container Image with name: `%s` and version: `%s`", image.name, image.version) if synchronous: image.wait_for_creation(show_output=True) return image, registered_model