def load_model(model_uri, dfs_tmpdir=None): """ Load the Spark MLlib model from the path. :param model_uri: The location, in URI format, of the MLflow model, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is loaded from this destination. Defaults to ``/tmp/mlflow``. :return: pyspark.ml.pipeline.PipelineModel .. code-block:: python :caption: Example from mlflow import spark model = mlflow.spark.load_model("spark-model") # Prepare test documents, which are unlabeled (id, text) tuples. test = spark.createDataFrame([ (4, "spark i j k"), (5, "l m n"), (6, "spark hadoop spark"), (7, "apache hadoop")], ["id", "text"]) # Make predictions on test documents prediction = model.transform(test) """ if RunsArtifactRepository.is_runs_uri(model_uri): runs_uri = model_uri model_uri = RunsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) elif ModelsArtifactRepository.is_models_uri(model_uri): runs_uri = model_uri model_uri = ModelsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME) model_uri = append_to_uri_path(model_uri, flavor_conf["model_data"]) local_model_path = _download_artifact_from_uri(model_uri) _add_code_from_conf_to_system_path(local_model_path, flavor_conf) return _load_model(model_uri=model_uri, dfs_tmpdir_base=dfs_tmpdir)
def load_model(model_uri, dfs_tmpdir=None): """ Load the Spark MLlib model from the path. :param model_uri: The location, in URI format, of the MLflow model, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html# artifact-locations>`_. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is loaded from this destination. Defaults to ``/tmp/mlflow``. :return: pyspark.ml.pipeline.PipelineModel >>> from mlflow import spark >>> model = mlflow.spark.load_model("spark-model") >>> # Prepare test documents, which are unlabeled (id, text) tuples. >>> test = spark.createDataFrame([ ... (4, "spark i j k"), ... (5, "l m n"), ... (6, "spark hadoop spark"), ... (7, "apache hadoop")], ["id", "text"]) >>> # Make predictions on test documents. >>> prediction = model.transform(test) """ if RunsArtifactRepository.is_runs_uri(model_uri): runs_uri = model_uri model_uri = RunsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME) model_uri = posixpath.join(model_uri, flavor_conf["model_data"]) return _load_model(model_uri=model_uri, dfs_tmpdir=dfs_tmpdir)