Exemplo n.º 1
0
def _get_flavor_configuration_from_uri(model_uri, flavor_name):
    """
    Obtains the configuration for the specified flavor from the specified
    MLflow model uri. If the model does not contain the specified flavor,
    an exception will be thrown.

    :param model_uri: The path to the root directory of the MLflow model for which to load
                       the specified flavor configuration.
    :param flavor_name: The name of the flavor configuration to load.
    :return: The flavor configuration as a dictionary.
    """
    try:
        ml_model_file = _download_artifact_from_uri(
            artifact_uri=append_to_uri_path(model_uri, MLMODEL_FILE_NAME))
    except Exception as ex:
        raise MlflowException(
            'Failed to download an "{model_file}" model file from "{model_uri}": {ex}'
            .format(model_file=MLMODEL_FILE_NAME, model_uri=model_uri, ex=ex),
            RESOURCE_DOES_NOT_EXIST,
        )
    model_conf = Model.load(ml_model_file)
    if flavor_name not in model_conf.flavors:
        raise MlflowException(
            'Model does not have the "{flavor_name}" flavor'.format(
                flavor_name=flavor_name),
            RESOURCE_DOES_NOT_EXIST,
        )
    return model_conf.flavors[flavor_name]
Exemplo n.º 2
0
    def create_run(self, experiment_id, user_id, start_time, tags):
        with self.ManagedSessionMaker() as session:
            experiment = self.get_experiment(experiment_id)
            self._check_experiment_is_active(experiment)

            run_id = uuid.uuid4().hex
            artifact_location = append_to_uri_path(
                experiment.artifact_location, run_id,
                SqlAlchemyStore.ARTIFACTS_FOLDER_NAME)
            run = SqlRun(name="",
                         artifact_uri=artifact_location,
                         run_uuid=run_id,
                         experiment_id=experiment_id,
                         source_type=SourceType.to_string(SourceType.UNKNOWN),
                         source_name="",
                         entry_point_name="",
                         user_id=user_id,
                         status=RunStatus.to_string(RunStatus.RUNNING),
                         start_time=start_time,
                         end_time=None,
                         source_version="",
                         lifecycle_stage=LifecycleStage.ACTIVE)

            tags_dict = {}
            for tag in tags:
                tags_dict[tag.key] = tag.value
            run.tags = [
                SqlTag(key=key, value=value)
                for key, value in tags_dict.items()
            ]
            self._save_to_db(objs=run, session=session)

            return run.to_mlflow_entity()
Exemplo n.º 3
0
 def _get_artifact_dir(self, experiment_id, run_uuid):
     _validate_run_id(run_uuid)
     return append_to_uri_path(
         self.get_experiment(experiment_id).artifact_location,
         run_uuid,
         FileStore.ARTIFACTS_FOLDER_NAME,
     )
Exemplo n.º 4
0
def get_artifact_uri(run_id, artifact_path=None):
    """
    Get the absolute URI of the specified artifact in the specified run. If `path` is not specified,
    the artifact root URI of the specified run will be returned; calls to ``log_artifact``
    and ``log_artifacts`` write artifact(s) to subdirectories of the artifact root URI.

    :param run_id: The ID of the run for which to obtain an absolute artifact URI.
    :param artifact_path: The run-relative artifact path. For example,
                          ``path/to/artifact``. If unspecified, the artifact root URI for the
                          specified run will be returned.
    :return: An *absolute* URI referring to the specified artifact or the specified run's artifact
             root. For example, if an artifact path is provided and the specified run uses an
             S3-backed  store, this may be a uri of the form
             ``s3://<bucket_name>/path/to/artifact/root/path/to/artifact``. If an artifact path
             is not provided and the specified run uses an S3-backed store, this may be a URI of
             the form ``s3://<bucket_name>/path/to/artifact/root``.
    """
    if not run_id:
        raise MlflowException(
            message=
            "A run_id must be specified in order to obtain an artifact uri!",
            error_code=INVALID_PARAMETER_VALUE)

    store = _get_store()
    run = store.get_run(run_id)
    # Maybe move this method to RunsArtifactRepository so the circular dependency is clearer.
    assert urllib.parse.urlparse(
        run.info.artifact_uri).scheme != "runs"  # avoid an infinite loop
    if artifact_path is None:
        return run.info.artifact_uri
    else:
        return append_to_uri_path(run.info.artifact_uri, artifact_path)
Exemplo n.º 5
0
 def create_run(self, experiment_id: str, user_id: str, start_time: int,
                tags: List[RunTag]) -> Run:
     run_id = uuid.uuid4().hex
     experiment = self._get_experiment(experiment_id)
     self._check_experiment_is_active(experiment)
     artifact_location = append_to_uri_path(
         experiment.artifact_location, run_id,
         ElasticsearchStore.ARTIFACTS_FOLDER_NAME)
     tags_dict = {}
     for tag in tags:
         tags_dict[tag.key] = tag.value
     run_tags = [
         ElasticTag(key=key, value=value)
         for key, value in tags_dict.items()
     ]
     run = ElasticRun(meta={'id': run_id},
                      run_id=run_id,
                      experiment_id=experiment_id,
                      user_id=user_id,
                      status=RunStatus.to_string(RunStatus.RUNNING),
                      start_time=start_time,
                      end_time=None,
                      lifecycle_stage=LifecycleStage.ACTIVE,
                      artifact_uri=artifact_location,
                      tags=run_tags)
     run.save()
     return run.to_mlflow_entity()
Exemplo n.º 6
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     artifact_uri = artifact_uri or append_to_uri_path(
         self.artifact_root_uri, str(experiment_id))
     self._check_root_dir()
     meta_dir = mkdir(self.root_directory, str(experiment_id))
     experiment = Experiment(experiment_id, name, artifact_uri,
                             LifecycleStage.ACTIVE)
     experiment_dict = dict(experiment)
     # tags are added to the file system and are not written to this dict on write
     # As such, we should not include them in the meta file.
     del experiment_dict['tags']
     write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict)
     return experiment_id
Exemplo n.º 7
0
def load_model(model_uri, dfs_tmpdir=None):
    """
    Load the Spark MLlib model from the path.

    :param model_uri: The location, in URI format, of the MLflow model, for example:

                      - ``/Users/me/path/to/local/model``
                      - ``relative/path/to/local/model``
                      - ``s3://my_bucket/path/to/model``
                      - ``runs:/<mlflow_run_id>/run-relative/path/to/model``
                      - ``models:/<model_name>/<model_version>``
                      - ``models:/<model_name>/<stage>``

                      For more information about supported URI schemes, see
                      `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html#
                      artifact-locations>`_.
    :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local
                       filesystem if running in local mode. The model is loaded from this
                       destination. Defaults to ``/tmp/mlflow``.
    :return: pyspark.ml.pipeline.PipelineModel

    .. code-block:: python
        :caption: Example

        from mlflow import spark
        model = mlflow.spark.load_model("spark-model")
        # Prepare test documents, which are unlabeled (id, text) tuples.
        test = spark.createDataFrame([
            (4, "spark i j k"),
            (5, "l m n"),
            (6, "spark hadoop spark"),
            (7, "apache hadoop")], ["id", "text"])
        # Make predictions on test documents
        prediction = model.transform(test)
    """
    if RunsArtifactRepository.is_runs_uri(model_uri):
        runs_uri = model_uri
        model_uri = RunsArtifactRepository.get_underlying_uri(model_uri)
        _logger.info("'%s' resolved as '%s'", runs_uri, model_uri)
    elif ModelsArtifactRepository.is_models_uri(model_uri):
        runs_uri = model_uri
        model_uri = ModelsArtifactRepository.get_underlying_uri(model_uri)
        _logger.info("'%s' resolved as '%s'", runs_uri, model_uri)
    flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME)
    model_uri = append_to_uri_path(model_uri, flavor_conf["model_data"])
    local_model_path = _download_artifact_from_uri(model_uri)
    _add_code_from_conf_to_system_path(local_model_path, flavor_conf)

    return _load_model(model_uri=model_uri, dfs_tmpdir_base=dfs_tmpdir)
Exemplo n.º 8
0
def _get_flavor_backend(model_uri, **kwargs):
    with TempDir() as tmp:
        if ModelsArtifactRepository.is_models_uri(model_uri):
            underlying_model_uri = ModelsArtifactRepository.get_underlying_uri(
                model_uri)
        else:
            underlying_model_uri = model_uri
        local_path = _download_artifact_from_uri(append_to_uri_path(
            underlying_model_uri, "MLmodel"),
                                                 output_path=tmp.path())
        model = Model.load(local_path)
    flavor_name, flavor_backend = get_flavor_backend(model, **kwargs)
    if flavor_backend is None:
        raise Exception("No suitable flavor backend was found for the model.")
    _logger.info("Selected backend for flavor '%s'", flavor_name)
    return flavor_backend
    def build_image_local_from_model_uri(self,
                                         model_uri,
                                         base_image,
                                         mlflow_home=None,
                                         **kwargs):
        """build PythonModel Backed service image from model_uri

        :param base_image: image base from which  build  model image
        :param mlflow_home: mllfow local copy used to startup the model service in container
                            if None install from pip.
        :param model_uri: directory contains pyfunc model filesystem.
                          <"pyfunc-filename-system"
                          https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#pyfunc-filename-system>_
        """
        with tempfile.TemporaryDirectory() as tmp_dir:
            if ModelsArtifactRepository.is_models_uri(model_uri):
                underlying_model_uri = ModelsArtifactRepository.get_underlying_uri(
                    model_uri)
            else:
                underlying_model_uri = model_uri

            local_path = _download_artifact_from_uri(append_to_uri_path(
                underlying_model_uri, MLMODEL_FILE_NAME),
                                                     output_path=tmp_dir)

            model_meta = Model.load(local_path)

            flavor_name, flavor_backend = get_flavor_backend(
                model_meta, **kwargs)
            if flavor_name is None:
                raise TypeError("no suitable backend was found for the model")

            if not flavor_backend.can_build_image():
                raise AttributeError(
                    'flavor {} not support build image'.format(flavor_name))

            # always intall mlflow for override office mlflow package in container
            return_code = flavor_backend.build_image(model_uri,
                                                     self.image_name,
                                                     install_mlflow=True,
                                                     mlflow_home=mlflow_home,
                                                     base_image=base_image)
            return True if not return_code else False
Exemplo n.º 10
0
 def _create_experiment_with_id(self, name, experiment_id, artifact_uri):
     artifact_uri = artifact_uri or append_to_uri_path(
         self.artifact_root_uri, str(experiment_id))
     dynamodb = self._get_dynamodb_resource()
     table_name = "_".join(
         [self.table_prefix, DynamodbStore.EXPERIMENT_TABLE])
     table = dynamodb.Table(table_name)
     exp = Experiment(
         experiment_id=experiment_id,
         name=name,
         artifact_location=artifact_uri,
         lifecycle_stage=LifecycleStage.ACTIVE,
     )
     response = table.put_item(
         Item=_entity_to_dict(exp),
         ReturnConsumedCapacity="TOTAL",
     )
     if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
         raise MlflowException("DynamoDB connection error")
     return experiment_id
 def _get_artifact_location(self, experiment_id):
     return append_to_uri_path(self.artifact_root_uri, str(experiment_id))
Exemplo n.º 12
0
def validate_append_to_uri_path_test_cases(cases):
    for input_uri, input_path, expected_output_uri in cases:
        assert append_to_uri_path(input_uri, input_path) == expected_output_uri
        assert append_to_uri_path(
            input_uri, *posixpath.split(input_path)) == expected_output_uri
Exemplo n.º 13
0
def log_explanation(predict_function, features, artifact_path=None):
    r"""
    Given a ``predict_function`` capable of computing ML model output on the provided ``features``,
    computes and logs explanations of an ML model's output. Explanations are logged as a directory
    of artifacts containing the following items generated by `SHAP`_ (SHapley Additive
    exPlanations).

        - Base values
        - SHAP values (computed using `shap.KernelExplainer`_)
        - Summary bar plot (shows the average impact of each feature on model output)

    :param predict_function:
        A function to compute the output of a model (e.g. ``predict_proba`` method of
        scikit-learn classifiers). Must have the following signature:

        .. code-block:: python

            def predict_function(X) -> pred:
                ...

        - ``X``: An array-like object whose shape should be (# samples, # features).
        - ``pred``: An array-like object whose shape should be (# samples) for
          a regressor or (# classes, # samples) for a classifier. For a classifier,
          the values in ``pred`` should correspond to the predicted probability of each class.

        Acceptable array-like object types:

            - ``numpy.array``
            - ``pandas.DataFrame``
            - ``shap.common.DenseData``
            - ``scipy.sparse matrix``

    :param features:
        A matrix of features to compute SHAP values with. The provided features should
        have shape (# samples, # features), and can be either of the array-like object
        types listed above.

        .. note::
            Background data for `shap.KernelExplainer`_ is generated by subsampling ``features``
            with `shap.kmeans`_. The background data size is limited to 100 rows for performance
            reasons.

    :param artifact_path:
        The run-relative artifact path to which the explanation is saved.
        If unspecified, defaults to "model_explanations_shap".

    :return: Artifact URI of the logged explanations.

    .. _SHAP: https://github.com/slundberg/shap

    .. _shap.KernelExplainer: https://shap.readthedocs.io/en/latest/generated
        /shap.KernelExplainer.html#shap.KernelExplainer

    .. _shap.kmeans: https://github.com/slundberg/shap/blob/v0.36.0/shap/utils/_legacy.py#L9

    .. code-block:: python
        :caption: Example

        import os

        import numpy as np
        import pandas as pd
        from sklearn.datasets import load_diabetes
        from sklearn.linear_model import LinearRegression

        import mlflow

        # prepare training data
        X, y = dataset = load_diabetes(return_X_y=True, as_frame=True)
        X = pd.DataFrame(dataset.data[:50, :8], columns=dataset.feature_names[:8])
        y = dataset.target[:50]

        # train a model
        model = LinearRegression()
        model.fit(X, y)

        # log an explanation
        with mlflow.start_run() as run:
            mlflow.shap.log_explanation(model.predict, X)

        # list artifacts
        client = mlflow.tracking.MlflowClient()
        artifact_path = "model_explanations_shap"
        artifacts = [x.path for x in client.list_artifacts(run.info.run_id, artifact_path)]
        print("# artifacts:")
        print(artifacts)

        # load back the logged explanation
        dst_path = client.download_artifacts(run.info.run_id, artifact_path)
        base_values = np.load(os.path.join(dst_path, "base_values.npy"))
        shap_values = np.load(os.path.join(dst_path, "shap_values.npy"))

        print("\n# base_values:")
        print(base_values)
        print("\n# shap_values:")
        print(shap_values[:3])

    .. code-block:: text
        :caption: Output

        # artifacts:
        ['model_explanations_shap/base_values.npy',
         'model_explanations_shap/shap_values.npy',
         'model_explanations_shap/summary_bar_plot.png']

        # base_values:
        20.502000000000002

        # shap_values:
        [[ 2.09975523  0.4746513   7.63759026  0.        ]
         [ 2.00883109 -0.18816665 -0.14419184  0.        ]
         [ 2.00891772 -0.18816665 -0.14419184  0.        ]]

    .. figure:: ../_static/images/shap-ui-screenshot.png

        Logged artifacts
    """
    import matplotlib.pyplot as plt
    import shap

    artifact_path = _DEFAULT_ARTIFACT_PATH if artifact_path is None else artifact_path
    with mlflow.utils.autologging_utils.disable_autologging():
        background_data = shap.kmeans(
            features, min(_MAXIMUM_BACKGROUND_DATA_SIZE, len(features)))
        explainer = shap.KernelExplainer(predict_function, background_data)
        shap_values = explainer.shap_values(features)

        _log_numpy(explainer.expected_value, _BASE_VALUES_FILE_NAME,
                   artifact_path)
        _log_numpy(shap_values, _SHAP_VALUES_FILE_NAME, artifact_path)

        shap.summary_plot(shap_values, features, plot_type="bar", show=False)
        fig = plt.gcf()
        fig.tight_layout()
        _log_matplotlib_figure(fig, _SUMMARY_BAR_PLOT_FILE_NAME, artifact_path)
        plt.close(fig)

    return append_to_uri_path(mlflow.active_run().info.artifact_uri,
                              artifact_path)