Exemplo n.º 1
0
def register_model(model_uri, name):
    """
    Create a new model version in model registry for the model files specified by ``model_uri``.
    Note that this method assumes the model registry backend URI is the same as that of the
    tracking backend.
    :param model_uri: URI referring to the MLmodel directory. Use a ``runs:/`` URI if you want to
                      record the run ID with the model in model registry. ``models:/`` URIs are
                      currently not supported.
    :param name: Name of the registered model under which to create a new model version. If a
                 registered model with the given name does not exist, it will be created
                 automatically.
    :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by
             backend.
    """
    client = MlflowClient()
    try:
        client.create_registered_model(name)
    except MlflowException as e:
        if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS):
            eprint(
                "Registered model %s already exists. Using it to create a new version."
                % name)
        else:
            raise e

    if RunsArtifactRepository.is_runs_uri(model_uri):
        source = RunsArtifactRepository.get_underlying_uri(model_uri)
        (run_id, _) = RunsArtifactRepository.parse_runs_uri(model_uri)
        return client.create_model_version(name, source, run_id)
    else:
        return client.create_model_version(name, source=model_uri, run_id=None)
Exemplo n.º 2
0
def test_create_model_version_copy_not_called_to_nondb(mock_registry_store):
    client = MlflowClient(tracking_uri="databricks://tracking", registry_uri="https://registry")
    mock_registry_store.create_model_version.return_value = _default_model_version()
    with mock.patch("mlflow.tracking.client._upload_artifacts_to_databricks") as upload_mock:
        client.create_model_version(
            "model name", "dbfs:/source", "run_12345", run_link="not:/important/for/test",
        )
        upload_mock.assert_not_called()
Exemplo n.º 3
0
def test_create_model_version_copy_called_db_to_db(mock_registry_store):
    client = MlflowClient(
        tracking_uri="databricks://tracking", registry_uri="databricks://registry:workspace"
    )
    mock_registry_store.create_model_version.return_value = ""
    with mock.patch("mlflow.tracking.client._upload_artifacts_to_databricks") as upload_mock:
        client.create_model_version(
            "model name", "dbfs:/source", "run_12345", run_link="not:/important/for/test"
        )
        upload_mock.assert_called_once_with(
            "dbfs:/source", "run_12345", "databricks://tracking", "databricks://registry:workspace"
        )
Exemplo n.º 4
0
def test_create_model_version_non_ready_model(mock_registry_store):
    run_id = "runid"
    client = MlflowClient(tracking_uri="http://10.123.1231.11")
    mock_registry_store.create_model_version.return_value = ModelVersion(
        "name",
        1,
        0,
        1,
        source="source",
        run_id=run_id,
        status=ModelVersionStatus.to_string(ModelVersionStatus.FAILED_REGISTRATION),
    )
    with pytest.raises(MlflowException, match="Model version creation failed for model name"):
        client.create_model_version("name", "source")
Exemplo n.º 5
0
def register_model(model_uri,
                   name,
                   await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS):
    """
    Create a new model version in model registry for the model files specified by ``model_uri``.
    Note that this method assumes the model registry backend URI is the same as that of the
    tracking backend.

    :param model_uri: URI referring to the MLmodel directory. Use a ``runs:/`` URI if you want to
                      record the run ID with the model in model registry. ``models:/`` URIs are
                      currently not supported.
    :param name: Name of the registered model under which to create a new model version. If a
                 registered model with the given name does not exist, it will be created
                 automatically.
    :param await_registration_for: Number of seconds to wait for the model version to finish
                            being created and is in ``READY`` status. By default, the function
                            waits for five minutes. Specify 0 or None to skip waiting.
    :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by
             backend.
    """
    client = MlflowClient()
    try:
        create_model_response = client.create_registered_model(name)
        eprint("Successfully registered model '%s'." %
               create_model_response.name)
    except MlflowException as e:
        if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS):
            eprint(
                "Registered model '%s' already exists. Creating a new version of this model..."
                % name)
        else:
            raise e

    if RunsArtifactRepository.is_runs_uri(model_uri):
        source = RunsArtifactRepository.get_underlying_uri(model_uri)
        (run_id, _) = RunsArtifactRepository.parse_runs_uri(model_uri)
        create_version_response = client.create_model_version(
            name, source, run_id)
    else:
        create_version_response = client.create_model_version(
            name,
            source=model_uri,
            run_id=None,
            await_creation_for=await_registration_for)
    eprint("Created version '{version}' of model '{model_name}'.".format(
        version=create_version_response.version,
        model_name=create_version_response.name))
    return create_version_response
Exemplo n.º 6
0
def test_create_model_version_run_link_with_configured_profile(
        mock_registry_store):
    experiment_id = "test-exp-id"
    hostname = "https://workspace.databricks.com/"
    workspace_id = "10002"
    run_id = "runid"
    workspace_url = construct_run_url(hostname, experiment_id, run_id,
                                      workspace_id)
    get_run_mock = mock.MagicMock()
    get_run_mock.return_value = Run(
        RunInfo(run_id, experiment_id, "userid", "status", 0, 1, None), None)
    with mock.patch(
            "mlflow.tracking.client.is_in_databricks_notebook",
            return_value=False
    ), mock.patch(
            "mlflow.tracking.client.get_workspace_info_from_databricks_secrets",
            return_value=(hostname, workspace_id),
    ):
        client = MlflowClient(tracking_uri="databricks",
                              registry_uri="otherplace")
        client.get_run = get_run_mock
        mock_registry_store.create_model_version.return_value = ModelVersion(
            "name",
            1,
            0,
            1,
            source="source",
            run_id=run_id,
            run_link=workspace_url)
        model_version = client.create_model_version("name", "source", "runid")
        assert model_version.run_link == workspace_url
        # verify that the client generated the right URL
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", "source", "runid", [], workspace_url, None)
Exemplo n.º 7
0
def register(run_id: str, experiment_id: int, model_name: str, block: bool):
    client = MlflowClient(tracking_uri="databricks")
    if not any([m.name == model_name for m in client.list_registered_models()]):
        client.create_registered_model(model_name)
    source = f"dbfs:/databricks/mlflow-tracking/{experiment_id}/{run_id}/artifacts/model"
    model_details = client.create_model_version(model_name, source, run_id)

    if block:
        def wait_until_ready() -> bool:
            for _ in range(60):
                model_version_details = client.get_model_version(
                    name=model_name,
                    version=model_details.version,
                )
                status = ModelVersionStatus.from_string(model_version_details.status)
                print("Model status: %s" % ModelVersionStatus.to_string(status))
                if status == ModelVersionStatus.READY:
                    return True
                time.sleep(5)
            return False

        if not wait_until_ready():
            print(f"Timeout waiting on registration of model.  Will not stage model.")
            return
    client.transition_model_version_stage(model_name, model_details.version, "Staging")
Exemplo n.º 8
0
def test_create_model_version_run_link_in_notebook_with_default_profile(
        mock_registry_store):
    experiment_id = 'test-exp-id'
    hostname = 'https://workspace.databricks.com/'
    workspace_id = '10002'
    run_id = 'runid'
    workspace_url = construct_run_url(hostname, experiment_id, run_id,
                                      workspace_id)
    get_run_mock = mock.MagicMock()
    get_run_mock.return_value = Run(
        RunInfo(run_id, experiment_id, 'userid', 'status', 0, 1, None), None)
    with mock.patch('mlflow.tracking.client.is_in_databricks_notebook',
                    return_value=True), \
            mock.patch('mlflow.tracking.client.get_workspace_info_from_dbutils',
                       return_value=(hostname, workspace_id)):
        client = MlflowClient(tracking_uri='databricks',
                              registry_uri='otherplace')
        client.get_run = get_run_mock
        mock_registry_store.create_model_version.return_value = \
            ModelVersion('name', 1, 0, 1, source='source', run_id=run_id, run_link=workspace_url)
        model_version = client.create_model_version('name', 'source', 'runid')
        assert (model_version.run_link == workspace_url)
        # verify that the client generated the right URL
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", 'source', 'runid', [], workspace_url)
Exemplo n.º 9
0
def test_create_model_version_nondatabricks_source_no_run_id(
        mock_registry_store):
    client = MlflowClient(tracking_uri="http://10.123.1231.11")
    mock_registry_store.create_model_version.return_value = ModelVersion(
        "name", 1, 0, 1, source="source")
    model_version = client.create_model_version("name", "source")
    assert model_version.name == "name"
    assert model_version.source == "source"
    assert model_version.run_id is None
    # verify that the store was not provided a run id
    mock_registry_store.create_model_version.assert_called_once_with(
        "name", "source", None, [], None, None)
Exemplo n.º 10
0
def test_create_model_version_nondatabricks_source_no_runlink(mock_registry_store):
    run_id = 'runid'
    client = MlflowClient(tracking_uri='http://10.123.1231.11')
    mock_registry_store.create_model_version.return_value = \
        ModelVersion('name', 1, 0, 1, source='source', run_id=run_id)
    model_version = client.create_model_version('name', 'source', 'runid')
    assert(model_version.name == 'name')
    assert(model_version.source == 'source')
    assert(model_version.run_id == 'runid')
    # verify that the store was not provided a run link
    mock_registry_store.create_model_version.assert_called_once_with(
        "name", 'source', 'runid', [], None)
Exemplo n.º 11
0
def test_create_model_version_explicitly_set_run_link(mock_registry_store):
    run_id = 'runid'
    run_link = 'my-run-link'
    hostname = 'https://workspace.databricks.com/'
    workspace_id = '10002'
    mock_registry_store.create_model_version.return_value = \
        ModelVersion('name', 1, 0, 1, source='source', run_id=run_id, run_link=run_link)
    # mocks to make sure that even if you're in a notebook, this setting is respected.
    with mock.patch('mlflow.tracking.client.is_in_databricks_notebook',
                    return_value=True), \
        mock.patch('mlflow.tracking.client.get_workspace_info_from_dbutils',
                   return_value=(hostname, workspace_id)):
        client = MlflowClient(tracking_uri='databricks', registry_uri='otherplace')
        model_version = client.create_model_version('name', 'source', 'runid', run_link=run_link)
        assert(model_version.run_link == run_link)
        # verify that the store was provided with the explicitly passed in run link
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", 'source', 'runid', [], run_link)
Exemplo n.º 12
0
def test_client_registry_operations_raise_exception_with_unsupported_registry_store():
    """
    This test case ensures that Model Registry operations invoked on the `MlflowClient`
    fail with an informative error message when the registry store URI refers to a
    store that does not support Model Registry features (e.g., FileStore).
    """
    with TempDir() as tmp:
        client = MlflowClient(registry_uri=tmp.path())
        expected_failure_functions = [
            client._get_registry_client,
            lambda: client.create_registered_model("test"),
            lambda: client.get_registered_model("test"),
            lambda: client.create_model_version("test", "source", "run_id"),
            lambda: client.get_model_version("test", 1),
        ]
        for func in expected_failure_functions:
            with pytest.raises(MlflowException) as exc:
                func()
            assert exc.value.error_code == ErrorCode.Name(FEATURE_DISABLED)
Exemplo n.º 13
0
def test_create_model_version_explicitly_set_run_link(mock_registry_store):
    run_id = "runid"
    run_link = "my-run-link"
    hostname = "https://workspace.databricks.com/"
    workspace_id = "10002"
    mock_registry_store.create_model_version.return_value = ModelVersion(
        "name", 1, 0, 1, source="source", run_id=run_id, run_link=run_link
    )
    # mocks to make sure that even if you're in a notebook, this setting is respected.
    with mock.patch(
        "mlflow.tracking.client.is_in_databricks_notebook", return_value=True
    ), mock.patch(
        "mlflow.tracking.client.get_workspace_info_from_dbutils",
        return_value=(hostname, workspace_id),
    ):
        client = MlflowClient(tracking_uri="databricks", registry_uri="otherplace")
        model_version = client.create_model_version("name", "source", "runid", run_link=run_link)
        assert model_version.run_link == run_link
        # verify that the store was provided with the explicitly passed in run link
        mock_registry_store.create_model_version.assert_called_once_with(
            "name", "source", "runid", [], run_link, None
        )
import mlflow.sklearn
from mlflow.tracking import MlflowClient
from sklearn.ensemble import RandomForestRegressor


if __name__ == "__main__":

    mlflow.set_tracking_uri("sqlite:///mlruns.db")
    params = {"n_estimators": 3, "random_state": 42}
    name = "RandomForestRegression"
    rfr = RandomForestRegressor(**params).fit([[0, 1]], [1])

    # Log MLflow entities
    with mlflow.start_run() as run:
        mlflow.log_params(params)
        mlflow.sklearn.log_model(rfr, artifact_path="models/sklearn-model")

    # Register model name in the model registry
    client = MlflowClient()
    client.create_registered_model(name)

    # Create a new version of the rfr model under the registered model name
    model_uri = "runs:/{}/models/sklearn-model".format(run.info.run_id)
    mv = client.create_model_version(name, model_uri, run.info.run_id)
    stages = client.get_model_version_stages(name, mv.version)
    print("Model list of valid stages: {}".format(stages))
Exemplo n.º 15
0
        if not os.path.exists("outputs"):
            os.makedirs("outputs")
        with open("outputs/test.txt", "w") as f:
            f.write("Looks, like I logged to the local store!")

        log_artifacts("outputs")
        shutil.rmtree('outputs')
        run_id = run.info.run_uuid
    #
    # register with model registry
    # on a local host
    #
    client = MlflowClient()
    result = mlflow.register_model(f"runs:/{run_id}/artifacts/sklearn-model",
                                   model_name)
    print(result)

    result = client.create_model_version(
        name="WeatherForecastModel",
        source=f"mlruns/0/{run_id}/artifacts/sk-model",
        run_id=run_id)
    print(result)
    [pp(dict(rm), indent=4) for rm in client.list_registered_models()]
    # Get a list of specific versions of the named models
    print(f"List of Model = {model_name} and Versions")
    print("=" * 80)
    [
        pp(dict(mv), indent=4)
        for mv in client.search_model_versions("name='WeatherForecastModel'")
    ]
# COMMAND ----------

import posixpath
source = posixpath.join(
    artifact_uri, artifact_path
)  # we preserved the model artifact dbfs path from the source workspace
try:
    remote_client.create_registered_model(model_name)
except Exception as e:
    if e.error_code == 'RESOURCE_ALREADY_EXISTS':
        print(e)
    else:
        throw(e)
mv = remote_client.create_model_version(
    model_name, source,
    run_id)  # `source` must point to the DBFS location in the new workspace
print(mv)

# COMMAND ----------

# MAGIC %md At this point, if you log into the registry workspace you should see the new model version.

# COMMAND ----------

# MAGIC %md ##### Optionally, write `<SourceWorkspaceId>` and `<RunID>` as the model version description for lineage tracking.
# MAGIC The `Source Run` field will not show up on the model version page in the registry workspace because it is known to the workspace.

# COMMAND ----------

# Note: if you are okay with exposing the experiment ID to those who can read the model version in the registry workspace,
Exemplo n.º 17
0
    sk_learn_rfr = RandomForestRegressor(**params)

    # Log parameters and metrics using the MLflow APIs
    mlflow.log_params(params)
    mlflow.log_param("param_1", randint(0, 100))
    mlflow.log_metrics({"metric_1": random(), "metric_2": random()})

    # log the sklearn model and register as version 1
    mlflow.sklearn.log_model(sk_model=sk_learn_rfr,
                             artifact_path="sklearn-model")

client = MlflowClient()
client.create_registered_model("sk-learn-random_forest-reg-model")

client.create_model_version(name="sk-learn-random-forest-reg-model",
                            source="",
                            run_id="")

# Fetching an mlflow model from the model registry

import mlflow.pyfunc

model_name = "sk-learn-random-forest-reg-model"
model_version = 1

data = ""

model = mlflow.pyfunc.load_model(
    model_uri=f"models:/{model_name}/{model_version}")

model.predict(data)
Exemplo n.º 18
0
# MAGIC https://docs.microsoft.com/fr-fr/azure/databricks/applications/machine-learning/manage-model-lifecycle/

# COMMAND ----------

result = mlflow.register_model("runs:<model-path>", "<model-name>")

# COMMAND ----------

from mlflow.tracking import MlflowClient

client = MlflowClient()
name = "spark-lr-registered-model"
client.create_registered_model(name)

desc = "A new version of the model"
model_uri = "runs:/{}/sklearn-model".format(run.info.run_id)
mv = client.create_model_version(name,
                                 model_uri,
                                 run.info.run_id,
                                 description=desc)

# COMMAND ----------

# MAGIC %md mlflow.pyfunc

# COMMAND ----------

import mlflow.pyfunc

mlflow.pyfunc.log_model(tunedModel.bestModel, "spark-model")
Exemplo n.º 19
0
def register_model(model_uri,
                   name,
                   await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS):
    """
    Create a new model version in model registry for the model files specified by ``model_uri``.
    Note that this method assumes the model registry backend URI is the same as that of the
    tracking backend.

    :param model_uri: URI referring to the MLmodel directory. Use a ``runs:/`` URI if you want to
                      record the run ID with the model in model registry. ``models:/`` URIs are
                      currently not supported.
    :param name: Name of the registered model under which to create a new model version. If a
                 registered model with the given name does not exist, it will be created
                 automatically.
    :param await_registration_for: Number of seconds to wait for the model version to finish
                            being created and is in ``READY`` status. By default, the function
                            waits for five minutes. Specify 0 or None to skip waiting.
    :return: Single :py:class:`mlflow.entities.model_registry.ModelVersion` object created by
             backend.

    .. code-block:: python
        :caption: Example

        import mlflow.sklearn
        from sklearn.ensemble import RandomForestRegressor

        mlflow.set_tracking_uri("sqlite:////tmp/mlruns.db")
        params = {"n_estimators": 3, "random_state": 42}

        # Log MLflow entities
        with mlflow.start_run() as run:
           rfr = RandomForestRegressor(**params).fit([[0, 1]], [1])
           mlflow.log_params(params)
           mlflow.sklearn.log_model(rfr, artifact_path="sklearn-model")

        model_uri = "runs:/{}/sklearn-model".format(run.info.run_id)
        mv = mlflow.register_model(model_uri, "RandomForestRegressionModel")
        print("Name: {}".format(mv.name))
        print("Version: {}".format(mv.version))

    .. code-block:: text
        :caption: Output

        Name: RandomForestRegressionModel
        Version: 1
    """
    client = MlflowClient()
    try:
        create_model_response = client.create_registered_model(name)
        eprint("Successfully registered model '%s'." %
               create_model_response.name)
    except MlflowException as e:
        if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS):
            eprint(
                "Registered model '%s' already exists. Creating a new version of this model..."
                % name)
        else:
            raise e

    if RunsArtifactRepository.is_runs_uri(model_uri):
        source = RunsArtifactRepository.get_underlying_uri(model_uri)
        (run_id, _) = RunsArtifactRepository.parse_runs_uri(model_uri)
        create_version_response = client.create_model_version(
            name, source, run_id, await_creation_for=await_registration_for)
    else:
        create_version_response = client.create_model_version(
            name,
            source=model_uri,
            run_id=None,
            await_creation_for=await_registration_for)
    eprint("Created version '{version}' of model '{model_name}'.".format(
        version=create_version_response.version,
        model_name=create_version_response.name))
    return create_version_response
  #mlflow.log_artifact("obs_vs_predict.png")
  
  run = mlflow.active_run()
  print("Active run_id: {}".format(run.info.run_id))


# COMMAND ----------

# Register model
## NE PAS EXECUTER (fait par la cellule ci-dessus)
#https://www.mlflow.org/docs/latest/model-registry.html#registering-a-model

client = MlflowClient()
result = client.create_model_version( \
                                     name="ridge-registered-model", \
                                     source="dbfs:/databricks/mlflow-tracking/1051022099699898/4a4cd9fb4a034066a4a635eedd8b5a65/artifacts/ridge-model", \
                                     run_id="2ca9694883284c55948d49e7254f6a23" \
                                    )


# COMMAND ----------

# hyperparameters tuning (grid search)
from sklearn.model_selection import GridSearchCV

dico_param = {'alpha': [1e-3, 1e-2, 1e-1, 1]}
search_hyperp_ridge = GridSearchCV(Ridge(), dico_param, scoring='neg_mean_squared_error', cv = 5)
search_hyperp_ridge.fit(X_train, X_train)
search_hyperp_ridge.predict(X_test)

print(search_hyperp_ridge.best_params_)