示例#1
0
文件: test_log.py 项目: tnixon/mlflow
def test_log_explanation_doesnt_create_autologged_run():
    mlflow.sklearn.autolog(disable=False, exclusive=False)
    dataset = sklearn.datasets.load_boston()
    X = pd.DataFrame(dataset.data[:50, :8], columns=dataset.feature_names[:8])
    y = dataset.target[:50]
    model = sklearn.linear_model.LinearRegression()
    model.fit(X, y)

    with mlflow.start_run() as run:
        mlflow.shap.log_explanation(model.predict, X)

    run_data = MlflowClient().get_run(run.info.run_id).data
    metrics, params, tags = run_data.metrics, run_data.params, run_data.tags
    assert not metrics
    assert not params
    assert all("mlflow." in key for key in tags)
    assert "mlflow.autologging" not in tags
示例#2
0
def test_client_create_run(mock_store, mock_user_id, mock_time):

    experiment_id = mock.Mock()

    MlflowClient().create_run(experiment_id)

    mock_store.create_run.assert_called_once_with(
        experiment_id=experiment_id,
        user_id=mock_user_id,
        run_name=None,
        start_time=int(mock_time * 1000),
        tags=[],
        parent_run_id=None,
        source_type=SourceType.LOCAL,
        source_name="Python Application",
        entry_point_name=None,
        source_version=None)
示例#3
0
def test_get_model_name_and_version_with_stage():
    with mock.patch.object(
        MlflowClient,
        "get_latest_versions",
        return_value=[
            ModelVersion(
                name="mv1", version="10", creation_timestamp=123, current_stage="Production"
            ),
            ModelVersion(
                name="mv2", version="15", creation_timestamp=124, current_stage="Production"
            ),
        ],
    ) as mlflow_client_mock:
        assert get_model_name_and_version(MlflowClient(), "models:/AdsModel1/Production") == (
            "AdsModel1",
            "15",
        )
        mlflow_client_mock.assert_called_once_with("AdsModel1", ["Production"])
示例#4
0
def register_model(run, model, model_name):
    result = mlflow.register_model(
        "runs:/" + run.info.run_id + "/artifacts/" + model, model_name)

    description = []
    for param in run.data.params:
        description.append("**{}:** {}\n".format(param,
                                                 run.data.params[param]))

    description.append("**Accuracy:** {}".format(
        client.get_metric_history(run.info.run_id, "accuracy")[0].value))

    description.append("**Loss:** {}".format(
        client.get_metric_history(run.info.run_id, "loss")[0].value))

    MlflowClient().update_model_version(name=model_name,
                                        version=result.version,
                                        description="".join(description))
示例#5
0
def test_transition_model_version_stage(mock_registry_store):
    name = "Model 1"
    version = "12"
    stage = "Production"
    expected_result = ModelVersion(name,
                                   version,
                                   creation_timestamp=123,
                                   current_stage=stage)
    mock_registry_store.transition_model_version_stage.return_value = expected_result
    actual_result = MlflowClient(
        registry_uri="sqlite:///somedb.db").transition_model_version_stage(
            name, version, stage)
    mock_registry_store.transition_model_version_stage.assert_called_once_with(
        name=name,
        version=version,
        stage=stage,
        archive_existing_versions=False)
    assert expected_result == actual_result
示例#6
0
def main(argv):
    parser = argparse.ArgumentParser('serve')
    parser.add_argument("--model",
                        required=True,
                        help="model name in model registry")

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument("--stage",
                       default=None,
                       help="use model with this stage")
    group.add_argument("--version",
                       default=None,
                       help="use this model version")

    parser.add_argument("--auto_update_model",
                        action="store_true",
                        help="If using a model from a specific stage, "
                        "auto-update whenever a new model is registered "
                        "to that stage")

    args = parser.parse_args()

    if args.auto_update_model:
        assert args.stage, "'auto_update_model' can only be used with 'stage'"

    mlflow.set_tracking_uri("http://*****:*****@scheduler.task('interval',
                        id='do_job_1',
                        seconds=10,
                        misfire_grace_time=900)
        def maybe_update_model():
            global app
            app.model.update_model(args.model, args.stage, None)

    app.run(host='0.0.0.0', port=5050)
    return 0
示例#7
0
    def optimize(self, maxevals=200, model_id=0, reuse_experiment=False):

        param_space = self.hyperparameter_space()
        objective = self.get_objective(self.lgtrain)
        objective.i = 0
        trials = Trials()
        best = fmin(fn=objective,
                    space=param_space,
                    algo=tpe.suggest,
                    max_evals=maxevals,
                    trials=trials)
        best['num_boost_round'] = self.early_stop_dict[
            trials.best_trial['tid']]
        best['num_leaves'] = int(best['num_leaves'])
        best['verbose'] = -1

        # The next few lines are the only ones related to mlflow.
        if not Path('mlruns').exists():
            # here set the tracking_uri. If None then http://localhost:5000
            client = MlflowClient()
            n_experiments = 0
        elif not reuse_experiment:
            client = MlflowClient()
            n_experiments = len(client.list_experiments())
            experiment_name = 'experiment_' + str(n_experiments)
            client.create_experiment(name=experiment_name)
        with mlflow.start_run(experiment_id=n_experiments):
            model = lgb.LGBMClassifier(**best)
            model.fit(self.lgtrain.data,
                      self.lgtrain.label,
                      feature_name=self.colnames,
                      categorical_feature=self.categorical_columns)
            for name, value in best.items():
                mlflow.log_param(name, value)
            mlflow.log_metric('binary_logloss',
                              trials.best_trial['result']['loss'])
            mlflow.sklearn.log_model(model, "model")

        model_fname = 'model_{}_.p'.format(model_id)
        best_experiment_fname = 'best_experiment_{}_.p'.format(model_id)

        pickle.dump(model, open(self.PATH / model_fname, 'wb'))
        pickle.dump(best, open(self.PATH / best_experiment_fname, 'wb'))

        self.best = best
        self.model = model
示例#8
0
def test_update_model_version_compatibility_layer(mock_registry_store):
    """
    This test makes sure that old (now deprecated) apis work as expected after api update.
    Update registered model no longer accepts new name, but the client should translate it to
    rename call.
    """
    expected_return_value = "some expected return value."
    mock_registry_store.update_model_version.return_value = expected_return_value
    res = MlflowClient(
        registry_uri="sqlite:///somedb.db").update_model_version(
            name="orig name", version="1", stage="Staging", description="desc")
    assert expected_return_value == res
    mock_registry_store.transition_model_version_stage.assert_called_once_with(
        name="orig name",
        version="1",
        stage="Staging",
        archive_existing_versions=False)
    mock_registry_store.update_model_version.assert_called_once_with(
        name="orig name", version="1", description="desc")
    def _save(self, data: MetricsDict) -> None:
        """Save given MLflow metrics dataset and log it in MLflow as metrics.

        Args:
            data (MetricsDict): MLflow metrics dataset.
        """
        client = MlflowClient()
        try:
            run_id = self.run_id
        except DataSetError:
            # If run_id can't be found log_metric would create new run.
            run_id = None

        log_metric = (partial(client.log_metric, run_id)
                      if run_id is not None else mlflow.log_metric)
        metrics = (self._build_args_list_from_metric_item(k, v)
                   for k, v in data.items())
        for k, v, i in chain.from_iterable(metrics):
            log_metric(k, v, step=i)
示例#10
0
def log_production_model(config_path):

    logger = logging.getLogger()
    logger.info('Parsing the config file supplied')
    config = read_params(config_path)

    mlflow_config = config.mlflow_config

    model_name = mlflow_config.registered_model_name

    remote_server_uri = mlflow_config.remote_server_uri

    mlflow.set_tracking_uri(remote_server_uri)
    logger.info('Setting the Tracking URI')

    runs = mlflow.search_runs(experiment_ids=1)
    lowest = runs["metrics.mae"].sort_values(ascending=True).min()
    print(lowest)
    lowest_run_id = runs[runs["metrics.mae"] == lowest]["run_id"].iloc[0]

    logger.info('Obtaining the model with the best metric')

    client = MlflowClient()
    for mv in client.search_model_versions(f"name='{model_name}'"):
        mv = dict(mv)

        if mv["run_id"] == lowest_run_id:
            current_version = mv["version"]
            logged_model = mv["source"]
            pprint(mv, indent=4)

            client.transition_model_version_stage(name=model_name,
                                                  version=current_version,
                                                  stage="Production")
        else:
            current_version = mv["version"]
            client.transition_model_version_stage(name=model_name,
                                                  version=current_version,
                                                  stage="Staging")

    loaded_model = mlflow.pyfunc.load_model(logged_model)
    model_dir = Path.cwd() / config.model_dir
    model_path = str(Path(model_dir / "model.joblib"))

    joblib.dump(loaded_model, model_path)
示例#11
0
def store_run_df(experiment_name, experiment_id):
    client = MlflowClient()
    if client.list_experiments()[0].name == experiment_name:
        run_df = pd.DataFrame([(run.run_uuid, run.start_time, run.artifact_uri)
                               for run in client.list_run_infos(experiment_id)
                               ])
        run_df.columns = ['run_uuid', 'start_time', 'artifact_uri']
        run_df['start_time'] = pd.to_datetime(run_df['start_time'], unit='ms')
        run_df = run_df.sort_values("start_time", ascending=False)
        run_df['train_accuracy'] = [
            client.get_run(
                run_df.loc[i]['run_uuid']).data.metrics['train_accuracy']
            if len(client.get_run(run_df.loc[i]['run_uuid']).data.metrics) > 0
            else 0 for i in range(len(run_df))
        ]
        run_df['test_accuracy'] = [
            client.get_run(
                run_df.loc[i]['run_uuid']).data.metrics['test_accuracy']
            if len(client.get_run(run_df.loc[i]['run_uuid']).data.metrics) > 0
            else 0 for i in range(len(run_df))
        ]
        return run_df
示例#12
0
def test_log_explanation_doesnt_create_autologged_run():
    try:
        mlflow.sklearn.autolog(disable=False, exclusive=False)
        X, y = sklearn.datasets.load_diabetes(return_X_y=True, as_frame=True)
        X = X.iloc[:50, :4]
        y = y.iloc[:50]
        model = sklearn.linear_model.LinearRegression()
        model.fit(X, y)

        with mlflow.start_run() as run:
            mlflow.shap.log_explanation(model.predict, X)

        run_data = MlflowClient().get_run(run.info.run_id).data
        metrics, params, tags = run_data.metrics, run_data.params, run_data.tags
        assert not metrics
        assert not params
        assert all("mlflow." in key for key in tags)
        assert "mlflow.autologging" not in tags
    finally:
        mlflow.sklearn.autolog(disable=True)
示例#13
0
def test_update_registered_model_compatibility_layer(mock_registry_store):
    """
    This test makes sure that old (now deprecated) apis work as expected after api update.
    Update registered model no longer accepts new name, but the client should translate it to
    rename call.
    """
    expected_return_value = "some expected return value."
    mock_registry_store.rename_registered_model.return_value = expected_return_value
    expected_return_value_2 = "other expected return value."
    mock_registry_store.update_registered_model.return_value = expected_return_value_2
    res = MlflowClient(
        registry_uri="sqlite:///somedb.db").update_registered_model(
            name="orig name",
            new_name="new name",
            description="new description")
    assert expected_return_value_2 == res
    mock_registry_store.rename_registered_model.assert_called_once_with(
        name="orig name", new_name="new name")
    mock_registry_store.update_registered_model.assert_called_once_with(
        name="new name", description="new description")
def test_kedro_mlflow_config_experiment_exists(kedro_project_with_mlflow_conf):

    # create an experiment with the same name
    mlflow_tracking_uri = (
        kedro_project_with_mlflow_conf / "conf" / "local" / "mlruns"
    ).as_uri()
    MlflowClient(mlflow_tracking_uri).create_experiment("exp1")
    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="mlruns"),
        tracking=dict(experiment=dict(name="exp1")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert "exp1" in [
        exp.name for exp in config.server._mlflow_client.list_experiments()
    ]
    def configure(
        self,
        run_uuid,
        experiment_name,
        tracking_uri,
        run_name=None,
        always_log_artifacts=False,
        create_run=True,
        create_experiment=True,
        nest_run=True,
    ):
        if mlflow.active_run() and not nest_run:
            logger.info('Ending previous MLFlow run: {}.'.format(self.run_uuid))
            mlflow.end_run()

        self.always_log_artifacts = always_log_artifacts
        self._experiment_name = experiment_name
        self._run_name = run_name

        # MLflow specific
        if tracking_uri:
            mlflow.set_tracking_uri(tracking_uri)

        if run_uuid:
            existing_run = MlflowClient().get_run(run_uuid)
            if not existing_run and not create_run:
                raise FileNotFoundError(
                    'Run ID {} not found under {}'.format(
                        run_uuid, mlflow.get_tracking_uri()
                    )
                )

        experiment_id = self._retrieve_mlflow_experiment_id(
            experiment_name, create=create_experiment
        )
        return mlflow.start_run(
            run_uuid,
            experiment_id=experiment_id,
            run_name=run_name,
            nested=nest_run,
        )
示例#16
0
def test_kedro_mlflow_config_experiment_exists(mocker, kedro_project_with_mlflow_conf):

    # create an experiment with the same name
    mlflow_tracking_uri = (
        kedro_project_with_mlflow_conf / "conf" / "local" / "mlruns"
    ).as_uri()
    MlflowClient(mlflow_tracking_uri).create_experiment("exp1")
    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()
    assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
示例#17
0
def test_create_model_version(mock_registry_store):
    """
    Basic test for create model version.
    """
    mock_registry_store.create_model_version.return_value = _default_model_version(
    )
    res = MlflowClient(
        registry_uri="sqlite:///somedb.db").create_model_version(
            "orig name",
            "source",
            "run-id",
            tags={"key": "value"},
            description="desc")
    assert res == _default_model_version()
    mock_registry_store.create_model_version.assert_called_once_with(
        "orig name",
        "source",
        "run-id",
        [ModelVersionTag(key="key", value="value")],
        None,
        "desc",
    )
示例#18
0
def test_kedro_mlflow_config_experiment_was_deleted(mocker, tmp_path):
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project
    mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True)

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (tmp_path / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id)

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=tmp_path,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )
    assert "exp1" in [
        exp.name for exp in config.mlflow_client.list_experiments()
    ]
def log_production_model(config_path):
    config = read_params(config_path)

    mlflow_config = config["mlflow_config"]

    model_name = mlflow_config["registered_model_name"]

    remote_server_uri = mlflow_config["remote_server_uri"]

    mlflow.set_tracking_uri(remote_server_uri)

    runs = mlflow.search_runs(experiment_ids=1)
    lowest = runs["metrics.mae"].sort_values(ascending=True)[0]
    lowest_run_id = runs[runs["metrics.mae"] == lowest]["run_id"][0]

    client = MlflowClient()
    for mv in client.search_model_versions(f"name='{model_name}'"):
        mv = dict(mv)

        if mv["run_id"] == lowest_run_id:
            current_version = mv["version"]
            logged_model = mv["source"]
            pprint(mv, indent=4)
            client.transition_model_version_stage(
                name=model_name,
                version=current_version,
                stage="Production"
            )
        else:
            current_version = mv["version"]
            client.transition_model_version_stage(
                name=model_name,
                version=current_version,
                stage="Staging"
            )

    loaded_model = mlflow.pyfunc.load_model(logged_model)

    model_path = config["webapp_model_dir"]  # "prediction_service/model"

    joblib.dump(loaded_model, model_path)
def assert_are_metrics_logged(
    data: Dict[str, Union[float, List[float]]],
    client: MlflowClient,
    run_id: str,
    prefix: Optional[str] = None,
) -> bool:
    """Helper function which checks if given metrics where logged.

    Args:
        data: (Dict[str, Union[float, List[float]]]): Logged metrics.
        client: (MlflowClient): MLflow client instance.
        run_id: (str): id of run where data was logged.
        prefix: (Optional[str])
    """
    for key in data.keys():
        metric_key = f"{prefix}.{key}" if prefix else key
        metric = client.get_metric_history(run_id, metric_key)
        data_len = len(data[key]) if isinstance(data[key], list) else 1
        assert len(metric) == data_len
        for idx, item in enumerate(metric):
            data_value = (data[key][idx]["value"] if isinstance(
                data[key], list) else data[key]["value"])
            assert item.value == data_value and item.key == metric_key
    assert True
示例#21
0
    def __init__(self,
                 experiment_name: str = 'default',
                 run_name: str = 'test',
                 tracking_uri: Optional[str] = None,
                 tags: Optional[Dict[str, Any]] = None,
                 save_dir: Optional[str] = './mlruns'):

        if not _MLFLOW_AVAILABLE:
            raise ImportError(
                'You want to use `mlflow` logger which is not installed yet,'
                ' install it with `pip install mlflow`.')
        super().__init__()
        if not tracking_uri:
            tracking_uri = f'{LOCAL_FILE_URI_PREFIX}{save_dir}'

        self._experiment_name = experiment_name
        self._tracking_uri = tracking_uri
        self.tags = tags

        mlflow.set_experiment(experiment_name)
        run = mlflow.start_run(run_name=run_name)
        self._run_id = run.info.run_id
        self._experiment_id = run.info.experiment_id
        self._mlflow_client = MlflowClient(tracking_uri)
示例#22
0
def test_client_registry_operations_raise_exception_with_unsupported_registry_store():
    """
    This test case ensures that Model Registry operations invoked on the `MlflowClient`
    fail with an informative error message when the registry store URI refers to a
    store that does not support Model Registry features (e.g., FileStore).
    """
    with TempDir() as tmp:
        client = MlflowClient(registry_uri=tmp.path())
        expected_failure_functions = [
            client._get_registry_client,
            lambda: client.create_registered_model("test"),
            lambda: client.get_registered_model("test"),
            lambda: client.create_model_version("test", "source", "run_id"),
            lambda: client.get_model_version("test", 1),
        ]
        for func in expected_failure_functions:
            with pytest.raises(MlflowException) as exc:
                func()
            assert exc.value.error_code == ErrorCode.Name(FEATURE_DISABLED)
示例#23
0
    def _init(self):
        from mlflow.tracking import MlflowClient
        uri = osp.join(osp.dirname(self.logdir), 'mlruns')
        # print(uri)
        # import ipdb
        # ipdb.set_trace()
        # raise RuntimeError
        client = MlflowClient(tracking_uri=uri)
        experiments = [e.name for e in client.list_experiments()]
        exp_name = self.config.get("mlflow_experiment", "test")
        if exp_name in experiments:
            experiment_id = client.get_experiment_by_name(exp_name)
        else:
            experiment_id = client.create_experiment(exp_name)
        run = client.create_run(experiment_id.experiment_id,
                                tags={'mlflow.runName': self.trial.trial_id})
        self._run_id = run.info.run_id

        self.client = client
        self._log_hparams()
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf):

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="mlruns"),
        tracking=dict(experiment=dict(name="exp1")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert "exp1" in [
        exp.name for exp in config.server._mlflow_client.list_experiments()
    ]
示例#25
0
def test_client_can_be_serialized_with_pickle(tmpdir):
    """
    Verifies that instances of `MlflowClient` can be serialized using pickle, even if the underlying
    Tracking and Model Registry stores used by the client are not serializable using pickle
    """

    class MockUnpickleableTrackingStore(SqlAlchemyTrackingStore):
        pass

    class MockUnpickleableModelRegistryStore(SqlAlchemyModelRegistryStore):
        pass

    backend_store_path = tmpdir.join("test.db").strpath
    artifact_store_path = tmpdir.join("artfiacts").strpath

    mock_tracking_store = MockUnpickleableTrackingStore(
        "sqlite:///" + backend_store_path, artifact_store_path
    )
    mock_model_registry_store = MockUnpickleableModelRegistryStore(
        "sqlite:///" + backend_store_path
    )

    # Verify that the mock stores cannot be pickled because they are defined within a function
    # (i.e. the test function)
    with pytest.raises(AttributeError, match="<locals>.MockUnpickleableTrackingStore'"):
        pickle.dumps(mock_tracking_store)

    with pytest.raises(AttributeError, match="<locals>.MockUnpickleableModelRegistryStore'"):
        pickle.dumps(mock_model_registry_store)

    _tracking_store_registry.register("pickle", lambda *args, **kwargs: mock_tracking_store)
    _model_registry_store_registry.register(
        "pickle", lambda *args, **kwargs: mock_model_registry_store
    )

    # Create an MlflowClient with the store that cannot be pickled, perform
    # tracking & model registry operations, and verify that the client can still be pickled
    client = MlflowClient("pickle://foo")
    client.create_experiment("test_experiment")
    client.create_registered_model("test_model")
    pickle.dumps(client)
def test_mlflow_metrics_logging_deactivation(tracking_uri, metrics):
    mlflow_metrics_dataset = MlflowMetricsDataSet(prefix="hello")

    mlflow.set_tracking_uri(tracking_uri.as_uri())
    mlflow_client = MlflowClient(tracking_uri=tracking_uri.as_uri())

    mlflow_metrics_dataset._logging_activated = False

    all_runs_id_beginning = set([
        run.run_id for k in range(len(mlflow_client.list_experiments()))
        for run in mlflow_client.list_run_infos(experiment_id=f"{k}")
    ])

    mlflow_metrics_dataset.save(metrics)

    all_runs_id_end = set([
        run.run_id for k in range(len(mlflow_client.list_experiments()))
        for run in mlflow_client.list_run_infos(experiment_id=f"{k}")
    ])

    assert all_runs_id_beginning == all_runs_id_end
def test_artifact_dataset_logging_deactivation(tmp_path, tracking_uri):
    mlflow_pkl_dataset = MlflowArtifactDataSet(data_set=dict(
        type=PickleDataSet, filepath=(tmp_path / "df1.csv").as_posix()))

    mlflow.set_tracking_uri(tracking_uri.as_uri())
    mlflow_client = MlflowClient(tracking_uri=tracking_uri.as_uri())

    mlflow_pkl_dataset._logging_activated = False

    all_runs_id_beginning = set([
        run.run_id for k in range(len(mlflow_client.list_experiments()))
        for run in mlflow_client.list_run_infos(experiment_id=f"{k}")
    ])

    mlflow_pkl_dataset.save(2)

    all_runs_id_end = set([
        run.run_id for k in range(len(mlflow_client.list_experiments()))
        for run in mlflow_client.list_run_infos(experiment_id=f"{k}")
    ])

    assert all_runs_id_beginning == all_runs_id_end
示例#28
0
def test_mlflow_model_logger_logging_deactivation(tracking_uri, linreg_model):
    mlflow_model_logger_dataset = MlflowModelLoggerDataSet(
        flavor="mlflow.sklearn")

    mlflow.set_tracking_uri(tracking_uri)
    mlflow_client = MlflowClient(tracking_uri=tracking_uri)

    mlflow_model_logger_dataset._logging_activated = False

    all_runs_id_beginning = set([
        run.run_id for k in range(len(mlflow_client.list_experiments()))
        for run in mlflow_client.list_run_infos(experiment_id=f"{k}")
    ])

    mlflow_model_logger_dataset.save(linreg_model)

    all_runs_id_end = set([
        run.run_id for k in range(len(mlflow_client.list_experiments()))
        for run in mlflow_client.list_run_infos(experiment_id=f"{k}")
    ])

    assert all_runs_id_beginning == all_runs_id_end
示例#29
0
def stage(model_name: str, experiment_id, metric: str, highest: bool, databricks_token: str):
    client = MlflowClient(tracking_uri="databricks")
    runs = client.search_runs(experiment_ids=experiment_id)
    max_run: Tuple[Optional[str], Union[float, int]] = (None, -float('inf') if highest else float('inf'))
    for run in runs:
        if highest:
            if run.data.metrics[metric] > max_run[1]:
                max_run = (run.info.run_id, run.data.metrics[metric])
        else:
            if run.data.metrics[metric] < max_run[1]:
                max_run = (run.info.run_id, run.data.metrics[metric])
    if max_run[0] is None:
        print(f"No runs found in the provided experiment {experiment_id}")
        return
    print(f"Run {max_run[0]} with {metric} = {max_run[1]} chosen for registration.")
    for version in client.search_model_versions("name = '%s'" % model_name):
        if version.run_id == max_run[0]:
            if version.current_stage != "Staging":
                print(f"Highest performing model is versioned but not in 'Staging'. Promoting..")
                client.transition_model_version_stage(model_name, version.version, "Staging")
                return
    print("Best model not registered. Registering...")
    register(max_run[0], experiment_id, model_name, True)
示例#30
0
    return output_df


# COMMAND ----------

input_data = table('bank_db.bank_marketing_train_set')
pdDF = input_data.toPandas()

# COMMAND ----------

returnDF = train_xgboost(pdDF)

# COMMAND ----------

# MAGIC %md
# MAGIC ##### Extra: Use the MLflow client to analyse the runs programmatically
# MAGIC Next to using the Experiments UI, or just putting all the results from a run into spark DF, the `MLflowClient` class can be used to look up runs programmatially from a given (list of) experiment(s). Below is an example of how we can retrieve the run_id with the highest area under the curve score by using the `client.search_runs` method.

# COMMAND ----------

from mlflow.tracking import MlflowClient
client = MlflowClient()
best_run_id = client.search_runs(experiment_ids=[experiment_id],
                                 order_by=["metrics.auc DESC"])[0].info.run_id

# COMMAND ----------

best_run_id

# COMMAND ----------