示例#1
0
    def __init__(
        self,
        tracking_uri: Optional[str] = None,
        registry_uri: Optional[str] = None,
        experiment_name: Optional[str] = None,
        tags: Optional[Dict] = None,
        save_artifact: bool = False,
    ):

        self.tracking_uri = tracking_uri
        self.registry_uri = registry_uri
        self.experiment_name = experiment_name
        self.tags = tags
        self.should_save_artifact = save_artifact

        self.mlflow_util = MLflowLoggerUtil()

        if ray.util.client.ray.is_connected():
            logger.warning(
                "When using MLflowLoggerCallback with Ray Client, "
                "it is recommended to use a remote tracking "
                "server. If you are using a MLflow tracking server "
                "backed by the local filesystem, then it must be "
                "setup on the server side and not on the client "
                "side."
            )
示例#2
0
    def __init__(
        self,
        tracking_uri: Optional[str] = None,
        registry_uri: Optional[str] = None,
        experiment_id: Optional[str] = None,
        experiment_name: Optional[str] = None,
        tags: Optional[Dict] = None,
        save_artifact: bool = False,
        logdir: Optional[str] = None,
        worker_to_log: int = 0,
    ):
        warnings.warn(
            _deprecation_msg,
            DeprecationWarning,
        )
        self._logdir_manager = _TrainCallbackLogdirManager(logdir=logdir)
        self.results_preprocessor = IndexedResultsPreprocessor(
            indices=worker_to_log)

        self.tracking_uri = tracking_uri
        self.registry_uri = registry_uri
        self.experiment_id = experiment_id
        self.experiment_name = experiment_name
        self.tags = tags

        self.save_artifact = save_artifact
        self.mlflow_util = MLflowLoggerUtil()
示例#3
0
    def setUp(self):
        self.dirpath = tempfile.mkdtemp()
        import mlflow
        mlflow.set_tracking_uri(self.dirpath)
        mlflow.create_experiment(name="existing_experiment")

        self.mlflow_util = MLflowLoggerUtil()
        self.tracking_uri = mlflow.get_tracking_uri()
示例#4
0
    def __init__(self, config: Dict, *args, **kwargs):
        self.mlflow_util = MLflowLoggerUtil()

        if not isinstance(self, Trainable):
            raise ValueError(
                "The `MLflowTrainableMixin` can only be used as a mixin "
                "for `tune.Trainable` classes. Please make sure your "
                "class inherits from both. For example: "
                "`class YourTrainable(MLflowTrainableMixin)`."
            )

        super().__init__(config, *args, **kwargs)
        _config = config.copy()
        try:
            mlflow_config = _config.pop("mlflow").copy()
        except KeyError as e:
            raise ValueError(
                "MLflow mixin specified but no configuration has been passed. "
                "Make sure to include a `mlflow` key in your `config` dict "
                "containing at least a `tracking_uri` and either "
                "`experiment_name` or `experiment_id` specification."
            ) from e

        tracking_uri = mlflow_config.pop("tracking_uri", None)
        if tracking_uri is None:
            raise ValueError(
                "MLflow mixin specified but no "
                "tracking_uri has been "
                "passed in. Make sure to include a `mlflow` "
                "key in your `config` dict containing at "
                "least a `tracking_uri`"
            )

        # Set the tracking token if one is passed in.
        tracking_token = mlflow_config.pop("token", None)

        experiment_id = mlflow_config.pop("experiment_id", None)

        experiment_name = mlflow_config.pop("experiment_name", None)

        # This initialization happens in each of the Trainables/workers.
        # So we have to set `create_experiment_if_not_exists` to False.
        # Otherwise there might be race conditions when each worker tries to
        # create the same experiment.
        # For the mixin, the experiment must be created beforehand.
        self.mlflow_util.setup_mlflow(
            tracking_uri=tracking_uri,
            experiment_id=experiment_id,
            experiment_name=experiment_name,
            tracking_token=tracking_token,
            create_experiment_if_not_exists=False,
        )

        run_name = self.trial_name + "_" + self.trial_id
        run_name = run_name.replace("/", "_")
        self.mlflow_util.start_run(set_active=True, run_name=run_name)
示例#5
0
文件: logging.py 项目: novahe/ray
    def __init__(self,
                 tracking_uri: Optional[str] = None,
                 registry_uri: Optional[str] = None,
                 experiment_id: Optional[str] = None,
                 experiment_name: Optional[str] = None,
                 tags: Optional[Dict] = None,
                 save_artifact: bool = False,
                 logdir: Optional[str] = None,
                 worker_to_log: int = 0):
        super().__init__(logdir=logdir, worker_to_log=worker_to_log)

        self.tracking_uri = tracking_uri
        self.registry_uri = registry_uri
        self.experiment_id = experiment_id
        self.experiment_name = experiment_name
        self.tags = tags

        self.save_artifact = save_artifact
        self.mlflow_util = MLflowLoggerUtil()
示例#6
0
class MLflowLoggerCallback(LoggerCallback):
    """MLflow Logger to automatically log Tune results and config to MLflow.

    MLflow (https://mlflow.org) Tracking is an open source library for
    recording and querying experiments. This Ray Tune ``LoggerCallback``
    sends information (config parameters, training results & metrics,
    and artifacts) to MLflow for automatic experiment tracking.

    Args:
        tracking_uri (str): The tracking URI for where to manage experiments
            and runs. This can either be a local file path or a remote server.
            This arg gets passed directly to mlflow
            initialization. When using Tune in a multi-node setting, make sure
            to set this to a remote server and not a local file path.
        registry_uri (str): The registry URI that gets passed directly to
            mlflow initialization.
        experiment_name (str): The experiment name to use for this Tune run.
            If the experiment with the name already exists with MLflow,
            it will be reused. If not, a new experiment will be created with
            that name.
        tags (Dict):  An optional dictionary of string keys and values to set
            as tags on the run
        save_artifact (bool): If set to True, automatically save the entire
            contents of the Tune local_dir as an artifact to the
            corresponding run in MlFlow.

    Example:

    .. code-block:: python

        from ray.tune.integration.mlflow import MLflowLoggerCallback

        tags = { "user_name" : "John",
                 "git_commit_hash" : "abc123"}

        tune.run(
            train_fn,
            config={
                # define search space here
                "parameter_1": tune.choice([1, 2, 3]),
                "parameter_2": tune.choice([4, 5, 6]),
            },
            callbacks=[MLflowLoggerCallback(
                experiment_name="experiment1",
                tags=tags,
                save_artifact=True)])

    """
    def __init__(
        self,
        tracking_uri: Optional[str] = None,
        registry_uri: Optional[str] = None,
        experiment_name: Optional[str] = None,
        tags: Optional[Dict] = None,
        save_artifact: bool = False,
    ):

        self.tracking_uri = tracking_uri
        self.registry_uri = registry_uri
        self.experiment_name = experiment_name
        self.tags = tags
        self.should_save_artifact = save_artifact

        self.mlflow_util = MLflowLoggerUtil()

        if ray.util.client.ray.is_connected():
            logger.warning("When using MLflowLoggerCallback with Ray Client, "
                           "it is recommended to use a remote tracking "
                           "server. If you are using a MLflow tracking server "
                           "backed by the local filesystem, then it must be "
                           "setup on the server side and not on the client "
                           "side.")

    def setup(self, *args, **kwargs):
        # Setup the mlflow logging util.
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri,
                                      registry_uri=self.registry_uri,
                                      experiment_name=self.experiment_name)

        if self.tags is None:
            # Create empty dictionary for tags if not given explicitly
            self.tags = {}

        self._trial_runs = {}

    def log_trial_start(self, trial: "Trial"):
        # Create run if not already exists.
        if trial not in self._trial_runs:

            # Set trial name in tags
            tags = self.tags.copy()
            tags["trial_name"] = str(trial)

            run = self.mlflow_util.start_run(tags=tags, run_name=str(trial))
            self._trial_runs[trial] = run.info.run_id

        run_id = self._trial_runs[trial]

        # Log the config parameters.
        config = trial.config
        self.mlflow_util.log_params(run_id=run_id, params_to_log=config)

    def log_trial_result(self, iteration: int, trial: "Trial", result: Dict):
        step = result.get(TIMESTEPS_TOTAL) or result[TRAINING_ITERATION]
        run_id = self._trial_runs[trial]
        self.mlflow_util.log_metrics(run_id=run_id,
                                     metrics_to_log=result,
                                     step=step)

    def log_trial_end(self, trial: "Trial", failed: bool = False):
        run_id = self._trial_runs[trial]

        # Log the artifact if set_artifact is set to True.
        if self.should_save_artifact:
            self.mlflow_util.save_artifacts(run_id=run_id, dir=trial.logdir)

        # Stop the run once trial finishes.
        status = "FINISHED" if not failed else "FAILED"
        self.mlflow_util.end_run(run_id=run_id, status=status)
示例#7
0
class MLflowLoggerCallback(TrainingCallback):
    """MLflow Logger to automatically log Train results and config to MLflow.

    MLflow (https://mlflow.org) Tracking is an open source library for
    recording and querying experiments. This Ray Train callback
    sends information (config parameters, training results & metrics,
    and artifacts) to MLflow for automatic experiment tracking.

    Args:
        tracking_uri (Optional[str]): The tracking URI for where to manage
            experiments and runs. This can either be a local file path or a
            remote server. If None is passed in, the logdir of the trainer
            will be used as the tracking URI.
            This arg gets passed directly to mlflow initialization.
        registry_uri (Optional[str]): The registry URI that gets passed
            directly to mlflow initialization. If None is passed in, the
            logdir of the trainer will be used as the registry URI.
        experiment_id (Optional[str]): The experiment id of an already
            existing experiment. If not
            passed in, experiment_name will be used.
        experiment_name (Optional[str]): The experiment name to use for this
            Train run.
            If the experiment with the name already exists with MLflow,
            it will be used. If not, a new experiment will be created with
            this name. At least one of ``experiment_id`` or
            ``experiment_name`` must be passed in.
        tags (Optional[Dict]):  An optional dictionary of string keys and
            values to set as tags on the run
        save_artifact: If set to True, automatically save the entire
            contents of the Train local_dir as an artifact to the
            corresponding run in MlFlow.
        logdir (Optional[str]): Path to directory where the results file
            should be. If None, will be set by the Trainer. If no tracking
            uri or registry uri are passed in, the logdir will be used for
            both.
        worker_to_log: Worker index to log. By default, will log the
            worker with index 0.
    """
    def __init__(
        self,
        tracking_uri: Optional[str] = None,
        registry_uri: Optional[str] = None,
        experiment_id: Optional[str] = None,
        experiment_name: Optional[str] = None,
        tags: Optional[Dict] = None,
        save_artifact: bool = False,
        logdir: Optional[str] = None,
        worker_to_log: int = 0,
    ):
        warnings.warn(
            _deprecation_msg,
            DeprecationWarning,
        )
        self._logdir_manager = _TrainCallbackLogdirManager(logdir=logdir)
        self.results_preprocessor = IndexedResultsPreprocessor(
            indices=worker_to_log)

        self.tracking_uri = tracking_uri
        self.registry_uri = registry_uri
        self.experiment_id = experiment_id
        self.experiment_name = experiment_name
        self.tags = tags

        self.save_artifact = save_artifact
        self.mlflow_util = MLflowLoggerUtil()

    def start_training(self, logdir: str, config: Dict, **info):
        self._logdir_manager.setup_logdir(default_logdir=logdir)

        tracking_uri = self.tracking_uri or os.path.join(
            str(self.logdir), "mlruns")
        registry_uri = self.registry_uri or os.path.join(
            str(self.logdir), "mlruns")

        self.mlflow_util.setup_mlflow(
            tracking_uri=tracking_uri,
            registry_uri=registry_uri,
            experiment_id=self.experiment_id,
            experiment_name=self.experiment_name,
            create_experiment_if_not_exists=True,
        )

        self.mlflow_util.start_run(tags=self.tags, set_active=True)
        self.mlflow_util.log_params(params_to_log=config)

    def handle_result(self, results: List[Dict], **info):
        result = results[0]

        self.mlflow_util.log_metrics(metrics_to_log=result,
                                     step=result[TRAINING_ITERATION])

    def finish_training(self, error: bool = False, **info):
        checkpoint_dir = self.logdir.joinpath(TRAIN_CHECKPOINT_SUBDIR)
        if self.save_artifact and checkpoint_dir.exists():
            self.mlflow_util.save_artifacts(dir=str(checkpoint_dir))
        self.mlflow_util.end_run(status="FAILED" if error else "FINISHED")

    @property
    def logdir(self) -> Path:
        return self._logdir_manager.logdir_path
示例#8
0
class MLflowTest(unittest.TestCase):
    def setUp(self):
        self.dirpath = tempfile.mkdtemp()
        import mlflow
        mlflow.set_tracking_uri(self.dirpath)
        mlflow.create_experiment(name="existing_experiment")

        self.mlflow_util = MLflowLoggerUtil()
        self.tracking_uri = mlflow.get_tracking_uri()

    def tearDown(self):
        shutil.rmtree(self.dirpath)

    def test_experiment_id(self):
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri,
                                      experiment_id="0")
        assert self.mlflow_util.experiment_id == "0"

    def test_experiment_id_env_var(self):
        os.environ["MLFLOW_EXPERIMENT_ID"] = "0"
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri)
        assert self.mlflow_util.experiment_id == "0"
        del os.environ["MLFLOW_EXPERIMENT_ID"]

    def test_experiment_name(self):
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri,
                                      experiment_name="existing_experiment")
        assert self.mlflow_util.experiment_id == "0"

    def test_experiment_name_env_var(self):
        os.environ["MLFLOW_EXPERIMENT_NAME"] = "existing_experiment"
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri)
        assert self.mlflow_util.experiment_id == "0"
        del os.environ["MLFLOW_EXPERIMENT_NAME"]

    def test_id_precedence(self):
        os.environ["MLFLOW_EXPERIMENT_ID"] = "0"
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri,
                                      experiment_name="new_experiment")
        assert self.mlflow_util.experiment_id == "0"
        del os.environ["MLFLOW_EXPERIMENT_ID"]

    def test_new_experiment(self):
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri,
                                      experiment_name="new_experiment")
        assert self.mlflow_util.experiment_id == "1"

    def test_setup_fail(self):
        with self.assertRaises(ValueError):
            self.mlflow_util.setup_mlflow(
                tracking_uri=self.tracking_uri,
                experiment_name="new_experiment2",
                create_experiment_if_not_exists=False)

    def test_log_params(self):
        params = {"a": "a"}
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri,
                                      experiment_name="new_experiment")
        run = self.mlflow_util.start_run()
        run_id = run.info.run_id
        self.mlflow_util.log_params(params_to_log=params, run_id=run_id)

        run = self.mlflow_util._mlflow.get_run(run_id=run_id)
        assert run.data.params == params

        params2 = {"b": "b"}
        self.mlflow_util.start_run(set_active=True)
        self.mlflow_util.log_params(params_to_log=params2, run_id=run_id)
        assert self.mlflow_util._mlflow.get_run(run_id=run_id).data.params == {
            **params,
            **params2
        }
        self.mlflow_util.end_run()

    def test_log_metrics(self):
        metrics = {"a": 1.0}
        self.mlflow_util.setup_mlflow(tracking_uri=self.tracking_uri,
                                      experiment_name="new_experiment")
        run = self.mlflow_util.start_run()
        run_id = run.info.run_id
        self.mlflow_util.log_metrics(metrics_to_log=metrics,
                                     run_id=run_id,
                                     step=0)

        run = self.mlflow_util._mlflow.get_run(run_id=run_id)
        assert run.data.metrics == metrics

        metrics2 = {"b": 1.0}
        self.mlflow_util.start_run(set_active=True)
        self.mlflow_util.log_metrics(metrics_to_log=metrics2,
                                     run_id=run_id,
                                     step=0)
        assert self.mlflow_util._mlflow.get_run(
            run_id=run_id).data.metrics == {
                **metrics,
                **metrics2
            }
        self.mlflow_util.end_run()