Exemplo n.º 1
0
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf):

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=kedro_project_with_mlflow_conf,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )

    project_metadata = _get_project_metadata(kedro_project_with_mlflow_conf)
    _add_src_to_path(project_metadata.source_dir, kedro_project_with_mlflow_conf)
    configure_project(project_metadata.package_name)
    with KedroSession.create(
        "fake_project", project_path=kedro_project_with_mlflow_conf
    ):
        config.setup()

    assert "exp1" in [exp.name for exp in config.mlflow_client.list_experiments()]
Exemplo n.º 2
0
    def setUp(self):
        self.tracking_uri = tempfile.mkdtemp()
        self.registry_uri = tempfile.mkdtemp()

        client = MlflowClient(tracking_uri=self.tracking_uri,
                              registry_uri=self.registry_uri)
        client.create_experiment(name="existing_experiment")
        assert client.get_experiment_by_name(
            "existing_experiment").experiment_id == "0"
Exemplo n.º 3
0
def get_exp_id(name, registry_uri, artifact_location):

    clnt = MlflowClient(registry_uri=registry_uri)
    exp_info = clnt.get_experiment_by_name(name)
    exp_id = exp_info.experiment_id if exp_info else clnt.create_experiment(
        name, artifact_location=artifact_location)
    return exp_id
Exemplo n.º 4
0
def get_run_id(client: MlflowClient, experiment_name: str,
               model_key: str) -> str:
    """
    Get an existing or create a new run for the given model_key and experiment_name.

    The model key corresponds to a unique configuration of the model. The corresponding
    run must be manually stopped using the `mlflow.tracking.MlflowClient.set_terminated`
    method.

    Parameters
    ----------
    client: mlflow.tracking.MlflowClient
        Client with tracking uri set to AzureML if configured.
    experiment_name: str
        Name of experiment to log to.
    model_key: str
        Unique ID of model configuration.

    Returns
    -------
    run_id: str
        Unique ID of MLflow run to log to.
    """
    experiment = client.get_experiment_by_name(experiment_name)

    experiment_id = (getattr(experiment, "experiment_id") if experiment else
                     client.create_experiment(experiment_name))
    return client.create_run(experiment_id, tags={
        "model_key": model_key
    }).info.run_id
Exemplo n.º 5
0
def get_experiment_id(client: MlflowClient, experiment_name: str) -> int:
    experiment = client.get_experiment_by_name(experiment_name)
    if experiment is None:
        experiment_id = client.create_experiment(experiment_name)
    else:
        experiment_id = experiment.experiment_id
    return experiment_id
Exemplo n.º 6
0
class MlflowWriter:
    def __init__(self, experiment_name, **kwargs):
        self.client = MlflowClient(**kwargs)
        try:
            self.experiment_id = self.client.create_experiment(experiment_name)
        except Exception:
            self.experiment_id = self.client.get_experiment_by_name(
                experiment_name).experiment_id

        self.run_id = self.client.create_run(self.experiment_id).info.run_id

    def log_params_from_omegaconf_dict(self, params):
        for param_name, element in params.items():
            self._explore_recursive(param_name, element)

    def _explore_recursive(self, parent_name, element):
        if isinstance(element, DictConfig):
            for k, v in element.items():
                if isinstance(v, DictConfig) or isinstance(v, ListConfig):
                    self._explore_recursive(f"{parent_name}.{k}", v)
                else:
                    self.client.log_param(self.run_id, f"{parent_name}.{k}", v)
        elif isinstance(element, ListConfig):
            for i, v in enumerate(element):
                self.client.log_param(self.run_id, f"{parent_name}.{i}", v)

    def log_torch_model(self, model):
        with mlflow.start_run(self.run_id):
            pytorch.log_model(model, "models")

    def log_param(self, key, value):
        self.client.log_param(self.run_id, key, value)

    def log_metric(self, key, value, timestamp=None, step=None):
        self.client.log_metric(self.run_id, key, value, timestamp, step)

    def log_artifact(self, local_path, artifact_path=None):
        self.client.log_artifact(self.run_id, local_path, artifact_path)

    def set_terminated(self):
        self.client.set_terminated(self.run_id)

    def move_mlruns(self):
        # runのコピー
        hydra_cwd = os.getcwd()
        exp_root, exp_id = os.path.split(hydra_cwd)
        src_mlrun_dir = os.path.join(hydra_cwd, "mlruns", "1")
        src_mlrun_path = [
            file_folder for file_folder in glob.glob(f"{src_mlrun_dir}/*")
            if os.path.isdir(file_folder)
        ][0]
        run_hash = os.path.basename(src_mlrun_path)
        dst_mlrun_path = os.path.join(hydra.utils.get_original_cwd(), "mlruns",
                                      "1", run_hash)
        shutil.copytree(src_mlrun_path, dst_mlrun_path)
        overwrite_meta_yaml(dst_mlrun_path, run_hash)
        # experimentのコピー
        dst_exp_path = os.path.join(hydra.utils.get_original_cwd(), "mlruns",
                                    "1")
        copy_exp_meta_yaml(src_mlrun_dir, dst_exp_path)
Exemplo n.º 7
0
	def optimize(self, maxevals=50, model_id=0):

		param_space = self.hyperparameter_space()
		objective = self.get_objective(self.lgtrain)
		objective.i=0
		trials = Trials()
		best = fmin(fn=objective,
		            space=param_space,
		            algo=tpe.suggest,
		            max_evals=maxevals,
		            trials=trials)
		best['num_boost_round'] = self.early_stop_dict[trials.best_trial['tid']]
		best['num_leaves'] = int(best['num_leaves'])
		best['verbose'] = -1

		# The next few lines are the only ones related to mlflow. One
		# "annoying" behaviour of mlflow is that when you instantiate a client
		# it creates the 'mlruns' dir by default as well as the first
		# experiment and there does not seem to be a way I can change this
		# behaviour without changing the source code. The solution is the
		# following hack:
		if not Path('mlruns').exists():
			client = MlflowClient()
		else:
			client = MlflowClient()
			n_experiments = len(client.list_experiments())
			client.create_experiment(name=str(n_experiments))
		experiments = client.list_experiments()
		with mlflow.start_run(experiment_id=experiments[-1].experiment_id) as run:
			model = lgb.LGBMClassifier(**best)
			model.fit(self.lgtrain.data,
				self.lgtrain.label,
				feature_name=self.colnames,
				categorical_feature=self.categorical_columns)
			for name, value in best.items():
				mlflow.log_param(name, value)
			mlflow.log_metric('binary_logloss', trials.best_trial['result']['loss'])
			mlflow.sklearn.log_model(model, "model")

		model_fname = 'model_{}_.p'.format(model_id)
		best_experiment_fname = 'best_experiment_{}_.p'.format(model_id)

		pickle.dump(model, open(self.PATH/model_fname, 'wb'))
		pickle.dump(best, open(self.PATH/best_experiment_fname, 'wb'))

		self.best = best
		self.model = model
Exemplo n.º 8
0
    def optimize(self, maxevals=200, model_id=0, reuse_experiment=False):

        param_space = self.hyperparameter_space()
        objective = self.get_objective(self.lgtrain)
        objective.i = 0
        trials = Trials()
        best = fmin(fn=objective,
                    space=param_space,
                    algo=tpe.suggest,
                    max_evals=maxevals,
                    trials=trials)
        best['num_boost_round'] = self.early_stop_dict[
            trials.best_trial['tid']]
        best['num_leaves'] = int(best['num_leaves'])
        best['verbose'] = -1

        # The next few lines are the only ones related to mlflow.
        if not Path('mlruns').exists():
            # here set the tracking_uri. If None then http://localhost:5000
            client = MlflowClient()
            n_experiments = 0
        elif not reuse_experiment:
            client = MlflowClient()
            n_experiments = len(client.list_experiments())
            experiment_name = 'experiment_' + str(n_experiments)
            client.create_experiment(name=experiment_name)
        with mlflow.start_run(experiment_id=n_experiments):
            model = lgb.LGBMClassifier(**best)
            model.fit(self.lgtrain.data,
                      self.lgtrain.label,
                      feature_name=self.colnames,
                      categorical_feature=self.categorical_columns)
            for name, value in best.items():
                mlflow.log_param(name, value)
            mlflow.log_metric('binary_logloss',
                              trials.best_trial['result']['loss'])
            mlflow.sklearn.log_model(model, "model")

        model_fname = 'model_{}_.p'.format(model_id)
        best_experiment_fname = 'best_experiment_{}_.p'.format(model_id)

        pickle.dump(model, open(self.PATH / model_fname, 'wb'))
        pickle.dump(best, open(self.PATH / best_experiment_fname, 'wb'))

        self.best = best
        self.model = model
Exemplo n.º 9
0
 def test_fit_with_real_run_and_client(self, tmp_path, logger_cls,
                                       net_builder_cls):
     from mlflow.tracking import MlflowClient
     client = MlflowClient(tracking_uri=tmp_path.as_uri())
     experiment_name = 'foo'
     experiment_id = client.create_experiment(experiment_name)
     run = client.create_run(experiment_id)
     logger = logger_cls(run, client, create_artifact=False)
     net_builder_cls(callbacks=[logger], max_epochs=3)
     assert os.listdir(tmp_path)
Exemplo n.º 10
0
def _get_exp_id_from_name(name):
    client = MlflowClient(tracking_uri=os.environ['MLFLOW_TRACKING_URI'])

    try:
        experiment_id = client.create_experiment(name)
    except MlflowException as exc:
        if exc.error_code == 'RESOURCE_ALREADY_EXISTS':
            experiment = client.get_experiment_by_name(name)
            experiment_id = experiment.experiment_id

    return experiment_id
Exemplo n.º 11
0
def test_kedro_mlflow_config_experiment_was_deleted(mocker, tmp_path):
    # create a ".kedro.yml" file to identify "tmp_path" as the root of a kedro project
    mocker.patch("kedro_mlflow.utils._is_kedro_project", lambda x: True)

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (tmp_path / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id)

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        project_path=tmp_path,
        mlflow_tracking_uri="mlruns",
        experiment_opts=dict(name="exp1"),
    )
    assert "exp1" in [
        exp.name for exp in config.mlflow_client.list_experiments()
    ]
Exemplo n.º 12
0
def test_client_can_be_serialized_with_pickle(tmpdir):
    """
    Verifies that instances of `MlflowClient` can be serialized using pickle, even if the underlying
    Tracking and Model Registry stores used by the client are not serializable using pickle
    """

    class MockUnpickleableTrackingStore(SqlAlchemyTrackingStore):
        pass

    class MockUnpickleableModelRegistryStore(SqlAlchemyModelRegistryStore):
        pass

    backend_store_path = tmpdir.join("test.db").strpath
    artifact_store_path = tmpdir.join("artfiacts").strpath

    mock_tracking_store = MockUnpickleableTrackingStore(
        "sqlite:///" + backend_store_path, artifact_store_path
    )
    mock_model_registry_store = MockUnpickleableModelRegistryStore(
        "sqlite:///" + backend_store_path
    )

    # Verify that the mock stores cannot be pickled because they are defined within a function
    # (i.e. the test function)
    with pytest.raises(AttributeError, match="<locals>.MockUnpickleableTrackingStore'"):
        pickle.dumps(mock_tracking_store)

    with pytest.raises(AttributeError, match="<locals>.MockUnpickleableModelRegistryStore'"):
        pickle.dumps(mock_model_registry_store)

    _tracking_store_registry.register("pickle", lambda *args, **kwargs: mock_tracking_store)
    _model_registry_store_registry.register(
        "pickle", lambda *args, **kwargs: mock_model_registry_store
    )

    # Create an MlflowClient with the store that cannot be pickled, perform
    # tracking & model registry operations, and verify that the client can still be pickled
    client = MlflowClient("pickle://foo")
    client.create_experiment("test_experiment")
    client.create_registered_model("test_model")
    pickle.dumps(client)
Exemplo n.º 13
0
def preprocess_data(catalog,
                    ml_stage,
                    parameters='default',
                    config_path=None,
                    requires=None,
                    provides=None,
                    load_run_id=None,
                    enable_tracking=None,
                    experiment=None,
                    tracking_uri=None,
                    save_dir=None,
                    tags=None):
    cfg = load_configuration(catalog=catalog,
                             parameters=parameters,
                             config_path=config_path,
                             tracking_uri=tracking_uri,
                             save_dir=save_dir)

    run_id = None
    if enable_tracking:
        client = MlflowClient(cfg.tracking_uri)
        try:
            experiment_id = client.create_experiment(experiment)
        except:
            experiment_id = client.get_experiment_by_name(
                experiment).experiment_id
        mlrun = client.create_run(experiment_id=experiment_id)
        run_id = mlrun.info.run_id

    data_catalog = DataCatalog.from_config(catalog=cfg.catalog,
                                           data_dir=cfg.data_dir,
                                           load_versions=load_run_id,
                                           save_version=run_id,
                                           ml_stages=[ml_stage])
    parameters = cfg.parameters.preprocess
    rebind = cfg.catalog.rebind_names
    requires = dict() if requires is None else dict(requires)
    provides = dict() if provides is None else dict(provides)

    preprocess = PreprocessStep(data_catalog,
                                parameters,
                                ml_stage=ml_stage,
                                rebind=rebind,
                                requires=requires,
                                provides=provides,
                                run_id=run_id,
                                tracking_uri=cfg.tracking_uri,
                                experiment_name=experiment,
                                tags=tags)
    preprocess()
def test_kedro_mlflow_config_experiment_was_deleted(kedro_project_with_mlflow_conf):

    # create an experiment with the same name and then delete it
    mlflow_tracking_uri = (kedro_project_with_mlflow_conf / "mlruns").as_uri()
    mlflow_client = MlflowClient(mlflow_tracking_uri)
    mlflow_client.create_experiment("exp1")
    mlflow_client.delete_experiment(
        mlflow_client.get_experiment_by_name("exp1").experiment_id
    )

    # the config must restore properly the experiment
    config = KedroMlflowConfig(
        server=dict(mlflow_tracking_uri="mlruns"),
        tracking=dict(experiment=dict(name="exp1")),
    )

    bootstrap_project(kedro_project_with_mlflow_conf)
    with KedroSession.create(project_path=kedro_project_with_mlflow_conf) as session:
        context = session.load_context()  # setup config
        config.setup(context)

    assert "exp1" in [
        exp.name for exp in config.server._mlflow_client.list_experiments()
    ]
Exemplo n.º 15
0
class MlflowLogger:
    def __init__(self,
                 experiment_name,
                 tracking_uri=None,
                 registry_uri=None,
                 **kwargs):
        super().__init__(**kwargs)

        self.client = MlflowClient(tracking_uri, registry_uri)

        try:
            self.experiment_id = self.client.create_experiment(experiment_name)
        except MlflowException:
            self.experiment_id = self.client.get_experiment_by_name(
                experiment_name).experiment_id

        self.run_id = self.client.create_run(self.experiment_id).info.run_id

    def log_params_from_omegaconf_dict(self, params):
        for param_name, element in params.items():
            self._explore_recursive(param_name, element)

    def _explore_recursive(self, parent_name, element):
        if isinstance(element, DictConfig):
            for k, v in element.items():
                if isinstance(v, DictConfig) or isinstance(v, ListConfig):
                    self._explore_recursive(f'{parent_name}.{k}', v)
                else:
                    self.client.log_param(self.run_id, f'{parent_name}.{k}', v)
        elif isinstance(element, ListConfig):
            for i, v in enumerate(element):
                self.client.log_param(self.run_id, f'{parent_name}.{i}', v)

    def log_metric(self, key, value):
        self.client.log_metric(self.run_id, key, value)

    def log_torch_model(self, model, model_name):
        with mlflow.start_run(self.run_id):
            pytorch.log_model(model, model_name)

    def log_param(self, key, value):
        self.client.log_param(self.run_id, key, value)

    def log_artifact(self, local_path):
        self.client.log_artifact(self.run_id, local_path)

    def set_terminated(self):
        self.client.set_terminated(self.run_id)
Exemplo n.º 16
0
    def mlflow(mlflow_uri=None, tags={}, show_args={}):
        try:
            from mlflow.tracking import MlflowClient
        except ImportError:
            logger.error('ERROR: Failed importing mlflow')
            MlflowClient = NotImplemented

        if mlflow_uri is None:
            return None, None

        tags["git_branch"] = subprocess.check_output(
            ["git", "describe", "--all"]).decode("utf8").strip()
        tags["git_hash"] = subprocess.check_output(
            ["git", "describe", "--always"]).decode("utf8").strip()
        tags["timestamp"] = datetime.now().strftime("_%Y-%m-%d-%H-%M-%S")
        if "MY_HOSTIP" in os.environ.keys():
            tags["my_host_ip"] = os.environ["MY_HOSTIP"]
        if "MY_HOSTNAME" in os.environ.keys():
            tags["my_host_name"] = os.environ["MY_HOSTNAME"]

        for key, value in tags.items():
            show_args[f'tags-{key}'] = value

        if "exp_name" in tags:
            exp_name = tags["exp_name"]
        else:
            exp_name = "Ocr-PSEOnly"

        ml_client = MlflowClient(mlflow_uri)
        ml_exp = ml_client.get_experiment_by_name(exp_name)
        if ml_exp is None:
            ml_exp_id = ml_client.create_experiment(exp_name)
        else:
            ml_exp_id = ml_exp.experiment_id
        ml_run = ml_client.create_run(ml_exp_id)
        logger.info("ml_run: {}", ml_run)
        ml_run_id = ml_run.info.run_id
        for key, value in tags.items():
            logger.info(f'tag --- {key}: {value}')
            if ml_client is not None:
                ml_client.set_tag(ml_run_id, key, value)
        logger.info("------------")
        for key, value in show_args.items():
            logger.info(f'{key}: {value}')
            if ml_client is not None:
                ml_client.log_param(ml_run_id, key, value)

        return ml_client, ml_run_id
Exemplo n.º 17
0
class MlflowWriter():
    def __init__(self, experiment_name, task_name, **kwargs):
        self.client = MlflowClient(**kwargs)
        self.experiment_name = experiment_name
        self.task_name = task_name
        try:
            self.experiment_id = self.client.create_experiment(experiment_name)
        except:
            self.experiment_id = self.client.get_experiment_by_name(experiment_name).experiment_id

        self.run_id = self.client.create_run(self.experiment_id).info.run_id

        mlflow.set_tag(MLFLOW_RUN_NAME, task_name)

        print(f"Exp ID:{self.experiment_id}, Run ID:{self.run_id}")

    def log_params_from_omegaconf_dict(self, params):
        for param_name, element in params.items():
            self._explore_recursive(param_name, element)

    def _explore_recursive(self, parent_name, element):
        if isinstance(element, DictConfig):
            for k, v in element.items():
                if isinstance(v, DictConfig) or isinstance(v, ListConfig):
                    self._explore_recursive(f'{parent_name}.{k}', v)
                else:
                    self.client.log_param(self.run_id, f'{parent_name}.{k}', v)
        elif isinstance(element, ListConfig):
            for i, v in enumerate(element):
                self.client.log_param(self.run_id, f'{parent_name}.{i}', v)

    def log_torch_model(self, model):
        with mlflow.start_run(self.run_id):
            pytorch.log_model(model, 'models')

    def log_param(self, key, value):
        self.client.log_param(self.run_id, key, value)

    def log_metric(self, key, value):
        self.client.log_metric(self.run_id, key, value)

    def log_artifact(self, local_path):
        self.client.log_artifact(self.run_id, local_path)

    def set_terminated(self):
        self.client.set_terminated(self.run_id)
Exemplo n.º 18
0
def get_or_create_experiment(experiment_name) -> Experiment:
    """
    Creates an mlflow experiment
    :param experiment_name: str. The name of the experiment to be set in MLFlow
    :return: the experiment created if it doesn't exist, experiment if it is already created.
    """
    try:
        client = MlflowClient()
        experiment: Experiment = client.get_experiment_by_name(
            name=experiment_name)
        if experiment and experiment.lifecycle_stage != 'deleted':
            return experiment
        else:
            experiment_id = client.create_experiment(name=experiment_name)
            return client.get_experiment(experiment_id=experiment_id)
    except Exception as e:
        logger.error(
            f'Unable to get or create experiment {experiment_name}: {e}')
Exemplo n.º 19
0
    def _init(self):
        from mlflow.tracking import MlflowClient
        uri = osp.join(osp.dirname(self.logdir), 'mlruns')
        # print(uri)
        # import ipdb
        # ipdb.set_trace()
        # raise RuntimeError
        client = MlflowClient(tracking_uri=uri)
        experiments = [e.name for e in client.list_experiments()]
        exp_name = self.config.get("mlflow_experiment", "test")
        if exp_name in experiments:
            experiment_id = client.get_experiment_by_name(exp_name)
        else:
            experiment_id = client.create_experiment(exp_name)
        run = client.create_run(experiment_id.experiment_id,
                                tags={'mlflow.runName': self.trial.trial_id})
        self._run_id = run.info.run_id

        self.client = client
        self._log_hparams()
Exemplo n.º 20
0
    def testLogArtifact(self):
        with open('artifacts/foo.txt',
                  'rb') as f1, open('artifacts/image.png', 'rb') as f2, open(
                      'artifacts/animation.gif', 'rb') as f3:
            client = MlflowClient(tracking_uri="http://localhost:5000")
            try:
                experiment_id = client.create_experiment("foo")
            except RestException as e:
                experiment = client.get_experiment_by_name("foo")
                experiment_id = experiment.experiment_id
            run = client.create_run(experiment_id)
            run_id = run.info.run_id
            print(experiment_id + ":" + run_id)

            files = {'file1': f1, 'file2': f2, 'file3': f3}
            data = {'run_id': run_id}
            r = requests.post('http://localhost:5001/log_artifact',
                              files=files,
                              data=data)
            print(r.text)
Exemplo n.º 21
0
class MlflowWriter(BaseMlWriter):
    def __init__(self, log_dir, experiment_name):
        super().__init__()

        mlflow_dir = log_dir / "mlflow" / "mlruns"
        self.client = MlflowClient(tracking_uri=str(mlflow_dir))
        try:
            self.experiment_id = self.client.create_experiment(experiment_name)
        except:
            self.experiment_id = self.client.get_experiment_by_name(
                experiment_name).experiment_id

        self.run_id = self.client.create_run(self.experiment_id).info.run_id

    def __del__(self):
        self.client.set_terminated(self.run_id)

    def log_params(self, params: Dict):
        # from flatten_dict import flatten
        flatten_params = flatten(params)

        for key, value in flatten_params.items():
            self.log_param(key, value)

    def log_artifact(self, local_path: Path):
        if local_path.exists():
            self.client.log_artifact(self.run_id, str(local_path))
        else:
            logger.info(f"NOT Exists: {local_path}")

    def log_torch_model(self, model):
        with mlflow.start_run(self.run_id):
            mlflow.pytorch.log_model(model, 'models')

    def log_param(self, key, value):
        self.client.log_param(self.run_id, key, value)

    def log_metric(self, key, value):
        self.client.log_metric(self.run_id, key, value)
Exemplo n.º 22
0
class RemoteTracking:
    def __init__(self, tracking_uri=None, registry_uri=None):
        self.server = MlflowClient(tracking_uri, registry_uri)

    def get_experiment_id(self, name, artifact_location=None):
        experiment = self.server.get_experiment_by_name(name)
        if experiment:
            experiment_id = experiment.experiment_id
            return experiment_id
        else:
            print("Experiment not exist")
            print("Creating new experiment on tracking")
            experiment_id = self.server.create_experiment(
                name, artifact_location)
            return experiment_id

    def get_run_id(self, experiment_id):
        run = self.server.create_run(experiment_id)
        run_id = run.info.run_id
        return run_id

    def log_params(self, run_id, params):
        for key, value in params.items():
            self.server.log_param(run_id, key, value)
        print("Parameters successful logged")

    def set_tags(self, run_id, params):
        for key, value in params.items():
            self.server.set_tag(run_id, key, value)
        print("Tags successful logged")

    def log_metrics(self, run_id, params):
        for key, value in params.items():
            self.server.log_metric(run_id, key, value)
        print("Metrics successful logged")

    def log_artifacts(self, run_id, local_dir, artifact_path=None):
        self.server.log_artifacts(run_id, local_dir, artifact_path)
Exemplo n.º 23
0
class Tracker():
    def __init__(self, experiment: str, tracking_uri: str):
        self.experiment_name = experiment
        self.experiment_id = None
        self.run_id = None
        self.client = MlflowClient(tracking_uri=tracking_uri)

    def log_param(self, key: str, value: Any):
        self.client.log_param(self.run_id, key, value)

    def log_params(self, params: Dict):
        for k, v in params.items():
            self.log_param(k, v)

    def log_artifacts(self, artifacts: List[str]):
        for artifact in artifacts:
            self.client.log_artifact(self.run_id, artifact)

    def log_metrics(self, metrics: Dict[str, Any]):
        for k, v in metrics.items():
            if isinstance(v, list):
                for i in range(len(v)):
                    self.client.log_metric(self.run_id, k, v[i], step=i)
            else:
                self.client.log_metric(self.run_id, k, v)

    def start_run(self):
        try:
            self.experiment_id = self.client.create_experiment(
                self.experiment_name)
        except Exception:
            self.experiment_id = self.client.get_experiment_by_name(
                self.experiment_name).experiment_id
        run = self.client.create_run(self.experiment_id)
        self.run_id = run.info.run_id

    def end_run(self):
        self.client.set_terminated(self.run_id)
Exemplo n.º 24
0
class MLFlowLogger(LightningLoggerBase):
    """
    Log using `MLflow <https://mlflow.org>`_. Install it with pip:

    .. code-block:: bash

        pip install mlflow

    Example:
        >>> from pytorch_lightning import Trainer
        >>> from pytorch_lightning.loggers import MLFlowLogger
        >>> mlf_logger = MLFlowLogger(
        ...     experiment_name="default",
        ...     tracking_uri="file:./ml-runs"
        ... )
        >>> trainer = Trainer(logger=mlf_logger)

    Use the logger anywhere in you :class:`~pytorch_lightning.core.lightning.LightningModule` as follows:

    >>> from pytorch_lightning import LightningModule
    >>> class LitModel(LightningModule):
    ...     def training_step(self, batch, batch_idx):
    ...         # example
    ...         self.logger.experiment.whatever_ml_flow_supports(...)
    ...
    ...     def any_lightning_module_function_or_hook(self):
    ...         self.logger.experiment.whatever_ml_flow_supports(...)

    Args:
        experiment_name: The name of the experiment
        tracking_uri: Address of local or remote tracking server.
            If not provided, defaults to the service set by ``mlflow.tracking.set_tracking_uri``.
        tags: A dictionary tags for the experiment.

    """
    def __init__(self,
                 experiment_name: str = 'default',
                 tracking_uri: Optional[str] = None,
                 tags: Optional[Dict[str, Any]] = None,
                 save_dir: Optional[str] = None):
        super().__init__()
        if not tracking_uri and save_dir:
            tracking_uri = f'file:{os.sep * 2}{save_dir}'
        self._mlflow_client = MlflowClient(tracking_uri)
        self.experiment_name = experiment_name
        self._run_id = None
        self.tags = tags

    @property
    def experiment(self) -> MlflowClient:
        r"""
        Actual MLflow object. To use mlflow features in your
        :class:`~pytorch_lightning.core.lightning.LightningModule` do the following.

        Example::

            self.logger.experiment.some_mlflow_function()

        """
        return self._mlflow_client

    @property
    def run_id(self):
        if self._run_id is not None:
            return self._run_id

        expt = self._mlflow_client.get_experiment_by_name(self.experiment_name)

        if expt:
            self._expt_id = expt.experiment_id
        else:
            log.warning(
                f'Experiment with name {self.experiment_name} not found. Creating it.'
            )
            self._expt_id = self._mlflow_client.create_experiment(
                name=self.experiment_name)

        run = self._mlflow_client.create_run(experiment_id=self._expt_id,
                                             tags=self.tags)
        self._run_id = run.info.run_id
        return self._run_id

    @rank_zero_only
    def log_hyperparams(self, params: Union[Dict[str, Any],
                                            Namespace]) -> None:
        params = self._convert_params(params)
        params = self._flatten_dict(params)
        for k, v in params.items():
            self.experiment.log_param(self.run_id, k, v)

    @rank_zero_only
    def log_metrics(self,
                    metrics: Dict[str, float],
                    step: Optional[int] = None) -> None:
        timestamp_ms = int(time() * 1000)
        for k, v in metrics.items():
            if isinstance(v, str):
                log.warning(f'Discarding metric with string value {k}={v}.')
                continue
            self.experiment.log_metric(self.run_id, k, v, timestamp_ms, step)

    @rank_zero_only
    def finalize(self, status: str = 'FINISHED') -> None:
        super().finalize(status)
        if status == 'success':
            status = 'FINISHED'
        self.experiment.set_terminated(self.run_id, status)

    @property
    def name(self) -> str:
        return self.experiment_name

    @property
    def version(self) -> str:
        return self._run_id
Exemplo n.º 25
0
Arquivo: mlflow.py Projeto: xqk/ray
class MLFlowLoggerCallback(LoggerCallback):
    """MLFlow Logger to automatically log Tune results and config to MLFlow.

    MLFlow (https://mlflow.org) Tracking is an open source library for
    recording and querying experiments. This Ray Tune ``LoggerCallback``
    sends information (config parameters, training results & metrics,
    and artifacts) to MLFlow for automatic experiment tracking.

    Args:
        tracking_uri (str): The tracking URI for where to manage experiments
            and runs. This can either be a local file path or a remote server.
            This arg gets passed directly to mlflow.tracking.MlflowClient
            initialization. When using Tune in a multi-node setting, make sure
            to set this to a remote server and not a local file path.
        registry_uri (str): The registry URI that gets passed directly to
            mlflow.tracking.MlflowClient initialization.
        experiment_name (str): The experiment name to use for this Tune run.
            If None is passed in here, the Logger will automatically then
            check the MLFLOW_EXPERIMENT_NAME and then the MLFLOW_EXPERIMENT_ID
            environment variables to determine the experiment name.
            If the experiment with the name already exists with MlFlow,
            it will be reused. If not, a new experiment will be created with
            that name.
        save_artifact (bool): If set to True, automatically save the entire
            contents of the Tune local_dir as an artifact to the
            corresponding run in MlFlow.

    Example:

    .. code-block:: python

        from ray.tune.integration.mlflow import MLFlowLoggerCallback
        tune.run(
            train_fn,
            config={
                # define search space here
                "parameter_1": tune.choice([1, 2, 3]),
                "parameter_2": tune.choice([4, 5, 6]),
            },
            callbacks=[MLFlowLoggerCallback(
                experiment_name="experiment1",
                save_artifact=True)])

    """
    def __init__(self,
                 tracking_uri: Optional[str] = None,
                 registry_uri: Optional[str] = None,
                 experiment_name: Optional[str] = None,
                 save_artifact: bool = False):

        mlflow = _import_mlflow()
        if mlflow is None:
            raise RuntimeError("MLFlow has not been installed. Please `pip "
                               "install mlflow` to use the MLFlowLogger.")

        from mlflow.tracking import MlflowClient
        self.client = MlflowClient(tracking_uri=tracking_uri,
                                   registry_uri=registry_uri)

        if experiment_name is None:
            # If no name is passed in, then check env vars.
            # First check if experiment_name env var is set.
            experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME")

        if experiment_name is not None:
            # First check if experiment with name exists.
            experiment = self.client.get_experiment_by_name(experiment_name)
            if experiment is not None:
                # If it already exists then get the id.
                experiment_id = experiment.experiment_id
            else:
                # If it does not exist, create the experiment.
                experiment_id = self.client.create_experiment(
                    name=experiment_name)
        else:
            # No experiment_name is passed in and name env var is not set.
            # Now check the experiment id env var.
            experiment_id = os.environ.get("MLFLOW_EXPERIMENT_ID")
            # Confirm that an experiment with this id exists.
            if experiment_id is None or self.client.get_experiment(
                    experiment_id) is None:
                raise ValueError("No experiment_name passed, "
                                 "MLFLOW_EXPERIMENT_NAME env var is not "
                                 "set, and MLFLOW_EXPERIMENT_ID either "
                                 "is not set or does not exist. Please "
                                 "set one of these to use the "
                                 "MLFlowLoggerCallback.")

        # At this point, experiment_id should be set.
        self.experiment_id = experiment_id
        self.save_artifact = save_artifact

        self._trial_runs = {}

    def log_trial_start(self, trial: "Trial"):
        # Create run if not already exists.
        if trial not in self._trial_runs:
            run = self.client.create_run(experiment_id=self.experiment_id,
                                         tags={"trial_name": str(trial)})
            self._trial_runs[trial] = run.info.run_id

        run_id = self._trial_runs[trial]

        # Log the config parameters.
        config = trial.config

        for key, value in config.items():
            self.client.log_param(run_id=run_id, key=key, value=value)

    def log_trial_result(self, iteration: int, trial: "Trial", result: Dict):
        run_id = self._trial_runs[trial]
        for key, value in result.items():
            try:
                value = float(value)
            except (ValueError, TypeError):
                logger.debug("Cannot log key {} with value {} since the "
                             "value cannot be converted to float.".format(
                                 key, value))
                continue
            self.client.log_metric(run_id=run_id,
                                   key=key,
                                   value=value,
                                   step=iteration)

    def log_trial_end(self, trial: "Trial", failed: bool = False):
        run_id = self._trial_runs[trial]

        # Log the artifact if set_artifact is set to True.
        if self.save_artifact:
            self.client.log_artifacts(run_id, local_dir=trial.logdir)

        # Stop the run once trial finishes.
        status = "FINISHED" if not failed else "FAILED"
        self.client.set_terminated(run_id=run_id, status=status)
Exemplo n.º 26
0
class MLFlowLogger(LightningLoggerBase):
    """
    Log using `MLflow <https://mlflow.org>`_.

    Install it with pip:

    .. code-block:: bash

        pip install mlflow

    .. code-block:: python

        from pytorch_lightning import Trainer
        from pytorch_lightning.loggers import MLFlowLogger

        mlf_logger = MLFlowLogger(experiment_name="default", tracking_uri="file:./ml-runs")
        trainer = Trainer(logger=mlf_logger)

    Use the logger anywhere in your :class:`~pytorch_lightning.core.lightning.LightningModule` as follows:

    .. code-block:: python

        from pytorch_lightning import LightningModule


        class LitModel(LightningModule):
            def training_step(self, batch, batch_idx):
                # example
                self.logger.experiment.whatever_ml_flow_supports(...)

            def any_lightning_module_function_or_hook(self):
                self.logger.experiment.whatever_ml_flow_supports(...)

    Args:
        experiment_name: The name of the experiment
        run_name: Name of the new run. The `run_name` is internally stored as a ``mlflow.runName`` tag.
            If the ``mlflow.runName`` tag has already been set in `tags`, the value is overridden by the `run_name`.
        tracking_uri: Address of local or remote tracking server.
            If not provided, defaults to `MLFLOW_TRACKING_URI` environment variable if set, otherwise it falls
            back to `file:<save_dir>`.
        tags: A dictionary tags for the experiment.
        save_dir: A path to a local directory where the MLflow runs get saved.
            Defaults to `./mlflow` if `tracking_uri` is not provided.
            Has no effect if `tracking_uri` is provided.
        prefix: A string to put at the beginning of metric keys.
        artifact_location: The location to store run artifacts. If not provided, the server picks an appropriate
            default.

    Raises:
        ImportError:
            If required MLFlow package is not installed on the device.
    """

    LOGGER_JOIN_CHAR = "-"

    def __init__(
        self,
        experiment_name: str = "default",
        run_name: Optional[str] = None,
        tracking_uri: Optional[str] = os.getenv("MLFLOW_TRACKING_URI"),
        tags: Optional[Dict[str, Any]] = None,
        save_dir: Optional[str] = "./mlruns",
        prefix: str = "",
        artifact_location: Optional[str] = None,
    ):
        if mlflow is None:
            raise ImportError(
                "You want to use `mlflow` logger which is not installed yet, install it with `pip install mlflow`."
            )
        super().__init__()
        if not tracking_uri:
            tracking_uri = f"{LOCAL_FILE_URI_PREFIX}{save_dir}"

        self._experiment_name = experiment_name
        self._experiment_id = None
        self._tracking_uri = tracking_uri
        self._run_name = run_name
        self._run_id = None
        self.tags = tags
        self._prefix = prefix
        self._artifact_location = artifact_location

        self._mlflow_client = MlflowClient(tracking_uri)

    @property
    @rank_zero_experiment
    def experiment(self) -> MlflowClient:
        r"""
        Actual MLflow object. To use MLflow features in your
        :class:`~pytorch_lightning.core.lightning.LightningModule` do the following.

        Example::

            self.logger.experiment.some_mlflow_function()

        """
        if self._experiment_id is None:
            expt = self._mlflow_client.get_experiment_by_name(
                self._experiment_name)
            if expt is not None:
                self._experiment_id = expt.experiment_id
            else:
                log.warning(
                    f"Experiment with name {self._experiment_name} not found. Creating it."
                )
                self._experiment_id = self._mlflow_client.create_experiment(
                    name=self._experiment_name,
                    artifact_location=self._artifact_location)

        if self._run_id is None:
            if self._run_name is not None:
                self.tags = self.tags or {}
                if MLFLOW_RUN_NAME in self.tags:
                    log.warning(
                        f"The tag {MLFLOW_RUN_NAME} is found in tags. The value will be overridden by {self._run_name}."
                    )
                self.tags[MLFLOW_RUN_NAME] = self._run_name
            run = self._mlflow_client.create_run(
                experiment_id=self._experiment_id,
                tags=resolve_tags(self.tags))
            self._run_id = run.info.run_id
        return self._mlflow_client

    @property
    def run_id(self) -> str:
        """
        Create the experiment if it does not exist to get the run id.

        Returns:
            The run id.
        """
        _ = self.experiment
        return self._run_id

    @property
    def experiment_id(self) -> str:
        """
        Create the experiment if it does not exist to get the experiment id.

        Returns:
            The experiment id.
        """
        _ = self.experiment
        return self._experiment_id

    @rank_zero_only
    def log_hyperparams(self, params: Union[Dict[str, Any],
                                            Namespace]) -> None:
        params = self._convert_params(params)
        params = self._flatten_dict(params)
        for k, v in params.items():
            if len(str(v)) > 250:
                rank_zero_warn(
                    f"Mlflow only allows parameters with up to 250 characters. Discard {k}={v}",
                    RuntimeWarning)
                continue

            self.experiment.log_param(self.run_id, k, v)

    @rank_zero_only
    def log_metrics(self,
                    metrics: Dict[str, float],
                    step: Optional[int] = None) -> None:
        assert rank_zero_only.rank == 0, "experiment tried to log from global_rank != 0"

        metrics = self._add_prefix(metrics)

        timestamp_ms = int(time() * 1000)
        for k, v in metrics.items():
            if isinstance(v, str):
                log.warning(f"Discarding metric with string value {k}={v}.")
                continue

            new_k = re.sub("[^a-zA-Z0-9_/. -]+", "", k)
            if k != new_k:
                rank_zero_warn(
                    "MLFlow only allows '_', '/', '.' and ' ' special characters in metric name."
                    f" Replacing {k} with {new_k}.",
                    RuntimeWarning,
                )
                k = new_k

            self.experiment.log_metric(self.run_id, k, v, timestamp_ms, step)

    @rank_zero_only
    def finalize(self, status: str = "FINISHED") -> None:
        super().finalize(status)
        status = "FINISHED" if status == "success" else status
        if self.experiment.get_run(self.run_id):
            self.experiment.set_terminated(self.run_id, status)

    @property
    def save_dir(self) -> Optional[str]:
        """
        The root file directory in which MLflow experiments are saved.

        Return:
            Local path to the root experiment directory if the tracking uri is local.
            Otherwhise returns `None`.
        """
        if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
            return self._tracking_uri.lstrip(LOCAL_FILE_URI_PREFIX)

    @property
    def name(self) -> str:
        """
        Get the experiment id.

        Returns:
            The experiment id.
        """
        return self.experiment_id

    @property
    def version(self) -> str:
        """
        Get the run id.

        Returns:
            The run id.
        """
        return self.run_id
Exemplo n.º 27
0
import warnings

from mlflow.tracking import MlflowClient

if __name__ == "__main__":

    warnings.filterwarnings("ignore")

    def print_experiment_info(experiment):
        print("Name: {}".format(experiment.name))
        print("Experiment_id: {}".format(experiment.experiment_id))
        print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))

    # Create an experiment name, which must be unique and case sensitive
    client = MlflowClient()
    experiment_id = client.create_experiment("Social NLP Experiments")

    # Fetch experiment metadata information
    experiment = client.get_experiment(experiment_id)
    print_experiment_info(experiment)
    print("--")

    # Rename and fetch experiment metadata information
    client.rename_experiment(experiment_id, "Social Media NLP Experiments")
    experiment = client.get_experiment(experiment_id)
    print_experiment_info(experiment)
Exemplo n.º 28
0
import mlflow
from mlflow.tracking import MlflowClient

EXPERIMENT_NAME = '[FR] [Paris] [jbt] basic'

# Indicate mlflow to log to remote server
mlflow.set_tracking_uri("https://mlflow.lewagon.co/")

client = MlflowClient()

try:
    experiment_id = client.create_experiment(EXPERIMENT_NAME)
except BaseException:
    experiment_id = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

yourname = 'jbt'

if yourname is None:
    print("please define your name, il will be used as a parameter to log")

for model in ["linear", "Randomforest"]:
    run = client.create_run(experiment_id)
    client.log_metric(run.info.run_id, "rmse", 4.5)
    client.log_param(run.info.run_id, "model", model)
    client.log_param(run.info.run_id, "student_name", yourname)
Exemplo n.º 29
0
class MLFlowLogger(LightningLoggerBase):
    """
    Log using `MLflow <https://mlflow.org>`_. Install it with pip:

    .. code-block:: bash

        pip install mlflow

    Example:
        >>> from pytorch_lightning import Trainer
        >>> from pytorch_lightning.loggers import MLFlowLogger
        >>> mlf_logger = MLFlowLogger(
        ...     experiment_name="default",
        ...     tracking_uri="file:./ml-runs"
        ... )
        >>> trainer = Trainer(logger=mlf_logger)

    Use the logger anywhere in you :class:`~pytorch_lightning.core.lightning.LightningModule` as follows:

    >>> from pytorch_lightning import LightningModule
    >>> class LitModel(LightningModule):
    ...     def training_step(self, batch, batch_idx):
    ...         # example
    ...         self.logger.experiment.whatever_ml_flow_supports(...)
    ...
    ...     def any_lightning_module_function_or_hook(self):
    ...         self.logger.experiment.whatever_ml_flow_supports(...)

    Args:
        experiment_name: The name of the experiment
        tracking_uri: Address of local or remote tracking server.
            If not provided, defaults to `file:<save_dir>`.
        tags: A dictionary tags for the experiment.
        save_dir: A path to a local directory where the MLflow runs get saved.
            Defaults to `./mlflow` if `tracking_uri` is not provided.
            Has no effect if `tracking_uri` is provided.

    """
    def __init__(self,
                 experiment_name: str = 'default',
                 tracking_uri: Optional[str] = None,
                 tags: Optional[Dict[str, Any]] = None,
                 save_dir: Optional[str] = './mlruns'):

        if not _MLFLOW_AVAILABLE:
            raise ImportError(
                'You want to use `mlflow` logger which is not installed yet,'
                ' install it with `pip install mlflow`.')
        super().__init__()
        if not tracking_uri:
            tracking_uri = f'{LOCAL_FILE_URI_PREFIX}{save_dir}'

        self._experiment_name = experiment_name
        self._experiment_id = None
        self._tracking_uri = tracking_uri
        self._run_id = None
        self.tags = tags
        self._mlflow_client = MlflowClient(tracking_uri)

    @property
    @rank_zero_experiment
    def experiment(self) -> MlflowClient:
        r"""
        Actual MLflow object. To use MLflow features in your
        :class:`~pytorch_lightning.core.lightning.LightningModule` do the following.

        Example::

            self.logger.experiment.some_mlflow_function()

        """
        expt = self._mlflow_client.get_experiment_by_name(
            self._experiment_name)

        if expt:
            self._experiment_id = expt.experiment_id
        else:
            log.warning(
                f'Experiment with name {self._experiment_name} not found. Creating it.'
            )
            self._experiment_id = self._mlflow_client.create_experiment(
                name=self._experiment_name)

        if not self._run_id:
            run = self._mlflow_client.create_run(
                experiment_id=self._experiment_id, tags=self.tags)
            self._run_id = run.info.run_id
        return self._mlflow_client

    @property
    def run_id(self):
        # create the experiment if it does not exist to get the run id
        _ = self.experiment
        return self._run_id

    @property
    def experiment_id(self):
        # create the experiment if it does not exist to get the experiment id
        _ = self.experiment
        return self._experiment_id

    @rank_zero_only
    def log_hyperparams(self, params: Union[Dict[str, Any],
                                            Namespace]) -> None:
        params = self._convert_params(params)
        params = self._flatten_dict(params)
        for k, v in params.items():
            self.experiment.log_param(self.run_id, k, v)

    @rank_zero_only
    def log_metrics(self,
                    metrics: Dict[str, float],
                    step: Optional[int] = None) -> None:
        assert rank_zero_only.rank == 0, 'experiment tried to log from global_rank != 0'

        timestamp_ms = int(time() * 1000)
        for k, v in metrics.items():
            if isinstance(v, str):
                log.warning(f'Discarding metric with string value {k}={v}.')
                continue
            self.experiment.log_metric(self.run_id, k, v, timestamp_ms, step)

    @rank_zero_only
    def finalize(self, status: str = 'FINISHED') -> None:
        super().finalize(status)
        status = 'FINISHED' if status == 'success' else status
        if self.experiment.get_run(self.run_id):
            self.experiment.set_terminated(self.run_id, status)

    @property
    def save_dir(self) -> Optional[str]:
        """
        The root file directory in which MLflow experiments are saved.

        Return:
            Local path to the root experiment directory if the tracking uri is local.
            Otherwhise returns `None`.
        """
        if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
            return self._tracking_uri.lstrip(LOCAL_FILE_URI_PREFIX)

    @property
    def name(self) -> str:
        return self.experiment_id

    @property
    def version(self) -> str:
        return self.run_id
    def optimize(self, metrics, cv_type, n_splits, maxevals=200, do_predict_proba=None):

        params = self.hyperparameter_space()
        extra_params = self.extra_setup()

        env = Environment(
            train_dataset=self.data,
            results_path='HyperparameterHunterAssets',
            # results_path=self.PATH,
            metrics=[metrics],
            do_predict_proba = do_predict_proba,
            cv_type=cv_type,
            cv_params=dict(n_splits=n_splits),
        )

        # optimizer = opt.GradientBoostedRegressionTreeOptimization(iterations=maxevals)
        optimizer = opt.BayesianOptimization(iterations=maxevals)
        optimizer.set_experiment_guidelines(
            model_initializer=lgb.LGBMClassifier,
            model_init_params=params,
            model_extra_params=extra_params
        )
        optimizer.go()

        # there are a few fixes on its way and the next few lines will soon be
        # one. At the moment, to access to the best parameters one has to read
        # from disc and access them
        best_experiment = 'HyperparameterHunterAssets/Experiments/Descriptions/'+\
            optimizer.best_experiment+'.json'
        with open(best_experiment) as best:
            best = json.loads(best.read())['hyperparameters']['model_init_params']

        # The next few lines are the only ones related to mlflow. One
        # "annoying" behaviour of mlflow is that when you instantiate a client
        # it creates the 'mlruns' dir by default as well as the first
        # experiment and there does not seem to be a way I can change this
        # behaviour without changing the source code. The solution is the
        # following hack:
        if not Path('mlruns').exists():
            client = MlflowClient()
        else:
            client = MlflowClient()
            n_experiments = len(client.list_experiments())
            client.create_experiment(name=str(n_experiments))
        experiments = client.list_experiments()
        with mlflow.start_run(experiment_id=experiments[-1].experiment_id) as run:
            model = lgb.LGBMClassifier(**best)
            X, y = self.data.drop('target',axis=1), self.data.target
            model.fit(X,y,
                feature_name=self.colnames,
                categorical_feature=self.categorical_columns
                )
            for name, value in best.items():
                mlflow.log_param(name, value)
            mlflow.log_metric('f1_score', -optimizer.optimizer_result.fun)
            mlflow.sklearn.log_model(model, "model")

        pickle.dump(model, open(self.PATH+'/HHmodel.p', 'wb'))
        pickle.dump(optimizer, open(self.PATH+'/HHoptimizer.p', 'wb'))

        return