Python set_tags示例，mlflow.set_tags Python示例

示例#1

0

显示文件

文件： synthesis.py 项目： elf11/patectsdgym

def run_synthesis(synthesis_args):
	n, s, synth_args, d, epsilons, datasets, cat_cols, save_models_path, run_name = synthesis_args
	res = []
	with mlflow.start_run(nested=True):
		synth = s(epsilon=float(epsilons[0]), **synth_args)
		for i, e in enumerate(epsilons):
			start_time = time.time()		

			#Need to save: Epochs, Epsilons traversed, Loss 
			sampled = synth.fit_sample(datasets[d]["data"],categorical_columns=cat_cols.split(','), update_epsilon=float(e), verbose=conf.VERBOSE, mlflow=True)
			end_time = time.time()

			mlflow.set_tags({"synthesizer": type(synth),
				"args": str(synth_args),
				"dataset": str(d),
				"epsilon": str(e),
				"duration_seconds": str(end_time - start_time)})

			res.append((n, d, str(e), sampled))
			print("Epsilon " + str(e) + " finished for Synthesizer " + n + " in " + str(end_time - start_time) + "s")

						
			datapath = os.path.join(save_models_path, n + "_" + str(e) + "_" + d + "_" + run_name + "_dataset.csv")
			modelpath = os.path.join(save_models_path, n + "_" + str(e) + "_" + d + "_" + run_name + "_model.ckpt")

			with open(datapath, 'wb') as f:
				sampled.to_csv(datapath)
			mlflow.log_artifact(datapath)

			synth.save(modelpath)
			mlflow.log_artifact(modelpath)

	return res

示例#2

0

显示文件

def train_model(model, X_train, y_train, name, config):
    """train
    train a single model.

    # Arguments
        model: Model, NN model to train.
        X_train: ndarray(number, lags), Input data for train.
        y_train: ndarray(number, ), result data for train.
        name: String, name of model.
        config: Dict, parameter for train.
    """
    mlflow.set_tracking_uri("http://127.0.0.1:5000")
    tracking_uri = mlflow.get_tracking_uri()
    print("Current tracking uri: {}".format(tracking_uri))

    tags = {"usuario": "Anonymous"}

    mlflow.set_experiment("traffic_flow-saes")
    with mlflow.start_run() as run:
        mlflow.set_tags(tags)
        mlflow.keras.autolog()

        model.compile(loss="mse", optimizer="rmsprop", metrics=['mape'])
        #early = EarlyStopping(monitor='val_loss', patience=30, verbose=0, mode='auto')
        hist = model.fit(X_train,
                         y_train,
                         batch_size=config["batch"],
                         epochs=config["epochs"],
                         validation_split=0.05)

        model.save('model/' + name + '.h5')
        df = pd.DataFrame.from_dict(hist.history)
        df.to_csv('model/' + name + ' loss.csv', encoding='utf-8', index=False)
        mlflow.log_param("Run_id", run.info.run_id)

示例#3

0

显示文件

def train_model(
    params_fp: Path = Path(config.CONFIG_DIR, "params.json"),
    model_dir: Optional[Path] = Path(config.MODEL_DIR),
    experiment_name: Optional[str] = "best",
    run_name: Optional[str] = "model",
) -> None:
    """Train a model using the specified parameters.

    Args:
        params_fp (Path, optional): Parameters to use for training. Defaults to `config/params.json`.
        model_dir (Path): location of model artifacts. Defaults to config.MODEL_DIR.
        experiment_name (str, optional): Name of the experiment to save the run to. Defaults to `best`.
        run_name (str, optional): Name of the run. Defaults to `model`.
    """
    # Set experiment and start run
    params = Namespace(**utils.load_dict(filepath=params_fp))

    # Start run
    mlflow.set_experiment(experiment_name=experiment_name)
    with mlflow.start_run(run_name=run_name):
        run_id = mlflow.active_run().info.run_id

        # Train
        artifacts = main.run(params=params)

        # Set tags
        tags = {}
        mlflow.set_tags(tags)

        # Log metrics
        performance = artifacts["performance"]
        logger.info(json.dumps(performance["overall"], indent=2))
        metrics = {
            "precision": performance["overall"]["precision"],
            "recall": performance["overall"]["recall"],
            "f1": performance["overall"]["f1"],
            "best_val_loss": artifacts["loss"],
            "behavioral_score": performance["behavioral"]["score"],
            "slices_f1": performance["slices"]["overall"]["f1"],
        }
        mlflow.log_metrics(metrics)

        # Log artifacts
        with tempfile.TemporaryDirectory() as dp:
            utils.save_dict(vars(artifacts["params"]),
                            Path(dp, "params.json"),
                            cls=NumpyEncoder)
            utils.save_dict(performance, Path(dp, "performance.json"))
            artifacts["label_encoder"].save(Path(dp, "label_encoder.json"))
            artifacts["tokenizer"].save(Path(dp, "tokenizer.json"))
            torch.save(artifacts["model"].state_dict(), Path(dp, "model.pt"))
            mlflow.log_artifacts(dp)
        mlflow.log_params(vars(artifacts["params"]))

    # Save for repo
    open(Path(model_dir, "run_id.txt"), "w").write(run_id)
    utils.save_dict(vars(params),
                    Path(model_dir, "params.json"),
                    cls=NumpyEncoder)
    utils.save_dict(performance, Path(model_dir, "performance.json"))

示例#4

0

显示文件

def log_sk_model(sk_model,
                 model_artifact_path: str = 'model',
                 registered_model_name: str = None,
                 params: dict = None,
                 metrics: dict = None,
                 tags: dict = None,
                 artifacts: dict = None) -> None:

    if params is None:
        params = {}
    if metrics is None:
        metrics = {}
    if tags is None:
        tags = {}
    if artifacts is None:
        artifacts = {}

    _logger.info("Logging Scikit-Learn model to MLflow")
    mlflow.sklearn.log_model(sk_model=sk_model,
                             artifact_path=model_artifact_path,
                             conda_env='./environment.yml',
                             registered_model_name=registered_model_name)
    mlflow.log_params(params)
    mlflow.set_tags(tags)
    mlflow.log_metrics(metrics)
    for local_path, artifact_path in artifacts.items():
        _logger.debug(
            f"Logging artifact to MLflow: {local_path} - {artifact_path}")
        mlflow.log_artifact(local_path, artifact_path)

示例#5

0

显示文件

文件： omlflow.py 项目： twolffpiggott/mltb

    def set_tags(self, tags, optuna_log=True):
        """Wrapper of the corresponding MLflow function.

        The data is also added to Optuna as an user attribute.

        Args:
            tags ([Dict]): Dict of tags.
            optuna_log (bool, optional): Internal parameter that should be ignored by the API user.
                Defaults to True.
        """
        for key, value in tags.items():
            if optuna_log:
                self._trial.set_user_attr(key, value)
            _logger.info(f"Tag: {key}: {value}")
            value = str(value)  # make sure it is a string
            if len(value) > self._max_mlflow_tag_length:
                tags[key] = textwrap.shorten(value,
                                             self._max_mlflow_tag_length)
        try:
            mlflow.set_tags(normalize_mlflow_entry_names_in_dict(tags))
        except Exception as e:
            _logger.error(
                "Exception raised during MLflow communication! Exception: {}".
                format(e),
                exc_info=True,
            )

示例#6

0

显示文件

文件： utils.py 项目： sunny1401/randoms

def log_experiment(params,
                   metrics=None,
                   tags=None,
                   model=None,
                   experiment_name=EXPERIMENT_NAME):
    """
    Logs the model and related parameters and metrics as an experiment.
    :param params: (dict): key-value pairs of named-paramters used by the model
    :param tags: (dict): key-value pairs of tags
    :param metrics: (dict): key-value paris of metric by the model
    :param model: (str): file location of saved model
    :param experiment_name: (str): Name of experiment for which data is being logged
    :param tracking_uri: (str): Tracking uri which should be mapped to the experiment.
    For permanent uris such as postgres database etc, these can be set to environment variable - MLFLOW_TRACKING_URI
    """
    try:
        mlflow.create_experiment(experiment_name)
    except MlflowException:
        logger.info("Found existing experiment. Adding new version to that.")

    mlflow.set_experiment(experiment_name)
    with mlflow.start_run():
        mlflow.log_params(params)
        if metrics is not None:
            mlflow.log_metrics(metrics)
        mlflow.set_tags(tags)
        if model is not None:
            mlflow.log_artifact(model)
    try:
        mlflow.end_run()
    except MlflowException:
        pass

示例#7

0

显示文件

def log_mlflow(experiment, model, params={}, metrics={}, tags={}):
    """
    Function to log models, params, metrics and tags to MLFlow
    """
    print("=== Logging in MLFlow Server...")
    mlflow.set_experiment(experiment)
    with mlflow.start_run():
        ## LOG PARAMS
        mlflow.log_params(params)
        print("Params logged")

        ## LOG METRICS
        mlflow.log_metrics(metrics)
        print("Metric logged.")

        ## LOG TAGS
        mlflow.set_tags(tags)
        print("Tags logged.")

        ## LOG MODEL
        if model != None:
            mlflow.pyfunc.log_model(artifact_path="model",
                                    python_model=model,
                                    conda_env="config/conda.yaml")
            runid = mlflow.active_run().info.run_uuid
            print("Model saved in run: {}.".format(runid))

示例#8

0

显示文件

文件： main.py 项目： kongzii/ml-project-template

def main(args: argparse.Namespace):
    keras_model = KerasModel(args.model_dir)

    raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory(
        f"{args.dataset_dir}/test",
        label_mode="int",
        batch_size=args.batch_size,
        class_names=keras_model.class_names,
        seed=SEED_VALUE,
    )

    y_true, y_pred = [], []

    for x, y in raw_test_ds:
        predictions = keras_model.predict(x, return_id=True)
        predictions = [max(p.keys(), key=lambda k: p[k]) for p in predictions]

        y_true.extend(y.numpy().tolist())
        y_pred.extend(predictions)
        assert len(y_true) == len(y_pred)

        if not len(y_true) % 1_000:
            logging.info(f"Tested {len(y_true)} samples.")

    scores = {
        "accuracy_score": accuracy_score(y_true, y_pred),
        "f1_score": f1_score(y_true, y_pred),
        "precision_score": precision_score(y_true, y_pred),
        "recall_score": recall_score(y_true, y_pred),
        "confusion_matrix": confusion_matrix(y_true, y_pred),
    }

    logging.info(scores)
    mlflow.set_tags(scores)

示例#9

0

显示文件

def trainer(regularisation: int, max_iter: int):
    with mlflow.start_run() as _:
        mlflow.set_tags({"training_type": "Baseline"})
        mlflow.log_params({"C": regularisation, "max_iter": max_iter})

        X, y = fetch_censusdata()
        clf = make_pipeline(
            make_linear_preprocessor(),
            LogisticRegression(C=regularisation,
                               max_iter=max_iter,
                               class_weight="balanced",
                               random_state=0),
        )

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        clf.fit(X_train, y_train)

        y_pred = clf.predict(X_test)

        precision, recall, fscore, _ = precision_recall_fscore_support(
            y_test, y_pred, average="binary")

        mlflow.log_metrics({
            "precision": precision,
            "recall": recall,
            "fscore": fscore
        })

示例#10

0

显示文件

文件： gpopy.py 项目： ssameerr/gpopy

 def detailed_score(self, generation): 
     """
     A more detailed run with mlflow.
     While use this function, is recomended that you use the mlflow tracking function for the model you're using.
     e.g You can use mlflow tracking on Tensorflow, torch, scikit ... 
     
     TODO: Merge this function with easy_score, and pass the paramater used on run as parameter for easy_score
     This fix will be available 
     """
     if self.score_function == None: 
         assert False, "No score function setted, you can set it using set_score(func) or passing score= func during class instantiation"
     for i, elem in enumerate(self.population): 
         with mlflow.start_run() as run: 
             tags = {
                 'generation': generation + 1, 
                 'individue' : i  + 1
             }
             mlflow.set_tags(tags)
             mlflow.log_param("Generation", generation + 1)
             score, model = self.score_function(elem)
         if score > self.top_score: 
             print(f"*** New optimal model founded with a score of {score} ***")
             self.top_score = score
             self.top_model = (score, model)
         elem['score'] = score
     sorted_list = sorted(self.population, key= itemgetter('score'), reverse= True)
     self.first_parent = sorted_list[0]
     self.second_parent = sorted_list[1]
     generation_top_score = self.first_parent['score']
     self.genetic_tree.append(self.first_parent)
     print(f"Better parent {self.first_parent}")
     print("DONE")

示例#11

0

显示文件

文件： pipeline.py 项目： OvidiuGrec/Darwin-Project

    def run_experiment(self, **kwargs):

        if self.options.opt:
            self.optimize()

        seed = self.config['general']['seed']
        np.random.seed(seed)
        tf.random.set_seed(seed)

        if kwargs:
            self.adjust_pars(kwargs)
            self.data = Data(self.config, self.options, self.pars)

        if self.options.mlflow:
            mlflow.set_experiment(self.config['general']['experiment'])
            mlflow.start_run()
            self.log_pars()
            mlflow.set_tags({'seed': seed, 'mode': self.options.mode})

        if self.feature_type == 'combined' and self.fusion == 'late':
            y_true, y_pred, mae = self.train_bimodal()
        else:
            _, y_pred, _, y_true, mae = self.train_model(self.feature_type)

        if self.options.verbose:
            run_validation(y_true, y_pred)

        if self.options.mlflow:
            mlflow.end_run()

        return -mae

示例#12

0

显示文件

    def before_pipeline_run(self, run_params: Dict[str, Any],
                            pipeline: Pipeline, catalog: DataCatalog) -> None:
        """Hook to be invoked before a pipeline runs.
        Args:
            run_params: The params needed for the given run.
                Should be identical to the data logged by Journal.
                # @fixme: this needs to be modelled explicitly as code, instead of comment
                Schema: {
                    "run_id": str,
                    "project_path": str,
                    "env": str,
                    "kedro_version": str,
                    "tags": Optional[List[str]],
                    "from_nodes": Optional[List[str]],
                    "to_nodes": Optional[List[str]],
                    "node_names": Optional[List[str]],
                    "from_inputs": Optional[List[str]],
                    "load_versions": Optional[List[str]],
                    "pipeline_name": str,
                    "extra_params": Optional[Dict[str, Any]],
                }
            pipeline: The ``Pipeline`` that will be run.
            catalog: The ``DataCatalog`` to be used during the run.
        """
        self.context = load_context(
            project_path=run_params["project_path"],
            env=run_params["env"],
            extra_params=run_params["extra_params"],
        )

        mlflow_conf = get_mlflow_config(self.context)
        mlflow_conf.setup(self.context)

        run_name = (mlflow_conf.run_opts["name"]
                    if mlflow_conf.run_opts["name"] is not None else
                    run_params["pipeline_name"])
        mlflow.start_run(
            run_id=mlflow_conf.run_opts["id"],
            experiment_id=mlflow_conf.experiment.experiment_id,
            run_name=run_name,
            nested=mlflow_conf.run_opts["nested"],
        )
        # Set tags only for run parameters that have values.
        mlflow.set_tags({k: v for k, v in run_params.items() if v})
        # add manually git sha for consistency with the journal
        # TODO : this does not take into account not committed files, so it
        # does not ensure reproducibility. Define what to do.
        mlflow.set_tag("git_sha", _git_sha(run_params["project_path"]))
        mlflow.set_tag(
            "kedro_command",
            _generate_kedro_command(
                tags=run_params["tags"],
                node_names=run_params["node_names"],
                from_nodes=run_params["from_nodes"],
                to_nodes=run_params["to_nodes"],
                from_inputs=run_params["from_inputs"],
                load_versions=run_params["load_versions"],
                pipeline_name=run_params["pipeline_name"],
            ),
        )

示例#13

0

显示文件

文件： pytorch_modeler.py 项目： HirokiNarita/dcase2020task2

def mlflow_log(history, config, machine_type, out_path, tb_log_dir):
    mlflow.set_tracking_uri(config['IO_OPTION']['MLFLOW_PATH']+'/mlruns')
    run_name = config['IO_OPTION']['model_name']+'_'+machine_type
    with mlflow.start_run(run_name=run_name) as run:
        # IO_OPTION and etc into mlflow
        mlflow.set_tags(config['IO_OPTION'])
        mlflow.set_tags(config['etc'])
        mlflow.set_tag('machine_type', machine_type)
        mlflow.set_tag('tb_log_dir', tb_log_dir)
        # Log spectrogram_param into mlflow
        for key, value in config['mel_spectrogram_param'].items():
            mlflow.log_param(key, value)
        # log fit param
        for key, value in config['fit'].items():
            mlflow.log_param(key, value)
        # Log other info
        mlflow.log_param('loss_type', 'MSE')
        
        # Log results into mlflow
        mlflow.log_metric('train_epoch_score', history['epoch_score_lists']['train'][-1])
        mlflow.log_metric('valid_epoch_score', history['epoch_score_lists']['valid'][-1])

        # Log model
        mlflow.log_artifact(out_path)
    mlflow.end_run()

示例#14

0

显示文件

文件： synthesis.py 项目： LydiaY5559/whitenoise-system

def run_synthesis(synthesis_args):
    """
    A parallel run of the synthesis step

    :param synthesis_args: 
        n = name of the synthesizer, 
        s = synthesizer object,
        synth_args = dictionary of hyperparams for the synthesizer
        d = name of dataset
        e = epsilon value 
        datasets = dataset dictionary 
        cat_cols = list of categorical columns in dataset d
    :type synthesis_args: tuple
    :return: (n, d, str(e), sampled = synthesized data of size len(d))
    :rtype: tuple
    """
    n, s, synth_args, d, e, datasets, cat_cols = synthesis_args
    with mlflow.start_run(nested=True):
        start_time = time.time()
        synth = s(epsilon=float(e), **synth_args)
        d_copy = datasets[d]["data"].copy()
        sampled = synth.fit_sample(d_copy,categorical_columns=cat_cols.split(','))
        end_time = time.time()
        mlflow.set_tags({"synthesizer": type(synth),
                         "args": str(synth_args),
                         "epsilon": str(e),
                         "dataset": str(datasets),
                         "duration_seconds": str(end_time - start_time)})
        print(datasets[d]["name"] + ' finished. Epsilon: ' + str(e))
        datasets[d][n][str(e)] = sampled
    return (n, d, str(e), sampled)

示例#15

0

显示文件

文件： modeling.py 项目： dominikmn/one-million-posts

 def log(self):
     """Log params, metrics, and tags to MLFlow if is_def is False"""
     if not self.is_dev:
         mlflow.log_params(self.params)
         mlflow.log_metrics(self.metrics)
         mlflow.set_tags(self.tags)
         self._save_model()

示例#16

0

显示文件

文件： _train.py 项目： danieliong/GeoMagForecasting

def setup_mlflow(cfg, features_cfg, data_cfg):
    import mlflow

    experiment_id = OmegaConf.select(cfg, "experiment_id", default=None)

    if experiment_id is None and cfg.experiment_name is not None:
        mlflow.set_experiment(cfg.experiment_name)
        experiment = mlflow.get_experiment_by_name(cfg.experiment_name)
        if cfg.experiment_name is not None:
            logger.debug(f"MLFlow Experiment: {cfg.experiment_name}")

        experiment_id = experiment.experiment_id

    orig_cwd = get_original_cwd()
    tracking_uri = f"file://{orig_cwd}/mlruns"
    mlflow.set_tracking_uri(tracking_uri)
    logger.info(f"MLFlow Tracking URI: {tracking_uri}")

    # if cfg.model == "xgboost":
    #     import mlflow.xgboost

    #     logger.debug("Turning on MLFlow autologging for XGBoost...")
    #     mlflow.xgboost.autolog()

    run = mlflow.start_run(experiment_id=experiment_id)

    # tracking_uri = mlflow.get_tracking_uri()
    # logger.info(f"MLFlow Tracking URI: {tracking_uri}")

    processed_data_dir = Path(to_absolute_path(data_cfg.hydra.run.dir))
    if processed_data_dir is not None:
        data_hydra_dir = processed_data_dir / ".hydra"
        mlflow.log_artifacts(data_hydra_dir,
                             artifact_path="processed_data_configs")
        data_cfg = OmegaConf.load(data_hydra_dir / "config.yaml")
        for name, param_name in DATA_CONFIGS_TO_LOG.items():
            param = OmegaConf.select(data_cfg, param_name)
            if param is not None:
                if isinstance(param, list):
                    param = ", ".join([str(p) for p in param])
                mlflow.log_param(name, param)

    model_hydra_dir = Path(".hydra")
    mlflow.log_artifacts(model_hydra_dir, artifact_path="model_configs")

    mlflow.log_params({
        "model": cfg.model,
        "lag": features_cfg.lag,
        "exog_lag": features_cfg.exog_lag,
        "lead": features_cfg.lead,
        "cv_method": cfg.cv.method,
    })
    mlflow.log_params(cfg.cv.params)

    tags = OmegaConf.select(cfg, "tags", default={})
    if bool(tags):
        mlflow.set_tags(tags)

    return run

示例#17

0

显示文件

def trainer(regularisation: int, max_iter: int):
    with mlflow.start_run() as _:
        mlflow.set_tags({"training_type": "FeatureImportance"})
        mlflow.log_params({"C": regularisation, "max_iter": max_iter})

        X, y = fetch_censusdata()
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)

        p1 = make_nonlinear_preprocessor()
        p2 = make_nonlinear_to_linear_preprocessor()
        clf = LogisticRegression(C=regularisation,
                                 max_iter=max_iter,
                                 class_weight="balanced",
                                 random_state=0)

        pp = make_pipeline(p1, p2, clf)

        pp.fit(X_train, y_train)

        # Log reference metrics
        y_pred = pp.predict(X_test)

        ref_precision, ref_recall, ref_fscore, _ = precision_recall_fscore_support(
            y_test, y_pred, average="binary")

        mlflow.log_metrics({
            "precision": ref_precision,
            "recall": ref_recall,
            "fscore": ref_fscore
        })

        for i in range(len(FEATURES)):
            with mlflow.start_run(nested=True) as _:
                mlflow.set_tags({"training_type": "FeatureImportance"})
                mlflow.log_params({"feature": FEATURES[i]})

                X_test_tr = p1.transform(X_test)

                #  shuffle feature i
                indexes = np.arange(X_test_tr.shape[0])
                np.random.shuffle(indexes)
                X_test_tr[:, i] = X_test_tr[indexes, i]

                X_test_tr = p2.transform(X_test_tr)
                y_pred = clf.predict(X_test_tr)

                precision, recall, fscore, _ = precision_recall_fscore_support(
                    y_test, y_pred, average="binary")

                mlflow.log_metrics({
                    "precision": precision,
                    "precision_penalty": precision - ref_precision,
                    "recall": recall,
                    "recall_penalty": recall - ref_recall,
                    "fscore": fscore,
                    "fscore_penalty": fscore - ref_fscore,
                })

示例#18

0

显示文件

def _log_xp(config, name):
    set_experiment(name)
    set_tags(config.get('tags', {}))

    simplejson.dump(config, open(f"/tmp/config.json", "w"))
    log_artifact(f"/tmp/config.json")
    os.remove(f"/tmp/config.json")
    return name

示例#19

0

显示文件

文件： ml_logger.py 项目： T-Sumida/sound-recognition

    def __init__(self, exp_name: str, tag: Dict):
        """mlflowの初期化

        Args:
            exp_name (str): 実験名
            tag (Dict): タグ情報
        """
        mlflow.set_experiment(exp_name)
        mlflow.set_tags(tag)

示例#20

0

显示文件

文件： mlflow.py 项目： rhhc/optuna_begin

    def __call__(self, study: optuna.study.Study,
                 trial: optuna.trial.FrozenTrial) -> None:

        # This sets the tracking_uri for MLflow.
        if self._tracking_uri is not None:
            mlflow.set_tracking_uri(self._tracking_uri)

        # This sets the experiment of MLflow.
        mlflow.set_experiment(study.study_name)

        with mlflow.start_run(run_name=str(trial.number),
                              nested=self._nest_trials):

            # This sets the metric for MLflow.
            trial_value = trial.value if trial.value is not None else float(
                "nan")
            mlflow.log_metric(self._metric_name, trial_value)

            # This sets the params for MLflow.
            mlflow.log_params(trial.params)

            # This sets the tags for MLflow.
            tags: Dict[str, str] = {}
            tags["number"] = str(trial.number)
            tags["datetime_start"] = str(trial.datetime_start)
            tags["datetime_complete"] = str(trial.datetime_complete)

            # Set state and convert it to str and remove the common prefix.
            trial_state = trial.state
            if isinstance(trial_state, TrialState):
                tags["state"] = str(trial_state).split(".")[-1]

            # Set direction and convert it to str and remove the common prefix.
            study_direction = study.direction
            if isinstance(study_direction, StudyDirection):
                tags["direction"] = str(study_direction).split(".")[-1]

            tags.update(trial.user_attrs)
            distributions = {(k + "_distribution"): str(v)
                             for (k, v) in trial.distributions.items()}
            tags.update(distributions)

            if self._tag_study_user_attrs:
                tags.update(study.user_attrs)

            # This is a temporary fix on Optuna side. It avoids an error with user
            # attributes that are too long. It should be fixed on MLflow side later.
            # When it is fixed on MLflow side this codeblock can be removed.
            # see https://github.com/optuna/optuna/issues/1340
            # see https://github.com/mlflow/mlflow/issues/2931
            max_mlflow_tag_length = 5000
            for key, value in tags.items():
                value = str(value)  # make sure it is a string
                if len(value) > max_mlflow_tag_length:
                    tags[key] = textwrap.shorten(value, max_mlflow_tag_length)

            mlflow.set_tags(tags)

示例#21

0

显示文件

文件： state.py 项目： isabella232/uv-metrics

def start_run(param_prefix: Optional[str] = None,
              experiment_name: Optional[str] = None,
              run_name: Optional[str] = None,
              artifact_location: Optional[str] = None,
              **args) -> mlf.ActiveRun:
    """Close alias of mlflow.start_run. The only difference is that uv.start_run
  attempts to extract parameters from the environment and log those to the
  bound UV reporter using `report_params`.

  Note that if experiment_name is specified and refers to an existing
  experiment, then the artifact_location will not be honored as this is an
  immutable property of an mlflow experiment. This method will issue a warning
  but proceed.

  Note that the returned value can be used as a context manager:
  https://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.start_run
  """
    if experiment_name is None:
        experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME")

    if run_name is None:
        run_name = os.environ.get("MLFLOW_RUN_NAME")

    if artifact_location is None:
        artifact_location = os.environ.get("MLFLOW_ARTIFACT_ROOT")

    _ensure_non_null_project(artifact_location)

    # Make sure the experiment exists before the run starts.
    if experiment_name is not None:
        if mlf.get_experiment_by_name(experiment_name) is None:
            mlf.create_experiment(experiment_name, artifact_location)
        mlf.set_experiment(experiment_name)

    ret = mlf.start_run(run_name=run_name, **args)
    env_params = ue.extract_params(prefix=param_prefix)
    mlf.set_tags(env_params)

    # for CAIP jobs, we add the job id as a tag, along with a link to the
    # console page
    cloud_ml_job_id = os.environ.get('CLOUD_ML_JOB_ID')
    if cloud_ml_job_id is not None:
        mlf.set_tag(
            'cloud_ml_job_details',
            f'https://console.cloud.google.com/ai-platform/jobs/{cloud_ml_job_id}'
        )
        mlf.set_tag('cloud_ml_job_id', cloud_ml_job_id)

    mlf_artifact_uri = mlf.get_artifact_uri()
    if mlf_artifact_uri is not None and artifact_location is not None:
        if not mlf_artifact_uri.startswith(artifact_location):
            logging.warning(
                f'requested mlflow artifact location {artifact_location} differs '
                f'from existing experiment artifact uri {mlf_artifact_uri}')

    return ret

示例#22

0

显示文件

文件： utils.py 项目： barmanu/dp-dpo-citation-bio-trainer

def log_mlflow_results(model, metrics, feat_config, model_config, tags):
    TRACKING_URI = "https://mlflow.caps.dev.dp.elsevier.systems"
    mlflow.set_tracking_uri(TRACKING_URI)
    mlflow.set_experiment("cp-ml-reference-separator-evaluator")
    with mlflow.start_run():
        mlflow.log_metrics(metrics)
        mlflow.keras.log_model(model, "models")
        mlflow.log_params(feat_config)
        mlflow.log_params(model_config)
        mlflow.set_tags(tags)

示例#23

0

显示文件

 def mlflow_run(self, df):
     with mlflow.start_run() as run:
         run_id = run.info.run_uuid
         experiment_id = run.info.experiment_id
         # train test split
         train, test = train_test_split(df,
                                        test_size=0.2,
                                        random_state=42,
                                        stratify=df[['is_profit']])
         y = train['is_profit'].copy()
         X = train.drop(columns=['is_profit']).copy()
         y_test = test['is_profit'].copy()
         X_test = test.drop(columns=['is_profit']).copy()
         # pipeline
         float_cols = df.select_dtypes(include='float64').columns
         preprocessor = ColumnTransformer(
             [
                 ('StandardScaler', StandardScaler(), float_cols),
                 #('OneHotEncoder', OneHotEncoder(), cat_cols),
             ],
             remainder='passthrough')
         full_pipe = Pipeline(steps=[
             ('preprocessor', preprocessor),
             ('model', self.model),
         ])
         # fit
         t_start = time.time()
         full_pipe.fit(X, y)
         t_training = time.time() - t_start
         # predict
         t_start = time.time()
         y_test_pred_proba = full_pipe.predict_proba(X_test)
         t_prediction = time.time() - t_start
         # score
         proba_threshold = 0.75
         metrics = {
             'auroc':
             roc_auc_score(y_test, y_test_pred_proba[:, 1]),
             'precision':
             precision_score(y_test,
                             (y_test_pred_proba[:, 1] > proba_threshold)),
             't_training':
             t_training,
             't_prediction':
             t_prediction,
         }
         # log params, metrics, tags
         mlflow.log_params(self.params)
         mlflow.log_metrics(metrics)
         mlflow.set_tags(self.tags)
         # log Model
         #mlflow.sklearn.log_model(full_pipe, artifact_path='model')
         #wrapped_model = SklearnModelWrapper(full_pipe)
         #mlflow.pyfunc.log_model('model', python_model=wrapped_model)
         return full_pipe

示例#24

0

显示文件

def log_tag(dry_run, model_name, data_name, suffix):
    if suffix is not None:
        suffix = suffix[1:]

    if not dry_run:
        mlflow.set_tags({
            'mlflow.runName': f'{model_name}-{data_name}-{suffix.upper()}',
            'model': model_name,
            'data': data_name,
            'suffix': suffix,
        })

示例#25

0

显示文件

文件： resources.py 项目： prezi/dagster

    def _set_all_tags(self):
        """Method collects dagster_run_id plus all env variables/tags that have been
            specified by the user in the config_schema and logs them as tags in mlflow.

        Returns:
            tags [dict]: Dictionary of all the tags
        """
        tags = {tag: environ.get(tag) for tag in self.env_tags_to_log}
        tags["dagster_run_id"] = self.dagster_run_id
        if self.extra_tags:
            tags.update(self.extra_tags)

        mlflow.set_tags(tags)

示例#26

0

显示文件

文件： test_mlflow_registry.py 项目： eto-ai/rikai

def mlflow_client(tmp_path_factory, resnet_model_uri: str,
                  spark: SparkSession) -> MlflowClient:
    tmp_path = tmp_path_factory.mktemp("mlflow")
    tmp_path.mkdir(parents=True, exist_ok=True)
    tracking_uri = "sqlite:///" + str(tmp_path / "tracking.db")
    mlflow.set_tracking_uri(tracking_uri)
    experiment_id = mlflow.create_experiment("rikai-test", str(tmp_path))
    # simpliest
    with mlflow.start_run(experiment_id=experiment_id):
        mlflow.log_param("optimizer", "Adam")
        # Fake training loop
        model = torch.load(resnet_model_uri)
        artifact_path = "model"

        schema = ("STRUCT<boxes:ARRAY<ARRAY<float>>,"
                  "scores:ARRAY<float>,labels:ARRAY<int>>")
        pre_processing = ("rikai.contrib.torch.transforms."
                          "fasterrcnn_resnet50_fpn.pre_processing")
        post_processing = ("rikai.contrib.torch.transforms."
                           "fasterrcnn_resnet50_fpn.post_processing")
        rikai.mlflow.pytorch.log_model(
            model,  # same as vanilla mlflow
            artifact_path,  # same as vanilla mlflow
            schema,
            pre_processing,
            post_processing,
            registered_model_name="rikai-test",  # same as vanilla mlflow
        )

    # vanilla mlflow
    with mlflow.start_run():
        mlflow.pytorch.log_model(model,
                                 artifact_path,
                                 registered_model_name="vanilla-mlflow")
        mlflow.set_tags({
            "rikai.model.flavor": "pytorch",
            "rikai.output.schema": schema,
            "rikai.transforms.pre": pre_processing,
            "rikai.transforms.post": post_processing,
        })

    # vanilla mlflow no tags
    with mlflow.start_run():
        mlflow.pytorch.log_model(
            model,
            artifact_path,
            registered_model_name="vanilla-mlflow-no-tags",
        )

    spark.conf.set("rikai.sql.ml.registry.mlflow.tracking_uri", tracking_uri)
    return mlflow.tracking.MlflowClient(tracking_uri)

示例#27

0

显示文件

def test_set_tags():
    exact_expected_tags = {"name_1": "c", "name_2": "b", "nested/nested/name": 5}
    approx_expected_tags = set([MLFLOW_USER, MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE])
    with start_run() as active_run:
        run_id = active_run.info.run_id
        mlflow.set_tags(exact_expected_tags)
    finished_run = tracking.MlflowClient().get_run(run_id)
    # Validate tags
    assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags)
    for tag_key, tag_val in finished_run.data.tags.items():
        if tag_key in approx_expected_tags:
            pass
        else:
            assert str(exact_expected_tags[tag_key]) == tag_val

示例#28

0

显示文件

文件： mflow_handler.py 项目： aleksei-kashapov/deep-depth-transfer

    def start_callback(self, parameters):
        try:
            mlflow.set_experiment(self._experiment_name)
            if mlflow.active_run() is not None:
                mlflow.end_run()
            mlflow.start_run()
            mlflow.set_tags(self._mlflow_tags)
            mlflow.log_params(parameters)
            mlflow.log_params(self._mlflow_parameters)

        except mlflow.exceptions.MlflowException as msg:
            self._enable_mlflow = False
            print(f"[WARNING][MlFlowHandler] - [StartCallback] {msg}")
            print(f"[WARNING][MlFlowHandler] - [StartCallback] mlflow is disabled")

示例#29

0

显示文件

def train_model(
    args_fp: Path = Path(config.CONFIG_DIR, "args.json"),
    experiment_name: Optional[str] = "best",
    run_name: Optional[str] = "model",
) -> None:
    """Train a model using the specified parameters.

    Args:
        args_fp (Path, optional): Location of arguments to use for training. Defaults to `config/args.json`.
        experiment_name (str, optional): Name of the experiment to save the run to. Defaults to `best`.
        run_name (str, optional): Name of the run. Defaults to `model`.
    """
    # Set experiment and start run
    args = Namespace(**utils.load_dict(filepath=args_fp))

    # Start run
    mlflow.set_experiment(experiment_name=experiment_name)
    with mlflow.start_run(run_name=run_name
                          ) as run:  # NOQA: F841 (assigned to but never used)
        # Train
        artifacts = main.run(args=args)

        # Set tags
        tags = {"data_version": artifacts["data_version"]}
        mlflow.set_tags(tags)

        # Log metrics
        performance = artifacts["performance"]
        logger.info(json.dumps(performance["overall"], indent=2))
        metrics = {
            "precision": performance["overall"]["precision"],
            "recall": performance["overall"]["recall"],
            "f1": performance["overall"]["f1"],
            "best_val_loss": artifacts["loss"],
            "behavioral_score": artifacts["behavioral_report"]["score"],
            "slices_f1": performance["slices"]["f1"],
        }
        mlflow.log_metrics(metrics)

        # Log artifacts
        with tempfile.TemporaryDirectory() as dp:
            artifacts["label_encoder"].save(Path(dp, "label_encoder.json"))
            artifacts["tokenizer"].save(Path(dp, "tokenizer.json"))
            torch.save(artifacts["model"].state_dict(), Path(dp, "model.pt"))
            utils.save_dict(performance, Path(dp, "performance.json"))
            utils.save_dict(artifacts["behavioral_report"],
                            Path(dp, "behavioral_report.json"))
            mlflow.log_artifacts(dp)
        mlflow.log_params(vars(artifacts["args"]))

示例#30

0

显示文件

def pMSE_test(args):
    """
    Parallelizable
    """
    d1, d2, mlflow_step, name, epsilon, synth_name, dataset_name = args
    pmse = pmse_ratio(d1, d2)
    with mlflow.start_run(nested=True):
        mlflow.set_tags({
            'metric_name': str(name),
            'dataset': dataset_name,
            'epsilon': str(epsilon),
            'synthesizer': str(synth_name),
            'pmse_score': str(pmse),
        })
    return float(pmse)