def run_synthesis(synthesis_args): n, s, synth_args, d, epsilons, datasets, cat_cols, save_models_path, run_name = synthesis_args res = [] with mlflow.start_run(nested=True): synth = s(epsilon=float(epsilons[0]), **synth_args) for i, e in enumerate(epsilons): start_time = time.time() #Need to save: Epochs, Epsilons traversed, Loss sampled = synth.fit_sample(datasets[d]["data"],categorical_columns=cat_cols.split(','), update_epsilon=float(e), verbose=conf.VERBOSE, mlflow=True) end_time = time.time() mlflow.set_tags({"synthesizer": type(synth), "args": str(synth_args), "dataset": str(d), "epsilon": str(e), "duration_seconds": str(end_time - start_time)}) res.append((n, d, str(e), sampled)) print("Epsilon " + str(e) + " finished for Synthesizer " + n + " in " + str(end_time - start_time) + "s") datapath = os.path.join(save_models_path, n + "_" + str(e) + "_" + d + "_" + run_name + "_dataset.csv") modelpath = os.path.join(save_models_path, n + "_" + str(e) + "_" + d + "_" + run_name + "_model.ckpt") with open(datapath, 'wb') as f: sampled.to_csv(datapath) mlflow.log_artifact(datapath) synth.save(modelpath) mlflow.log_artifact(modelpath) return res
def train_model(model, X_train, y_train, name, config): """train train a single model. # Arguments model: Model, NN model to train. X_train: ndarray(number, lags), Input data for train. y_train: ndarray(number, ), result data for train. name: String, name of model. config: Dict, parameter for train. """ mlflow.set_tracking_uri("http://127.0.0.1:5000") tracking_uri = mlflow.get_tracking_uri() print("Current tracking uri: {}".format(tracking_uri)) tags = {"usuario": "Anonymous"} mlflow.set_experiment("traffic_flow-saes") with mlflow.start_run() as run: mlflow.set_tags(tags) mlflow.keras.autolog() model.compile(loss="mse", optimizer="rmsprop", metrics=['mape']) #early = EarlyStopping(monitor='val_loss', patience=30, verbose=0, mode='auto') hist = model.fit(X_train, y_train, batch_size=config["batch"], epochs=config["epochs"], validation_split=0.05) model.save('model/' + name + '.h5') df = pd.DataFrame.from_dict(hist.history) df.to_csv('model/' + name + ' loss.csv', encoding='utf-8', index=False) mlflow.log_param("Run_id", run.info.run_id)
def train_model( params_fp: Path = Path(config.CONFIG_DIR, "params.json"), model_dir: Optional[Path] = Path(config.MODEL_DIR), experiment_name: Optional[str] = "best", run_name: Optional[str] = "model", ) -> None: """Train a model using the specified parameters. Args: params_fp (Path, optional): Parameters to use for training. Defaults to `config/params.json`. model_dir (Path): location of model artifacts. Defaults to config.MODEL_DIR. experiment_name (str, optional): Name of the experiment to save the run to. Defaults to `best`. run_name (str, optional): Name of the run. Defaults to `model`. """ # Set experiment and start run params = Namespace(**utils.load_dict(filepath=params_fp)) # Start run mlflow.set_experiment(experiment_name=experiment_name) with mlflow.start_run(run_name=run_name): run_id = mlflow.active_run().info.run_id # Train artifacts = main.run(params=params) # Set tags tags = {} mlflow.set_tags(tags) # Log metrics performance = artifacts["performance"] logger.info(json.dumps(performance["overall"], indent=2)) metrics = { "precision": performance["overall"]["precision"], "recall": performance["overall"]["recall"], "f1": performance["overall"]["f1"], "best_val_loss": artifacts["loss"], "behavioral_score": performance["behavioral"]["score"], "slices_f1": performance["slices"]["overall"]["f1"], } mlflow.log_metrics(metrics) # Log artifacts with tempfile.TemporaryDirectory() as dp: utils.save_dict(vars(artifacts["params"]), Path(dp, "params.json"), cls=NumpyEncoder) utils.save_dict(performance, Path(dp, "performance.json")) artifacts["label_encoder"].save(Path(dp, "label_encoder.json")) artifacts["tokenizer"].save(Path(dp, "tokenizer.json")) torch.save(artifacts["model"].state_dict(), Path(dp, "model.pt")) mlflow.log_artifacts(dp) mlflow.log_params(vars(artifacts["params"])) # Save for repo open(Path(model_dir, "run_id.txt"), "w").write(run_id) utils.save_dict(vars(params), Path(model_dir, "params.json"), cls=NumpyEncoder) utils.save_dict(performance, Path(model_dir, "performance.json"))
def log_sk_model(sk_model, model_artifact_path: str = 'model', registered_model_name: str = None, params: dict = None, metrics: dict = None, tags: dict = None, artifacts: dict = None) -> None: if params is None: params = {} if metrics is None: metrics = {} if tags is None: tags = {} if artifacts is None: artifacts = {} _logger.info("Logging Scikit-Learn model to MLflow") mlflow.sklearn.log_model(sk_model=sk_model, artifact_path=model_artifact_path, conda_env='./environment.yml', registered_model_name=registered_model_name) mlflow.log_params(params) mlflow.set_tags(tags) mlflow.log_metrics(metrics) for local_path, artifact_path in artifacts.items(): _logger.debug( f"Logging artifact to MLflow: {local_path} - {artifact_path}") mlflow.log_artifact(local_path, artifact_path)
def set_tags(self, tags, optuna_log=True): """Wrapper of the corresponding MLflow function. The data is also added to Optuna as an user attribute. Args: tags ([Dict]): Dict of tags. optuna_log (bool, optional): Internal parameter that should be ignored by the API user. Defaults to True. """ for key, value in tags.items(): if optuna_log: self._trial.set_user_attr(key, value) _logger.info(f"Tag: {key}: {value}") value = str(value) # make sure it is a string if len(value) > self._max_mlflow_tag_length: tags[key] = textwrap.shorten(value, self._max_mlflow_tag_length) try: mlflow.set_tags(normalize_mlflow_entry_names_in_dict(tags)) except Exception as e: _logger.error( "Exception raised during MLflow communication! Exception: {}". format(e), exc_info=True, )
def log_experiment(params, metrics=None, tags=None, model=None, experiment_name=EXPERIMENT_NAME): """ Logs the model and related parameters and metrics as an experiment. :param params: (dict): key-value pairs of named-paramters used by the model :param tags: (dict): key-value pairs of tags :param metrics: (dict): key-value paris of metric by the model :param model: (str): file location of saved model :param experiment_name: (str): Name of experiment for which data is being logged :param tracking_uri: (str): Tracking uri which should be mapped to the experiment. For permanent uris such as postgres database etc, these can be set to environment variable - MLFLOW_TRACKING_URI """ try: mlflow.create_experiment(experiment_name) except MlflowException: logger.info("Found existing experiment. Adding new version to that.") mlflow.set_experiment(experiment_name) with mlflow.start_run(): mlflow.log_params(params) if metrics is not None: mlflow.log_metrics(metrics) mlflow.set_tags(tags) if model is not None: mlflow.log_artifact(model) try: mlflow.end_run() except MlflowException: pass
def log_mlflow(experiment, model, params={}, metrics={}, tags={}): """ Function to log models, params, metrics and tags to MLFlow """ print("=== Logging in MLFlow Server...") mlflow.set_experiment(experiment) with mlflow.start_run(): ## LOG PARAMS mlflow.log_params(params) print("Params logged") ## LOG METRICS mlflow.log_metrics(metrics) print("Metric logged.") ## LOG TAGS mlflow.set_tags(tags) print("Tags logged.") ## LOG MODEL if model != None: mlflow.pyfunc.log_model(artifact_path="model", python_model=model, conda_env="config/conda.yaml") runid = mlflow.active_run().info.run_uuid print("Model saved in run: {}.".format(runid))
def main(args: argparse.Namespace): keras_model = KerasModel(args.model_dir) raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory( f"{args.dataset_dir}/test", label_mode="int", batch_size=args.batch_size, class_names=keras_model.class_names, seed=SEED_VALUE, ) y_true, y_pred = [], [] for x, y in raw_test_ds: predictions = keras_model.predict(x, return_id=True) predictions = [max(p.keys(), key=lambda k: p[k]) for p in predictions] y_true.extend(y.numpy().tolist()) y_pred.extend(predictions) assert len(y_true) == len(y_pred) if not len(y_true) % 1_000: logging.info(f"Tested {len(y_true)} samples.") scores = { "accuracy_score": accuracy_score(y_true, y_pred), "f1_score": f1_score(y_true, y_pred), "precision_score": precision_score(y_true, y_pred), "recall_score": recall_score(y_true, y_pred), "confusion_matrix": confusion_matrix(y_true, y_pred), } logging.info(scores) mlflow.set_tags(scores)
def trainer(regularisation: int, max_iter: int): with mlflow.start_run() as _: mlflow.set_tags({"training_type": "Baseline"}) mlflow.log_params({"C": regularisation, "max_iter": max_iter}) X, y = fetch_censusdata() clf = make_pipeline( make_linear_preprocessor(), LogisticRegression(C=regularisation, max_iter=max_iter, class_weight="balanced", random_state=0), ) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) precision, recall, fscore, _ = precision_recall_fscore_support( y_test, y_pred, average="binary") mlflow.log_metrics({ "precision": precision, "recall": recall, "fscore": fscore })
def detailed_score(self, generation): """ A more detailed run with mlflow. While use this function, is recomended that you use the mlflow tracking function for the model you're using. e.g You can use mlflow tracking on Tensorflow, torch, scikit ... TODO: Merge this function with easy_score, and pass the paramater used on run as parameter for easy_score This fix will be available """ if self.score_function == None: assert False, "No score function setted, you can set it using set_score(func) or passing score= func during class instantiation" for i, elem in enumerate(self.population): with mlflow.start_run() as run: tags = { 'generation': generation + 1, 'individue' : i + 1 } mlflow.set_tags(tags) mlflow.log_param("Generation", generation + 1) score, model = self.score_function(elem) if score > self.top_score: print(f"*** New optimal model founded with a score of {score} ***") self.top_score = score self.top_model = (score, model) elem['score'] = score sorted_list = sorted(self.population, key= itemgetter('score'), reverse= True) self.first_parent = sorted_list[0] self.second_parent = sorted_list[1] generation_top_score = self.first_parent['score'] self.genetic_tree.append(self.first_parent) print(f"Better parent {self.first_parent}") print("DONE")
def run_experiment(self, **kwargs): if self.options.opt: self.optimize() seed = self.config['general']['seed'] np.random.seed(seed) tf.random.set_seed(seed) if kwargs: self.adjust_pars(kwargs) self.data = Data(self.config, self.options, self.pars) if self.options.mlflow: mlflow.set_experiment(self.config['general']['experiment']) mlflow.start_run() self.log_pars() mlflow.set_tags({'seed': seed, 'mode': self.options.mode}) if self.feature_type == 'combined' and self.fusion == 'late': y_true, y_pred, mae = self.train_bimodal() else: _, y_pred, _, y_true, mae = self.train_model(self.feature_type) if self.options.verbose: run_validation(y_true, y_pred) if self.options.mlflow: mlflow.end_run() return -mae
def before_pipeline_run(self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog) -> None: """Hook to be invoked before a pipeline runs. Args: run_params: The params needed for the given run. Should be identical to the data logged by Journal. # @fixme: this needs to be modelled explicitly as code, instead of comment Schema: { "run_id": str, "project_path": str, "env": str, "kedro_version": str, "tags": Optional[List[str]], "from_nodes": Optional[List[str]], "to_nodes": Optional[List[str]], "node_names": Optional[List[str]], "from_inputs": Optional[List[str]], "load_versions": Optional[List[str]], "pipeline_name": str, "extra_params": Optional[Dict[str, Any]], } pipeline: The ``Pipeline`` that will be run. catalog: The ``DataCatalog`` to be used during the run. """ self.context = load_context( project_path=run_params["project_path"], env=run_params["env"], extra_params=run_params["extra_params"], ) mlflow_conf = get_mlflow_config(self.context) mlflow_conf.setup(self.context) run_name = (mlflow_conf.run_opts["name"] if mlflow_conf.run_opts["name"] is not None else run_params["pipeline_name"]) mlflow.start_run( run_id=mlflow_conf.run_opts["id"], experiment_id=mlflow_conf.experiment.experiment_id, run_name=run_name, nested=mlflow_conf.run_opts["nested"], ) # Set tags only for run parameters that have values. mlflow.set_tags({k: v for k, v in run_params.items() if v}) # add manually git sha for consistency with the journal # TODO : this does not take into account not committed files, so it # does not ensure reproducibility. Define what to do. mlflow.set_tag("git_sha", _git_sha(run_params["project_path"])) mlflow.set_tag( "kedro_command", _generate_kedro_command( tags=run_params["tags"], node_names=run_params["node_names"], from_nodes=run_params["from_nodes"], to_nodes=run_params["to_nodes"], from_inputs=run_params["from_inputs"], load_versions=run_params["load_versions"], pipeline_name=run_params["pipeline_name"], ), )
def mlflow_log(history, config, machine_type, out_path, tb_log_dir): mlflow.set_tracking_uri(config['IO_OPTION']['MLFLOW_PATH']+'/mlruns') run_name = config['IO_OPTION']['model_name']+'_'+machine_type with mlflow.start_run(run_name=run_name) as run: # IO_OPTION and etc into mlflow mlflow.set_tags(config['IO_OPTION']) mlflow.set_tags(config['etc']) mlflow.set_tag('machine_type', machine_type) mlflow.set_tag('tb_log_dir', tb_log_dir) # Log spectrogram_param into mlflow for key, value in config['mel_spectrogram_param'].items(): mlflow.log_param(key, value) # log fit param for key, value in config['fit'].items(): mlflow.log_param(key, value) # Log other info mlflow.log_param('loss_type', 'MSE') # Log results into mlflow mlflow.log_metric('train_epoch_score', history['epoch_score_lists']['train'][-1]) mlflow.log_metric('valid_epoch_score', history['epoch_score_lists']['valid'][-1]) # Log model mlflow.log_artifact(out_path) mlflow.end_run()
def run_synthesis(synthesis_args): """ A parallel run of the synthesis step :param synthesis_args: n = name of the synthesizer, s = synthesizer object, synth_args = dictionary of hyperparams for the synthesizer d = name of dataset e = epsilon value datasets = dataset dictionary cat_cols = list of categorical columns in dataset d :type synthesis_args: tuple :return: (n, d, str(e), sampled = synthesized data of size len(d)) :rtype: tuple """ n, s, synth_args, d, e, datasets, cat_cols = synthesis_args with mlflow.start_run(nested=True): start_time = time.time() synth = s(epsilon=float(e), **synth_args) d_copy = datasets[d]["data"].copy() sampled = synth.fit_sample(d_copy,categorical_columns=cat_cols.split(',')) end_time = time.time() mlflow.set_tags({"synthesizer": type(synth), "args": str(synth_args), "epsilon": str(e), "dataset": str(datasets), "duration_seconds": str(end_time - start_time)}) print(datasets[d]["name"] + ' finished. Epsilon: ' + str(e)) datasets[d][n][str(e)] = sampled return (n, d, str(e), sampled)
def log(self): """Log params, metrics, and tags to MLFlow if is_def is False""" if not self.is_dev: mlflow.log_params(self.params) mlflow.log_metrics(self.metrics) mlflow.set_tags(self.tags) self._save_model()
def setup_mlflow(cfg, features_cfg, data_cfg): import mlflow experiment_id = OmegaConf.select(cfg, "experiment_id", default=None) if experiment_id is None and cfg.experiment_name is not None: mlflow.set_experiment(cfg.experiment_name) experiment = mlflow.get_experiment_by_name(cfg.experiment_name) if cfg.experiment_name is not None: logger.debug(f"MLFlow Experiment: {cfg.experiment_name}") experiment_id = experiment.experiment_id orig_cwd = get_original_cwd() tracking_uri = f"file://{orig_cwd}/mlruns" mlflow.set_tracking_uri(tracking_uri) logger.info(f"MLFlow Tracking URI: {tracking_uri}") # if cfg.model == "xgboost": # import mlflow.xgboost # logger.debug("Turning on MLFlow autologging for XGBoost...") # mlflow.xgboost.autolog() run = mlflow.start_run(experiment_id=experiment_id) # tracking_uri = mlflow.get_tracking_uri() # logger.info(f"MLFlow Tracking URI: {tracking_uri}") processed_data_dir = Path(to_absolute_path(data_cfg.hydra.run.dir)) if processed_data_dir is not None: data_hydra_dir = processed_data_dir / ".hydra" mlflow.log_artifacts(data_hydra_dir, artifact_path="processed_data_configs") data_cfg = OmegaConf.load(data_hydra_dir / "config.yaml") for name, param_name in DATA_CONFIGS_TO_LOG.items(): param = OmegaConf.select(data_cfg, param_name) if param is not None: if isinstance(param, list): param = ", ".join([str(p) for p in param]) mlflow.log_param(name, param) model_hydra_dir = Path(".hydra") mlflow.log_artifacts(model_hydra_dir, artifact_path="model_configs") mlflow.log_params({ "model": cfg.model, "lag": features_cfg.lag, "exog_lag": features_cfg.exog_lag, "lead": features_cfg.lead, "cv_method": cfg.cv.method, }) mlflow.log_params(cfg.cv.params) tags = OmegaConf.select(cfg, "tags", default={}) if bool(tags): mlflow.set_tags(tags) return run
def trainer(regularisation: int, max_iter: int): with mlflow.start_run() as _: mlflow.set_tags({"training_type": "FeatureImportance"}) mlflow.log_params({"C": regularisation, "max_iter": max_iter}) X, y = fetch_censusdata() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) p1 = make_nonlinear_preprocessor() p2 = make_nonlinear_to_linear_preprocessor() clf = LogisticRegression(C=regularisation, max_iter=max_iter, class_weight="balanced", random_state=0) pp = make_pipeline(p1, p2, clf) pp.fit(X_train, y_train) # Log reference metrics y_pred = pp.predict(X_test) ref_precision, ref_recall, ref_fscore, _ = precision_recall_fscore_support( y_test, y_pred, average="binary") mlflow.log_metrics({ "precision": ref_precision, "recall": ref_recall, "fscore": ref_fscore }) for i in range(len(FEATURES)): with mlflow.start_run(nested=True) as _: mlflow.set_tags({"training_type": "FeatureImportance"}) mlflow.log_params({"feature": FEATURES[i]}) X_test_tr = p1.transform(X_test) # shuffle feature i indexes = np.arange(X_test_tr.shape[0]) np.random.shuffle(indexes) X_test_tr[:, i] = X_test_tr[indexes, i] X_test_tr = p2.transform(X_test_tr) y_pred = clf.predict(X_test_tr) precision, recall, fscore, _ = precision_recall_fscore_support( y_test, y_pred, average="binary") mlflow.log_metrics({ "precision": precision, "precision_penalty": precision - ref_precision, "recall": recall, "recall_penalty": recall - ref_recall, "fscore": fscore, "fscore_penalty": fscore - ref_fscore, })
def _log_xp(config, name): set_experiment(name) set_tags(config.get('tags', {})) simplejson.dump(config, open(f"/tmp/config.json", "w")) log_artifact(f"/tmp/config.json") os.remove(f"/tmp/config.json") return name
def __init__(self, exp_name: str, tag: Dict): """mlflowの初期化 Args: exp_name (str): 実験名 tag (Dict): タグ情報 """ mlflow.set_experiment(exp_name) mlflow.set_tags(tag)
def __call__(self, study: optuna.study.Study, trial: optuna.trial.FrozenTrial) -> None: # This sets the tracking_uri for MLflow. if self._tracking_uri is not None: mlflow.set_tracking_uri(self._tracking_uri) # This sets the experiment of MLflow. mlflow.set_experiment(study.study_name) with mlflow.start_run(run_name=str(trial.number), nested=self._nest_trials): # This sets the metric for MLflow. trial_value = trial.value if trial.value is not None else float( "nan") mlflow.log_metric(self._metric_name, trial_value) # This sets the params for MLflow. mlflow.log_params(trial.params) # This sets the tags for MLflow. tags: Dict[str, str] = {} tags["number"] = str(trial.number) tags["datetime_start"] = str(trial.datetime_start) tags["datetime_complete"] = str(trial.datetime_complete) # Set state and convert it to str and remove the common prefix. trial_state = trial.state if isinstance(trial_state, TrialState): tags["state"] = str(trial_state).split(".")[-1] # Set direction and convert it to str and remove the common prefix. study_direction = study.direction if isinstance(study_direction, StudyDirection): tags["direction"] = str(study_direction).split(".")[-1] tags.update(trial.user_attrs) distributions = {(k + "_distribution"): str(v) for (k, v) in trial.distributions.items()} tags.update(distributions) if self._tag_study_user_attrs: tags.update(study.user_attrs) # This is a temporary fix on Optuna side. It avoids an error with user # attributes that are too long. It should be fixed on MLflow side later. # When it is fixed on MLflow side this codeblock can be removed. # see https://github.com/optuna/optuna/issues/1340 # see https://github.com/mlflow/mlflow/issues/2931 max_mlflow_tag_length = 5000 for key, value in tags.items(): value = str(value) # make sure it is a string if len(value) > max_mlflow_tag_length: tags[key] = textwrap.shorten(value, max_mlflow_tag_length) mlflow.set_tags(tags)
def start_run(param_prefix: Optional[str] = None, experiment_name: Optional[str] = None, run_name: Optional[str] = None, artifact_location: Optional[str] = None, **args) -> mlf.ActiveRun: """Close alias of mlflow.start_run. The only difference is that uv.start_run attempts to extract parameters from the environment and log those to the bound UV reporter using `report_params`. Note that if experiment_name is specified and refers to an existing experiment, then the artifact_location will not be honored as this is an immutable property of an mlflow experiment. This method will issue a warning but proceed. Note that the returned value can be used as a context manager: https://www.mlflow.org/docs/latest/python_api/mlflow.html#mlflow.start_run """ if experiment_name is None: experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") if run_name is None: run_name = os.environ.get("MLFLOW_RUN_NAME") if artifact_location is None: artifact_location = os.environ.get("MLFLOW_ARTIFACT_ROOT") _ensure_non_null_project(artifact_location) # Make sure the experiment exists before the run starts. if experiment_name is not None: if mlf.get_experiment_by_name(experiment_name) is None: mlf.create_experiment(experiment_name, artifact_location) mlf.set_experiment(experiment_name) ret = mlf.start_run(run_name=run_name, **args) env_params = ue.extract_params(prefix=param_prefix) mlf.set_tags(env_params) # for CAIP jobs, we add the job id as a tag, along with a link to the # console page cloud_ml_job_id = os.environ.get('CLOUD_ML_JOB_ID') if cloud_ml_job_id is not None: mlf.set_tag( 'cloud_ml_job_details', f'https://console.cloud.google.com/ai-platform/jobs/{cloud_ml_job_id}' ) mlf.set_tag('cloud_ml_job_id', cloud_ml_job_id) mlf_artifact_uri = mlf.get_artifact_uri() if mlf_artifact_uri is not None and artifact_location is not None: if not mlf_artifact_uri.startswith(artifact_location): logging.warning( f'requested mlflow artifact location {artifact_location} differs ' f'from existing experiment artifact uri {mlf_artifact_uri}') return ret
def log_mlflow_results(model, metrics, feat_config, model_config, tags): TRACKING_URI = "https://mlflow.caps.dev.dp.elsevier.systems" mlflow.set_tracking_uri(TRACKING_URI) mlflow.set_experiment("cp-ml-reference-separator-evaluator") with mlflow.start_run(): mlflow.log_metrics(metrics) mlflow.keras.log_model(model, "models") mlflow.log_params(feat_config) mlflow.log_params(model_config) mlflow.set_tags(tags)
def mlflow_run(self, df): with mlflow.start_run() as run: run_id = run.info.run_uuid experiment_id = run.info.experiment_id # train test split train, test = train_test_split(df, test_size=0.2, random_state=42, stratify=df[['is_profit']]) y = train['is_profit'].copy() X = train.drop(columns=['is_profit']).copy() y_test = test['is_profit'].copy() X_test = test.drop(columns=['is_profit']).copy() # pipeline float_cols = df.select_dtypes(include='float64').columns preprocessor = ColumnTransformer( [ ('StandardScaler', StandardScaler(), float_cols), #('OneHotEncoder', OneHotEncoder(), cat_cols), ], remainder='passthrough') full_pipe = Pipeline(steps=[ ('preprocessor', preprocessor), ('model', self.model), ]) # fit t_start = time.time() full_pipe.fit(X, y) t_training = time.time() - t_start # predict t_start = time.time() y_test_pred_proba = full_pipe.predict_proba(X_test) t_prediction = time.time() - t_start # score proba_threshold = 0.75 metrics = { 'auroc': roc_auc_score(y_test, y_test_pred_proba[:, 1]), 'precision': precision_score(y_test, (y_test_pred_proba[:, 1] > proba_threshold)), 't_training': t_training, 't_prediction': t_prediction, } # log params, metrics, tags mlflow.log_params(self.params) mlflow.log_metrics(metrics) mlflow.set_tags(self.tags) # log Model #mlflow.sklearn.log_model(full_pipe, artifact_path='model') #wrapped_model = SklearnModelWrapper(full_pipe) #mlflow.pyfunc.log_model('model', python_model=wrapped_model) return full_pipe
def log_tag(dry_run, model_name, data_name, suffix): if suffix is not None: suffix = suffix[1:] if not dry_run: mlflow.set_tags({ 'mlflow.runName': f'{model_name}-{data_name}-{suffix.upper()}', 'model': model_name, 'data': data_name, 'suffix': suffix, })
def _set_all_tags(self): """Method collects dagster_run_id plus all env variables/tags that have been specified by the user in the config_schema and logs them as tags in mlflow. Returns: tags [dict]: Dictionary of all the tags """ tags = {tag: environ.get(tag) for tag in self.env_tags_to_log} tags["dagster_run_id"] = self.dagster_run_id if self.extra_tags: tags.update(self.extra_tags) mlflow.set_tags(tags)
def mlflow_client(tmp_path_factory, resnet_model_uri: str, spark: SparkSession) -> MlflowClient: tmp_path = tmp_path_factory.mktemp("mlflow") tmp_path.mkdir(parents=True, exist_ok=True) tracking_uri = "sqlite:///" + str(tmp_path / "tracking.db") mlflow.set_tracking_uri(tracking_uri) experiment_id = mlflow.create_experiment("rikai-test", str(tmp_path)) # simpliest with mlflow.start_run(experiment_id=experiment_id): mlflow.log_param("optimizer", "Adam") # Fake training loop model = torch.load(resnet_model_uri) artifact_path = "model" schema = ("STRUCT<boxes:ARRAY<ARRAY<float>>," "scores:ARRAY<float>,labels:ARRAY<int>>") pre_processing = ("rikai.contrib.torch.transforms." "fasterrcnn_resnet50_fpn.pre_processing") post_processing = ("rikai.contrib.torch.transforms." "fasterrcnn_resnet50_fpn.post_processing") rikai.mlflow.pytorch.log_model( model, # same as vanilla mlflow artifact_path, # same as vanilla mlflow schema, pre_processing, post_processing, registered_model_name="rikai-test", # same as vanilla mlflow ) # vanilla mlflow with mlflow.start_run(): mlflow.pytorch.log_model(model, artifact_path, registered_model_name="vanilla-mlflow") mlflow.set_tags({ "rikai.model.flavor": "pytorch", "rikai.output.schema": schema, "rikai.transforms.pre": pre_processing, "rikai.transforms.post": post_processing, }) # vanilla mlflow no tags with mlflow.start_run(): mlflow.pytorch.log_model( model, artifact_path, registered_model_name="vanilla-mlflow-no-tags", ) spark.conf.set("rikai.sql.ml.registry.mlflow.tracking_uri", tracking_uri) return mlflow.tracking.MlflowClient(tracking_uri)
def test_set_tags(): exact_expected_tags = {"name_1": "c", "name_2": "b", "nested/nested/name": 5} approx_expected_tags = set([MLFLOW_USER, MLFLOW_SOURCE_NAME, MLFLOW_SOURCE_TYPE]) with start_run() as active_run: run_id = active_run.info.run_id mlflow.set_tags(exact_expected_tags) finished_run = tracking.MlflowClient().get_run(run_id) # Validate tags assert len(finished_run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags) for tag_key, tag_val in finished_run.data.tags.items(): if tag_key in approx_expected_tags: pass else: assert str(exact_expected_tags[tag_key]) == tag_val
def start_callback(self, parameters): try: mlflow.set_experiment(self._experiment_name) if mlflow.active_run() is not None: mlflow.end_run() mlflow.start_run() mlflow.set_tags(self._mlflow_tags) mlflow.log_params(parameters) mlflow.log_params(self._mlflow_parameters) except mlflow.exceptions.MlflowException as msg: self._enable_mlflow = False print(f"[WARNING][MlFlowHandler] - [StartCallback] {msg}") print(f"[WARNING][MlFlowHandler] - [StartCallback] mlflow is disabled")
def train_model( args_fp: Path = Path(config.CONFIG_DIR, "args.json"), experiment_name: Optional[str] = "best", run_name: Optional[str] = "model", ) -> None: """Train a model using the specified parameters. Args: args_fp (Path, optional): Location of arguments to use for training. Defaults to `config/args.json`. experiment_name (str, optional): Name of the experiment to save the run to. Defaults to `best`. run_name (str, optional): Name of the run. Defaults to `model`. """ # Set experiment and start run args = Namespace(**utils.load_dict(filepath=args_fp)) # Start run mlflow.set_experiment(experiment_name=experiment_name) with mlflow.start_run(run_name=run_name ) as run: # NOQA: F841 (assigned to but never used) # Train artifacts = main.run(args=args) # Set tags tags = {"data_version": artifacts["data_version"]} mlflow.set_tags(tags) # Log metrics performance = artifacts["performance"] logger.info(json.dumps(performance["overall"], indent=2)) metrics = { "precision": performance["overall"]["precision"], "recall": performance["overall"]["recall"], "f1": performance["overall"]["f1"], "best_val_loss": artifacts["loss"], "behavioral_score": artifacts["behavioral_report"]["score"], "slices_f1": performance["slices"]["f1"], } mlflow.log_metrics(metrics) # Log artifacts with tempfile.TemporaryDirectory() as dp: artifacts["label_encoder"].save(Path(dp, "label_encoder.json")) artifacts["tokenizer"].save(Path(dp, "tokenizer.json")) torch.save(artifacts["model"].state_dict(), Path(dp, "model.pt")) utils.save_dict(performance, Path(dp, "performance.json")) utils.save_dict(artifacts["behavioral_report"], Path(dp, "behavioral_report.json")) mlflow.log_artifacts(dp) mlflow.log_params(vars(artifacts["args"]))
def pMSE_test(args): """ Parallelizable """ d1, d2, mlflow_step, name, epsilon, synth_name, dataset_name = args pmse = pmse_ratio(d1, d2) with mlflow.start_run(nested=True): mlflow.set_tags({ 'metric_name': str(name), 'dataset': dataset_name, 'epsilon': str(epsilon), 'synthesizer': str(synth_name), 'pmse_score': str(pmse), }) return float(pmse)