class ExperimentImporter(): def __init__(self, mlflow_client=None, mlmodel_fix=True, use_src_user_id=False, import_mlflow_tags=True, import_metadata_tags=False): """ :param mlflow_client: MLflow client or if None create default client. :param import_mlflow_tags: Import mlflow tags. :param use_src_user_id: Set the destination user ID to the source user ID. Source user ID is ignored when importing into :param import_metadata_tags: Import mlflow_export_import tags. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() self.run_importer = RunImporter( self.mlflow_client, mlmodel_fix=mlmodel_fix, use_src_user_id=use_src_user_id, import_mlflow_tags=import_mlflow_tags, import_metadata_tags=import_metadata_tags) print("MLflowClient:", self.mlflow_client) self.dbx_client = DatabricksHttpClient() def import_experiment(self, exp_name, input_dir): """ :param: exp_name: Destination experiment name. :param: input_dir: Source experiment directory. :return: A map of source run IDs and destination run.info. """ mlflow_utils.set_experiment(self.dbx_client, exp_name) dst_exp_id = self.mlflow_client.get_experiment_by_name( exp_name).experiment_id # TODO manifest_path = os.path.join(input_dir, "manifest.json") dct = utils.read_json_file(manifest_path) run_ids = dct["export_info"]["ok_runs"] failed_run_ids = dct["export_info"]["failed_runs"] print( f"Importing {len(run_ids)} runs into experiment '{exp_name}' from {input_dir}" ) run_ids_map = {} run_info_map = {} for src_run_id in run_ids: dst_run, src_parent_run_id = self.run_importer.import_run( exp_name, os.path.join(input_dir, src_run_id)) dst_run_id = dst_run.info.run_id run_ids_map[src_run_id] = { "dst_run_id": dst_run_id, "src_parent_run_id": src_parent_run_id } run_info_map[src_run_id] = dst_run.info print( f"Imported {len(run_ids)} runs into experiment '{exp_name}' from {input_dir}" ) if len(failed_run_ids) > 0: print( f"Warning: {len(failed_run_ids)} failed runs were not imported - see {manifest_path}" ) utils.nested_tags(self.mlflow_client, run_ids_map) return run_info_map
class ModelImporter(): def __init__(self, filesystem=None): self.fs = filesystem or _filesystem.get_filesystem() self.client = mlflow.tracking.MlflowClient() self.run_importer = RunImporter(self.client, mlmodel_fix=True) def import_model(self, input_dir, model_name, experiment_name, delete_model=False): path = os.path.join(input_dir, "model.json") dct = utils.read_json_file(path) dct = dct["registered_model"] print("Model to import:") print(f" Name: {dct['name']}") print(f" Description: {dct.get('description','')}") print(f" Tags: {dct.get('tags','')}") print(f" {len(dct['latest_versions'])} latest versions") if delete_model: model_utils.delete_model(self.client, model_name) tags = {e["key"]: e["value"] for e in dct.get("tags", {})} self.client.create_registered_model(model_name, tags, dct.get("description")) mlflow.set_experiment(experiment_name) print("Importing latest versions:") for v in dct["latest_versions"]: run_id = v["run_id"] source = v["source"] current_stage = v["current_stage"] artifact_uri = v["artifact_uri"] run_dir = os.path.join(input_dir, run_id) print(f" Version {v['version']}:") print(f" current_stage: {v['current_stage']}:") print(" Run to import:") print(" run_id:", run_id) print(" artifact_uri:", artifact_uri) print(" source: ", source) model_path = source.replace(artifact_uri + "/", "") print(" model_path:", model_path) run_id, _ = self.run_importer.import_run(experiment_name, run_dir) run = self.client.get_run(run_id) print(" Imported run:") print(" run_id:", run_id) print(" artifact_uri:", run.info.artifact_uri) source = os.path.join(run.info.artifact_uri, model_path) print(" source: ", source) version = self.client.create_model_version(model_name, source, run_id) model_utils.wait_until_version_is_ready(self.client, model_name, version, sleep_time=2) self.client.transition_model_version_stage(model_name, version.version, current_stage)
def test_run_import_metadata_tags(): run1, run2 = init_run_test(RunExporter(export_metadata_tags=True), RunImporter(mlmodel_fix=mlmodel_fix, import_metadata_tags=True, import_mlflow_tags=True), verbose=False) compare_run_import_metadata_tags(client, output, run1, run2)
class ExperimentImporter(): def __init__(self, mlflow_client=None, use_src_user_id=False, import_mlflow_tags=True, import_metadata_tags=False): self.client = mlflow_client or mlflow.tracking.MlflowClient() self.run_importer = RunImporter(self.client, use_src_user_id=use_src_user_id, import_mlflow_tags=import_mlflow_tags, import_metadata_tags=import_metadata_tags) print("MLflowClient:",self.client) def import_experiment(self, exp_name, input): if input.endswith(".zip"): self.import_experiment_from_zip(exp_name, input) else: self.import_experiment_from_dir(exp_name, input) def import_experiment_from_dir(self, exp_name, exp_dir): mlflow.set_experiment(exp_name) manifest_path = os.path.join(exp_dir,"manifest.json") dct = utils.read_json_file(manifest_path) run_ids = dct["run_ids"] failed_run_ids = dct['failed_run_ids'] print(f"Importing {len(run_ids)} runs into experiment '{exp_name}' from {exp_dir}") run_ids_mapping = {} for src_run_id in run_ids: dst_run_id, src_parent_run_id = self.run_importer.import_run(exp_name, os.path.join(exp_dir,src_run_id)) run_ids_mapping[src_run_id] = (dst_run_id,src_parent_run_id) print(f"Imported {len(run_ids)} runs into experiment '{exp_name}' from {exp_dir}") if len(failed_run_ids) > 0: print(f"Warning: {len(failed_run_ids)} failed runs were not imported - see {manifest_path}") utils.nested_tags(self.client, run_ids_mapping) def import_experiment_from_zip(self, exp_name, zip_file): utils.unzip_directory(zip_file, exp_name, self.import_experiment_from_dir)
def __init__(self, mlflow_client=None, mlmodel_fix=True, use_src_user_id=False, import_mlflow_tags=True, import_metadata_tags=False): """ :param mlflow_client: MLflow client or if None create default client. :param import_mlflow_tags: Import mlflow tags. :param use_src_user_id: Set the destination user ID to the source user ID. Source user ID is ignored when importing into :param import_metadata_tags: Import mlflow_export_import tags. """ self.mlflow_client = mlflow_client or mlflow.tracking.MlflowClient() self.run_importer = RunImporter( self.mlflow_client, mlmodel_fix=mlmodel_fix, use_src_user_id=use_src_user_id, import_mlflow_tags=import_mlflow_tags, import_metadata_tags=import_metadata_tags) print("MLflowClient:", self.mlflow_client) self.dbx_client = DatabricksHttpClient()
def __init__(self, mlflow_client=None, run_importer=None, await_creation_for=None): """ :param mlflow_client: MLflow client or if None create default client. :param run_importer: RunImporter instance. :param await_creation_for: Seconds to wait for model version crreation. """ self.mlflow_client = mlflow.tracking.MlflowClient() print(">> run_importer:", run_importer) print(">> run_importer.type:", type(run_importer)) self.run_importer = run_importer if run_importer else RunImporter( self.mlflow_client, mlmodel_fix=True, import_mlflow_tags=False) self.await_creation_for = await_creation_for
dbutils.widgets.text("Input folder", "") input_dir = dbutils.widgets.get("Input folder") if len(input_dir) == 0: raise Exception("ERROR: Input is required") input_dir, experiment_name print("input_dir:", input_dir) print("experiment_name:", experiment_name) # COMMAND ---------- # MAGIC %md ### Import Run # COMMAND ---------- from mlflow_export_import.run.import_run import RunImporter importer = RunImporter() run_id, parent_run_id = importer.import_run(experiment_name, input_dir) run_id, parent_run_id # COMMAND ---------- # MAGIC %md ### Display MLflow UI URIs # COMMAND ---------- # MAGIC %run ./Common # COMMAND ---------- display_run_uri(run_id)
def __init__(self, filesystem=None): self.fs = filesystem or _filesystem.get_filesystem() self.client = mlflow.tracking.MlflowClient() self.run_importer = RunImporter(self.client, mlmodel_fix=True)
def test_run_no_import_mlflow_tags(): run1, run2 = init_run_test( RunExporter(), RunImporter(mlmodel_fix=mlmodel_fix, import_mlflow_tags=False)) compare_run_no_import_mlflow_tags(run1, run2)
def test_run_basic(): run1, run2 = init_run_test(RunExporter(), RunImporter(mlmodel_fix=mlmodel_fix)) compare_runs(run1, run2)
def __init__(self, filesystem=None, run_importer=None): self.fs = filesystem or _filesystem.get_filesystem() self.client = mlflow.tracking.MlflowClient() self.run_importer = run_importer if run_importer else RunImporter( self.client, mlmodel_fix=True, import_mlflow_tags=True)
def test_run_basic(): run1, run2 = init_run_test( RunExporter(), RunImporter(mlmodel_fix=mlmodel_fix, import_mlflow_tags=True)) compare_runs(client, output, run1, run2)
def __init__(self, mlflow_client=None, use_src_user_id=False, import_mlflow_tags=True, import_metadata_tags=False): self.client = mlflow_client or mlflow.tracking.MlflowClient() self.run_importer = RunImporter(self.client, use_src_user_id=use_src_user_id, import_mlflow_tags=import_mlflow_tags, import_metadata_tags=import_metadata_tags) print("MLflowClient:",self.client)