import mlflow from mlflow.tracking import MlflowClient from mlflow.entities import ViewType if __name__ == "__main__": def print_run_infos(run_infos): for r in run_infos: print("- run_id: {}, lifecycle_stage: {}".format(r.run_id, r.lifecycle_stage)) # Create two runs with mlflow.start_run() as run1: mlflow.log_metric("click_rate", 1.55) with mlflow.start_run() as run2: mlflow.log_metric("click_rate", 2.50) # Delete the last run client = MlflowClient() client.delete_run(run2.info.run_id) # Get all runs under the default experiment (whose id is 0) print("Active runs:") print_run_infos(mlflow.list_run_infos("0", run_view_type=ViewType.ACTIVE_ONLY)) print("Deleted runs:") print_run_infos(mlflow.list_run_infos("0", run_view_type=ViewType.DELETED_ONLY)) print("All runs:") print_run_infos(mlflow.list_run_infos("0", run_view_type=ViewType.ALL, order_by=["metric.click_rate DESC"]))
class MlflowManager(object): _instance_lock = threading.Lock() _init_flag = False def __init__(self): if self._init_flag is False: tracking_uri = "file:./mlruns" # tracking_uri = "mysql://*****:*****@10.117.61.106:3306/qdmlflow?charset=utf8" artifact_location = None print('[MlflowManager] init start: tracking_uri: %s' % tracking_uri) self.artifact_location = artifact_location self.tracking_uri = tracking_uri self.client = MlflowClient(tracking_uri=self.tracking_uri) mlflow.set_tracking_uri(uri=self.tracking_uri) print('[MlflowManager] init end ...') self._init_flag = True return # 设计成单例模式 def __new__(cls, *args, **kwargs): print('[MlflowManager] __new__ 1') if not hasattr(MlflowManager, "_instance"): print('[MlflowManager] __new__ 2') with MlflowManager._instance_lock: print('[MlflowManager] __new__ 3') if not hasattr(MlflowManager, "_instance"): print('[MlflowManager] __new__ 4') MlflowManager._instance = object.__new__(cls) return MlflowManager._instance # ========================================================================= def log_param(self, run_id, key, value): self.client.log_param(run_id, key, value) # 判断实验是否存在 def is_experiment_exist(self, experiment_name): find_experiment = self.client.get_experiment_by_name(experiment_name) if find_experiment: return True else: return False # 判断version_name是否存在 def is_version_name_exist(self, experiment_id, version_name, view='active_only'): view_type = ViewType.from_string(view) if view else ViewType.ACTIVE_ONLY runs = self.client.store.search_runs([experiment_id], None, view_type) for run in runs: tags = {k: v for k, v in run.data.tags.items()} run_name = tags.get(mlflow_tags.MLFLOW_RUN_NAME, "") if run_name == version_name: return True return False # ======================================================= # 获取某个实验的某个主版本的所有子版本(孩子) def get_minor_versions(self, experiment_id, version_id, view='active_only'): view_type = ViewType.from_string(view) if view else ViewType.ACTIVE_ONLY runs = self.client.store.search_runs([experiment_id], None, view_type) minor_versions_list = [] for run in runs: tags = {k: v for k, v in run.data.tags.items()} parent_run_id = tags.get(mlflow_tags.MLFLOW_PARENT_RUN_ID, "") if parent_run_id == version_id: minor_versions_list.append(run) return minor_versions_list # 获取某个实验的某个主版本的子版本(孩子)个数 def get_minor_versions_count(self, experiment_id, version_id, view='active_only'): minor_versions_list = self.get_minor_versions(experiment_id, version_id, view) return len(minor_versions_list) # 判断子版本是不是主版本的子版本(孩子) def is_minor_version(self, experiment_id, version_id, minor_version_name, view='active_only'): minor_versions_list = self.get_minor_versions(experiment_id, version_id, view) for minor_version in minor_versions_list: tags = {k: v for k, v in minor_version.data.tags.items()} run_name = tags.get(mlflow_tags.MLFLOW_RUN_NAME, "") if run_name == minor_version_name: return True return False # 获取某个实验的某个主版本的某个子版本ID def get_minor_version_id(self, experiment_id, version_id, minor_version_name, view='active_only'): minor_versions_list = self.get_minor_versions(experiment_id, version_id, view) for minor_version in minor_versions_list: tags = {k: v for k, v in minor_version.data.tags.items()} run_name = tags.get(mlflow_tags.MLFLOW_RUN_NAME, "") if run_name == minor_version_name: return minor_version.info.run_id return None # 自动生成一个子版本名称,格式如:1.1, 1.2, 2.3 def generate_minor_version_name(self, experiment_id, version_id, version_name): count = self.get_minor_versions_count(experiment_id=experiment_id, version_id=version_id) minor_version_name = version_name + "." + str(count + 1) return minor_version_name # 清理多余的子版本(按时间清理,保留运行时间最近的n个) def clean_minor_versions(self, experiment_id, version_id, reserve_count=4): try: minor_versions = self.get_minor_versions(experiment_id=experiment_id, version_id=version_id) if len(minor_versions) <= reserve_count: return # minor_versions按时间有序排列,只需要遍历后面的几个,直接删除即可 for i in range(reserve_count, len(minor_versions)): print('[minor_version]', i, minor_versions[i].info.experiment_id, minor_versions[i].info.run_id, minor_versions[i].info.start_time) self.client.delete_run(run_id=minor_versions[i].info.run_id) except Exception as e: content = '[clean_minor_versions][error] {}'.format(e) content = '[{}] {} {}'.format(experiment_id, version_id, content) print(content) # ======================================================= # 获取某个实验中,所有主版本 def get_major_versions(self, experiment_id, view='active_only'): view_type = ViewType.from_string(view) if view else ViewType.ACTIVE_ONLY runs = self.client.store.search_runs([experiment_id], None, view_type) major_versions_list = [] for run in runs: tags = {k: v for k, v in run.data.tags.items()} parent_run_id = tags.get(mlflow_tags.MLFLOW_PARENT_RUN_ID, "") if parent_run_id == "": major_versions_list.append(run) return major_versions_list # 获取某个实验中,主版本个数 def get_major_versions_count(self, experiment_id, view='active_only'): major_versions_list = self.get_major_versions(experiment_id, view) return len(major_versions_list) # 通过experiment_id和version_name查找主版本ID # view可选值: 'active_only', 'deleted_only', 'all' def get_major_version_id(self, experiment_id, version_name, view='active_only'): major_versions_list = self.get_major_versions(experiment_id, view) for major_version in major_versions_list: tags = {k: v for k, v in major_version.data.tags.items()} run_name = tags.get(mlflow_tags.MLFLOW_RUN_NAME, "") if run_name == version_name: return major_version.info.run_id return None # 自动生成一个主版本名称,格式如:1, 2, 3 def generate_major_version_name(self, experiment_id): count = self.get_major_versions_count(experiment_id=experiment_id) major_version_name = str(count + 1) return major_version_name # ======================================================= def get_versions(self, experiment_id, view='active_only'): view_type = ViewType.from_string(view) if view else ViewType.ACTIVE_ONLY runs = self.client.store.search_runs([experiment_id], None, view_type) return runs # 通过experiment_id和version_name查找版本ID,不区分主版本或子版本 def get_version_id(self, experiment_id, version_name, view='active_only'): runs = self.get_versions(experiment_id, view) for run in runs: tags = {k: v for k, v in run.data.tags.items()} run_name = tags.get(mlflow_tags.MLFLOW_RUN_NAME, "") if run_name == version_name: return run.info.run_id return None # 获取最优的版本 def get_best_version(self, experiment_name, metrics='rmse'): experiment = self.client.get_experiment_by_name(experiment_name) if experiment: experiment_id = experiment.experiment_id best_version_name = None best_version_id = None best_value = 0.0 version_list = self.get_versions(experiment_id) print(len(version_list)) for version in version_list: if metrics in version.data.metrics.keys(): print(version.data.metrics[metrics]) if version.data.metrics[metrics] > best_value: best_value = version.data.metrics[metrics] tags = {k: v for k, v in version.data.tags.items()} best_version_name = tags.get(mlflow_tags.MLFLOW_RUN_NAME, "") best_version_id = version.info.run_id return best_version_name, best_version_id else: print('[get_best_version][error] experiment not found: %s' % experiment_name) return None, None # ======================================================= # 创建一个实验 def create_experiment(self, experiment_name): if not self.is_experiment_exist(experiment_name): experiment_id = self.client.create_experiment(name=experiment_name, artifact_location=None) print('[create_experiment][success] experiment_name: %s; experiment_id: %s' % (experiment_name, experiment_id)) return True else: print('[create_experiment][error] experiment_name: %s is exist' % experiment_name) return False def get_run(self, run_id='27f2872ffe3144b59200350a83ac11a5'): run = self.client.get_run(run_id) print(run) print(run.data.params) # ================================================================= # 运行一个新的主版本,新增一条运行记录 def create_and_run_major_version(self, experiment_name, func, args): experiment = self.client.get_experiment_by_name(experiment_name) # 判断实验是否存在 if experiment: experiment_id = experiment.experiment_id # 自动生成一个主版本名称 major_version_name = self.generate_major_version_name(experiment_id) run = mlflow.start_run(experiment_id=experiment_id, run_name=major_version_name) if run: print('[run_major_version_new] start experiment: %s major_version: %s' % (experiment_name, major_version_name)) # 运行函数 func(experiment_name=experiment_name, experiment_id=experiment_id, version_name=major_version_name, version_id=run.info.run_id, args=args) mlflow.end_run() else: print('[run_major_version_new][error] experiment not found: %s' % experiment_name) # 运行一个新的子版本,新增一条运行记录 def create_and_run_minor_version(self, experiment_name, version_name, func, args): experiment = self.client.get_experiment_by_name(experiment_name) # 判断实验是否存在 if experiment: experiment_id = experiment.experiment_id version_id = self.get_major_version_id(experiment_id=experiment_id, version_name=version_name) # 主版本是否存在 if version_id: with mlflow.start_run(experiment_id=experiment_id, run_id=version_id): # 自动生成一个子版本名称 minor_version_name = self.generate_minor_version_name(experiment_id, version_id, version_name) run = mlflow.start_run(experiment_id=experiment_id, run_name=minor_version_name, nested=True) if run: print( '[run_minor_version_new] start experiment: %s major_version: %s minor_version_name: %s' % (experiment_name, version_name, minor_version_name)) # 运行函数 func(experiment_name=experiment_name, experiment_id=experiment_id, version_name=minor_version_name, version_id=run.info.run_id, args=args) mlflow.end_run() else: print('[run_minor_version_new][error] major_version not found: %s' % version_name) else: print('[run_minor_version_new][error] experiment not found: %s' % experiment_name)
import warnings from mlflow.tracking import MlflowClient if __name__ == '__main__': warnings.filterwarnings("ignore") # Create a run under the default experiment (whose ID is "0"). client = MlflowClient() expriment_id = "0" run = client.create_run(expriment_id) run_id = run.info.run_id print("run_id: {}; lifecycle_stage: {}".format(run_id, run.info.lifecycle_stage)) print("--") client.delete_run(run_id) del_run = client.get_run(run_id) print("run_id: {}; lifecycle_stage: {}".format( run_id, del_run.info.lifecycle_stage))
k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.") } print("tags: {}".format(tags)) # Create an experiment and log two runs with metrics and tags under it experiment_id = mlflow.create_experiment("Social NLP Experiments") with mlflow.start_run(experiment_id=experiment_id) as run: mlflow.log_metric("m", 1.55) mlflow.set_tag("s.release", "1.1.0-RC") with mlflow.start_run(experiment_id=experiment_id): mlflow.log_metric("m", 2.50) mlflow.set_tag("s.release", "1.2.0-GA") # Search all runs under experiment id and order by descending value of the metric 'm' client = MlflowClient() runs = client.search_runs(experiment_id, order_by=["metrics.m DESC"]) print_run_info(runs) print("--") # Delete the first run client.delete_run(run_id=run.info.run_id) # Search only deleted runs under the experiment_id and use a case insensitive pattern # in the filter_string for the tag. filter_string = "tags.s.release ILIKE '%rc%'" runs = client.search_runs(experiment_id, run_view_type=ViewType.DELETED_ONLY, filter_string=filter_string) print_run_info(runs)