def evaluate_disentanglement_metric(model, metric_names=['mig'], dataset_name='mpi3d_toy'): # These imports are included only inside this function for code base to run on systems without # proper installation of tensorflow and libcublas from aicrowd import utils_pytorch from aicrowd.evaluate import evaluate from disentanglement_lib.config.unsupervised_study_v1 import sweep as unsupervised_study_v1 _study = unsupervised_study_v1.UnsupervisedStudyV1() evaluation_configs = sorted(_study.get_eval_config_files()) evaluation_configs.append(os.path.join(os.getenv("PWD", ""), "extra_metrics_configs/irs.gin")) results_dict_all = dict() for metric_name in metric_names: eval_bindings = [ "evaluation.random_seed = {}".format(0), "evaluation.name = '{}'".format(metric_name) ] # Get the correct config file and load it my_config = get_gin_config(evaluation_configs, metric_name) if my_config is None: logging.warning('metric {} not among available configs: {}'.format(metric_name, evaluation_configs)) return 0 # gin.parse_config_file(my_config) gin.parse_config_files_and_bindings([my_config], eval_bindings) model_path = os.path.join(model.ckpt_dir, 'pytorch_model.pt') utils_pytorch.export_model(utils_pytorch.RepresentationExtractor(model.model.encoder, 'mean'), input_shape=(1, model.num_channels, model.image_size, model.image_size), path=model_path) output_dir = os.path.join(model.ckpt_dir, 'eval_results', metric_name) os.makedirs(os.path.join(model.ckpt_dir, 'results'), exist_ok=True) results_dict = evaluate(model.ckpt_dir, output_dir, True) gin.clear_config() results = 0 for key, value in results_dict.items(): if key != 'elapsed_time' and key != 'uuid' and key != 'num_active_dims': results = value logging.info('Evaluation {}={}'.format(metric_name, results)) results_dict_all['eval_{}'.format(metric_name)] = results # print(results_dict) return results_dict_all
exp_config = utils_pytorch.get_config() print("Evaluating Experiment '{exp_config.experiment_name}' " "from {exp_config.base_path} on dataset {exp_config.dataset_name}") # ----- Helpers ----- def get_full_path(filename): return os.path.join(ROOT, filename) ############################################################################## # Gather Evaluation Configs | Compute Metrics ############################################################################## _study = unsupervised_study_v1.UnsupervisedStudyV1() evaluation_configs = sorted(_study.get_eval_config_files()) # Add IRS evaluation_configs.append(get_full_path("extra_metrics_configs/irs.gin")) # Compute individual metrics expected_evaluation_metrics = [ 'dci', 'factor_vae_metric', 'sap_score', 'mig', 'irs' ] for gin_eval_config in evaluation_configs: metric_name = gin_eval_config.split("/")[-1].replace(".gin", "") if metric_name not in expected_evaluation_metrics: # Ignore unneeded evaluation configs continue print("Evaluating Metric : {}".format(metric_name))
def eval_main(eval_pytorch=False): global base_path, experiment_name, ROOT, exp_config ############################################################################## # 0. Settings # By default, we save all the results in subdirectories of the following path. ############################################################################## base_path = os.getenv("AICROWD_OUTPUT_PATH", "./scratch/shared") experiment_name = os.getenv("AICROWD_EVALUATION_NAME", "experiment_name") DATASET_NAME = "auto" overwrite = True experiment_output_path = os.path.join(base_path, experiment_name) ROOT = os.getenv("NDC_ROOT", ".") # Print the configuration for reference if not MONKEY: print(f"Evaluating Experiment '{experiment_name}' from {base_path}.") else: import utils_pytorch exp_config = utils_pytorch.get_config() print( f"Evaluating Experiment '{exp_config.experiment_name}' " f"from {exp_config.base_path} on dataset {exp_config.dataset_name}" ) # ----- Helpers ----- def get_full_path(filename): return os.path.join(ROOT, filename) ############################################################################## # Gather Evaluation Configs | Compute Metrics ############################################################################## _study = unsupervised_study_v1.UnsupervisedStudyV1() evaluation_configs = sorted(_study.get_eval_config_files()) # Add IRS evaluation_configs.append(get_full_path("extra_metrics_configs/irs.gin")) # Compute individual metrics expected_evaluation_metrics = [ 'dci', 'factor_vae_metric', 'sap_score', 'mig', 'irs' ] for gin_eval_config in evaluation_configs: metric_name = gin_eval_config.split("/")[-1].replace(".gin", "") if metric_name not in expected_evaluation_metrics: # Ignore unneeded evaluation configs continue print("Evaluating Metric : {}".format(metric_name)) result_path = os.path.join(experiment_output_path, "metrics", metric_name) representation_path = os.path.join(experiment_output_path, "representation") eval_bindings = [ "evaluation.random_seed = {}".format(0), "evaluation.name = '{}'".format(metric_name) ] evaluate.evaluate_with_gin(representation_path, result_path, overwrite, [gin_eval_config], eval_bindings, eval_pytorch) # Gather evaluation results evaluation_result_template = "{}/metrics/{}/results/aggregate/evaluation.json" final_scores = {} for _metric_name in expected_evaluation_metrics: evaluation_json_path = evaluation_result_template.format( experiment_output_path, _metric_name) evaluation_results = json.loads(open(evaluation_json_path, "r").read()) if _metric_name == "factor_vae_metric": _score = evaluation_results["evaluation_results.eval_accuracy"] final_scores["factor_vae_metric"] = _score elif _metric_name == "dci": _score = evaluation_results["evaluation_results.disentanglement"] final_scores["dci"] = _score elif _metric_name == "mig": _score = evaluation_results["evaluation_results.discrete_mig"] final_scores["mig"] = _score elif _metric_name == "sap_score": _score = evaluation_results["evaluation_results.SAP_score"] final_scores["sap_score"] = _score elif _metric_name == "irs": _score = evaluation_results["evaluation_results.IRS"] final_scores["irs"] = _score else: raise Exception("Unknown metric name : {}".format(_metric_name)) print("Final Scores : ", final_scores) ############################################################################## # (Optional) Generate Visualizations ############################################################################## # model_directory = os.path.join(experiment_output_path, "model") # visualize_model.visualize(model_directory, "viz_output/") return final_scores
_sweep_dropout = h.product((_betas, _datasets, _all_layers)) _dropout_studies = { f"{s['dataset']}_dropout_{'all_' if s['all_layers'] else ''}b_{s['beta']}": sparsity_study.DropoutStudy(**s) for s in _sweep_dropout } _code_norm = h.sweep('code_norm', (True, False)) _sweep_wae = h.product((_datasets, _code_norm)) _wae_studies = { f"{s['dataset']}_wae{'_norm' if s['code_norm'] else ''}": sparsity_study.WAEStudy(**s) for s in _sweep_wae } STUDIES = { "unsupervised_study_v1": unsupervised_study_v1.UnsupervisedStudyV1(), "abstract_reasoning_study_v1": abstract_reasoning_study_v1.AbstractReasoningStudyV1(), "fairness_study_v1": fairness_study_v1.FairnessStudyV1(), "test": tests.TestStudy(), **_dim_wise_studies, **_dim_wise_mask_studies, **_dim_wise_mask_studies_2, **_dim_wise_mask_studies_3, **_dim_wise_mask_studies_4, **_dim_wise_mask_studies_5, **_dim_wise_mask_studies_6, **_dim_wise_mask_studies_7, **_mask_l1_studies, **_mask_l1_studies_2,