def evaluate_disentanglement_metric(model, metric_names=['mig'], dataset_name='mpi3d_toy'):
    # These imports are included only inside this function for code base to run on systems without
    # proper installation of tensorflow and libcublas
    from aicrowd import utils_pytorch
    from aicrowd.evaluate import evaluate
    from disentanglement_lib.config.unsupervised_study_v1 import sweep as unsupervised_study_v1

    _study = unsupervised_study_v1.UnsupervisedStudyV1()
    evaluation_configs = sorted(_study.get_eval_config_files())
    evaluation_configs.append(os.path.join(os.getenv("PWD", ""), "extra_metrics_configs/irs.gin"))

    results_dict_all = dict()
    for metric_name in metric_names:
        eval_bindings = [
            "evaluation.random_seed = {}".format(0),
            "evaluation.name = '{}'".format(metric_name)
        ]

        # Get the correct config file and load it
        my_config = get_gin_config(evaluation_configs, metric_name)
        if my_config is None:
            logging.warning('metric {} not among available configs: {}'.format(metric_name, evaluation_configs))
            return 0
        # gin.parse_config_file(my_config)
        gin.parse_config_files_and_bindings([my_config], eval_bindings)

        model_path = os.path.join(model.ckpt_dir, 'pytorch_model.pt')
        utils_pytorch.export_model(utils_pytorch.RepresentationExtractor(model.model.encoder, 'mean'),
                                   input_shape=(1, model.num_channels, model.image_size, model.image_size),
                                   path=model_path)

        output_dir = os.path.join(model.ckpt_dir, 'eval_results', metric_name)
        os.makedirs(os.path.join(model.ckpt_dir, 'results'), exist_ok=True)

        results_dict = evaluate(model.ckpt_dir, output_dir, True)
        gin.clear_config()
        results = 0
        for key, value in results_dict.items():
            if key != 'elapsed_time' and key != 'uuid' and key != 'num_active_dims':
                results = value
        logging.info('Evaluation   {}={}'.format(metric_name, results))
        results_dict_all['eval_{}'.format(metric_name)] = results
    # print(results_dict)
    return results_dict_all
示例#2
0
    exp_config = utils_pytorch.get_config()
    print("Evaluating Experiment '{exp_config.experiment_name}' "
          "from {exp_config.base_path} on dataset {exp_config.dataset_name}")

# ----- Helpers -----


def get_full_path(filename):
    return os.path.join(ROOT, filename)


##############################################################################
# Gather Evaluation Configs | Compute Metrics
##############################################################################
_study = unsupervised_study_v1.UnsupervisedStudyV1()
evaluation_configs = sorted(_study.get_eval_config_files())
# Add IRS
evaluation_configs.append(get_full_path("extra_metrics_configs/irs.gin"))

# Compute individual metrics
expected_evaluation_metrics = [
    'dci', 'factor_vae_metric', 'sap_score', 'mig', 'irs'
]

for gin_eval_config in evaluation_configs:
    metric_name = gin_eval_config.split("/")[-1].replace(".gin", "")
    if metric_name not in expected_evaluation_metrics:
        # Ignore unneeded evaluation configs
        continue
    print("Evaluating Metric : {}".format(metric_name))
示例#3
0
def eval_main(eval_pytorch=False):
    global base_path, experiment_name, ROOT, exp_config
    ##############################################################################
    # 0. Settings
    # By default, we save all the results in subdirectories of the following path.
    ##############################################################################
    base_path = os.getenv("AICROWD_OUTPUT_PATH", "./scratch/shared")
    experiment_name = os.getenv("AICROWD_EVALUATION_NAME", "experiment_name")
    DATASET_NAME = "auto"
    overwrite = True
    experiment_output_path = os.path.join(base_path, experiment_name)
    ROOT = os.getenv("NDC_ROOT", ".")
    # Print the configuration for reference
    if not MONKEY:
        print(f"Evaluating Experiment '{experiment_name}' from {base_path}.")
    else:
        import utils_pytorch
        exp_config = utils_pytorch.get_config()
        print(
            f"Evaluating Experiment '{exp_config.experiment_name}' "
            f"from {exp_config.base_path} on dataset {exp_config.dataset_name}"
        )

    # ----- Helpers -----
    def get_full_path(filename):
        return os.path.join(ROOT, filename)

    ##############################################################################
    # Gather Evaluation Configs | Compute Metrics
    ##############################################################################
    _study = unsupervised_study_v1.UnsupervisedStudyV1()
    evaluation_configs = sorted(_study.get_eval_config_files())
    # Add IRS
    evaluation_configs.append(get_full_path("extra_metrics_configs/irs.gin"))
    # Compute individual metrics
    expected_evaluation_metrics = [
        'dci', 'factor_vae_metric', 'sap_score', 'mig', 'irs'
    ]
    for gin_eval_config in evaluation_configs:
        metric_name = gin_eval_config.split("/")[-1].replace(".gin", "")
        if metric_name not in expected_evaluation_metrics:
            # Ignore unneeded evaluation configs
            continue
        print("Evaluating Metric : {}".format(metric_name))
        result_path = os.path.join(experiment_output_path, "metrics",
                                   metric_name)
        representation_path = os.path.join(experiment_output_path,
                                           "representation")
        eval_bindings = [
            "evaluation.random_seed = {}".format(0),
            "evaluation.name = '{}'".format(metric_name)
        ]
        evaluate.evaluate_with_gin(representation_path, result_path, overwrite,
                                   [gin_eval_config], eval_bindings,
                                   eval_pytorch)
    # Gather evaluation results
    evaluation_result_template = "{}/metrics/{}/results/aggregate/evaluation.json"
    final_scores = {}
    for _metric_name in expected_evaluation_metrics:
        evaluation_json_path = evaluation_result_template.format(
            experiment_output_path, _metric_name)
        evaluation_results = json.loads(open(evaluation_json_path, "r").read())
        if _metric_name == "factor_vae_metric":
            _score = evaluation_results["evaluation_results.eval_accuracy"]
            final_scores["factor_vae_metric"] = _score
        elif _metric_name == "dci":
            _score = evaluation_results["evaluation_results.disentanglement"]
            final_scores["dci"] = _score
        elif _metric_name == "mig":
            _score = evaluation_results["evaluation_results.discrete_mig"]
            final_scores["mig"] = _score
        elif _metric_name == "sap_score":
            _score = evaluation_results["evaluation_results.SAP_score"]
            final_scores["sap_score"] = _score
        elif _metric_name == "irs":
            _score = evaluation_results["evaluation_results.IRS"]
            final_scores["irs"] = _score
        else:
            raise Exception("Unknown metric name : {}".format(_metric_name))
    print("Final Scores : ", final_scores)
    ##############################################################################
    # (Optional) Generate Visualizations
    ##############################################################################
    # model_directory = os.path.join(experiment_output_path, "model")
    # visualize_model.visualize(model_directory, "viz_output/")
    return final_scores
示例#4
0
_sweep_dropout = h.product((_betas, _datasets, _all_layers))
_dropout_studies = {
    f"{s['dataset']}_dropout_{'all_' if s['all_layers'] else ''}b_{s['beta']}": sparsity_study.DropoutStudy(**s)
    for s in _sweep_dropout
}

_code_norm = h.sweep('code_norm', (True, False))
_sweep_wae = h.product((_datasets, _code_norm))
_wae_studies = {
    f"{s['dataset']}_wae{'_norm' if s['code_norm'] else ''}": sparsity_study.WAEStudy(**s)
    for s in _sweep_wae
}

STUDIES = {
    "unsupervised_study_v1": unsupervised_study_v1.UnsupervisedStudyV1(),
    "abstract_reasoning_study_v1":
        abstract_reasoning_study_v1.AbstractReasoningStudyV1(),
    "fairness_study_v1":
        fairness_study_v1.FairnessStudyV1(),
    "test": tests.TestStudy(),
    **_dim_wise_studies,
    **_dim_wise_mask_studies,
    **_dim_wise_mask_studies_2,
    **_dim_wise_mask_studies_3,
    **_dim_wise_mask_studies_4,
    **_dim_wise_mask_studies_5,
    **_dim_wise_mask_studies_6,
    **_dim_wise_mask_studies_7,
    **_mask_l1_studies,
    **_mask_l1_studies_2,