Пример #1
0
    def _risk_assessment_helper(self, experiment_class, exp_path, debug=False, other=None):

        dataset_getter = DatasetGetter(None)

        best_config = self.model_selector.model_selection(dataset_getter, experiment_class, exp_path,
                                                          self.model_configs, debug, other)

        # Retrain with the best configuration and test
        experiment = experiment_class(best_config['config'], exp_path)

        # Set up a log file for this experiment (I am in a forked process)
        logger = Logger(str(os.path.join(experiment.exp_path, 'experiment.log')), mode='a')

        dataset_getter.set_inner_k(None)

        training_scores, test_scores = [], []

        # Mitigate bad random initializations
        for i in range(3):
            training_score, test_score = experiment.run_test(dataset_getter, logger, other)
            print(f'Final training run {i + 1}: {training_score}, {test_score}')

            training_scores.append(training_score)
            test_scores.append(test_score)

        training_score = sum(training_scores)/3
        test_score = sum(test_scores)/3

        logger.log('TR score: ' + str(training_score) + ' TS score: ' + str(test_score))

        with open(os.path.join(self._HOLDOUT_FOLDER, self._ASSESSMENT_FILENAME), 'w') as fp:
            json.dump({'best_config': best_config, 'HOLDOUT_TR': training_score, 'HOLDOUT_TS': test_score}, fp)
Пример #2
0
    def _model_selection_helper(self,
                                dataset_getter,
                                experiment_class,
                                config,
                                exp_config_name,
                                other=None):
        """
        :param dataset_getter:
        :param experiment_class:
        :param config:
        :param exp_config_name:
        :param other:
        :return:
        """

        # Create the experiment object which will be responsible for running a specific experiment
        experiment = experiment_class(config, exp_config_name)

        # Set up a log file for this experiment (run in a separate process)
        logger = Logger(str(os.path.join(experiment.exp_path,
                                         'experiment.log')),
                        mode='a')
        logger.log('Configuration: ' + str(experiment.model_config))

        config_filename = os.path.join(experiment.exp_path,
                                       self._CONFIG_FILENAME)

        # ------------- PREPARE DICTIONARY TO STORE RESULTS -------------- #

        selection_dict = {
            'config': experiment.model_config.config_dict,
            'TR_score': 0.,
            'VL_score': 0.,
        }

        dataset_getter.set_inner_k(None)  # need to stay this way

        training_score, validation_score = experiment.run_valid(
            dataset_getter, logger, other)

        selection_dict['TR_score'] = float(training_score)
        selection_dict['VL_score'] = float(validation_score)

        logger.log('TR Accuracy: ' + str(training_score) + ' VL Accuracy: ' +
                   str(validation_score))

        with open(config_filename, 'w') as fp:
            json.dump(selection_dict, fp)
Пример #3
0
class AbstractTool(object):
    """
    This is a abstract class to represent a tool.

    """
    def __init__(self,
                 tool_name=None,
                 path_for_log_file='tmp/',
                 parameters=None):
        """
        Class constructor

        @param tool_name: tool name for debugging purposes
        @@type tool_name: string
        @param path_for_log_file: path to save the logs generated by the tool.
        @@type path_for_log_file: string
        """
        self.tool_name = tool_name
        self.log = Logger(tool_name, path_for_log_file)

    def execute_agent(self, agent):
        """
        Executes the main method of the tool on a agent.

        @param agent: the agent the method should be executed on.
        @@type agent: class Agent
        """
        raise NotImplementedError(
            "This method is abstract and must be implemented in derived classes."
        )

    def execute_model(self, model):
        """
        Executes the main method of the tool on a single model.

        @param model: the model the method should be executed on.
        @@type model: class derived from tools.AbstractModel class
        """
        raise NotImplementedError(
            "This method is abstract and must be implemented in derived classes."
        )

    def _create_log(self, data):
        print('Saving log...')
        self.log.log(data)
Пример #4
0
    def _model_selection_helper(self,
                                dataset_getter,
                                experiment_class,
                                config,
                                exp_config_name,
                                other=None):

        # Set up a log file for this experiment (run in a separate process)
        logger = Logger(str(os.path.join(exp_config_name, 'experiment.log')),
                        mode='a')

        logger.log('Configuration: ' + str(config))

        config_filename = os.path.join(exp_config_name, self._CONFIG_FILENAME)

        # ------------- PREPARE DICTIONARY TO STORE RESULTS -------------- #

        k_fold_dict = {
            'config': config,
            'folds': [{} for _ in range(self.folds)],
            'avg_TR_score': 0.,
            'avg_VL_score': 0.,
            'std_TR_score': 0.,
            'std_VL_score': 0.
        }

        for k in range(self.folds):

            dataset_getter.set_inner_k(k)

            fold_exp_folder = os.path.join(exp_config_name,
                                           'FOLD_' + str(k + 1))
            # Create the experiment object which will be responsible for running a specific experiment
            experiment = experiment_class(config, fold_exp_folder)

            training_score, validation_score = experiment.run_valid(
                dataset_getter, logger, other)

            logger.log(
                str(k + 1) + ' split, TR Accuracy: ' + str(training_score) +
                ' VL Accuracy: ' + str(validation_score))

            k_fold_dict['folds'][k]['TR_score'] = training_score
            k_fold_dict['folds'][k]['VL_score'] = validation_score

        tr_scores = np.array(
            [k_fold_dict['folds'][k]['TR_score'] for k in range(self.folds)])
        vl_scores = np.array(
            [k_fold_dict['folds'][k]['VL_score'] for k in range(self.folds)])

        k_fold_dict['avg_TR_score'] = tr_scores.mean()
        k_fold_dict['std_TR_score'] = tr_scores.std()
        k_fold_dict['avg_VL_score'] = vl_scores.mean()
        k_fold_dict['std_VL_score'] = vl_scores.std()

        logger.log('TR avg is ' + str(k_fold_dict['avg_TR_score']) +
                   ' std is ' + str(k_fold_dict['std_TR_score']) +
                   ' VL avg is ' + str(k_fold_dict['avg_VL_score']) +
                   ' std is ' + str(k_fold_dict['std_VL_score']))

        with open(config_filename, 'w') as fp:
            json.dump(k_fold_dict, fp)
Пример #5
0
    def run_final_model(self, outer_k, debug):
        outer_folder = osp.join(self._ASSESSMENT_FOLDER,
                                self._OUTER_FOLD_BASE + str(outer_k + 1))
        config_fname = osp.join(outer_folder, self._SELECTION_FOLDER,
                                self._WINNER_CONFIG)

        with open(config_fname, 'r') as f:
            best_config = json.load(f)

        dataset_getter_class = s2c(self.model_configs.dataset_getter)
        dataset_getter = dataset_getter_class(
            self.model_configs.data_root, self.splits_folder,
            s2c(self.model_configs.dataset_class),
            self.model_configs.dataset_name, self.outer_folds,
            self.inner_folds, self.model_configs.num_dataloader_workers,
            self.model_configs.pin_memory)
        # Tell the data provider to take data relative
        # to a specific OUTER split
        dataset_getter.set_outer_k(outer_k)
        dataset_getter.set_inner_k(None)

        # Mitigate bad random initializations
        for i in range(self.final_training_runs):

            final_run_exp_path = osp.join(outer_folder, f"final_run{i+1}")
            final_run_torch_path = osp.join(final_run_exp_path,
                                            f'run_{i+1}_results.torch')

            # Retrain with the best configuration and test
            # Set up a log file for this experiment (run in a separate process)
            logger = Logger(osp.join(final_run_exp_path, 'experiment.log'),
                            mode='a')
            logger.log(
                json.dumps(dict(outer_k=dataset_getter.outer_k,
                                inner_k=dataset_getter.inner_k,
                                **best_config),
                           sort_keys=False,
                           indent=4))

            if not debug:

                @ray.remote(num_cpus=1, num_gpus=self.gpus_per_task)
                def foo():
                    if not osp.exists(final_run_torch_path):

                        experiment = self.experiment_class(
                            best_config['config'], final_run_exp_path)
                        res = experiment.run_test(dataset_getter, logger)
                        torch.save(res, final_run_torch_path)
                    return outer_k, i

                # Launch the job and append to list of final runs jobs
                future = foo.remote()
                self.final_runs_job_list.append(future)
                self.progress_manager.update_state(
                    dict(type='START_FINAL_RUN', outer_fold=outer_k, run_id=i))
            else:
                if not osp.exists(final_run_torch_path):
                    experiment = self.experiment_class(best_config['config'],
                                                       final_run_exp_path)
                    training_score, test_score = experiment.run_test(
                        dataset_getter, logger)
                    torch.save((training_score, test_score),
                               final_run_torch_path)
        if debug:
            self.process_final_runs(outer_k)
Пример #6
0
    def model_selection(self, kfold_folder, outer_k, debug):
        """
        Performs model selection by launching each configuration in parallel, unless debug is True. Each process
        trains the same configuration for each inner fold.
        :param kfold_folder: The root folder for model selection
        :param outer_k: the current outer fold to consider
        :param debug: whether to run the procedure in debug mode (no multiprocessing)
        """
        SELECTION_FOLDER = osp.join(kfold_folder, self._SELECTION_FOLDER)

        # Create the dataset provider
        dataset_getter_class = s2c(self.model_configs.dataset_getter)
        dataset_getter = dataset_getter_class(
            self.model_configs.data_root, self.splits_folder,
            s2c(self.model_configs.dataset_class),
            self.model_configs.dataset_name, self.outer_folds,
            self.inner_folds, self.model_configs.num_dataloader_workers,
            self.model_configs.pin_memory)

        # Tell the data provider to take data relative
        # to a specific OUTER split
        dataset_getter.set_outer_k(outer_k)

        if not osp.exists(SELECTION_FOLDER):
            os.makedirs(SELECTION_FOLDER)

        # if the # of configs to try is 1, simply skip model selection
        if len(self.model_configs) > 1:

            # Launch one job for each inner_fold for each configuration
            for config_id, config in enumerate(self.model_configs):
                # I need to make a copy of this dictionary
                # It seems it gets shared between processes!
                cfg = deepcopy(config)

                # Create a separate folder for each configuration
                config_folder = osp.join(
                    SELECTION_FOLDER, self._CONFIG_BASE + str(config_id + 1))
                if not osp.exists(config_folder):
                    os.makedirs(config_folder)

                for k in range(self.inner_folds):
                    # Create a separate folder for each fold for each config.
                    fold_exp_folder = osp.join(
                        config_folder, self._INNER_FOLD_BASE + str(k + 1))
                    fold_results_torch_path = osp.join(
                        fold_exp_folder, f'fold_{str(k+1)}_results.torch')

                    # Tell the data provider to take data relative
                    # to a specific INNER split
                    dataset_getter.set_inner_k(k)

                    logger = Logger(osp.join(fold_exp_folder,
                                             'experiment.log'),
                                    mode='a')
                    logger.log(
                        json.dumps(dict(outer_k=dataset_getter.outer_k,
                                        inner_k=dataset_getter.inner_k,
                                        **config),
                                   sort_keys=False,
                                   indent=4))
                    if not debug:

                        @ray.remote(num_cpus=1, num_gpus=self.gpus_per_task)
                        def foo():
                            if not osp.exists(fold_results_torch_path):
                                experiment = self.experiment_class(
                                    config, fold_exp_folder)
                                res = experiment.run_valid(
                                    dataset_getter, logger)
                                torch.save(res, fold_results_torch_path)
                            return dataset_getter.outer_k, dataset_getter.inner_k, config_id

                        # Launch the job and append to list of outer jobs
                        future = foo.remote()
                        self.outer_folds_job_list.append(future)
                        self.progress_manager.update_state(
                            dict(type='START_CONFIG',
                                 outer_fold=outer_k,
                                 inner_fold=k,
                                 config_id=config_id))
                    else:  # debug mode
                        if not osp.exists(fold_results_torch_path):
                            experiment = self.experiment_class(
                                config, fold_exp_folder)
                            training_score, validation_score = experiment.run_valid(
                                dataset_getter, logger)
                            torch.save((training_score, validation_score),
                                       fold_results_torch_path)

                if debug:
                    self.process_config(config_folder, deepcopy(config))
            if debug:
                self.process_inner_results(SELECTION_FOLDER, config_id)
        else:
            # Performing model selection for a single configuration is useless
            with open(osp.join(SELECTION_FOLDER, self._WINNER_CONFIG),
                      'w') as fp:
                json.dump(dict(best_config_id=0, config=self.model_configs[0]),
                          fp,
                          sort_keys=False,
                          indent=4)