示例#1
0
 def prepare_optimizer(self, _arm):
     if _arm == 'fe':
         if self.update_flag[_arm] is True:
             # Build the Feature Engineering component.
             fe_evaluator = Evaluator(self.inc['hpo'], name='fe', resampling_strategy=self.evaluation_type,
                                      seed=self.seed)
             self.optimizer[_arm] = EvaluationBasedOptimizer(
                 self.inc['fe'], fe_evaluator,
                 self.classifier_id, self.per_run_time_limit, self.per_run_mem_limit, self.seed,
                 shared_mode=self.share_fe
             )
         else:
             self.logger.info('No improvement on HPO, so use the old FE optimizer!')
     else:
         if self.update_flag[_arm] is True:
             trials_per_iter = self.optimizer['fe'].evaluation_num_last_iteration
             hpo_evaluator = Evaluator(self.config_space.get_default_configuration(),
                                       data_node=self.inc['fe'],
                                       name='hpo',
                                       resampling_strategy=self.evaluation_type,
                                       seed=self.seed)
             self.optimizer[_arm] = SMACOptimizer(
                 hpo_evaluator, self.config_space, output_dir=self.output_dir,
                 per_run_time_limit=self.per_run_time_limit,
                 trials_per_iter=trials_per_iter // 2, seed=self.seed
             )
         else:
             self.logger.info('No improvement on FE, so use the old HPO optimizer!')
示例#2
0
def evaluate_evaluation_based_fe(dataset, time_limit, seed=1):
    # Prepare the configuration for random forest.
    from ConfigSpace.hyperparameters import UnParametrizedHyperparameter
    from autosklearn.pipeline.components.classification.random_forest import RandomForest
    cs = RandomForest.get_hyperparameter_search_space()
    clf_hp = UnParametrizedHyperparameter("estimator", 'random_forest')
    cs.add_hyperparameter(clf_hp)
    evaluator = Evaluator(cs.get_default_configuration(), name='fe', seed=seed)

    raw_data = load_data(dataset, datanode_returned=True)

    pipeline = FEPipeline(fe_enabled=True,
                          optimizer_type='eval_base',
                          time_budget=time_limit,
                          evaluator=evaluator,
                          seed=seed,
                          model_id='random_forest',
                          time_limit_per_trans=300)
    train_data = pipeline.fit_transform(raw_data)

    score = evaluator(None, data_node=train_data)
    print('==> Base validation score', score)

    save_path = proj_dir + 'data/fe_%s_%d.pkl' % (dataset, time_limit)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, score], f)
    return score
def evaluate_fe_bugs(dataset, run_id, time_limit, seed):
    algorithms = [
        'lda', 'k_nearest_neighbors', 'libsvm_svc', 'sgd', 'adaboost',
        'random_forest', 'extra_trees', 'decision_tree'
    ]
    algo_id = np.random.choice(algorithms, 1)[0]
    task_id = '%s-fe-%s-%d' % (dataset, algo_id, run_id)
    print(task_id)

    # Prepare the configuration for random forest.
    clf_class = _classifiers[algo_id]
    cs = clf_class.get_hyperparameter_search_space()
    clf_hp = UnParametrizedHyperparameter("estimator", algo_id)
    cs.add_hyperparameter(clf_hp)
    evaluator = Evaluator(cs.get_default_configuration(),
                          name='fe',
                          seed=seed,
                          resampling_strategy='holdout')

    pipeline = FEPipeline(fe_enabled=True,
                          optimizer_type='eval_base',
                          time_budget=time_limit,
                          evaluator=evaluator,
                          seed=seed,
                          model_id=algo_id,
                          time_limit_per_trans=per_run_time_limit,
                          task_id=task_id)

    raw_data, test_raw_data = load_train_test_data(dataset)
    train_data = pipeline.fit_transform(raw_data.copy_())
    test_data = pipeline.transform(test_raw_data.copy_())
    train_data_new = pipeline.transform(raw_data.copy_())

    assert (train_data.data[0] == train_data_new.data[0]).all()
    assert (train_data.data[1] == train_data_new.data[1]).all()
    assert (train_data_new == train_data)

    score = evaluator(None, data_node=test_data)
    print('==> Test score', score)
示例#4
0
def conduct_hpo(dataset='pc4',
                classifier_id='random_forest',
                iter_num=100,
                iter_mode=True):
    from autosklearn.pipeline.components.classification import _classifiers

    clf_class = _classifiers[classifier_id]
    cs = clf_class.get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", classifier_id)
    cs.add_hyperparameter(model)

    raw_data = load_data(dataset, datanode_returned=True)
    print(set(raw_data.data[1]))
    evaluator = Evaluator(cs.get_default_configuration(),
                          name='hpo',
                          data_node=raw_data)

    if not iter_mode:
        optimizer = SMACOptimizer(evaluator,
                                  cs,
                                  evaluation_limit=600,
                                  output_dir='logs')
        inc, val = optimizer.optimize()
        print(inc, val)
    else:
        import time
        _start_time = time.time()
        optimizer = SMACOptimizer(evaluator,
                                  cs,
                                  trials_per_iter=1,
                                  output_dir='logs',
                                  per_run_time_limit=180)
        results = list()
        for _iter in range(iter_num):
            perf, _, _ = optimizer.iterate()
            print(_iter, perf)
            results.append(perf)
        print(results)
        print(time.time() - _start_time)
示例#5
0
def evaluate_metalearning_configs(first_bandit):
    score_list = []
    for config in first_bandit.meta_configs:
        try:
            config = config.get_dictionary()
            # print(config)
            arm = None
            cs = ConfigurationSpace()
            for key in config:
                key_str = key.split(":")
                if key_str[0] == 'classifier':
                    if key_str[1] == '__choice__':
                        arm = config[key]
                        cs.add_hyperparameter(
                            UnParametrizedHyperparameter(
                                "estimator", config[key]))
                    else:
                        cs.add_hyperparameter(
                            UnParametrizedHyperparameter(
                                key_str[2], config[key]))

            if arm in first_bandit.arms:
                transformed_node = apply_metalearning_fe(
                    first_bandit.sub_bandits[arm].optimizer['fe'], config)
                default_config = cs.sample_configuration(1)
                hpo_evaluator = Evaluator(
                    None,
                    data_node=transformed_node,
                    name='hpo',
                    resampling_strategy=first_bandit.eval_type,
                    seed=first_bandit.seed)

                start_time = time.time()
                score = 1 - hpo_evaluator(default_config)
                time_cost = time.time() - start_time
                score_list.append(
                    (arm, score, default_config, transformed_node, time_cost))
                transformed_node.score = score

                # Evaluate the default config
                start_time = time.time()
                score = 1 - hpo_evaluator(
                    first_bandit.sub_bandits[arm].default_config)
                time_cost = time.time() - start_time
                score_list.append(
                    (arm, score, first_bandit.sub_bandits[arm].default_config,
                     transformed_node, time_cost))
                transformed_node.score = score
        except Exception as e:
            print(e)

    # Sort the meta-configs
    score_list.sort(key=lambda x: x[1], reverse=True)
    meta_arms = list()
    for arm_score_config in score_list:
        if arm_score_config[0] in meta_arms:
            continue

        first_bandit.sub_bandits[
            arm_score_config[0]].default_config = arm_score_config[2]
        first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats(
            'fe',
            (arm_score_config[1], arm_score_config[4], arm_score_config[3]))
        # first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats('hpo',
        #                                                                  (arm_score_config[1], arm_score_config[4],
        #                                                                   arm_score_config[2]))
        first_bandit.sub_bandits[arm_score_config[0]].optimizer[
            'fe'].hp_config = arm_score_config[2]
        meta_arms.append(arm_score_config[0])
    for arm in first_bandit.arms:
        if arm not in meta_arms:
            meta_arms.append(arm)

    first_bandit.final_rewards.append(score_list[0][1])
    first_bandit.action_sequence.append(score_list[0][0])
    first_bandit.time_records.append(score_list[0][2])
    first_bandit.arms = meta_arms
    first_bandit.logger.info("Arms after evaluating meta-configs: " +
                             str(first_bandit.arms))
示例#6
0
 def evaluate():
     perf = Evaluator(
         self.local_inc['hpo'], data_node=self.local_inc['fe'],
         name='fe', resampling_strategy=self.evaluation_type,
         seed=self.seed)(self.local_inc['hpo'])
     return perf
示例#7
0
    def __init__(self, classifier_id: str, data: DataNode,
                 share_fe=False, output_dir='logs',
                 per_run_time_limit=120,
                 per_run_mem_limit=5120,
                 eval_type='cv', dataset_id='default',
                 mth='rb', sw_size=3, strategy='avg',
                 n_jobs=1, seed=1):
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.classifier_id = classifier_id
        self.evaluation_type = eval_type
        self.original_data = data.copy_()
        self.share_fe = share_fe
        self.output_dir = output_dir
        self.mth = mth
        self.strategy = strategy
        self.seed = seed
        self.sliding_window_size = sw_size
        self.logger = get_logger('%s:%s-%d=>%s' % (__class__.__name__, dataset_id, seed, classifier_id))
        np.random.seed(self.seed)

        # Bandit settings.
        self.arms = ['fe', 'hpo']
        self.rewards = dict()
        self.optimizer = dict()
        self.evaluation_cost = dict()
        self.inc = dict()
        self.local_inc = dict()
        for arm in self.arms:
            self.rewards[arm] = list()
            self.evaluation_cost[arm] = list()
        self.pull_cnt = 0
        self.action_sequence = list()
        self.final_rewards = list()
        self.incumbent_perf = -1.
        self.incumbent_source = None
        self.update_flag = dict()
        self.imp_rewards = dict()
        for arm in self.arms:
            self.update_flag[arm] = True
            self.imp_rewards[arm] = list()

        from autosklearn.pipeline.components.classification import _classifiers
        clf_class = _classifiers[classifier_id]
        cs = clf_class.get_hyperparameter_search_space()
        model = UnParametrizedHyperparameter("estimator", classifier_id)
        cs.add_hyperparameter(model)
        self.config_space = cs
        self.default_config = cs.get_default_configuration()
        self.config_space.seed(self.seed)

        # Build the Feature Engineering component.
        fe_evaluator = Evaluator(self.default_config,
                                 name='fe', resampling_strategy=self.evaluation_type,
                                 seed=self.seed)
        self.optimizer['fe'] = EvaluationBasedOptimizer(
                self.original_data, fe_evaluator,
                classifier_id, per_run_time_limit, per_run_mem_limit, self.seed,
                shared_mode=self.share_fe, n_jobs=n_jobs)
        self.inc['fe'], self.local_inc['fe'] = self.original_data, self.original_data

        # Build the HPO component.
        trials_per_iter = len(self.optimizer['fe'].trans_types)
        hpo_evaluator = Evaluator(self.default_config,
                                  data_node=self.original_data, name='hpo',
                                  resampling_strategy=self.evaluation_type,
                                  seed=self.seed)
        if n_jobs == 1:
            self.optimizer['hpo'] = SMACOptimizer(
                hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit,
                trials_per_iter=trials_per_iter // 2, seed=self.seed)
        else:
            self.optimizer['hpo'] = PSMACOptimizer(
                hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit,
                trials_per_iter=trials_per_iter // 2, seed=self.seed,
                n_jobs=n_jobs
            )
        self.inc['hpo'], self.local_inc['hpo'] = self.default_config, self.default_config