Exemplo n.º 1
0
    def __init__(self, evaluator, config_space, time_limit=None, evaluation_limit=None,
                 per_run_time_limit=300, per_run_mem_limit=1024, output_dir='./',
                 trials_per_iter=1, seed=1, n_jobs=1):
        super().__init__(evaluator, config_space, seed)
        self.time_limit = time_limit
        self.evaluation_num_limit = evaluation_limit
        self.trials_per_iter = trials_per_iter
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.output_dir = output_dir

        self.optimizer = BO(objective_function=self.evaluator,
                            config_space=config_space,
                            max_runs=int(1e10),
                            task_id=None,
                            time_limit_per_trial=self.per_run_time_limit,
                            rng=np.random.RandomState(self.seed))

        self.trial_cnt = 0
        self.configs = list()
        self.perfs = list()
        self.incumbent_perf = float("-INF")
        self.incumbent_config = self.config_space.get_default_configuration()
        # Estimate the size of the hyperparameter space.
        hp_num = len(self.config_space.get_hyperparameters())
        if hp_num == 0:
            self.config_num_threshold = 0
        else:
            _threshold = int(len(set(self.config_space.sample_configuration(10000))) * 0.75)
            self.config_num_threshold = _threshold
        self.logger.debug('The maximum trial number in HPO is: %d' % self.config_num_threshold)
        self.maximum_config_num = min(600, self.config_num_threshold)
        self.early_stopped_flag = False
        self.eval_dict = {}
Exemplo n.º 2
0
    def __init__(self,
                 task_type,
                 input_data: DataNode,
                 evaluator: _BaseEvaluator,
                 model_id: str,
                 time_limit_per_trans: int,
                 mem_limit_per_trans: int,
                 seed: int,
                 n_jobs=1,
                 number_of_unit_resource=1,
                 time_budget=600,
                 algo='smac'):
        super().__init__(str(__class__.__name__), task_type, input_data, seed)
        self.number_of_unit_resource = number_of_unit_resource
        self.iter_num_per_unit_resource = 10
        self.time_limit_per_trans = time_limit_per_trans
        self.mem_limit_per_trans = mem_limit_per_trans
        self.time_budget = time_budget
        self.evaluator = evaluator
        self.model_id = model_id

        self.incumbent_score = -np.inf
        self.fetch_incumbent = None
        self.baseline_score = -np.inf
        self.start_time = time.time()
        self.hp_config = None
        self.seed = seed
        self.n_jobs = n_jobs

        self.node_dict = dict()

        self.early_stopped_flag = False
        self.is_finished = False
        self.iteration_id = 0

        self.evaluator.parse_needed = True
        # Prepare the hyperparameter space.
        self.hyperparameter_space = self._get_task_hyperparameter_space(
            optimizer=algo)
        if algo == 'smac':
            self.incumbent_config = self.hyperparameter_space.get_default_configuration(
            )
        else:
            self.incumbent_config = None
        self.optimizer = BO(objective_function=self.evaluate_function,
                            config_space=self.hyperparameter_space,
                            max_runs=int(1e10),
                            task_id=self.model_id,
                            time_limit_per_trial=self.time_limit_per_trans,
                            rng=np.random.RandomState(self.seed))
        self.eval_dict = {}
def run(dataset_name):
    file_id = '%s-resnet-%s-%d.pkl' % (dataset_name, mode, trial_num)
    saved_file = os.path.join(data_dir, file_id)

    # (x_train, y_train), (x_test, y_test), cls_num = load_dataset(dataset_name)
    # print(x_train[0])
    # print(x_test[0])
    # print(x_train.shape)
    # print(x_test.shape)
    # print(y_train.shape)

    def objective_function(cfg):
        (x_train, y_train), (x_test,
                             y_test), cls_num = load_dataset(dataset_name)
        epochs_num, run_count = get_default_setting(dataset_name)
        val_error = train(cls_num,
                          epochs_num,
                          cfg,
                          x_train,
                          y_train,
                          x_test,
                          y_test,
                          seed=32)
        print('the validation accuracy is ', 1 - val_error)

        if not os.path.exists(saved_file):
            data = list()
        else:
            with open(saved_file, 'rb') as f:
                data = pickle.load(f)
        data.append([cfg, val_error])

        with open(saved_file, 'wb') as f:
            pickle.dump(data, f)
        return val_error

    cs = create_configspace()
    bo = BO(objective_function,
            cs,
            max_runs=trial_num,
            time_limit_per_trial=10000,
            sample_strategy=mode,
            rng=np.random.RandomState(1))
    bo.run()
Exemplo n.º 4
0
                min_impurity_decrease=self.min_impurity_decrease,
                random_state=self.random_state,
                n_jobs=self.n_jobs,
                class_weight=self.class_weight,
                warm_start=True)

        self.estimator.fit(X, y, sample_weight=sample_weight)
        return self

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)


dataset_list = dataset_str.split(',')
check_datasets(dataset_list)
cs = get_cs()

_run_count = min(int(len(set(cs.sample_configuration(30000))) * 0.75), run_count)
print(_run_count)

for dataset in dataset_list:
    node = load_data(dataset, '../soln-ml/', True, task_type=0)
    _x, _y = node.data[0], node.data[1]
    eval = partial(eval_func, x=_x, y=_y)
    bo = BO(eval, cs, max_runs=_run_count, time_limit_per_trial=600, sample_strategy=mode, rng=np.random.RandomState(1))
    bo.run()
    with open('logs/%s-random_forest-%s-%d.pkl' % (dataset, mode, run_count), 'wb')as f:
        pickle.dump(bo.get_history().data, f)