def __init__(self, evaluator, config_space, time_limit=None, evaluation_limit=None, per_run_time_limit=300, per_run_mem_limit=1024, output_dir='./', trials_per_iter=1, seed=1, n_jobs=1): super().__init__(evaluator, config_space, seed) self.time_limit = time_limit self.evaluation_num_limit = evaluation_limit self.trials_per_iter = trials_per_iter self.per_run_time_limit = per_run_time_limit self.per_run_mem_limit = per_run_mem_limit self.output_dir = output_dir self.optimizer = BO(objective_function=self.evaluator, config_space=config_space, max_runs=int(1e10), task_id=None, time_limit_per_trial=self.per_run_time_limit, rng=np.random.RandomState(self.seed)) self.trial_cnt = 0 self.configs = list() self.perfs = list() self.incumbent_perf = float("-INF") self.incumbent_config = self.config_space.get_default_configuration() # Estimate the size of the hyperparameter space. hp_num = len(self.config_space.get_hyperparameters()) if hp_num == 0: self.config_num_threshold = 0 else: _threshold = int(len(set(self.config_space.sample_configuration(10000))) * 0.75) self.config_num_threshold = _threshold self.logger.debug('The maximum trial number in HPO is: %d' % self.config_num_threshold) self.maximum_config_num = min(600, self.config_num_threshold) self.early_stopped_flag = False self.eval_dict = {}
def __init__(self, task_type, input_data: DataNode, evaluator: _BaseEvaluator, model_id: str, time_limit_per_trans: int, mem_limit_per_trans: int, seed: int, n_jobs=1, number_of_unit_resource=1, time_budget=600, algo='smac'): super().__init__(str(__class__.__name__), task_type, input_data, seed) self.number_of_unit_resource = number_of_unit_resource self.iter_num_per_unit_resource = 10 self.time_limit_per_trans = time_limit_per_trans self.mem_limit_per_trans = mem_limit_per_trans self.time_budget = time_budget self.evaluator = evaluator self.model_id = model_id self.incumbent_score = -np.inf self.fetch_incumbent = None self.baseline_score = -np.inf self.start_time = time.time() self.hp_config = None self.seed = seed self.n_jobs = n_jobs self.node_dict = dict() self.early_stopped_flag = False self.is_finished = False self.iteration_id = 0 self.evaluator.parse_needed = True # Prepare the hyperparameter space. self.hyperparameter_space = self._get_task_hyperparameter_space( optimizer=algo) if algo == 'smac': self.incumbent_config = self.hyperparameter_space.get_default_configuration( ) else: self.incumbent_config = None self.optimizer = BO(objective_function=self.evaluate_function, config_space=self.hyperparameter_space, max_runs=int(1e10), task_id=self.model_id, time_limit_per_trial=self.time_limit_per_trans, rng=np.random.RandomState(self.seed)) self.eval_dict = {}
def run(dataset_name): file_id = '%s-resnet-%s-%d.pkl' % (dataset_name, mode, trial_num) saved_file = os.path.join(data_dir, file_id) # (x_train, y_train), (x_test, y_test), cls_num = load_dataset(dataset_name) # print(x_train[0]) # print(x_test[0]) # print(x_train.shape) # print(x_test.shape) # print(y_train.shape) def objective_function(cfg): (x_train, y_train), (x_test, y_test), cls_num = load_dataset(dataset_name) epochs_num, run_count = get_default_setting(dataset_name) val_error = train(cls_num, epochs_num, cfg, x_train, y_train, x_test, y_test, seed=32) print('the validation accuracy is ', 1 - val_error) if not os.path.exists(saved_file): data = list() else: with open(saved_file, 'rb') as f: data = pickle.load(f) data.append([cfg, val_error]) with open(saved_file, 'wb') as f: pickle.dump(data, f) return val_error cs = create_configspace() bo = BO(objective_function, cs, max_runs=trial_num, time_limit_per_trial=10000, sample_strategy=mode, rng=np.random.RandomState(1)) bo.run()
min_impurity_decrease=self.min_impurity_decrease, random_state=self.random_state, n_jobs=self.n_jobs, class_weight=self.class_weight, warm_start=True) self.estimator.fit(X, y, sample_weight=sample_weight) return self def predict(self, X): if self.estimator is None: raise NotImplementedError() return self.estimator.predict(X) dataset_list = dataset_str.split(',') check_datasets(dataset_list) cs = get_cs() _run_count = min(int(len(set(cs.sample_configuration(30000))) * 0.75), run_count) print(_run_count) for dataset in dataset_list: node = load_data(dataset, '../soln-ml/', True, task_type=0) _x, _y = node.data[0], node.data[1] eval = partial(eval_func, x=_x, y=_y) bo = BO(eval, cs, max_runs=_run_count, time_limit_per_trial=600, sample_strategy=mode, rng=np.random.RandomState(1)) bo.run() with open('logs/%s-random_forest-%s-%d.pkl' % (dataset, mode, run_count), 'wb')as f: pickle.dump(bo.get_history().data, f)