def __init__( self, space: Optional[Dict] = None, metric: Optional[str] = None, mode: Optional[str] = None, points_to_evaluate: Optional[List[Dict]] = None, n_initial_points: int = 20, random_state_seed: Optional[int] = None, gamma: float = 0.25, max_concurrent: Optional[int] = None, use_early_stopped_trials: Optional[bool] = None, ): assert hpo is not None, ( "HyperOpt must be installed! Run `pip install hyperopt`.") if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." from hyperopt.fmin import generate_trials_to_calculate super(HyperOptSearch, self).__init__(metric=metric, mode=mode, max_concurrent=max_concurrent, use_early_stopped_trials=use_early_stopped_trials) self.max_concurrent = max_concurrent # hyperopt internally minimizes, so "max" => -1 if mode == "max": self.metric_op = -1. elif mode == "min": self.metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial(hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert isinstance(points_to_evaluate, (list, tuple)) self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) self.domain = None if isinstance(space, dict) and space: resolved_vars, domain_vars, grid_vars = parse_spec_vars(space) if domain_vars or grid_vars: logger.warning( UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))) space = self.convert_search_space(space) self.domain = hpo.Domain(lambda spc: spc, space)
def __init__(self, space, max_concurrent=10, reward_attr="episode_reward_mean", points_to_evaluate=None, **kwargs): _import_hyperopt() assert hpo is not None, "HyperOpt must be installed!" from hyperopt.fmin import generate_trials_to_calculate assert type(max_concurrent) is int and max_concurrent > 0 self._max_concurrent = max_concurrent self._reward_attr = reward_attr self.algo = hpo.tpe.suggest self.domain = hpo.Domain(lambda spc: spc, space) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert type(points_to_evaluate) == list self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} self.rstate = np.random.RandomState() super(HyperOptSearch, self).__init__(**kwargs)
def get_iter(fn, space, algo, max_evals, trials=None, rstate=None, pass_expr_memo_ctrl=None, catch_eval_exceptions=False, verbose=0, points_to_evaluate=None, max_queue_len=1, show_progressbar=False, ): if rstate is None: env_rseed = os.environ.get('HYPEROPT_FMIN_SEED', '') if env_rseed: rstate = np.random.RandomState(int(env_rseed)) else: rstate = np.random.RandomState() if trials is None: if points_to_evaluate is None: trials = base.Trials() else: assert type(points_to_evaluate) == list trials = generate_trials_to_calculate(points_to_evaluate) domain = base.Domain(fn, space, pass_expr_memo_ctrl=pass_expr_memo_ctrl) rval = FMinIter(algo, domain, trials, max_evals=max_evals, rstate=rstate, verbose=verbose, max_queue_len=max_queue_len, show_progressbar=show_progressbar) rval.catch_eval_exceptions = catch_eval_exceptions return rval
def __init__(self, space, max_concurrent=10, reward_attr="episode_reward_mean", points_to_evaluate=None, **kwargs): assert hpo is not None, "HyperOpt must be installed!" from hyperopt.fmin import generate_trials_to_calculate assert type(max_concurrent) is int and max_concurrent > 0 self._max_concurrent = max_concurrent self._reward_attr = reward_attr self.algo = hpo.tpe.suggest self.domain = hpo.Domain(lambda spc: spc, space) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert type(points_to_evaluate) == list self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} self.rstate = np.random.RandomState() super(HyperOptSearch, self).__init__(**kwargs)
def generate_trial(isDef=False): ''' Generate an initial Trial object. Inputs: isDef: Bool, if true initial guess defined Returns: empty or pre-defined trials object ''' # Modify pts to have a different initial guess # NOTE: Hyperopt reads dict keys and values separately, # and organizes keys in alphabetical order. While values order # remains the same. Thus, place keys in alphabetical order. if isDef: pts = [{ 'aint_dense': 1, 'batch_size': 0, 'nint_dense': 2, 'optim_type': 0, 'sint_dense': 0, 'train_loss': 0 }] new_trials = generate_trials_to_calculate(pts) else: new_trials = Trials() return new_trials
def process_meta(self, fn_name, space, algo, max_evals): fn = getattr(self, fn_name) if fn_name == 'xgb_reg': trials = generate_trials_to_calculate([self.meta_param_xgb_reg()]) else: trials = generate_trials_to_calculate([self.meta_param_xgb_clf()]) try: result = fmin(fn=fn, space=space, algo=algo, max_evals=max_evals, trials=trials) except Exception as e: return {'status': STATUS_FAIL, 'exception': str(e)} return trials
def __init__(self, space, max_concurrent=10, reward_attr=None, metric="episode_reward_mean", mode="max", points_to_evaluate=None, n_initial_points=20, random_state_seed=None, gamma=0.25, **kwargs): assert hpo is not None, "HyperOpt must be installed!" from hyperopt.fmin import generate_trials_to_calculate assert type(max_concurrent) is int and max_concurrent > 0 assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" metric = reward_attr logger.warning( "`reward_attr` is deprecated and will be removed in a future " "version of Tune. " "Setting `metric={}` and `mode=max`.".format(reward_attr)) self._max_concurrent = max_concurrent self._metric = metric # hyperopt internally minimizes, so "max" => -1 if mode == "max": self._metric_op = -1. elif mode == "min": self._metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial(hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) self.domain = hpo.Domain(lambda spc: spc, space) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert type(points_to_evaluate) == list self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) super(HyperOptSearch, self).__init__(metric=self._metric, mode=mode, **kwargs)
def test_early_stop_no_progress_loss(): trials = generate_trials_to_calculate([{'x': -100}]) fmin(fn=lambda x: x, space=hp.uniform("x", -5, 5), algo=rand.suggest, max_evals=500, trials=trials, early_stop_fn=no_progress_loss(10)) assert len(trials) == 10
def __init__( self, space=None, metric=None, mode=None, points_to_evaluate=None, n_initial_points=20, random_state_seed=None, gamma=0.25, max_concurrent=None, use_early_stopped_trials=None, ): assert hpo is not None, ( "HyperOpt must be installed! Run `pip install hyperopt`.") if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." from hyperopt.fmin import generate_trials_to_calculate super(HyperOptSearch, self).__init__( metric=metric, mode=mode, max_concurrent=max_concurrent, use_early_stopped_trials=use_early_stopped_trials) self.max_concurrent = max_concurrent # hyperopt internally minimizes, so "max" => -1 if mode == "max": self.metric_op = -1. elif mode == "min": self.metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial( hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert isinstance(points_to_evaluate, (list, tuple)) self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) self.domain = None if space: self.domain = hpo.Domain(lambda spc: spc, space)
def process_xgb_clf(self, max_evals): trials = generate_trials_to_calculate([self.meta_param_xgb_clf()]) try: result = fmin(fn=self.xgb_clf, space=xgb_para, trials=trials, algo=tpe.suggest, max_evals=max_evals) except Exception as e: return {'status': STATUS_FAIL, 'exception': str(e)} return result
def gen_trials(self): ''' Generate an initial Trial object. Redefine this function if you want custom guesses. Returns: trials: empty or pre-defined trials object ''' # Modify pts to have a different initial guess # NOTE: Hyperopt reads dict keys and values separately, # and organizes keys in alphabetical order. While values order # remains the same. Thus, place keys in alphabetical order. if self.hps_guess is not None: trials = generate_trials_to_calculate(self.hps_guess) else: trials = Trials() return trials
def hyper_optimization(self, train_mask, val_mask): def objective(hyperparams): model = GCN({**self.params, **hyperparams, 'timer': self.timer}).to(self.device) pred, pred_val, flag = model.train_predict(self.data, train_mask=train_mask, val_mask=val_mask) if flag: self.flag_end = True score = accuracy_score(self.data.y[val_mask].cpu().numpy(), (pred_val.max(1)[1]).cpu().numpy()) return {'loss': -score, 'status': STATUS_OK, 'pred': pred.cpu().numpy(), 'flag': self.flag_end} trials = generate_trials_to_calculate(self.points) if self.timer.remain_time() < 5 or self.flag_end: self.flag_end = True return None, -1.0, None best = fmin(fn=objective, space=self.space, trials=trials, algo=tpe.suggest, max_evals=5, verbose=0, timeout=self.timer.remain_time()-5) hyperparams = space_eval(self.space, best) best_score = -trials.best_trial['result']['loss'] pprint.pprint(hyperparams, width=1) print('>>>>>>> ', best_score) pred = trials.best_trial['result']['pred'] return pred, best_score, hyperparams
def _setup_hyperopt(self) -> None: from hyperopt.fmin import generate_trials_to_calculate if self._metric is None and self._mode: # If only a mode was passed, use anonymous metric self._metric = DEFAULT_METRIC if self._points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert isinstance(self._points_to_evaluate, (list, tuple)) for i in range(len(self._points_to_evaluate)): config = self._points_to_evaluate[i] self._convert_categories_to_indices(config) # HyperOpt treats initial points as LIFO, reverse to get FIFO self._points_to_evaluate = list(reversed(self._points_to_evaluate)) self._hpopt_trials = generate_trials_to_calculate(self._points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(self._points_to_evaluate) self.domain = hpo.Domain(lambda spc: spc, self._space)
def get_new_params(experiment: Experiment, rstate, algo=tpe.suggest, n_points=1): params = [{p.name: p.value for p in result.params} for result in experiment.results] trials = generate_trials_to_calculate(params) trials.refresh() space = convert_parameter_space(experiment.parameter_spaces) domain = base.Domain( lambda args: experiment.results[params.index(args)].value, space) FMinIter(algo, domain, trials, rstate=rstate).serial_evaluate() new_ids = trials.new_trial_ids(n_points) new_points = algo(new_ids, domain, trials, rstate.randint(2**31 - 1)) new_params = [[ Parameter(name=k, value=v[0]) for k, v in point['misc']['vals'].items() ] for point in new_points] if experiment.results: experiment.best_params = [ Parameter(name=k, value=v[0]) for k, v in trials.best_trial['misc']['vals'].items() ] return new_params
def main(): # Imports the dataset and labels and turns them into numpy arrays. X = pd.read_csv('train_values.csv', index_col = 0).to_numpy() y = np.array(pd.read_csv('train_labels.csv', index_col = 0).to_numpy().T[0]) # Preprocesses data (one hot or ordinal encoding) X = preProcess(X, 'OneHotEncoding') # Performs PCA on dataset for better visualizaion print("Do you want to visualize dataset with PCA?\n") pca_inp = input('(y/n): ').lower() if pca_inp == 'y': size = int(input("Define amount of samples to visualize with PCA: ")) pca = PCA(n_components = 2) pca.fit(X) PCX = pca.transform(X) plt.scatter(PCX[:size,0], PCX[:size,1], c = y[:size]) plt.show() print() else: print() # Option to use entire dataset or a reduced set with a more balanced amount of each label. print("Use reduced set?\n") full = input('(y/n): ') print() # Create training set and test set if full == 'y': X1 = [X[i] for i in range(len(X)) if y[i] == 1] X2 = [X[i] for i in range(len(X)) if y[i] == 2] X3 = [X[i] for i in range(len(X)) if y[i] == 3] y1 = [y[i] for i in range(len(y)) if y[i] == 1] y2 = [y[i] for i in range(len(y)) if y[i] == 2] y3 = [y[i] for i in range(len(y)) if y[i] == 3] size = min(len(X1),len(X2),len(X3)) Xp = np.concatenate((X1[:size], X2[:size], X3[:size])) yp = np.concatenate((y1[:size], y2[:size], y3[:size])) train_X, test_X, train_y, test_y = train_test_split( Xp, yp, test_size = 0.2) else: train_X, test_X, train_y, test_y = train_test_split( X, y, test_size = 0.2, random_state=42) # Initializes last_classifier variable. last_classifier = None # 'Front-end' while True: print("Choose the training model: ") print(' - Network\n - GBM\n - LGBM\n - GridSearch\n - Hyperopt\n') inp = input('>> ').lower() print() ################################################################################################################################################################ if inp == 'network': print('-- SkLearn MLP Classifier (neural network) --\n') it = int(input('Define maximum number of iterations: ')) print() layers = [int(x) for x in input("Define network architecture: ").replace(' ', '').split(',')] print() alpha = input("Define regularization term alpha (default = 0.0001): ") if alpha == '': alpha = 0.0001 print() eps = input("Define stability term epsilon (default = 1e-8): ") if eps == '': eps = 0.00000001 print() activation = input("Define activation function (default = 'relu'): ") if activation == '': activation = 'relu' print() l_rate = input("Define initial learning rate (default = 0.001): ") if l_rate == '': l_rate = 0.001 print() solver = 'adam' decay = input("Define learning rate decay (default = 'constant'): ") if decay == '': decay = 'constant' print() beta_1 = input("Define beta 1 (default = 0.999): ") if beta_1 == '': beta_1 = 0.999 print() beta_2 = input("Define beta 2 (default = 0.999): ") if beta_2 == '': beta_2 = 0.999 print() print("Want early stopping (default = False)?") early = input('(y/n): ').lower() if early == 'y': early = True else: early = False print() print("Want warm start (default = False)?") warm = input('(y/n): ').lower() if early == 'y': warm = True else: warm = False print() num = input("Define number of iterations without change, to declare convergence (default = 10): ") if num == '': num = 10 print() tol = input("Finally, define the tolerance (default = 0.0001): ") if tol == '': tol = 0.0001 print('\n-- Training neural network --\n') mlp = MLPClassifier( hidden_layer_sizes = layers, max_iter = it, alpha = float(alpha), activation = activation, learning_rate = decay, learning_rate_init = float(l_rate), verbose = True, early_stopping = early, epsilon = float(eps), validation_fraction = 0.2, solver = solver, beta_1 = float(beta_1), beta_2 = float(beta_2), warm_start = warm, tol = float(tol), n_iter_no_change = int(num)) mlp.fit(train_X, train_y) print() preds = mlp.predict(train_X) print('Results on training set:') print(classification_report(train_y, preds, zero_division = 1)) print('Micro averaged F1 Score: ', f1_score(train_y, preds, average='micro'), '\n') print('-'*80, '\n') preds = mlp.predict(test_X) print('Results on cross-validation set:') print(classification_report(test_y, preds, zero_division = 1)) print('Micro averaged F1 Score: ', f1_score(test_y, preds, average='micro'), '\n') last_classifier = mlp winsound.Beep(frequency, duration) print('='*80, '\n') ################################################################################################################################################################ elif inp == 'gbm': print('-- SkLearn GBM Classifier --\n') learning_rate = input("Define learning rate (default = 0.1): ") if learning_rate == '': learning_rate = 0.1 print() n_estimators = input("Define number of estimators (default = 100): ") if n_estimators == '': n_estimators = 100 print() subsample = input("Define subsample percentage (default = 100 %): ") if subsample == '' or float(subsample) > 100: subsample = 100 print() min_samples_split = input("Define minimum number of samples to split node (default = 2): ") if min_samples_split == '': min_samples_split = 2 print() min_samples_leaf = input("Define minimum number of samples to make node a leaf (default = 1): ") if min_samples_leaf == '': min_samples_leaf = 1 print() max_depth = input("Define maximum depth of individual estimators (default = 3): ") if max_depth == '': max_depth = 3 print() print("Do you want cross-validation for early-stopping?") n_iter_no_change = input("(y/n): ") if n_iter_no_change == 'y': n_iter_no_change = 15 else: n_iter_no_change = None print("\n-- Training GBM --\n") gbm = GradientBoostingClassifier( learning_rate = float(learning_rate), n_estimators = int(n_estimators), subsample = float(subsample)/100, min_samples_split = int(min_samples_split), min_samples_leaf = int(min_samples_leaf), max_depth = int(max_depth), verbose = True, n_iter_no_change = n_iter_no_change) gbm.fit(train_X, train_y) preds = gbm.predict(train_X) print('Results on training set:') print(classification_report(train_y, preds, zero_division = 1)) print('Micro averaged F1 Score: ', f1_score(train_y, preds, average='micro'), '\n') print('-'*80, '\n') preds = gbm.predict(test_X) print('Results on cross-validation set:') print(classification_report(test_y, preds, zero_division = 1)) print('Micro averaged F1 Score: ', f1_score(test_y, preds, average='micro'), '\n') last_classifier = gbm winsound.Beep(frequency, duration) print('='*80, '\n') ################################################################################################################################################################ elif inp == 'lgbm': print('-- SkLearn LightGBM API --\n') max_depth = input('Define max depth of tree (default = -1): ') if max_depth == '': max_depth = -1 print() min_data_in_leaf = input('Define min data in leaf (default = 20): ') if min_data_in_leaf == '': min_data_in_leaf = 20 print() feature_fraction = input('Define feature fraction for random subsampling (default = 1): ') if feature_fraction == '' or not (0 < float(feature_fraction) <= 1): feature_fraction = 1 print() bagging_freq = 0 bagging_fraction = input('Define bagging fraction for random sampling (default = 1): ') if bagging_fraction == '' or not (0 < float(bagging_fraction) <= 1): bagging_fraction = 1 print() if float(bagging_fraction) < 1: bagging_freq = input('Define frequecy for bagging (default = 0): ') if bagging_freq == '': bagging_freq = 0 print() alpha = input('Define regularization alpha (default = 0): ') if alpha == '': alpha = 0 print() lamb = input('Define regularization lambda (default = 0): ') if lamb == '': lamb = 0 print() min_gain_to_split = input('Define min gain to split node (default = 0): ') if min_gain_to_split == '': min_gain_to_split = 0 print() objective = input('Define objective (default = softmax)\n\nOptions: \n- num_class\n- softmax\n- ovr\n\n>> ').lower() if objective == '': objective = 'softmax' print() num_boost_round = input('Define number of iterations (default = 100): ') if num_boost_round == '': num_boost_round = 100 print() l_rate = input('Define learning rate (default = 0.1): ') if l_rate == '': l_rate = 0.1 print() num_leaves = input('Define max number of leaves in a tree (default = 31): ') if num_leaves == '': num_leaves = 31 print() max_bin = input('Define max number of bins (default = 255): ') if max_bin == '': max_bin = 255 print() min_sum_hessian_in_leaf = input('Define min hessian in leaf (default = 0.001): ') if min_sum_hessian_in_leaf == '': min_sum_hessian_in_leaf = 0.001 print() print('Compensate for unbalanced dataset?') is_unbalance = input('(y/n): ').lower() if is_unbalance == 'y': is_unbalance = True else: is_unbalance = False print() print('-- Training Light GBM --\n') gbm = lgb.LGBMClassifier(max_depth = int(max_depth), min_data_in_leaf = int(min_data_in_leaf), feature_fraction = float(feature_fraction), bagging_fraction = float(bagging_fraction), bagging_freq = int(bagging_freq), lambda_l1 = float(alpha), lambda_l2 = float(lamb), min_gain_to_split = float(min_gain_to_split), objective = objective, num_boost_round = int(num_boost_round), learning_rate = float(l_rate), num_leaves = int(num_leaves), gpu_use_dp = True, num_threads = 2, num_class = 3, is_unbalance = is_unbalance, verbosity = 10, max_bin = int(max_bin), min_sum_hessian_in_leaf = float(min_sum_hessian_in_leaf), ) warnings.filterwarnings("ignore") gbm.fit(train_X, train_y) warnings.filterwarnings("default") preds = gbm.predict(train_X) print('Results on training set:') print(classification_report(train_y, preds, zero_division = 1)) print('Micro averaged F1 Score: ', f1_score(train_y, preds, average='micro'), '\n') print('-'*80, '\n') preds = gbm.predict(test_X) print('Results on cross-validation set:') print(classification_report(test_y, preds, zero_division = 1)) print('Micro averaged F1 Score: ', f1_score(test_y, preds, average='micro'), '\n') last_classifier = gbm winsound.Beep(frequency, duration) print('='*80, '\n') ################################################################################################################################################################ elif inp == 'gridsearch': print('-- Random GridSearchCV on LightGBM classifier --\n') iters = input('Define number of iterations (default = 10): ') if iters == '': iters = 10 print() cv = input('Define number of CV (default = 2): ') if cv == '': cv = 2 print() param_distributions = { 'max_depth': [-1,10,30,60,100,200], 'min_data_in_leaf': [4,8,16,32,64,128], 'feature_fraction': [0.1,0.25,0.5,0.75,1], 'bagging_fraction': [0.1,0.25,0.5,0.75,1], 'bagging_freq': [0,2,8,32,128], 'lambda_l1': [0,0.1,1,2,4,175,512,1000], 'lambda_l2': [0,0.1,1,2,4,175,512,1000], 'min_gain_to_split': [0,0.01,0.03,0.1,0.3,0.5,0.9,1], 'num_boost_round': [100, 500, 1000, 2500, 5000, 10000, 15000, 20000], 'learning_rate': [0.5, 0.25, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001], 'num_leaves': [10, 31, 62, 124, 200, 500, 750, 1000], 'objective': ['softmax'], 'gpu_use_dp': [True], 'num_threads': [1], 'num_class': [3], 'max_bin': [128, 256, 512, 1024, 2048, 3000, 4000, 5000, 6000] } print('-- Finding best parameters --\n') rSearch = RandomizedSearchCV(estimator = lgb.LGBMClassifier(), param_distributions = param_distributions, scoring = 'f1_micro', n_jobs = 2, cv = int(cv), verbose = 10, n_iter = int(iters)) warnings.filterwarnings("ignore") rSearch.fit(train_X, train_y) warnings.filterwarnings("default") print() print('Best parameters:') print(rSearch.best_params_, '\n') print('Score:') print(rSearch.best_score_, '\n') winsound.Beep(frequency, duration) print('='*80, '\n') ################################################################################################################################################################ elif inp == 'hyperopt': print('-- Bayesian Optimization on LightGBM classifier (with Hyperopt) --\n') iters = input('Define number of evaluations (default = 50): ') if iters == '': iters = 50 print() def objective_fun(space): print() warnings.filterwarnings('ignore') model = lgb.LGBMClassifier(**space) accuracy = cross_val_score(model, X, y, cv = 3, scoring = 'f1_micro').mean() warnings.filterwarnings('default') return {'loss': - accuracy, 'status': STATUS_OK} param_space = { 'max_depth': 0, 'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 1, 5000, 1)), 'feature_fraction': hp.uniform('feature_fraction', 0, 1), 'bagging_fraction': hp.uniform('bagging_fraction', 0, 1), 'bagging_freq': scope.int(hp.quniform('bagging_freq', 0, 100 ,1)), 'lambda_l1': hp.uniform('lambda_l1', 0, 10000), 'lambda_l2': hp.uniform('lambda_l2', 0, 10000), 'min_gain_to_split': hp.uniform('min_gain_to_split', 0, 11), 'num_boost_round': scope.int(hp.quniform('num_boost_round', 100, 20000, 1)), 'learning_rate': hp.uniform('learning_rate', 0.000001, 1), 'num_leaves': scope.int(hp.quniform('num_leaves', 2, 2000, 1)), 'objective': 'softmax', 'gpu_use_dp': True, 'num_threads': 2, 'num_class': 3, 'max_bin': scope.int(hp.quniform('max_bin', 32, 4096, 1)), 'min_sum_hessian_in_leaf': hp.uniform('min_sum_hessian_in_leaf', 0, 5) } param_init_trials = { 'max_depth': 0, 'min_data_in_leaf': 40, 'feature_fraction': 0.5, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 0, 'num_boost_round': 10000, 'learning_rate': 0.1, 'num_leaves': 31, 'objective': 'softmax', 'gpu_use_dp': True, 'num_threads': 2, 'num_class': 3, 'max_bin': 256, 'min_sum_hessian_in_leaf': 0.1 } trials = generate_trials_to_calculate([param_init_trials]) best = fmin(fn = objective_fun, space = param_space, algo = tpe.suggest, max_evals = int(iters), trials = trials) print() print(best, '\n') input() ################################################################################################################################################################ elif last_classifier != None: # Decides if wants to submit print('Want to submit?') inp = input('(y/n): ').lower() print() if inp == 'y': # Decides if wants to fit classifier again for the entirety of the dataset print('Want to fit for entire dataset?') inp = input('(y/n): ').lower() print() if inp == 'y': print('-- Training the last classifier --\n') warnings.filterwarnings("ignore") last_classifier.fit(X, y) warnings.filterwarnings("default") print() submit(last_classifier) return else: pass
def main(): # Imports the dataset and labels and turns them into numpy arrays. X = pd.read_csv('train_values.csv', index_col = 0).to_numpy() y = np.array(pd.read_csv('train_labels.csv', index_col = 0).to_numpy().T[0]) # Preprocesses data (one hot or ordinal encoding) X = preProcess(X, 'OneHotEncoding') # Performs PCA on dataset for better visualizaion print("Do you want to visualize dataset with PCA?\n") pca_inp = input('(y/n): ').lower() if pca_inp == 'y': size = int(input("Define amount of samples to visualize with PCA: ")) pca = PCA(n_components = 2) pca.fit(X) PCX = pca.transform(X) plt.scatter(PCX[:size,0], PCX[:size,1], c = y[:size]) plt.show() print() else: print() # Option to use entire dataset or a reduced set with a more balanced amount of each label. print("Use reduced set?\n") full = input('(y/n): ') print() # Create training set and test set if full == 'y': X1 = [X[i] for i in range(len(X)) if y[i] == 1] X2 = [X[i] for i in range(len(X)) if y[i] == 2] X3 = [X[i] for i in range(len(X)) if y[i] == 3] y1 = [y[i] for i in range(len(y)) if y[i] == 1] y2 = [y[i] for i in range(len(y)) if y[i] == 2] y3 = [y[i] for i in range(len(y)) if y[i] == 3] size = min(len(X1),len(X2),len(X3)) Xp = np.concatenate((X1[:size], X2[:size], X3[:size])) yp = np.concatenate((y1[:size], y2[:size], y3[:size])) train_X, test_X, train_y, test_y = train_test_split( Xp, yp, test_size = 0.2) else: train_X, test_X, train_y, test_y = train_test_split( X, y, test_size = 0.2, random_state=42) # Initializes last_classifier variable. last_classifier = None # 'Front-end' while True: print("Choose the training model: ") print(' - LGBM\n - GridSearch\n - Hyperopt\n - Evolve\n') inp = input('>> ').lower() print() ################################################################################################################################################################ if inp == 'lgbm': print('-- SkLearn LightGBM API --\n') max_depth = input('Define max depth of tree (default = -1): ') if max_depth == '': max_depth = -1 print() min_data_in_leaf = input('Define min data in leaf (default = 20): ') if min_data_in_leaf == '': min_data_in_leaf = 20 print() feature_fraction = input('Define feature fraction for random subsampling (default = 1): ') if feature_fraction == '' or not (0 < float(feature_fraction) <= 1): feature_fraction = 1 print() bagging_freq = 0 bagging_fraction = input('Define bagging fraction for random sampling (default = 1): ') if bagging_fraction == '' or not (0 < float(bagging_fraction) <= 1): bagging_fraction = 1 print() if float(bagging_fraction) < 1: bagging_freq = input('Define frequecy for bagging (default = 0): ') if bagging_freq == '': bagging_freq = 0 print() alpha = input('Define regularization alpha (default = 0): ') if alpha == '': alpha = 0 print() lamb = input('Define regularization lambda (default = 0): ') if lamb == '': lamb = 0 print() min_gain_to_split = input('Define min gain to split node (default = 0): ') if min_gain_to_split == '': min_gain_to_split = 0 print() objective = input('Define objective (default = softmax)\n\nOptions: \n- num_class\n- softmax\n- ovr\n\n>> ').lower() if objective == '': objective = 'softmax' print() num_boost_round = input('Define number of iterations (default = 100): ') if num_boost_round == '': num_boost_round = 100 print() l_rate = input('Define learning rate (default = 0.1): ') if l_rate == '': l_rate = 0.1 print() num_leaves = input('Define max number of leaves in a tree (default = 31): ') if num_leaves == '': num_leaves = 31 print() max_bin = input('Define max number of bins (default = 255): ') if max_bin == '': max_bin = 255 print() min_sum_hessian_in_leaf = input('Define min hessian in leaf (default = 0.001): ') if min_sum_hessian_in_leaf == '': min_sum_hessian_in_leaf = 0.001 print() print('Compensate for unbalanced dataset?') is_unbalance = input('(y/n): ').lower() if is_unbalance == 'y': is_unbalance = True else: is_unbalance = False print() print('-- Training Light GBM --\n') gbm = lgb.LGBMClassifier(max_depth = int(max_depth), min_data_in_leaf = int(min_data_in_leaf), feature_fraction = float(feature_fraction), bagging_fraction = float(bagging_fraction), bagging_freq = int(bagging_freq), lambda_l1 = float(alpha), lambda_l2 = float(lamb), min_gain_to_split = float(min_gain_to_split), objective = objective, num_boost_round = int(num_boost_round), learning_rate = float(l_rate), num_leaves = int(num_leaves), gpu_use_dp = True, num_threads = 2, num_class = 3, is_unbalance = is_unbalance, verbosity = 10, max_bin = int(max_bin), min_sum_hessian_in_leaf = float(min_sum_hessian_in_leaf), ) warnings.filterwarnings("ignore") gbm.fit(train_X, train_y) warnings.filterwarnings("default") preds = gbm.predict(train_X) print('Results on training set:') print(classification_report(train_y, preds, zero_division = 1)) print('Micro averaged F1 Score: ', f1_score(train_y, preds, average='micro'), '\n') print('-'*80, '\n') preds = gbm.predict(test_X) print('Results on cross-validation set:') print(classification_report(test_y, preds, zero_division = 1)) print('Micro averaged F1 Score: ', f1_score(test_y, preds, average='micro'), '\n') last_classifier = gbm winsound.Beep(frequency, duration) print('='*80, '\n') ################################################################################################################################################################ elif inp == 'gridsearch': print('-- Random GridSearchCV on LightGBM classifier --\n') iters = input('Define number of iterations (default = 10): ') if iters == '': iters = 10 print() cv = input('Define number of CV (default = 2): ') if cv == '': cv = 2 print() param_distributions = { 'max_depth': [-1,10,30,60,100,200], 'min_data_in_leaf': [4,8,16,32,64,128], 'feature_fraction': [0.1,0.25,0.5,0.75,1], 'bagging_fraction': [0.1,0.25,0.5,0.75,1], 'bagging_freq': [0,2,8,32,128,256], 'lambda_l1': [0,0.1,1,2,4,175,512,1000], 'lambda_l2': [0,0.1,1,2,4,175,512,1000], 'min_gain_to_split': [0,0.01,0.03,0.1,0.3,0.5,0.9,1], 'num_boost_round': [100, 500, 1000, 2500, 5000, 10000, 15000, 20000], 'learning_rate': [0.5, 0.25, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001], 'num_leaves': [10, 31, 62, 124, 200, 500, 750, 1000], 'objective': ['softmax'], 'gpu_use_dp': [True], 'num_threads': [1], 'num_class': [3], 'max_bin': [128, 256, 512, 1024, 2048, 3000, 4000, 5000, 6000] } print('-- Finding best parameters --\n') rSearch = RandomizedSearchCV(estimator = lgb.LGBMClassifier(), param_distributions = param_distributions, scoring = 'f1_micro', n_jobs = 2, cv = int(cv), verbose = 10, n_iter = int(iters)) warnings.filterwarnings("ignore") rSearch.fit(train_X, train_y) warnings.filterwarnings("default") print() print('Best parameters:') print(rSearch.best_params_, '\n') print('Score:') print(rSearch.best_score_, '\n') winsound.Beep(frequency, duration) print('='*80, '\n') ################################################################################################################################################################ elif inp == 'hyperopt': print('-- Bayesian Optimization on LightGBM classifier (with Hyperopt) --\n') iters = input('Define number of evaluations (default = 50): ') if iters == '': iters = 50 print() def objective_fun(space): print() warnings.filterwarnings('ignore') model = lgb.LGBMClassifier(**space) accuracy = cross_val_score(model, X, y, cv = 3, scoring = 'f1_micro').mean() warnings.filterwarnings('default') return {'loss': - accuracy, 'status': STATUS_OK} param_space = { 'max_depth': [0], 'min_data_in_leaf': scope.int(hp.quniform('min_data_in_leaf', 1, 5000, 1)), 'feature_fraction': hp.uniform('feature_fraction', 0, 1), 'bagging_fraction': hp.uniform('bagging_fraction', 0, 1), 'bagging_freq': scope.int(hp.quniform('bagging_freq', 0, 1000 ,1)), 'lambda_l1': hp.uniform('lambda_l1', 0, 10000), 'lambda_l2': hp.uniform('lambda_l2', 0, 10000), 'min_gain_to_split': hp.uniform('min_gain_to_split', 0, 1), 'num_boost_round': scope.int(hp.quniform('num_boost_round', 100, 20000, 1)), 'learning_rate': hp.uniform('learning_rate', 0.000001, 1), 'num_leaves': scope.int(hp.quniform('num_leaves', 2, 2000, 1)), 'objective': ['softmax'], 'gpu_use_dp': [True], 'num_threads': [2], 'num_class': [3], 'max_bin': scope.int(hp.quniform('max_bin', 32, 4096, 1)), 'min_sum_hessian_in_leaf': hp.uniform('min_sum_hessian_in_leaf', 0, 5) } param_init_trials = { 'max_depth': 0, 'min_data_in_leaf': 40, 'feature_fraction': 0.5, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'lambda_l1': 0, 'lambda_l2': 10, 'min_gain_to_split': 0, 'num_boost_round': 10000, 'learning_rate': 0.1, 'num_leaves': 31, 'objective': 'softmax', 'gpu_use_dp': True, 'num_threads': 2, 'num_class': 3, 'max_bin': 256, 'min_sum_hessian_in_leaf': 0.1 } trials = generate_trials_to_calculate([param_init_trials]) best = fmin(fn = objective_fun, space = param_space, algo = tpe.suggest, max_evals = int(iters), trials = trials) print() print(best, '\n') input() elif inp == 'evolve': print('-- Evolutionary optimization on LightGBM --\n') iters = input('Define number of generations (default = 10): ') if iters == '': iters = 10 print() cv = input('Define number of cross-validation sets (default = 2): ') if cv == '': cv = 2 print() c_size = input('Define size of change on each iteration (default = 1): ') if c_size == '': c_size = 1 print() pop_size = input('Define size of population (default = 10): ') if pop_size == '': pop_size = 10 param_space = { 'max_depth': [-1], 'min_data_in_leaf': np.linspace(0, 5000, 5002, dtype = int), 'feature_fraction': np.linspace(0, 1, 1000), 'bagging_fraction': np.linspace(0, 1, 1000), 'bagging_freq': np.linspace(0, 100 ,102, dtype = int), 'lambda_l1': np.linspace(0, 10000, 100000), 'lambda_l2': np.linspace(0, 10000, 100000), 'min_gain_to_split': np.linspace(0, 0.9, 100), 'num_boost_round': np.linspace(100, 20000, 19902, dtype = int), 'learning_rate': np.linspace(0.0001, 1, 10000), 'num_leaves': np.linspace(2, 2000, 2000, dtype = int), 'objective': ['softmax'], 'gpu_use_dp': [True], 'num_threads': [2], 'num_class': [3], 'max_bin': np.linspace(32, 4096, 4066, dtype = int), 'min_sum_hessian_in_leaf': np.linspace(0, 5, 100) } common_ancestor = { 'max_depth': -1, 'min_data_in_leaf': 20, 'feature_fraction': 0.5, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'lambda_l1': 0., 'lambda_l2': 10., 'min_gain_to_split': 0., 'num_boost_round': 2000, 'learning_rate': 0.1, 'num_leaves': 31, 'objective': 'softmax', 'gpu_use_dp': True, 'num_threads': 2, 'num_class': 3, 'max_bin': 256, 'min_sum_hessian_in_leaf': 0.1 } evolveSelect(X, y, param_space, int(iters), int(pop_size), float(c_size), int(cv), common_ancestor) ################################################################################################################################################################ elif last_classifier != None: # Decides if wants to submit print('Want to submit?') inp = input('(y/n): ').lower() print() if inp == 'y': # Decides if wants to fit classifier again for the entirety of the dataset print('Want to fit for entire dataset?') inp = input('(y/n): ').lower() print() if inp == 'y': print('-- Training the last classifier --\n') warnings.filterwarnings("ignore") last_classifier.fit(X, y) warnings.filterwarnings("default") print() submit(last_classifier) return else: pass
def bayesian_parameter_optimisation(tree, toolbox, max_evals_without_progress=10): """ Optimises the parameters in tree with bayesian optimisation. Returns a copy of the tree with the updated hyperparameters. :param tree: :return: """ hyperparameters, hyperparameter_indices, default_values = _get_hyperparameters_from_tree( tree) if not hyperparameters: # Cant optimise a tree with no tunable args, so just return a copy of the original tree return toolbox.clone(tree) print("Original tree", tree) # Start the search at the existing values rather than randomly trials = generate_trials_to_calculate([default_values]) # Each time we do bayesian optimisation we should use a new random seed to prevent overfitting # to a particular split seed = random.randint(0, 1000) stopping_critera_met = False optimised_params = space_eval(hyperparameters, default_values) n_iters_without_progress = 0 best_loss = inf # Run one iteration of bayesian optimisation till stopping criteria is met (timeout or no improvement) while not stopping_critera_met: try: # A single iteration of bayesian optimisation best = fmin(fn=partial(_objective_function, tree, toolbox, hyperparameter_indices, seed), space=hyperparameters, algo=tpe.suggest, max_evals=len(trials) + 1, trials=trials, show_progressbar=False) optimised_params = space_eval(hyperparameters, best) # Check if progress was made current_loss = trials.losses()[-1] if current_loss >= best_loss: n_iters_without_progress += 1 else: n_iters_without_progress = 0 best_loss = min(current_loss, best_loss) except TimeoutError: # Ran out of time while optimising. Break out of loop and return best we have break stopping_critera_met = n_iters_without_progress >= max_evals_without_progress tree = _fill_with_hyperparameters(tree, toolbox, hyperparameter_indices, optimised_params) print("Optimised tree", tree) return tree
def fmin( fn, space, algo, max_evals, early_stop_round_mode_fun=None, early_stop_round=None, trials=None, rstate=None, allow_trials_fmin=False, pass_expr_memo_ctrl=None, catch_eval_exceptions=False, verbose=0, return_argmin=True, points_to_evaluate=None, max_queue_len=1, show_progressbar=True, ): """Minimize a function over a hyperparameter space. More realistically: *explore* a function over a hyperparameter space according to a given algorithm, allowing up to a certain number of function evaluations. As points are explored, they are accumulated in `trials` Parameters ---------- fn : callable (trial point -> loss) This function will be called with a value generated from `space` as the first and possibly only argument. It can return either a scalar-valued loss, or a dictionary. A returned dictionary must contain a 'status' key with a value from `STATUS_STRINGS`, must contain a 'loss' key if the status is `STATUS_OK`. Particular optimization algorithms may look for other keys as well. An optional sub-dictionary associated with an 'attachments' key will be removed by fmin its contents will be available via `trials.trial_attachments`. The rest (usually all) of the returned dictionary will be stored and available later as some 'result' sub-dictionary within `trials.trials`. space : hyperopt.pyll.Apply node The set of possible arguments to `fn` is the set of objects that could be created with non-zero probability by drawing randomly from this stochastic program involving involving hp_<xxx> nodes (see `hyperopt.hp` and `hyperopt.pyll_utils`). algo : search algorithm This object, such as `hyperopt.rand.suggest` and `hyperopt.tpe.suggest` provides logic for sequential search of the hyperparameter space. max_evals : int Allow up to this many function evaluations before returning. trials : None or base.Trials (or subclass) Storage for completed, ongoing, and scheduled evaluation points. If None, then a temporary `base.Trials` instance will be created. If a trials object, then that trials object will be affected by side-effect of this call. rstate : numpy.RandomState, default numpy.random or `$HYPEROPT_FMIN_SEED` Each call to `algo` requires a seed value, which should be different on each call. This object is used to draw these seeds via `randint`. The default rstate is `numpy.random.RandomState(int(env['HYPEROPT_FMIN_SEED']))` if the `HYPEROPT_FMIN_SEED` environment variable is set to a non-empty string, otherwise np.random is used in whatever state it is in. verbose : int Print out some information to stdout during search. allow_trials_fmin : bool, default True If the `trials` argument pass_expr_memo_ctrl : bool, default False If set to True, `fn` will be called in a different more low-level way: it will receive raw hyperparameters, a partially-populated `memo`, and a Ctrl object for communication with this Trials object. return_argmin : bool, default True If set to False, this function returns nothing, which can be useful for example if it is expected that `len(trials)` may be zero after fmin, and therefore `trials.argmin` would be undefined. points_to_evaluate : list, default None Only works if trials=None. If points_to_evaluate equals None then the trials are evaluated normally. If list of dicts is passed then given points are evaluated before optimisation starts, so the overall number of optimisation steps is len(points_to_evaluate) + max_evals. Elements of this list must be in a form of a dictionary with variable names as keys and variable values as dict values. Example points_to_evaluate value is [{'x': 0.0, 'y': 0.0}, {'x': 1.0, 'y': 2.0}] max_queue_len : integer, default 1 Sets the queue length generated in the dictionary or trials. Increasing this value helps to slightly speed up parallel simulatulations which sometimes lag on suggesting a new trial. show_progressbar : bool, default True Show a progressbar. Returns ------- argmin : dictionary If return_argmin is True returns `trials.argmin` which is a dictionary. Otherwise this function returns the result of `hyperopt.space_eval(space, trails.argmin)` if there were succesfull trails. This object shares the same structure as the space passed. If there were no succesfull trails, it returns None. """ if rstate is None: env_rseed = os.environ.get('HYPEROPT_FMIN_SEED', '') if env_rseed: rstate = np.random.RandomState(int(env_rseed)) else: rstate = np.random.RandomState() if allow_trials_fmin and hasattr(trials, 'fmin'): return trials.fmin( fn, space, algo=algo, max_evals=max_evals, max_queue_len=max_queue_len, rstate=rstate, pass_expr_memo_ctrl=pass_expr_memo_ctrl, verbose=verbose, catch_eval_exceptions=catch_eval_exceptions, return_argmin=return_argmin, show_progressbar=show_progressbar, ) if trials is None: if points_to_evaluate is None: trials = base.Trials() else: assert type(points_to_evaluate) == list trials = generate_trials_to_calculate(points_to_evaluate) domain = base.Domain(fn, space, pass_expr_memo_ctrl=pass_expr_memo_ctrl) if early_stop_round is not None: # max_evals = 1000 if early_stop_round_mode_fun is None: early_stop_round_mode_fun = lambda x: 1 rval = FMinIter(algo, domain, trials, max_evals=max_evals, early_stop_round_mode_fun=early_stop_round_mode_fun, early_stop_round=early_stop_round, rstate=rstate, verbose=verbose, max_queue_len=max_queue_len, show_progressbar=show_progressbar) rval.catch_eval_exceptions = catch_eval_exceptions rval.exhaust() if return_argmin: if len(trials.trials) == 0: raise Exception( "There are no evaluation tasks, cannot return argmin of task losses." ) return trials.argmin elif len(trials) > 0: # Only if there are some succesfull trail runs, return the best point in the evaluation space return space_eval(space, trials.argmin) else: return None
def opt_method(data, initializers, resdir, max_evals): dataset = data.dataset_name __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) datapath = data.data_path init_endmembers = data.init_endmembers n_band, n_end = init_endmembers.shape def objective_func(data, hyperpars): data.load_data(normalize=True, shuffle=False) activation = LeakyReLU(0.2) unmixer = Autoencoder(n_end=n_end, data=my_data, activation=activation, optimizer=hyperpars['optimizer'], l2=hyperpars['l2'], l1=hyperpars['l1'], plot_every_n=0) unmixer.create_model(SAD) my_data.make_patches(1, num_patches=hyperpars['num_patches'], use_orig=True) history = unmixer.fit(epochs=100, batch_size=hyperpars['batch_size']) endmembers = unmixer.get_endmembers().transpose() abundances = unmixer.get_abundances() Y = np.transpose(data.orig_data) GT = np.transpose(data.GT) sad, idx_org, idx_hat, sad_k_m, s0 = calc_SAD_2(GT, endmembers) MSE = mse(Y, endmembers, np.transpose(abundances)) abundances = abundances.reshape(data.n_rows, data.n_cols, endmembers.shape[1]).transpose((1, 0, 2)) resdict = {'endmembers': endmembers, 'abundances': abundances, 'loss': history.history['loss'], 'SAD': sad, 'MSE': MSE} del unmixer K.clear_session() return {'loss': sad, 'status': STATUS_OK, 'attachments': resdict} space = { 'optimizer': {'class_name': 'RMSprop', 'config': {'lr': hp.qloguniform('ACCESS_' + dataset + '_lr', -16, -1, 1e-7)}}, 'l1': hp.qloguniform('ACCESS_' + dataset + '_l1', -16, 2, 1e-7), 'l2': hp.qloguniform('ACCESS_' + dataset + '_l2', -16, 2, 1e-7), 'num_patches': scope.int(hp.quniform('ACCESS_' + dataset + '_num_patches', 8, 8192, 1)), 'batch_size': scope.int(hp.quniform('ACCESS_' + dataset + '_batch_size', 1, 50, 1)), } my_data = HSI(datapath) trials = generate_trials_to_calculate([{ 'ACCESS_' + dataset + '_lr': 0.001, 'ACCESS_' + dataset + '_l1': 0, 'ACCESS_' + dataset + '_l2': 0, 'ACCESS_' + dataset + '_num_patches': 1028, 'ACCESS_' + dataset + '_batch_size': 32 }]) pars = fmin(lambda x: objective_func(my_data, x), space=space, trials=trials, algo=tpe.suggest, max_evals=max_evals, rstate=np.random.RandomState(random_seed)) improvements = reduce(improvement_only, trials.losses(), []) save_config(resdir, dataset, pars, trials.average_best_error()) return improvements, pars, trials
def runOptim(self, budget, b, initData=None, initResult=None): if initData is not None and initResult is not None: Xinit = initData[:] Yinit = initResult[:] else: # set a random number np.random.seed(self.trial_num) # Xinit: points in BO format and Pinit: points in TPE format # Yinit: max/auc values and Finit: min/loss values Xinit, Pinit, Yinit, Finit = self.initialize_all_methods() # get variable names bounds_keys = list(self.bounds.keys()) # compute besty of initial points besty = np.max(Yinit) # compute bestx of initial points besty_idx = np.argmax(Yinit) bestp = Pinit[besty_idx] # get selected arm for categorical variable (the first parameter) first_para_name = bounds_keys[0] bestarm = bestp[first_para_name] print("n_init: {}, bestarm: {}, bestx: {}, besty: {}".format(len(Finit), bestarm, bestp, round(besty, 4))) # store the result for this trial (becoming one col of matrix best_vals) result_list = [] # store the selected arms of initial points arm_list = [] for b_ele_idx in range(b): # store selected arms in all iterations for all trials self.arm_recommendations.append(bestarm) arm_list.append(bestarm) # store the bestx and besty of initial points result_list.append([0, arm_list, bestp, besty]) # use initial points for TPE tpe_algorithm = partial(tpe.suggest, n_startup_jobs=len(Finit)) # create trials with initial points trials = generate_trials_to_calculate(Pinit, Finit) # store best point and best function value so far bestx_sofar = [] besty_sofar = [] if b > 1: budget = int(budget / b) + 1 for t in range(1, budget): print("iteration: {}".format(t)) # store <batch_size> arms selected in this iteration arm_list = [] # store suggested data points in batch x_batch = np.zeros((b, self.n_dim)) # batch_size x dim of a data point y_batch = np.zeros(b) # store max function values f_batch = np.zeros(b) # store min function values # in an iteration, suggest a batch of points # only after selecting all points in the batch, we can compute their function values for b_ele_idx in range(b): # run TPE to suggest the next point which is stored in trials best_params = fmin(self.f, self.bounds, tpe_algorithm, len(trials) + 1, trials) # get best_x and best_y so far bestx_sofar.append(best_params) # max/auc of objective function best_result = trials.best_trial["result"]["loss"] if self.f_type == "func": best_result = -1.0 * best_result elif self.f_type == "class": best_result = 1.0 - best_result besty_sofar.append(best_result) # get selected arm for categorical variable (the first parameter) first_para_name = bounds_keys[0] arm = int(trials.vals[first_para_name][-1]) # store selected arms in all iterations for all trials self.arm_recommendations.append(arm) arm_list.append(arm) # get other variables x_next = [] for d in range(1, self.n_dim): para_name = bounds_keys[d] x_next.append(trials.vals[para_name][-1]) x_batch[b_ele_idx, :] = [arm] + x_next # get function value of the next point (indeed, we don't know this function value) y_next = trials.results[-1]["loss"] f_batch[b_ele_idx] = y_next # store min function value if self.f_type == "func": y_next = -1.0 * y_next elif self.f_type == "class": y_next = 1.0 - y_next y_batch[b_ele_idx] = y_next # store max function value print("arm_next: {}, x_next: {}, y_next: {}".format(arm, np.around(x_next, 4), round(y_next, 4))) if b > 1: # reset trials to suggest the next batch element trials = generate_trials_to_calculate(Pinit, Finit) # end batch if b > 1: # update the data with suggested points in batch for ele_idx, x in enumerate(x_batch): point = {bounds_keys[idx]: val for idx, val in enumerate(x)} point[bounds_keys[0]] = int(point[bounds_keys[0]]) Pinit.append(point) Finit.append(f_batch[ele_idx]) # create trails with new batch elements trials = generate_trials_to_calculate(Pinit, Finit) # instead of computing function values of batch elements, # we already have them in y_batch for b_ele_idx in range(b): # get the best function value till now end = (t - 1) * b + (b_ele_idx + 1) besty = max(besty_sofar[:end]) bestx = bestx_sofar[np.argmax(besty_sofar[:end])] # get selected arm for categorical variable (the first parameter) first_para_name = bounds_keys[0] bestarm = bestx[first_para_name] print("bestarm: {}, bestx: {}, besty: {}".format(bestarm, bestx, round(besty, 4))) # store the results of this iteration result_list.append([t, arm_list, bestx, besty]) if b > 1: result_list = result_list[:-1] print("Finished ", self.method, " for trial: ", self.trial_num) # store the result for all iterations in this trial df = pd.DataFrame(result_list, columns=["iter", "arm_list", "best_input", "best_value"]) return df