def model(X_train, Y_train, X_test, Y_test): model = layers.Sequential() model.add(layers.Dense(512, input_shape=(784, ))) model.add(layers.Activation('relu')) model.add(layers.Dropout({{hyperopt.uniform(0, 1)}})) model.add(layers.Dense({{hyperopt.choice([256, 512, 1024])}})) model.add(layers.Activation({{hyperopt.choice(['relu', 'sigmoid'])}})) model.add(layers.Dropout({{hyperopt.uniform(0, 1)}})) # If we choose 'four', add an additional fourth layer if {{hyperopt.choice(['three', 'four'])}} == 'four': model.add(layers.Dense(100)) model.add({{ hyperopt.choice([layers.Dropout(0.5), layers.Activation('linear')]) }}) model.add(layers.Activation('relu')) model.add(layers.Dense(10)) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer={{hyperopt.choice(['rmsprop', 'adam', 'sgd'])}}, metrics=['accuracy']) model.fit(X_train, Y_train, batch_size={{hyperopt.choice([64, 128])}}, nb_epoch=1, verbose=2, validation_data=(X_test, Y_test)) score, acc = model.evaluate(X_test, Y_test, verbose=0) print('Test accuracy:', acc) return {'loss': -acc, 'status': hyperopt.STATUS_OK, 'model': model}
FN = CM[1][0] TP = CM[1][1] FP = CM[0][1] print("TP = {}".format(TP)) print("FP = {}".format(FP)) print("FN = {}".format(FN)) f1 = 2. * TP / (2. * TP + FP + FN) print("F1 : ", f1) return {'loss': 1 - f1, 'status': STATUS_OK} space = { 'n_estimators': hp.choice('n_estimators', np.arange(200, 501, 25, dtype=int)), 'max_depth': hp.choice('max_depth', np.arange(15, 20, dtype=int)), 'max_features': hp.choice('max_features', np.arange(15, 30, dtype=int)), 'mss': hp.choice('mss', np.arange(2, 40, 1, dtype=int)), 'cw': hp.uniform('cw', 1, 5), 'msl': hp.choice('msl', np.arange(1, 11, dtype=int)) } trials = Trials() best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100, trials=trials)
def objective(x): return { 'loss': x**2, 'status': STATUS_OK, 'eval_time': time.time(), 'other_stuff': { 'type': None, 'value': [0, 1, 2] }, 'attachments': { 'time_module': pickle.dumps(time.time) } } trials = Trials() best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100, trials=trials) print(best) space = hp.choice('a', [('case 1', 1 + hp.lognormal('c1', 0, 1)), ('case 2', hp.uniform('c2', -10, 10))]) import hyperopt.pyll.stochastic print(hyperopt.pyll.stochastic.sample(space))
plt.show() return -best_AUC # =========use library "hyperopt" to finetune the hyerparameters============== from hyperopt import fmin, tpe, hp, partial batch_list = [32, 64, 128] for dist in [5.]: space = { "lr_rate": hp.uniform("lr_rate", 0.0005, 0.01), "dp_out": hp.uniform("dp_out", 0.5, 1), "bt_size": hp.choice("bt_size", batch_list), "distance": hp.choice("distance", [dist]) } # algo = partial(tpe.suggest, n_startup_jobs=10) try: best = fmin(main, space, algo=tpe.suggest, max_evals=50) best["bt_size"] = batch_list[best["bt_size"]] best["distance"] = dist best_AUC = -main(best) with open('finetune.txt', 'a') as f: f.write( "At distance {}, the best AUC is {}, its lr_rate is {}, drop_out is {}, batch_size is {}\n\n" .format(dist, best_AUC, best["lr_rate"], best["dp_out"], best["bt_size"])) except Exception as err: with open("error_info.txt", "a") as f:
if booster == "gbtree": pred_test = model.predict(X_test) elif booster == "dart": pred_test = model.predict(X_test, ntree_limit = num_round) error= MSE(y_test,pred_test) r2=-r2_score(y_train,model.predict(X_train)) return float(error) # DEFINING SEARCH SPACE search_space = {'booster': hp.choice('booster', ['gbtree',"dart"]), 'n_estimators': hp.quniform('n_estimators', 50, 3000, 1), 'eta': hp.uniform('eta', 0, 1), 'gamma': hp.uniform('gamma', 1, 500), 'max_depth': hp.quniform('max_depth', 3, 100, 1), 'min_child_weight': hp.uniform('min_child_weight', 0, 100), 'random_state': sample(scope.int(hp.quniform('random_state', 4, 8, 1))), 'subsample': hp.uniform('subsample', 0, 1), 'alpha': hp.uniform('alpha', 1, 8), 'colsample_bytree': hp.uniform('colsample_bytree', 0, 1), 'sample_type': hp.choice('sample_type', ['uniform', 'weighted']), 'normalize_type': hp.choice('normalize_type', ['tree', 'forest']), 'grow_policy': hp.choice('grow_policy', ['depthwise', 'lossguide']), 'rate_drop': hp.uniform('rate_drop', 0, 1), 'skip_drop': hp.uniform('skip_drop', 0, 1), 'colsample_bylevel': hp.uniform('colsample_bylevel', 0, 1),
hp.quniform("max_depth", 1, 10, 1), "subsample": hp.quniform("subsample", 0.1, 1, 0.05), "colsample_bytree": 1, "colsample_bylevel": hp.quniform("colsample_bylevel", 0.1, 1, 0.05), "nthread": xgb_nthread, "seed": xgb_random_seed, } param_space_clf_skl_random_logistic = { "alpha": hp.loguniform("alpha", np.log(0.01), np.log(20)), "normalize": hp.choice("normalize", [True, False]), "poly": hp.choice("poly", [False]), "n_estimators": hp.quniform("n_estimators", 2, 50, 2), "max_features": hp.quniform("max_features", 0.1, 1, 0.05), "bootstrap": hp.choice("bootstrap", [True, False]), "subsample": hp.quniform("subsample", 0.5, 1, 0.05), "random_state": skl_random_seed } ## linear support vector classifier param_space_clf_skl_lsvc = { "normalize": hp.choice("normalize", [True, False]), "C": hp.loguniform("C", np.log(1), np.log(100)), "epsilon":
def lgb_tuning(lgb_cv,N_FOLDS=5,MAX_EVALS=100,output_file='bayes_test.csv',metric='auc',objection='binary',groups=None): def objective(hyperparameters,groups=groups): # Keep track of evals ITERATION =0 # Using early stopping to find number of trees trained if 'n_estimators' in hyperparameters: del hyperparameters['n_estimators'] # Retrieve the subsample subsample = hyperparameters['boosting_type'].get('subsample', 1.0) # Extract the boosting type and subsample to top level keys hyperparameters['boosting_type'] = hyperparameters['boosting_type']['boosting_type'] hyperparameters['subsample'] = subsample # Make sure parameters that need to be integers are integers for parameter_name in ['num_leaves', 'subsample_for_bin', 'min_child_samples','max_depth']: hyperparameters[parameter_name] = int(hyperparameters[parameter_name]) hyperparameters['objective']=objection #hyperparameters['verbose']=-1 start = timer() # Perform n_folds cross validation if groups: groups=lgb_cv.get_group() folds=GroupKFold().split(lgb_cv.get_label(),groups=groups) else: folds=None if metric.lower()=='map': hyperparameters['eval_at']=1 cv_results = lgb.cv(hyperparameters, lgb_cv, num_boost_round = 4000, nfold = N_FOLDS,folds=folds,\ early_stopping_rounds=300, metrics = metric) run_time = timer() - start score_key=sorted(cv_results.keys())[0] # Extract the best score best_score = cv_results[score_key][-1] # Loss must be minimized if metric=='binary_error': loss=best_score else: loss = 1 - best_score # Boosting rounds that returned the highest cv score n_estimators = len(cv_results[score_key]) # Add the number of estimators to the hyperparameters hyperparameters['n_estimators'] = n_estimators # Write to the csv file ('a' means append) of_connection = open(OUT_FILE, 'a') writer = csv.writer(of_connection) writer.writerow([loss, hyperparameters, ITERATION, run_time, best_score]) of_connection.close() # Dictionary with information for evaluation return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION, 'train_time': run_time, 'status': STATUS_OK} # Define the search space space = { 'boosting_type': hp.choice('boosting_type', [{'boosting_type': 'gbdt', 'subsample': hp.uniform('gdbt_subsample', 0.5, 1)}, {'boosting_type': 'dart', 'subsample': hp.uniform('dart_subsample', 0.5, 1)}, {'boosting_type': 'goss', 'subsample': 1.0}]), 'num_leaves': hp.quniform('num_leaves', 20, 200, 4), 'learning_rate': hp.loguniform('learning_rate', np.log(0.005), np.log(0.5)), #'subsample_for_bin': hp.quniform('subsample_for_bin', 20000, 300000, 20000), 'min_child_samples': hp.quniform('min_child_samples', 20, 300, 5), 'reg_alpha': hp.uniform('reg_alpha', 0.0, 0.2), 'reg_lambda': hp.uniform('reg_lambda', 0.0, 0.2), #'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0), 'is_unbalance': hp.choice('is_unbalance', [True, False]), 'max_depth': hp.quniform('max_depth', 4, 8, 1) } # Create the algorithm tpe_algorithm = tpe.suggest # Record results trials = Trials() # Create a file and open a connection OUT_FILE = output_file of_connection = open(OUT_FILE, 'w') writer = csv.writer(of_connection) # Write column names headers = ['loss', 'hyperparameters', 'iteration', 'runtime', 'score'] writer.writerow(headers) of_connection.close() #global ITERATION ITERATION = 0 # Run optimization best = fmin(fn = objective, space = space, algo = tpe.suggest, trials = trials, max_evals = MAX_EVALS) return best