def xgb_evaluate( min_child_weight, colsample_bytree, max_depth, subsample, gamma, reg_alpha, reg_lambda, ): target = 'visitors' features = features_set_f0() split = 0.33 seed = 177 full_data, ntrain, ntest = data_preparation() trn = full_data[:ntrain] x_train, x_valid, y_train, y_valid = train_test_split(trn[features].values, trn[target].values, test_size=split, random_state=seed) del full_data, trn gc.collect() xgb_params = dict() xgb_params['objective'] = 'reg:linear' xgb_params['eval_metric'] = 'rmse' xgb_params['eta'] = 0.1 xgb_params['seed'] = seed xgb_params['silent'] = True # does help xgb_params['verbose_eval'] = False xgb_params['nrounds'] = 500 xgb_params['max_depth'] = int(np.round(max_depth)) xgb_params['min_child_weight'] = int(np.round(min_child_weight)) xgb_params['colsample_bytree'] = colsample_bytree xgb_params['subsample'] = subsample xgb_params['gamma'] = gamma xgb_params['alpha'] = reg_alpha xgb_params['lambda'] = reg_lambda xgb_clf = XgbWrapper(seed=seed, params=xgb_params) xgb_clf.train(x_train, y_train, x_valid, y_valid) return xgb_clf.best_score
def lgb_evaluate(min_child_sample, num_leaves, max_bin, min_child_weight, subsample, subsample_freq, colsample_bytree, reg_alpha, reg_lambda, feature_fraction, bagging_fraction): target = 'visitors' features = features_set_f0() split = 0.33 seed = 177 full_data, ntrain, ntest = data_preparation() trn = full_data[:ntrain] x_train, x_valid, y_train, y_valid = train_test_split(trn[features].values, trn[target].values, test_size=split, random_state=seed) del full_data, trn gc.collect() lgb_params = dict() lgb_params['objective'] = 'regression_l2' lgb_params['metric'] = 'l2_root' lgb_params['learning_rate'] = 0.1 lgb_params['random_state'] = seed lgb_params['silent'] = True # does help lgb_params['verbose_eval'] = False lgb_params['n_estimators'] = 500 lgb_params['min_child_samples'] = int(np.round(min_child_sample)) lgb_params['num_leaves'] = int(np.round(num_leaves)) lgb_params['max_bin'] = int(np.round(max_bin)) lgb_params['subsample_freq'] = int(np.round(subsample_freq)) lgb_params['colsample_bytree'] = colsample_bytree lgb_params['reg_alpha'] = reg_alpha lgb_params['reg_lambda'] = reg_lambda lgb_params['min_child_weight'] = min_child_weight lgb_params['subsample'] = subsample lgb_params['feature_fraction'] = feature_fraction lgb_params['bagging_freq'] = int(np.round(bagging_fraction)) lgb_clf = LgbWrapper(params=lgb_params) lgb_clf.train(x_train, y_train, x_valid, y_valid) return lgb_clf.best_score
import sys, os sys.path.append("../") from general.preprocess import data_preparation from general.ClfWrappers import XgbWrapper from general.utilities import sub_to_csv from features.f0 import features_set_f0 from cv.cv_02 import cross_validate TARGET = 'visitors' FEATURES = features_set_f0() SEED = 177 print("Overfiting process initiating...") xgb_params = dict() xgb_params['objective'] = 'reg:linear' xgb_params['eval_metric'] = 'rmse' xgb_params['eta'] = 0.02 xgb_params['seed'] = SEED xgb_params['silent'] = True # does help xgb_params['verbose_eval'] = False xgb_params['nrounds'] = 5000 xgb_params['early_stopping_rounds'] = 100 xgb_params['max_depth'] = 6 xgb_params['min_child_weight'] = 1 xgb_params['colsample_bytree'] = 0.724 xgb_params['subsample'] = 0.925 xgb_params['gamma'] = 0.512 xgb_params['alpha'] = 8.6 xgb_params['lambda'] = 1