示例#1
0
def xgb_evaluate(
    min_child_weight,
    colsample_bytree,
    max_depth,
    subsample,
    gamma,
    reg_alpha,
    reg_lambda,
):

    target = 'visitors'
    features = features_set_f0()
    split = 0.33
    seed = 177
    full_data, ntrain, ntest = data_preparation()
    trn = full_data[:ntrain]
    x_train, x_valid, y_train, y_valid = train_test_split(trn[features].values,
                                                          trn[target].values,
                                                          test_size=split,
                                                          random_state=seed)

    del full_data, trn
    gc.collect()

    xgb_params = dict()
    xgb_params['objective'] = 'reg:linear'
    xgb_params['eval_metric'] = 'rmse'
    xgb_params['eta'] = 0.1
    xgb_params['seed'] = seed
    xgb_params['silent'] = True  # does help
    xgb_params['verbose_eval'] = False
    xgb_params['nrounds'] = 500

    xgb_params['max_depth'] = int(np.round(max_depth))
    xgb_params['min_child_weight'] = int(np.round(min_child_weight))
    xgb_params['colsample_bytree'] = colsample_bytree
    xgb_params['subsample'] = subsample
    xgb_params['gamma'] = gamma
    xgb_params['alpha'] = reg_alpha
    xgb_params['lambda'] = reg_lambda

    xgb_clf = XgbWrapper(seed=seed, params=xgb_params)
    xgb_clf.train(x_train, y_train, x_valid, y_valid)

    return xgb_clf.best_score
示例#2
0
def lgb_evaluate(min_child_sample, num_leaves, max_bin, min_child_weight,
                 subsample, subsample_freq, colsample_bytree, reg_alpha,
                 reg_lambda, feature_fraction, bagging_fraction):

    target = 'visitors'
    features = features_set_f0()
    split = 0.33
    seed = 177
    full_data, ntrain, ntest = data_preparation()
    trn = full_data[:ntrain]
    x_train, x_valid, y_train, y_valid = train_test_split(trn[features].values,
                                                          trn[target].values,
                                                          test_size=split,
                                                          random_state=seed)

    del full_data, trn
    gc.collect()

    lgb_params = dict()
    lgb_params['objective'] = 'regression_l2'
    lgb_params['metric'] = 'l2_root'
    lgb_params['learning_rate'] = 0.1
    lgb_params['random_state'] = seed
    lgb_params['silent'] = True  # does help
    lgb_params['verbose_eval'] = False

    lgb_params['n_estimators'] = 500

    lgb_params['min_child_samples'] = int(np.round(min_child_sample))
    lgb_params['num_leaves'] = int(np.round(num_leaves))
    lgb_params['max_bin'] = int(np.round(max_bin))
    lgb_params['subsample_freq'] = int(np.round(subsample_freq))
    lgb_params['colsample_bytree'] = colsample_bytree
    lgb_params['reg_alpha'] = reg_alpha
    lgb_params['reg_lambda'] = reg_lambda
    lgb_params['min_child_weight'] = min_child_weight
    lgb_params['subsample'] = subsample
    lgb_params['feature_fraction'] = feature_fraction
    lgb_params['bagging_freq'] = int(np.round(bagging_fraction))

    lgb_clf = LgbWrapper(params=lgb_params)
    lgb_clf.train(x_train, y_train, x_valid, y_valid)

    return lgb_clf.best_score
示例#3
0
import sys, os

sys.path.append("../")
from general.preprocess import data_preparation
from general.ClfWrappers import XgbWrapper
from general.utilities import sub_to_csv
from features.f0 import features_set_f0
from cv.cv_02 import cross_validate

TARGET = 'visitors'
FEATURES = features_set_f0()
SEED = 177
print("Overfiting process initiating...")
xgb_params = dict()
xgb_params['objective'] = 'reg:linear'
xgb_params['eval_metric'] = 'rmse'
xgb_params['eta'] = 0.02
xgb_params['seed'] = SEED
xgb_params['silent'] = True  # does help
xgb_params['verbose_eval'] = False
xgb_params['nrounds'] = 5000
xgb_params['early_stopping_rounds'] = 100

xgb_params['max_depth'] = 6
xgb_params['min_child_weight'] = 1
xgb_params['colsample_bytree'] = 0.724
xgb_params['subsample'] = 0.925
xgb_params['gamma'] = 0.512
xgb_params['alpha'] = 8.6
xgb_params['lambda'] = 1