Пример #1
0
# Skopt functions
from skopt import BayesSearchCV
from skopt import gp_minimize  # Bayesian optimization using Gaussian Processes
from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args  # decorator to convert a list of parameters to named arguments
from skopt.callbacks import DeadlineStopper  # Stop the optimization before running out of a fixed budget of time.
from skopt.callbacks import VerboseCallback  # Callback to control the verbosity
# Stop the optimization If the last two positions at which the objective has been evaluated are less than delta
from skopt.callbacks import DeltaXStopper
from gen_feas import load_data

# Uploading the Boston dataset
# X, y = load_boston(return_X_y=True)

train, test, no_features, features = load_data()
X = train[features].values
y = train['target'].astype('int32')
# Transforming the problem into a classification (unbalanced)
# y_bin = (y > np.percentile(y, 90)).astype(int)
clf = lgb.LGBMClassifier(boosting_type='gbdt',
                         class_weight='balanced',
                         objective='binary',
                         n_jobs=-1,
                         verbose=0)
search_spaces = {
    'learning_rate': Real(0.01, 1.0, 'log-uniform'),
    'num_leaves': Integer(2, 500),
    'max_depth': Integer(0, 500),
    'min_child_samples': Integer(0, 200),
    'max_bin': Integer(100, 100000),
Пример #2
0
xgb_params = {
    'objective': 'binary:logistic',
    'max_depth': 5,
    'n_estimators': 100000,
    'learning_rate': 0.1,
    'nthread': 4,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'min_child_weight': 3,
    'n_jobs':-1
}
n_fold = 5
num_classes = 2
print("分类个数num_classes:{}".format(num_classes))
folds = StratifiedKFold(n_splits=n_fold, random_state=1314)
train, y, test, features = load_data()

X = train[features]
print(y.value_counts())
X_test = test[features]

result_dict_lgb = train_model_classification(X=X,
                                             X_test=X_test,
                                             y=y,
                                             params=xgb_params,
                                             num_classes=num_classes,
                                             folds=folds,
                                             model_type='xgb',
                                             eval_metric='logloss',
                                             plot_feature_importance=False,
                                             verbose=10,
Пример #3
0
    'min_child_weight': 3
}
cat_paras = {
    'iterations': 50,
    'learning_rate': 0.1,
    'od_type': 'Iter',
    'l2_leaf_reg': 3,
    'depth': 10
}
n_fold = 5
nums = [3, 4, 5, 11]
for num_classes in nums:
    print("分类个数num_classes:{}".format(num_classes))
    n_fold = 5
    folds = StratifiedKFold(n_splits=n_fold, random_state=1314)
    train, _, test, features = load_data(num_classes)

    X = train[features]
    X['Year-Of-Publication'] = X['Year-Of-Publication'].astype(int)
    X_test = test[features]
    X_test['Year-Of-Publication'] = X_test['Year-Of-Publication'].astype(int)
    y = train['Book-Rating'].astype(int)
    print(y.value_counts())
    X = train[features]
    X_test = test[features]

    result_dict_lgb = train_model_classification(X=X,
                                                 X_test=X_test,
                                                 y=y,
                                                 params=xgb_params,
                                                 num_classes=num_classes,