Exemplo n.º 1
0
def _train():
    # Configure and train model
    wandb.init(name="LightGBM_sweep")
    lgbm_config = {
        "num_leaves": wandb.config.num_leaves,
        "max_depth": wandb.config.max_depth,
        "learning_rate": wandb.config.learning_rate,
        "bagging_freq": wandb.config.bagging_freq,
        "bagging_fraction": wandb.config.bagging_fraction,
        "feature_fraction": wandb.config.feature_fraction,
        "metric": 'mse',
        "random_state": seed
    }
    lgbm_model = lgb.train(lgbm_config,
                           train_set=dtrain,
                           num_boost_round=750,
                           valid_sets=watchlist,
                           callbacks=[wandb_callback()],
                           verbose_eval=100,
                           early_stopping_rounds=50)

    # Create predictions for evaluation
    val_preds = lgbm_model.predict(val[feature_list],
                                   num_iteration=lgbm_model.best_iteration)
    val.loc[:, "prediction_kazutsugi"] = val_preds
    # W&B log metrics
    spearman, payout, numerai_sharpe, mae = evaluate(val)
    wandb.log({
        "Spearman": spearman,
        "Payout": payout,
        "Numerai Sharpe Ratio": numerai_sharpe,
        "Mean Absolute Error": mae
    })
Exemplo n.º 2
0
def run_model(dataset):
    train_idx, valid_idx = list(range(ntrain)), list(range(ntrain, ndata))
    train, valid = dataset.iloc[train_idx], dataset.iloc[valid_idx]
    train_x, train_y = train[['x']], train['y']
    valid_x, valid_y = valid[['x']], valid['y']
    n_train = lgb.Dataset(train_x, label=train_y)
    n_valid = lgb.Dataset(valid_x, label=valid_y)
    params = {
        'learning_rate': 0.2,
        'boosting_type': 'gbdt',
        #  ‘gbdt’, traditional Gradient Boosting Decision Tree.
        # ‘dart’, Dropouts meet Multiple Additive Regression Trees.
        # ‘goss’, Gradient-based One-Side Sampling.
        # ‘rf’, Random Forest.
        'objective': 'regression',
        'max_depth': 10,
        'num_leaves': 200,
        'metric': 'mse',
        'feature_fraction': 0.8,
        'bagging_fraction': 0.6,
        'bagging_freq': 6,
        'bagging_seed': 1,
        'seed': 8,
        'feature_fraction_seed': 7,
        'min_data_in_leaf': 50,
        'verbose': 1,
        #         'device_type': 'gpu',
        'nthread': 6,
        'lambda_l2': 0.01,
    }

    clf = lgb.train(
        params=params,
        train_set=n_train,
        num_boost_round=35000,
        valid_sets=[n_valid, n_train],
        early_stopping_rounds=500,
        verbose_eval=200,
        callbacks=[wandb_callback()],
    )
Exemplo n.º 3
0
def test_feature_importance_attribute_exists_for_lightgbm(wandb_init_run):
    X, y = make_classification(n_samples=100,
                               n_features=10,
                               n_informative=2,
                               n_redundant=5,
                               random_state=42)
    ten_features = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
    params = {
        'learning_rate': 0.01,
        'max_depth': -1,
        'num_leaves': 4,
        'objective': 'fair',
        'boosting': 'gbdt',
        'boost_from_average': True,
        'feature_fraction': 0.9,
        'bagging_freq': 1,
        'bagging_fraction': 0.5,
        'early_stopping_rounds': 200,
        'metric': 'rmse',
        'max_bin': 255,
        'n_jobs': -1,
        'verbosity': -1,
        'bagging_seed': 1234
    }
    dataset = lgb.Dataset(X, y)
    model = lgb.train(params,
                      dataset,
                      valid_sets=[dataset],
                      valid_names=['train'],
                      callbacks=[wandb_callback()])

    result = plot_feature_importances(model, feature_names=ten_features)

    assert isinstance(
        result, wandb.viz.Visualize
    ), "Should have returned a result, as feature_importances() attribute does exist"
Exemplo n.º 4
0
def test_basic_lightgbm(dummy_data, wandb_init_run):
    param = {'max_depth': 2, 'eta': 1}
    num_round = 2
    lgb.train(param, dummy_data, num_round, callbacks=[wandb_callback()])
Exemplo n.º 5
0
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': ['rmse', 'l2', 'l1', 'huber'],
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbosity': -1
}
wandb.config.update(params)

# train
# add lightgbm callback
gbm = lgb.train(
    params,
    lgb_train,
    num_boost_round=20,
    valid_sets=lgb_eval,
    valid_names=('validation'),
    callbacks=[wandb_callback(),
               lgb.early_stopping(stopping_rounds=5)])

# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred)**0.5)

# log feature importance and model checkpoint
log_summary(gbm, save_model_checkpoint=True)