示例#1
0
def test_regressor_evals_result(loop):  # noqa
    with cluster() as (s, [a, b]):
        with Client(s["address"], loop=loop):
            a = dxgb.XGBRegressor()
            X2 = da.from_array(X, 5)
            y2 = da.from_array(y, 5)
            a.fit(X2, y2, eval_metric="rmse", eval_set=[(X, y)])
            evals_result = a.evals_result()

    b = xgb.XGBRegressor()
    b.fit(X, y, eval_metric="rmse", eval_set=[(X, y)])
    assert_eq(evals_result, b.evals_result())
示例#2
0
def test_regressor(loop):  # noqa
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop):
            a = dxgb.XGBRegressor()
            X2 = da.from_array(X, 5)
            y2 = da.from_array(y, 5)
            a.fit(X2, y2)
            p1 = a.predict(X2)

    b = xgb.XGBRegressor()
    b.fit(X, y)
    assert_eq(p1, b.predict(X))
示例#3
0
def test_regressor(xgboost_loop):  # noqa
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=xgboost_loop):
            a = dxgb.XGBRegressor()
            X2 = da.from_array(X, 5)
            y2 = da.from_array(y, 5)
            weight1 = da.from_array(weight, 5)
            a.fit(X2, y2, sample_weight=weight1)
            p1 = a.predict(X2)

    b = xgb.XGBRegressor()
    b.fit(X, y, sample_weight=weight)

    np.testing.assert_array_almost_equal(a.feature_importances_,
                                         b.feature_importances_)
    assert_eq(p1, b.predict(X))
示例#4
0
def main():
    object = ps.preprocess()
    X_train, X_test, y_train, y_test = object.cleaning()
    param_grid = {
                'objective': ['binary:logistic'],
                'nround': [1000],
                'max_depth': [8]
    }
    estimator = dxgb.XGBRegressor()
    grid_search = GridSearchCV(estimator, param_grid, verbose=2, cv=2,  n_jobs=-1)
    client = Client(processes=False)
    start_time = time.time()
    with joblib.parallel_backend("dask"):
        grid_search.fit(X_train, y_train)
    end_time = time.time()
    grid_search.predict(X_test)
    print ("time difference in GridSearchCV second XGBRegressor is %d seconds " % end_time)
    client.shutdown()
示例#5
0
def test_regressor_with_early_stopping(loop):  # noqa
    with cluster() as (s, [a, b]):
        with Client(s["address"], loop=loop):
            a = dxgb.XGBRegressor()
            X2 = da.from_array(X, 5)
            y2 = da.from_array(y, 5)
            a.fit(
                X2,
                y2,
                early_stopping_rounds=4,
                eval_metric="rmse",
                eval_set=[(X, y)],
            )
            p1 = a.predict(X2)

    b = xgb.XGBRegressor()
    b.fit(X, y, early_stopping_rounds=4, eval_metric="rmse", eval_set=[(X, y)])
    assert_eq(p1, b.predict(X))
    assert_eq(a.best_score, b.best_score)
示例#6
0
def test_validation_weights_xgbregressor(loop):  # noqa
    from sklearn.datasets import make_regression
    from sklearn.metrics import mean_squared_error

    # prepare training and test data
    X, y = make_regression(n_samples=2000, random_state=42)

    with cluster() as (s, [a, b]):
        with Client(s["address"], loop=loop):
            X_train, X_test = X[:1600], X[1600:]
            y_train, y_test = y[:1600], y[1600:]

            dX_train = da.from_array(X_train)
            dy_train = da.from_array(y_train)
            dX_test = da.from_array(X_test)

            reg = dxgb.XGBRegressor()

            reg.fit(
                dX_train,
                dy_train,  # sample_weight=weights_train,
            )
            preds = reg.predict(dX_test)

            rng = np.random.RandomState(0)
            weights_train = 100.0 + rng.rand(len(X_train))
            weights_train = da.from_array(weights_train)
            weights_test = 100.0 + rng.rand(len(X_test))

            reg.fit(
                dX_train,
                dy_train,
                sample_weight=weights_train,
                sample_weight_eval_set=[weights_test],
            )
            preds2 = reg.predict(dX_test)

    err = mean_squared_error(preds, y_test)
    err2 = mean_squared_error(preds2, y_test)
    assert err != err2
y_train = y_train.to_dask_array(lengths=True)

print("scaling")
#scaler = StandardScaler()
#scaler.fit(X_train)
#scaled_data = scaler.transform(X_train)
#X_test = scaler.transform(X_test)

print("training")

# In[ ]:
base_model = dxgb.XGBRegressor(objective='reg:squarederror',
                               tree_method='hist',
                               verbosity=3,
                               n_jobs=-1,
                               n_estimators=1000,
                               learning_rate=0.010,
                               max_depth=0,
                               max_leaves=4,
                               grow_policy='lossguide')

with joblib.parallel_backend('dask'):
    base_model.fit(X_train, y_train.flatten())
#base_model.save_model('base_line_no_max_deph_lr_%f_%i.model'%(lr,leaves))
#
predictions = base_model.predict(X_test)
predictions = client.persist(predictions)
#
#print ("########")
#print ("R^2:",r2_score(y_test.compute(), predictions.compute()))
#print ("MAE:",mean_absolute_error(y_test.compute(), predictions.compute()))