Пример #1
0
def test_regressor_quantile(output, client, listen_port, alpha):
    X, y, w, dX, dy, dw = _create_data(objective='regression', output=output)

    dask_regressor = dlgbm.DaskLGBMRegressor(local_listen_port=listen_port,
                                             seed=42,
                                             objective='quantile',
                                             alpha=alpha,
                                             n_estimators=10,
                                             num_leaves=10,
                                             tree_learner_type='data_parallel')
    dask_regressor = dask_regressor.fit(dX,
                                        dy,
                                        client=client,
                                        sample_weight=dw)
    p1 = dask_regressor.predict(dX).compute()
    q1 = np.count_nonzero(y < p1) / y.shape[0]

    local_regressor = lightgbm.LGBMRegressor(seed=42,
                                             objective='quantile',
                                             alpha=alpha,
                                             n_estimatores=10,
                                             num_leaves=10)
    local_regressor.fit(X, y, sample_weight=w)
    p2 = local_regressor.predict(X)
    q2 = np.count_nonzero(y < p2) / y.shape[0]

    # Quantiles should be right
    np.testing.assert_allclose(q1, alpha, atol=0.2)
    np.testing.assert_allclose(q2, alpha, atol=0.2)

    client.close()
Пример #2
0
def test_regressor_pred_contrib(output, client, listen_port):
    X, y, w, dX, dy, dw = _create_data(objective='regression', output=output)

    dask_regressor = dlgbm.DaskLGBMRegressor(time_out=5,
                                             local_listen_port=listen_port,
                                             tree_learner='data',
                                             n_estimators=10,
                                             num_leaves=10)
    dask_regressor = dask_regressor.fit(dX,
                                        dy,
                                        sample_weight=dw,
                                        client=client)
    preds_with_contrib = dask_regressor.predict(dX,
                                                pred_contrib=True).compute()

    local_regressor = lightgbm.LGBMRegressor(n_estimators=10, num_leaves=10)
    local_regressor.fit(X, y, sample_weight=w)
    local_preds_with_contrib = local_regressor.predict(X, pred_contrib=True)

    if output == "scipy_csr_matrix":
        preds_with_contrib = np.array(preds_with_contrib.todense())

    # contrib outputs for distributed training are different than from local training, so we can just test
    # that the output has the right shape and base values are in the right position
    num_features = dX.shape[1]
    assert preds_with_contrib.shape[1] == num_features + 1
    assert preds_with_contrib.shape == local_preds_with_contrib.shape
Пример #3
0
def test_regressor(output, client, listen_port):
    X, y, w, dX, dy, dw = _create_data(objective='regression', output=output)

    dask_regressor = dlgbm.DaskLGBMRegressor(time_out=5,
                                             local_listen_port=listen_port,
                                             seed=42,
                                             num_leaves=10,
                                             tree='data')
    dask_regressor = dask_regressor.fit(dX,
                                        dy,
                                        client=client,
                                        sample_weight=dw)
    p1 = dask_regressor.predict(dX)
    if output != 'dataframe':
        s1 = r2_score(dy, p1)
    p1 = p1.compute()

    local_regressor = lightgbm.LGBMRegressor(seed=42, num_leaves=10)
    local_regressor.fit(X, y, sample_weight=w)
    s2 = local_regressor.score(X, y)
    p2 = local_regressor.predict(X)

    # Scores should be the same
    if output != 'dataframe':
        assert_eq(s1, s2, atol=.01)

    # Predictions should be roughly the same
    assert_eq(y, p1, rtol=1., atol=100.)
    assert_eq(y, p2, rtol=1., atol=50.)

    client.close()
Пример #4
0
def test_regressor_quantile(output, client, listen_port, alpha):
    X, y, w, dX, dy, dw = _create_data(objective='regression', output=output)

    params = {
        "objective": "quantile",
        "alpha": alpha,
        "random_state": 42,
        "n_estimators": 10,
        "num_leaves": 10
    }
    dask_regressor = dlgbm.DaskLGBMRegressor(local_listen_port=listen_port,
                                             tree_learner_type='data_parallel',
                                             **params)
    dask_regressor = dask_regressor.fit(dX,
                                        dy,
                                        client=client,
                                        sample_weight=dw)
    p1 = dask_regressor.predict(dX).compute()
    q1 = np.count_nonzero(y < p1) / y.shape[0]

    local_regressor = lightgbm.LGBMRegressor(**params)
    local_regressor.fit(X, y, sample_weight=w)
    p2 = local_regressor.predict(X)
    q2 = np.count_nonzero(y < p2) / y.shape[0]

    # Quantiles should be right
    np.testing.assert_allclose(q1, alpha, atol=0.2)
    np.testing.assert_allclose(q2, alpha, atol=0.2)

    client.close()
Пример #5
0
def test_regressor_local_predict(client, listen_port):
    X, y, w, dX, dy, dw = _create_data('regression', output='array')

    dask_regressor = dlgbm.DaskLGBMRegressor(local_listen_port=listen_port,
                                             seed=42)
    dask_regressor = dask_regressor.fit(dX,
                                        dy,
                                        sample_weight=dw,
                                        client=client)
    p1 = dask_regressor.predict(dX)
    p2 = dask_regressor.to_local().predict(X)
    s1 = r2_score(dy, p1)
    p1 = p1.compute()
    s2 = dask_regressor.to_local().score(X, y)

    # Predictions and scores should be the same
    assert_eq(p1, p2)
    assert_eq(s1, s2)
Пример #6
0
def test_regressor_local_predict(client, listen_port):
    X, y, _, dX, dy, dw = _create_data('regression', output='array')

    dask_regressor = dlgbm.DaskLGBMRegressor(local_listen_port=listen_port,
                                             random_state=42,
                                             n_estimators=10,
                                             num_leaves=10,
                                             tree_type='data')
    dask_regressor = dask_regressor.fit(dX,
                                        dy,
                                        sample_weight=dw,
                                        client=client)
    p1 = dask_regressor.predict(dX)
    p2 = dask_regressor.to_local().predict(X)
    s1 = r2_score(dy, p1)
    p1 = p1.compute()
    s2 = dask_regressor.to_local().score(X, y)

    # Predictions and scores should be the same
    assert_eq(p1, p2)
    assert_eq(s1, s2)

    client.close()