Пример #1
0
def run_with_dask_array(DMatrixT, client):
    import cupy as cp
    cp.cuda.runtime.setDevice(0)
    X, y = generate_array()

    X = X.map_blocks(cp.asarray)
    y = y.map_blocks(cp.asarray)
    dtrain = DMatrixT(client, X, y)
    out = dxgb.train(client, {
        'tree_method': 'gpu_hist',
        'debug_synchronize': True
    },
                     dtrain=dtrain,
                     evals=[(dtrain, 'X')],
                     num_boost_round=2)
    from_dmatrix = dxgb.predict(client, out, dtrain).compute()
    inplace_predictions = dxgb.inplace_predict(client, out, X).compute()
    single_node = out['booster'].predict(xgboost.DMatrix(X.compute()))
    np.testing.assert_allclose(single_node, from_dmatrix)
    device = cp.cuda.runtime.getDevice()
    assert device == inplace_predictions.device.id
    single_node = cp.array(single_node)
    assert device == single_node.device.id
    cp.testing.assert_allclose(single_node, inplace_predictions)
Пример #2
0
def test_categorical(local_cuda_cluster: LocalCUDACluster) -> None:
    with Client(local_cuda_cluster) as client:
        import dask_cudf

        rounds = 10
        X, y = make_categorical(client, 10000, 30, 13)
        X = dask_cudf.from_dask_dataframe(X)

        X_onehot, _ = make_categorical(client, 10000, 30, 13, True)
        X_onehot = dask_cudf.from_dask_dataframe(X_onehot)

        parameters = {"tree_method": "gpu_hist"}

        m = dxgb.DaskDMatrix(client, X_onehot, y, enable_categorical=True)
        by_etl_results = dxgb.train(
            client,
            parameters,
            m,
            num_boost_round=rounds,
            evals=[(m, "Train")],
        )["history"]

        m = dxgb.DaskDMatrix(client, X, y, enable_categorical=True)
        output = dxgb.train(
            client,
            parameters,
            m,
            num_boost_round=rounds,
            evals=[(m, "Train")],
        )
        by_builtin_results = output["history"]

        np.testing.assert_allclose(
            np.array(by_etl_results["Train"]["rmse"]),
            np.array(by_builtin_results["Train"]["rmse"]),
            rtol=1e-3,
        )
        assert tm.non_increasing(by_builtin_results["Train"]["rmse"])

        def check_model_output(model: dxgb.Booster) -> None:
            with tempfile.TemporaryDirectory() as tempdir:
                path = os.path.join(tempdir, "model.json")
                model.save_model(path)
                with open(path, "r") as fd:
                    categorical = json.load(fd)

                categories_sizes = np.array(
                    categorical["learner"]["gradient_booster"]["model"]
                    ["trees"][-1]["categories_sizes"])
                assert categories_sizes.shape[0] != 0
                np.testing.assert_allclose(categories_sizes, 1)

        check_model_output(output["booster"])
        reg = dxgb.DaskXGBRegressor(enable_categorical=True,
                                    n_estimators=10,
                                    tree_method="gpu_hist")
        reg.fit(X, y)

        check_model_output(reg.get_booster())

        reg = dxgb.DaskXGBRegressor(enable_categorical=True, n_estimators=10)
        with pytest.raises(ValueError):
            reg.fit(X, y)