def run_with_dask_array(DMatrixT, client): import cupy as cp cp.cuda.runtime.setDevice(0) X, y = generate_array() X = X.map_blocks(cp.asarray) y = y.map_blocks(cp.asarray) dtrain = DMatrixT(client, X, y) out = dxgb.train(client, { 'tree_method': 'gpu_hist', 'debug_synchronize': True }, dtrain=dtrain, evals=[(dtrain, 'X')], num_boost_round=2) from_dmatrix = dxgb.predict(client, out, dtrain).compute() inplace_predictions = dxgb.inplace_predict(client, out, X).compute() single_node = out['booster'].predict(xgboost.DMatrix(X.compute())) np.testing.assert_allclose(single_node, from_dmatrix) device = cp.cuda.runtime.getDevice() assert device == inplace_predictions.device.id single_node = cp.array(single_node) assert device == single_node.device.id cp.testing.assert_allclose(single_node, inplace_predictions)
def test_categorical(local_cuda_cluster: LocalCUDACluster) -> None: with Client(local_cuda_cluster) as client: import dask_cudf rounds = 10 X, y = make_categorical(client, 10000, 30, 13) X = dask_cudf.from_dask_dataframe(X) X_onehot, _ = make_categorical(client, 10000, 30, 13, True) X_onehot = dask_cudf.from_dask_dataframe(X_onehot) parameters = {"tree_method": "gpu_hist"} m = dxgb.DaskDMatrix(client, X_onehot, y, enable_categorical=True) by_etl_results = dxgb.train( client, parameters, m, num_boost_round=rounds, evals=[(m, "Train")], )["history"] m = dxgb.DaskDMatrix(client, X, y, enable_categorical=True) output = dxgb.train( client, parameters, m, num_boost_round=rounds, evals=[(m, "Train")], ) by_builtin_results = output["history"] np.testing.assert_allclose( np.array(by_etl_results["Train"]["rmse"]), np.array(by_builtin_results["Train"]["rmse"]), rtol=1e-3, ) assert tm.non_increasing(by_builtin_results["Train"]["rmse"]) def check_model_output(model: dxgb.Booster) -> None: with tempfile.TemporaryDirectory() as tempdir: path = os.path.join(tempdir, "model.json") model.save_model(path) with open(path, "r") as fd: categorical = json.load(fd) categories_sizes = np.array( categorical["learner"]["gradient_booster"]["model"] ["trees"][-1]["categories_sizes"]) assert categories_sizes.shape[0] != 0 np.testing.assert_allclose(categories_sizes, 1) check_model_output(output["booster"]) reg = dxgb.DaskXGBRegressor(enable_categorical=True, n_estimators=10, tree_method="gpu_hist") reg.fit(X, y) check_model_output(reg.get_booster()) reg = dxgb.DaskXGBRegressor(enable_categorical=True, n_estimators=10) with pytest.raises(ValueError): reg.fit(X, y)