def test_parameters_access(self): self._init_ray() from sklearn import datasets params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1} clf = RayXGBClassifier(n_estimators=1000, **params) assert clf.get_params()["updater"] == "grow_gpu_hist" assert clf.get_params()["subsample"] == 0.5 assert clf.get_params()["n_estimators"] == 1000 clf = RayXGBClassifier(n_estimators=1, nthread=4) X, y = datasets.load_iris(return_X_y=True) clf.fit(X, y) config = json.loads(clf.get_booster().save_config()) assert int(config["learner"]["generic_param"]["nthread"]) == 4 clf.set_params(nthread=16) config = json.loads(clf.get_booster().save_config()) assert int(config["learner"]["generic_param"]["nthread"]) == 16 clf.predict(X) config = json.loads(clf.get_booster().save_config()) assert int(config["learner"]["generic_param"]["nthread"]) == 16
def test_sklearn_get_default_params(self): self._init_ray() from sklearn.datasets import load_digits digits_2class = load_digits(n_class=2) X = digits_2class["data"] y = digits_2class["target"] cls = RayXGBClassifier() assert cls.get_params()["base_score"] is None cls.fit(X[:4, ...], y[:4, ...]) assert cls.get_params()["base_score"] is not None
def test_kwargs_grid_search(self): self._init_ray() from sklearn.model_selection import GridSearchCV from sklearn import datasets params = {"tree_method": "hist"} clf = RayXGBClassifier(n_estimators=1, learning_rate=1.0, **params) assert clf.get_params()["tree_method"] == "hist" # 'max_leaves' is not a default argument of XGBClassifier # Check we can still do grid search over this parameter search_params = {"max_leaves": range(2, 5)} grid_cv = GridSearchCV(clf, search_params, cv=5) iris = datasets.load_iris() grid_cv.fit(iris.data, iris.target) # Expect unique results for each parameter value # This confirms sklearn is able to successfully update the parameter means = grid_cv.cv_results_["mean_test_score"] assert len(means) == len(set(means))