def test_ranking(self): # generate random data x_train = np.random.rand(1000, 10) y_train = np.random.randint(5, size=1000) train_qid = np.repeat(np.array([list(range(20))]), 50) x_valid = np.random.rand(200, 10) y_valid = np.random.randint(5, size=200) valid_qid = np.repeat(np.array([list(range(4))]), 50) x_test = np.random.rand(100, 10) params = { "objective": "rank:pairwise", "learning_rate": 0.1, "gamma": 1.0, "min_child_weight": 0.1, "max_depth": 6, "n_estimators": 4, "random_state": 1, "n_jobs": 2 } model = RayXGBRanker(**params) model.fit( x_train, y_train, qid=train_qid, eval_set=[(x_valid, y_valid)], eval_qid=[valid_qid]) assert model.evals_result() pred = model.predict(x_test) train_data = RayDMatrix(x_train, y_train, qid=train_qid) valid_data = RayDMatrix(x_valid, y_valid, qid=valid_qid) test_data = RayDMatrix(x_test) params_orig = { "objective": "rank:pairwise", "eta": 0.1, "gamma": 1.0, "min_child_weight": 0.1, "max_depth": 6, "random_state": 1 } xgb_model_orig = train( params_orig, train_data, num_boost_round=4, evals=[(valid_data, "validation")], ray_params=RayParams(num_actors=2, max_actor_restarts=0)) pred_orig = predict( xgb_model_orig, test_data, ray_params=RayParams(num_actors=2, max_actor_restarts=0)) np.testing.assert_almost_equal(pred, pred_orig)
def test_best_ntree_limit(self): self._init_ray() from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True) def train(booster, forest): rounds = 4 cls = RayXGBClassifier( n_estimators=rounds, num_parallel_tree=forest, booster=booster).fit( X, y, eval_set=[(X, y)], early_stopping_rounds=3) if forest: assert cls.best_ntree_limit == rounds * forest else: assert cls.best_ntree_limit == 0 # best_ntree_limit is used by default, # assert that under gblinear it's # automatically ignored due to being 0. cls.predict(X) num_parallel_tree = 4 train("gbtree", num_parallel_tree) train("dart", num_parallel_tree) train("gblinear", None)
def fit( self, X, y, *, group=None, qid=None, sample_weight=None, base_margin=None, eval_set=None, eval_group=None, eval_qid=None, eval_metric=None, early_stopping_rounds=None, verbose=False, xgb_model: Optional[Union[Booster, str, XGBModel]] = None, sample_weight_eval_set=None, base_margin_eval_set=None, feature_weights=None, callbacks=None, ray_params: Union[None, RayParams, Dict] = None, _remote: Optional[bool] = None, ray_dmatrix_params: Optional[Dict] = None, ): if not (group is None and eval_group is None): raise ValueError("Use `qid` instead of `group` for RayXGBRanker.") if qid is None: raise ValueError("`qid` is required for ranking.") if eval_set is not None: if eval_qid is None: raise ValueError("`eval_qid `is required if" " `eval_set` is not None") evals_result = {} ray_dmatrix_params = ray_dmatrix_params or {} params = self.get_xgb_params() train_dmatrix, evals = _check_if_params_are_ray_dmatrix( X, sample_weight, base_margin, eval_set, sample_weight_eval_set, base_margin_eval_set, eval_qid) if train_dmatrix is None: train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, X=X, y=y, group=group, qid=qid, sample_weight=sample_weight, base_margin=base_margin, feature_weights=feature_weights, eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, base_margin_eval_set=base_margin_eval_set, eval_group=eval_group, eval_qid=eval_qid, # changed in xgboost-ray: create_dmatrix=lambda **kwargs: RayDMatrix(**{ **kwargs, **ray_dmatrix_params }), **self._ray_get_wrap_evaluation_matrices_compat_kwargs()) try: model, feval, params = self._configure_fit(xgb_model, eval_metric, params) except TypeError: # XGBoost >= 1.6.0 (model, feval, params, early_stopping_rounds, callbacks) = self._configure_fit(xgb_model, eval_metric, params, early_stopping_rounds, callbacks) if callable(feval): raise ValueError( "Custom evaluation metric is not yet supported for XGBRanker.") # remove those as they will be set in RayXGBoostActor params.pop("n_jobs", None) params.pop("nthread", None) ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs) additional_results = {} self._Booster = train( params, train_dmatrix, self.n_estimators, early_stopping_rounds=early_stopping_rounds, evals=evals, evals_result=evals_result, feval=feval, verbose_eval=verbose, xgb_model=model, callbacks=callbacks, # changed in xgboost-ray: additional_results=additional_results, ray_params=ray_params, _remote=_remote, ) self.objective = params["objective"] self.additional_results_ = additional_results self._set_evaluation_result(evals_result) return self
def fit( self, X, y, *, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, base_margin_eval_set=None, feature_weights=None, callbacks=None, ray_params: Union[None, RayParams, Dict] = None, _remote: Optional[bool] = None, ray_dmatrix_params: Optional[Dict] = None, ): evals_result = {} ray_dmatrix_params = ray_dmatrix_params or {} params = self.get_xgb_params() train_dmatrix, evals = _check_if_params_are_ray_dmatrix( X, sample_weight, base_margin, eval_set, sample_weight_eval_set, base_margin_eval_set) if train_dmatrix is not None: if not hasattr(self, "use_label_encoder"): warnings.warn("If X is a RayDMatrix, no label encoding" " will be performed. Ensure the labels are" " encoded.") elif self.use_label_encoder: raise ValueError( "X cannot be a RayDMatrix if `use_label_encoder` " "is set to True") if "num_class" not in params: raise ValueError( "`num_class` must be set during initalization if X" " is a RayDMatrix") self.classes_ = list(range(0, params["num_class"])) self.n_classes_ = params["num_class"] if self.n_classes_ <= 2: params.pop("num_class") label_transform = lambda x: x # noqa: E731 else: if len(X.shape) != 2: # Simply raise an error here since there might be many # different ways of reshaping raise ValueError( "Please reshape the input data X into 2-dimensional " "matrix.") label_transform = self._ray_fit_preprocess(y) if callable(self.objective): obj = _objective_decorator(self.objective) # Use default value. Is it really not used ? params["objective"] = "binary:logistic" else: obj = None if self.n_classes_ > 2: # Switch to using a multiclass objective in the underlying # XGB instance params["objective"] = "multi:softprob" params["num_class"] = self.n_classes_ try: model, feval, params = self._configure_fit(xgb_model, eval_metric, params) except TypeError: # XGBoost >= 1.6.0 (model, feval, params, early_stopping_rounds, callbacks) = self._configure_fit(xgb_model, eval_metric, params, early_stopping_rounds, callbacks) if train_dmatrix is None: train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, X=X, y=y, group=None, qid=None, sample_weight=sample_weight, base_margin=base_margin, feature_weights=feature_weights, eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, base_margin_eval_set=base_margin_eval_set, eval_group=None, eval_qid=None, # changed in xgboost-ray: create_dmatrix=lambda **kwargs: RayDMatrix(**{ **kwargs, **ray_dmatrix_params }), **self._ray_get_wrap_evaluation_matrices_compat_kwargs( label_transform=label_transform)) # remove those as they will be set in RayXGBoostActor params.pop("n_jobs", None) params.pop("nthread", None) ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs) additional_results = {} self._Booster = train( params, train_dmatrix, self.get_num_boosting_rounds(), evals=evals, early_stopping_rounds=early_stopping_rounds, evals_result=evals_result, obj=obj, feval=feval, verbose_eval=verbose, xgb_model=model, callbacks=callbacks, # changed in xgboost-ray: additional_results=additional_results, ray_params=ray_params, _remote=_remote, ) if not callable(self.objective): self.objective = params["objective"] self.additional_results_ = additional_results self._set_evaluation_result(evals_result) return self
def fit( self, X, y, *, sample_weight=None, base_margin=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model: Optional[Union[Booster, str, "XGBModel"]] = None, sample_weight_eval_set=None, base_margin_eval_set=None, feature_weights=None, callbacks=None, ray_params: Union[None, RayParams, Dict] = None, _remote: Optional[bool] = None, ray_dmatrix_params: Optional[Dict] = None, ): evals_result = {} ray_dmatrix_params = ray_dmatrix_params or {} train_dmatrix, evals = _check_if_params_are_ray_dmatrix( X, sample_weight, base_margin, eval_set, sample_weight_eval_set, base_margin_eval_set) if train_dmatrix is None: train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, X=X, y=y, group=None, qid=None, sample_weight=sample_weight, base_margin=base_margin, feature_weights=feature_weights, eval_set=eval_set, sample_weight_eval_set=sample_weight_eval_set, base_margin_eval_set=base_margin_eval_set, eval_group=None, eval_qid=None, # changed in xgboost-ray: create_dmatrix=lambda **kwargs: RayDMatrix(**{ **kwargs, **ray_dmatrix_params }), **self._ray_get_wrap_evaluation_matrices_compat_kwargs()) params = self.get_xgb_params() if callable(self.objective): obj = _objective_decorator(self.objective) params["objective"] = "reg:squarederror" else: obj = None try: model, feval, params = self._configure_fit(xgb_model, eval_metric, params) except TypeError: # XGBoost >= 1.6.0 (model, feval, params, early_stopping_rounds, callbacks) = self._configure_fit(xgb_model, eval_metric, params, early_stopping_rounds, callbacks) # remove those as they will be set in RayXGBoostActor params.pop("n_jobs", None) params.pop("nthread", None) ray_params = self._ray_set_ray_params_n_jobs(ray_params, self.n_jobs) additional_results = {} self._Booster = train( params, train_dmatrix, self.get_num_boosting_rounds(), evals=evals, early_stopping_rounds=early_stopping_rounds, evals_result=evals_result, obj=obj, feval=feval, verbose_eval=verbose, xgb_model=model, callbacks=callbacks, # changed in xgboost-ray: additional_results=additional_results, ray_params=ray_params, _remote=_remote, ) self.additional_results_ = additional_results self._set_evaluation_result(evals_result) return self