def test_multiclass_classification(self): self._init_ray() from sklearn.datasets import load_iris from sklearn.model_selection import KFold def check_pred(preds, labels, output_margin): if output_margin: err = sum(1 for i in range(len(preds)) if preds[i].argmax() != labels[i]) / float( len(preds)) else: err = sum(1 for i in range(len(preds)) if preds[i] != labels[i]) / float(len(preds)) assert err < 0.4 iris = load_iris() y = iris["target"] X = iris["data"] kf = KFold(n_splits=2, shuffle=True, random_state=self.rng) for train_index, test_index in kf.split(X, y): xgb_model = RayXGBClassifier().fit(X[train_index], y[train_index]) if hasattr(xgb_model.get_booster(), "num_boosted_rounds"): assert (xgb_model.get_booster().num_boosted_rounds() == xgb_model.n_estimators) preds = xgb_model.predict(X[test_index]) # test other params in XGBClassifier().fit preds2 = xgb_model.predict( X[test_index], output_margin=True, ntree_limit=3) preds3 = xgb_model.predict( X[test_index], output_margin=True, ntree_limit=0) preds4 = xgb_model.predict( X[test_index], output_margin=False, ntree_limit=3) labels = y[test_index] check_pred(preds, labels, output_margin=False) check_pred(preds2, labels, output_margin=True) check_pred(preds3, labels, output_margin=True) check_pred(preds4, labels, output_margin=False) cls = RayXGBClassifier(n_estimators=4).fit(X, y) assert cls.n_classes_ == 3 proba = cls.predict_proba(X) assert proba.shape[0] == X.shape[0] assert proba.shape[1] == cls.n_classes_ # custom objective, the default is multi:softprob # so no transformation is required. cls = RayXGBClassifier( n_estimators=4, objective=softprob_obj(3)).fit(X, y) proba = cls.predict_proba(X) assert proba.shape[0] == X.shape[0] assert proba.shape[1] == cls.n_classes_
def testClassifierLabelEncoder(self, n_class=2): self._init_ray() from sklearn.datasets import load_digits digits = load_digits(n_class=n_class) y = digits["target"] X = digits["data"] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) train_matrix = RayDMatrix(X_train, y_train) test_matrix = RayDMatrix(X_test, y_test) with self.assertRaisesRegex(Exception, "use_label_encoder"): RayXGBClassifier(use_label_encoder=True, **self.params).fit(train_matrix, None) with self.assertRaisesRegex(Exception, "num_class"): RayXGBClassifier(use_label_encoder=False, **self.params).fit(train_matrix, None) with self.assertRaisesRegex(Exception, r"must be \(RayDMatrix, str\)"): RayXGBClassifier(use_label_encoder=False, **self.params).fit(train_matrix, None, eval_set=[(X_test, y_test)]) with self.assertRaisesRegex(Exception, r"must be \(array_like, array_like\)"): RayXGBClassifier(use_label_encoder=False, **self.params).fit(X_train, y_train, eval_set=[(test_matrix, "eval") ]) RayXGBClassifier(use_label_encoder=False, num_class=n_class, **self.params).fit(train_matrix, None) clf = RayXGBClassifier(use_label_encoder=False, num_class=n_class, **self.params).fit(train_matrix, None, eval_set=[(test_matrix, "eval")]) clf.predict(test_matrix) clf.predict_proba(test_matrix)
def test_save_load_model(self): self._init_ray() with TemporaryDirectory() as tempdir: model_path = os.path.join(tempdir, "digits.model") self.save_load_model(model_path) with TemporaryDirectory() as tempdir: model_path = os.path.join(tempdir, "digits.model.json") self.save_load_model(model_path) from sklearn.datasets import load_digits with TemporaryDirectory() as tempdir: model_path = os.path.join(tempdir, "digits.model.json") digits = load_digits(n_class=2) y = digits["target"] X = digits["data"] booster = xgb.train( { "tree_method": "hist", "objective": "binary:logistic" }, dtrain=xgb.DMatrix(X, y), num_boost_round=4, ) predt_0 = booster.predict(xgb.DMatrix(X)) booster.save_model(model_path) cls = RayXGBClassifier() cls.load_model(model_path) proba = cls.predict_proba(X) assert proba.shape[0] == X.shape[0] assert proba.shape[1] == 2 # binary predt_1 = cls.predict_proba(X)[:, 1] assert np.allclose(predt_0, predt_1) cls = xgb.XGBModel() cls.load_model(model_path) predt_1 = cls.predict(X) assert np.allclose(predt_0, predt_1)