def learn_model(draw, X, y, task, learner, n_estimators, n_targets): # for lgbm or xgb return the booster or sklearn object? use_sklearn_estimator = draw(st.booleans()) if learner == 'xgb': assume(has_xgboost()) if task == 'regression': objective = draw( st.sampled_from(['reg:squarederror', 'reg:pseudohubererror'])) model = xgb.XGBRegressor(n_estimators=n_estimators, tree_method='gpu_hist', objective=objective, enable_categorical=True, verbosity=0).fit(X, y) elif task == 'classification': valid_objectives = [ 'binary:logistic', 'binary:hinge', 'binary:logitraw', 'count:poisson', ] if n_targets > 2: valid_objectives += [ 'rank:pairwise', 'rank:ndcg', 'rank:map', 'multi:softmax', 'multi:softprob' ] objective = draw(st.sampled_from(valid_objectives)) model = xgb.XGBClassifier(n_estimators=n_estimators, tree_method='gpu_hist', objective=objective, enable_categorical=True, verbosity=0).fit(X, y) pred = model.predict(X, output_margin=True) if not use_sklearn_estimator: model = model.get_booster() return model, pred elif learner == 'rf': predict_model = 'GPU ' if y.dtype == np.float32 else 'CPU' if task == 'regression': model = cuml.ensemble.RandomForestRegressor( n_estimators=n_estimators) model.fit(X, y) pred = model.predict(X, predict_model=predict_model) elif task == 'classification': model = cuml.ensemble.RandomForestClassifier( n_estimators=n_estimators) model.fit(X, y) pred = model.predict_proba(X) return model, pred elif learner == 'skl_rf': assume(has_sklearn()) if task == 'regression': model = sklrfr(n_estimators=n_estimators) model.fit(X, y) pred = model.predict(X) elif task == 'classification': model = sklrfc(n_estimators=n_estimators) model.fit(X, y) pred = model.predict_proba(X) return model, pred elif learner == 'lgbm': assume(has_lightgbm()) if task == 'regression': model = lgb.LGBMRegressor(n_estimators=n_estimators).fit(X, y) elif task == 'classification': model = lgb.LGBMClassifier(n_estimators=n_estimators).fit(X, y) pred = model.predict(X, raw_score=True) if not use_sklearn_estimator: model = model.booster_ return model, pred
@pytest.mark.skipif(has_xgboost() is False, reason="need to install xgboost") def test_output_args(small_classifier_and_preds): model_path, X, xgb_preds = small_classifier_and_preds fm = ForestInference.load(model_path, algo='TREE_REORG', output_class=False, threshold=0.50) X = np.asarray(X) fil_preds = fm.predict(X) fil_preds = np.reshape(fil_preds, np.shape(xgb_preds)) assert array_equal(fil_preds, xgb_preds, 1e-3) @pytest.mark.parametrize('num_classes', [2, 5]) @pytest.mark.skipif(has_lightgbm() is False, reason="need to install lightgbm") def test_lightgbm(tmp_path, num_classes): import lightgbm as lgb X, y = simulate_data(500, 10 if num_classes == 2 else 50, num_classes, random_state=43210, classification=True) train_data = lgb.Dataset(X, label=y) num_round = 5 model_path = str(os.path.join(tmp_path, 'lgb.model')) if num_classes == 2: param = { 'objective': 'binary', 'metric': 'binary_logloss',
from cuml.common.exceptions import NotFittedError from cuml.ensemble import RandomForestRegressor as curfr from cuml.ensemble import RandomForestClassifier as curfc import cuml from cuml.testing.utils import as_type # See issue #4729 # Xgboost disabled due to CI failures xgb = None def has_xgboost(): return False if has_lightgbm(): import lightgbm as lgb if has_shap(): import shap if has_sklearn(): from sklearn.datasets import make_regression, make_classification from sklearn.ensemble import RandomForestRegressor as sklrfr from sklearn.ensemble import RandomForestClassifier as sklrfc def make_classification_with_categorical(*, n_samples, n_features, n_categorical, n_informative, n_redundant,