def test_classification(breast_cancer_data): from sklearn.metrics import roc_auc_score, log_loss x_train, x_test, y_train, y_test = breast_cancer_data ngb = NGBClassifier(Dist=Bernoulli, verbose=False) ngb.fit(x_train, y_train) preds = ngb.predict(x_test) score = roc_auc_score(y_test, preds) # loose score requirement so it isn't failing all the time assert score >= 0.85 preds = ngb.predict_proba(x_test) score = log_loss(y_test, preds) assert score <= 0.30 score = ngb.score(x_test, y_test) assert score <= 0.30 dist = ngb.pred_dist(x_test) assert isinstance(dist, Bernoulli) score = roc_auc_score(y_test, preds[:, 1]) assert score >= 0.85
def test_classification(): from sklearn.datasets import load_breast_cancer from sklearn.metrics import roc_auc_score, log_loss data, target = load_breast_cancer(True) x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42) ngb = NGBClassifier(Dist=Bernoulli, verbose=False) ngb.fit(x_train, y_train) preds = ngb.predict(x_test) score = roc_auc_score(y_test, preds) assert score >= 0.95 preds = ngb.predict_proba(x_test) score = log_loss(y_test, preds) assert score <= 0.20 score = ngb.score(x_test, y_test) assert score <= 0.20 dist = ngb.pred_dist(x_test) assert isinstance(dist, Bernoulli) score = roc_auc_score(y_test, preds[:, 1]) assert score >= 0.95
def test_bernoulli(learner, breast_cancer_data: Tuple4Array): X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = breast_cancer_data # test early stopping features # test other args, n_trees, LR, minibatching- args as fixture ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=learner, verbose=False) ngb.fit(X_cls_train, Y_cls_train) y_pred = ngb.predict(X_cls_test) y_prob = ngb.predict_proba(X_cls_test) y_dist = ngb.pred_dist(X_cls_test)
def test_categorical(k: int, learner, breast_cancer_data: Tuple4Array): X_train, X_test, y_train, _ = breast_cancer_data dist = k_categorical(k) y_train = np.random.randint(0, k, (len(y_train))) # test early stopping features ngb = NGBClassifier(Dist=dist, Score=LogScore, Base=learner, verbose=False) ngb.fit(X_train, y_train) y_pred = ngb.predict(X_test) y_prob = ngb.predict_proba(X_test) y_dist = ngb.pred_dist(X_test)
def test_bernoulli(self, learners, cls_data): X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = cls_data for Learner in learners: # test early stopping features # test other args, n_trees, LR, minibatching- args as fixture ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=Learner, verbose=False) ngb.fit(X_cls_train, Y_cls_train) y_pred = ngb.predict(X_cls_test) y_prob = ngb.predict_proba(X_cls_test) y_dist = ngb.pred_dist(X_cls_test)
def test_categorical(self, learners, cls_data): X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = cls_data for K in [2,4,7]: Dist = k_categorical(K) Y_cls_train = np.random.randint(0,K,(len(Y_cls_train))) for Learner in learners: # test early stopping features ngb = NGBClassifier(Dist=Dist, Score=LogScore, Base=Learner, verbose=False) ngb.fit(X_cls_train, Y_cls_train) y_pred = ngb.predict(X_cls_test) y_prob = ngb.predict_proba(X_cls_test) y_dist = ngb.pred_dist(X_cls_test)
import numpy as np from ngboost import NGBClassifier from ngboost.distns import Bernoulli from ngboost.learners import default_tree_learner from ngboost.scores import MLE from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score if __name__ == "__main__": np.random.seed(12345) X, Y = load_breast_cancer(True) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) ngb = NGBClassifier(Base=default_tree_learner, Dist=Bernoulli, Score=MLE, verbose=True, natural_gradient=True) ngb.fit(X_train, Y_train) preds = ngb.pred_dist(X_test) print("ROC:", roc_auc_score(Y_test, preds.prob))