Пример #1
0
def test_classification():
    from sklearn.datasets import load_breast_cancer
    from sklearn.metrics import roc_auc_score, log_loss
    data, target = load_breast_cancer(True)
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        target,
                                                        test_size=0.2,
                                                        random_state=42)
    ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = roc_auc_score(y_test, preds)
    assert score >= 0.95

    preds = ngb.predict_proba(x_test)
    score = log_loss(y_test, preds)
    assert score <= 0.20

    score = ngb.score(x_test, y_test)
    assert score <= 0.20

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Bernoulli)

    score = roc_auc_score(y_test, preds[:, 1])
    assert score >= 0.95
Пример #2
0
class myNGBoostClassifier:
    def make(self , params  ):
        self.model =  NGBClassifier(**params  )
        return self

    def fit(self, xtrain, ytrain, xtest=None, ytest=None, fit_params={}):
        if type(xtrain) == pd.core.frame.DataFrame:
                xtrain = xtrain.values
                ytrain = ytrain.values
                if type(xtest) != type(None) and type(ytest) != type(None):
                    xtest = xtest.values
                    ytest = ytest.values
        if type(xtest) == type(None) or type(ytest) == type(None) :
            self.model.fit( xtrain , ytrain , **fit_params )
        else:
            self.model.fit( xtrain , ytrain , X_val = xtest , Y_val = ytest ,**fit_params )
        
    def predict(self , xs ):
        return self.model.predict(xs) 
        
    def predict_proba(self, xs):
        if len(xs.shape) == 1:
            return self.model.predict_proba(xs.reshape(1,-1))
        else:
            return self.model.predict_proba(xs)
Пример #3
0
def test_classification(breast_cancer_data):
    from sklearn.metrics import roc_auc_score, log_loss

    x_train, x_test, y_train, y_test = breast_cancer_data
    ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = roc_auc_score(y_test, preds)

    # loose score requirement so it isn't failing all the time
    assert score >= 0.85

    preds = ngb.predict_proba(x_test)
    score = log_loss(y_test, preds)
    assert score <= 0.30

    score = ngb.score(x_test, y_test)
    assert score <= 0.30

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Bernoulli)

    score = roc_auc_score(y_test, preds[:, 1])

    assert score >= 0.85
Пример #4
0
class ModelNgbClassifier(Model):
    def train(self, tr_x, tr_y, va_x=None, va_y=None, te_x=None):

        # ハイパーパラメータの設定
        params = dict(self.params)
        early_stopping_rounds = params.pop('early_stopping_rounds')

        self.model = NGBClassifier(**params)
        self.model.fit(tr_x.values,
                       tr_y.astype(int).values,
                       va_x.values,
                       va_y.astype(int).values,
                       early_stopping_rounds=early_stopping_rounds)

    def predict(self, te_x):
        return self.model.predict_proba(te_x.values)[:, 1]

    def save_model(self):
        model_path = os.path.join('../output/model',
                                  f'{self.run_fold_name}.model')
        os.makedirs(os.path.dirname(model_path), exist_ok=True)
        Data.dump(self.model, model_path)

    def load_model(self):
        model_path = os.path.join('../output/model',
                                  f'{self.run_fold_name}.model')
        self.model = Data.load(model_path)
Пример #5
0
def test_bernoulli(learner, breast_cancer_data: Tuple4Array):
    X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = breast_cancer_data
    # test early stopping features
    # test other args, n_trees, LR, minibatching- args as fixture
    ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=learner, verbose=False)
    ngb.fit(X_cls_train, Y_cls_train)
    y_pred = ngb.predict(X_cls_test)
    y_prob = ngb.predict_proba(X_cls_test)
    y_dist = ngb.pred_dist(X_cls_test)
Пример #6
0
def test_categorical(k: int, learner, breast_cancer_data: Tuple4Array):
    X_train, X_test, y_train, _ = breast_cancer_data
    dist = k_categorical(k)
    y_train = np.random.randint(0, k, (len(y_train)))
    # test early stopping features
    ngb = NGBClassifier(Dist=dist, Score=LogScore, Base=learner, verbose=False)
    ngb.fit(X_train, y_train)
    y_pred = ngb.predict(X_test)
    y_prob = ngb.predict_proba(X_test)
    y_dist = ngb.pred_dist(X_test)
Пример #7
0
	def test_bernoulli(self, learners, cls_data):
		X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = cls_data
		for Learner in learners:
			# test early stopping features
			# test other args, n_trees, LR, minibatching- args as fixture
			ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=Learner, verbose=False)
			ngb.fit(X_cls_train, Y_cls_train)
			y_pred = ngb.predict(X_cls_test)
			y_prob = ngb.predict_proba(X_cls_test)
			y_dist = ngb.pred_dist(X_cls_test)
Пример #8
0
class NGBoost(BaseEstimator, ClassifierMixin):
    def __init__(self, **params):
        logger.info('Initializing NGBoost...')
        self.params_ = params
        self.classes_ = np.array([0, 1])

    def get_params(self, deep=True):
        return self.params_

    def _to_numpy(self, X):
        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
            try:
                return X.to_numpy()
            except:
                ValueError('There is error when converting to numpy')
        elif isinstance(X, np.ndarray):
            return X
        else:
            ValueError('X must be pandas DataFrame, Series or numpy ndarray')

    def fit(self, X, y, *args, **kwargs):
        logger.info(f'NGBoost, fit')
        logger.info(f'NGBoost, training data shape {X.shape}')
        logger.info(f'NGBoost, training label shape {y.shape}')

        X_np = self._to_numpy(X)
        y_np = self._to_numpy(y)
        y_np = y_np.astype(int)
        print(f'np.unique(y_np)')
        self.estimator_ = NGBClassifier(**self.params_)
        self.estimator_.fit(X_np, y_np)
        logger.info(f'NGBoost, done fit')
        return self

    def transform(self, X, *args, **kwargs):
        logger.info(f'NGBoost, transform')
        logger.info(f'NGBoost, transform, testing shape: {X.shape}')
        X_np = self._to_numpy(X)
        pred = self.estimator_.predict_proba(X_np)[:, 1].reshape(-1)
        logger.info(f'NGBoost, transform, predictions shape: {pred.shape}')
        logger.info(f'NGBoost, done transform')
        return pred

    def score(self, X, y, *args, **kwargs):
        return roc_auc_score(y, self.transform(X))

    def predict_proba(self, X, *args, **kwargs):
        logger.info(f'NGBoost, predict_proba')
        logger.info(f'NGBoost, predict_proba, testing shape: {X.shape}')
        X_np = self._to_numpy(X)
        pred = self.estimator_.predict_proba(X_np)
        logger.info(f'NGBoost, predict_proba, done')
        return pred
Пример #9
0
	def test_categorical(self, learners, cls_data):
		X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = cls_data
		for K in [2,4,7]:
			Dist = k_categorical(K)
			Y_cls_train = np.random.randint(0,K,(len(Y_cls_train)))

			for Learner in learners:
				# test early stopping features
				ngb = NGBClassifier(Dist=Dist, Score=LogScore, Base=Learner, verbose=False)
				ngb.fit(X_cls_train, Y_cls_train)
				y_pred = ngb.predict(X_cls_test)
				y_prob = ngb.predict_proba(X_cls_test)
				y_dist = ngb.pred_dist(X_cls_test)
Пример #10
0
def fixture_learners_data(breast_cancer_data, boston_data,
                          boston_survival_data):
    """
    Returns:
        A list of iterables,
        each iterable containing a fitted model and
        X data and the predictions for the X_data
    """

    models_data = []
    X_class_train, _, Y_class_train, _ = breast_cancer_data
    ngb = NGBClassifier(verbose=False, n_estimators=10)
    ngb.fit(X_class_train, Y_class_train)
    models_data.append((ngb, X_class_train, ngb.predict(X_class_train)))

    X_reg_train, _, Y_reg_train, _ = boston_data
    ngb = NGBRegressor(verbose=False, n_estimators=10)
    ngb.fit(X_reg_train, Y_reg_train)
    models_data.append((ngb, X_reg_train, ngb.predict(X_reg_train)))

    X_surv_train, _, T_surv_train, E_surv_train, _ = boston_survival_data
    ngb = NGBSurvival(verbose=False, n_estimators=10)
    ngb.fit(X_surv_train, T_surv_train, E_surv_train)
    models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))

    ngb = NGBRegressor(Dist=MultivariateNormal(2), n_estimators=10)
    ngb.fit(X_surv_train, np.vstack([T_surv_train, E_surv_train]).T)
    models_data.append((ngb, X_surv_train, ngb.predict(X_surv_train)))
    return models_data
Пример #11
0
    def NGBoost(self, args):  ## Natural gradient Boosting

        logger.info("Running Natural Gradient Boosting ... ")
        ## https://stanfordmlgroup.github.io/ngboost/1-useage.html
        from ngboost.learners import default_tree_learner
        from ngboost.distns import k_categorical, Bernoulli  ##Classifier
        from ngboost.distns import Exponential, Normal, LogNormal  ## Regressor
        from ngboost.scores import MLE, LogScore, CRPScore

        ## Base Learner
        from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

        # NGBoost
        ## Comment: If error with singular matrix, increase size of input data from 0.05 to 0.15
        if args.predictor.lower() == 'classifier':
            from ngboost import NGBClassifier as ngb
            learner = DecisionTreeRegressor(criterion='friedman_mse',
                                            max_depth=6,
                                            random_state=SEED)
            ngb = ngb(Base=learner,
                      n_estimators=2000,
                      Score=MLE,
                      Dist=Bernoulli,
                      random_state=SEED)

        elif args.predictor.lower() == 'regressor':
            from ngboost import NGBRegressor as ngb
            learner = DecisionTreeRegressor(criterion='friedman_mse',
                                            max_depth=3,
                                            random_state=SEED)
            ngb = ngb(Base=default_tree_learner,
                      Dist=Exponential,
                      Score=LogScore,
                      learning_rate=0.01,
                      minibatch_frac=0.6,
                      col_sample=0.6)

        ## Fit model
        ngb.fit(self.X_train, np.asarray(self.y_train).astype(int))

        ## Predict the labels
        self.y_pred = ngb.predict(self.X_data)

        if args.predictor.lower() == 'regressor':
            self.y_pred = logistic.cdf(self.y_pred)

        self.data['boosting_score'] = self.y_pred
        self.model = ngb
        return self
Пример #12
0
import numpy as np
from ngboost import NGBClassifier
from ngboost.distns import Bernoulli
from ngboost.learners import default_tree_learner
from ngboost.scores import MLE

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

if __name__ == "__main__":

    np.random.seed(12345)

    X, Y = load_breast_cancer(True)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    ngb = NGBClassifier(Base=default_tree_learner,
                        Dist=Bernoulli,
                        Score=MLE,
                        verbose=True,
                        natural_gradient=True)
    ngb.fit(X_train, Y_train)

    preds = ngb.pred_dist(X_test)
    print("ROC:", roc_auc_score(Y_test, preds.prob))
Пример #13
0
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

from ngboost import NGBClassifier
from ngboost.distns import k_categorical

if __name__ == "__main__":

    X, y = load_breast_cancer(True)
    y[0:
      15] = 2  # artificially make this a 3-class problem instead of a 2-class problem
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)

    ngb = NGBClassifier(Dist=k_categorical(
        3))  # tell ngboost that there are 3 possible outcomes
    ngb.fit(X_train, Y_train)  # Y should have only 3 values: {0,1,2}

    # predicted probabilities of class 0, 1, and 2 (columns) for each observation (row)
    preds = ngb.predict_proba(X_test)
Пример #14
0
 def ng_model(self):
     ngb_cat = NGBClassifier(Dist=k_categorical(2), verbose=True)
     ng_clf = ngb_cat.fit(self.X_t, self.y_t)
     print(ng_clf.feature_importances_)
     return ng_clf