Пример #1
0
class myNGBoostClassifier:
    def make(self , params  ):
        self.model =  NGBClassifier(**params  )
        return self

    def fit(self, xtrain, ytrain, xtest=None, ytest=None, fit_params={}):
        if type(xtrain) == pd.core.frame.DataFrame:
                xtrain = xtrain.values
                ytrain = ytrain.values
                if type(xtest) != type(None) and type(ytest) != type(None):
                    xtest = xtest.values
                    ytest = ytest.values
        if type(xtest) == type(None) or type(ytest) == type(None) :
            self.model.fit( xtrain , ytrain , **fit_params )
        else:
            self.model.fit( xtrain , ytrain , X_val = xtest , Y_val = ytest ,**fit_params )
        
    def predict(self , xs ):
        return self.model.predict(xs) 
        
    def predict_proba(self, xs):
        if len(xs.shape) == 1:
            return self.model.predict_proba(xs.reshape(1,-1))
        else:
            return self.model.predict_proba(xs)
Пример #2
0
class NGBoost(BaseEstimator, ClassifierMixin):
    def __init__(self, **params):
        logger.info('Initializing NGBoost...')
        self.params_ = params
        self.classes_ = np.array([0, 1])

    def get_params(self, deep=True):
        return self.params_

    def _to_numpy(self, X):
        if isinstance(X, pd.DataFrame) or isinstance(X, pd.Series):
            try:
                return X.to_numpy()
            except:
                ValueError('There is error when converting to numpy')
        elif isinstance(X, np.ndarray):
            return X
        else:
            ValueError('X must be pandas DataFrame, Series or numpy ndarray')

    def fit(self, X, y, *args, **kwargs):
        logger.info(f'NGBoost, fit')
        logger.info(f'NGBoost, training data shape {X.shape}')
        logger.info(f'NGBoost, training label shape {y.shape}')

        X_np = self._to_numpy(X)
        y_np = self._to_numpy(y)
        y_np = y_np.astype(int)
        print(f'np.unique(y_np)')
        self.estimator_ = NGBClassifier(**self.params_)
        self.estimator_.fit(X_np, y_np)
        logger.info(f'NGBoost, done fit')
        return self

    def transform(self, X, *args, **kwargs):
        logger.info(f'NGBoost, transform')
        logger.info(f'NGBoost, transform, testing shape: {X.shape}')
        X_np = self._to_numpy(X)
        pred = self.estimator_.predict_proba(X_np)[:, 1].reshape(-1)
        logger.info(f'NGBoost, transform, predictions shape: {pred.shape}')
        logger.info(f'NGBoost, done transform')
        return pred

    def score(self, X, y, *args, **kwargs):
        return roc_auc_score(y, self.transform(X))

    def predict_proba(self, X, *args, **kwargs):
        logger.info(f'NGBoost, predict_proba')
        logger.info(f'NGBoost, predict_proba, testing shape: {X.shape}')
        X_np = self._to_numpy(X)
        pred = self.estimator_.predict_proba(X_np)
        logger.info(f'NGBoost, predict_proba, done')
        return pred
Пример #3
0
def test_classification():
    from sklearn.datasets import load_breast_cancer
    from sklearn.metrics import roc_auc_score, log_loss
    data, target = load_breast_cancer(True)
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        target,
                                                        test_size=0.2,
                                                        random_state=42)
    ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = roc_auc_score(y_test, preds)
    assert score >= 0.95

    preds = ngb.predict_proba(x_test)
    score = log_loss(y_test, preds)
    assert score <= 0.20

    score = ngb.score(x_test, y_test)
    assert score <= 0.20

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Bernoulli)

    score = roc_auc_score(y_test, preds[:, 1])
    assert score >= 0.95
Пример #4
0
def test_classification(breast_cancer_data):
    from sklearn.metrics import roc_auc_score, log_loss

    x_train, x_test, y_train, y_test = breast_cancer_data
    ngb = NGBClassifier(Dist=Bernoulli, verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = roc_auc_score(y_test, preds)

    # loose score requirement so it isn't failing all the time
    assert score >= 0.85

    preds = ngb.predict_proba(x_test)
    score = log_loss(y_test, preds)
    assert score <= 0.30

    score = ngb.score(x_test, y_test)
    assert score <= 0.30

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Bernoulli)

    score = roc_auc_score(y_test, preds[:, 1])

    assert score >= 0.85
Пример #5
0
class ModelNgbClassifier(Model):
    def train(self, tr_x, tr_y, va_x=None, va_y=None, te_x=None):

        # ハイパーパラメータの設定
        params = dict(self.params)
        early_stopping_rounds = params.pop('early_stopping_rounds')

        self.model = NGBClassifier(**params)
        self.model.fit(tr_x.values,
                       tr_y.astype(int).values,
                       va_x.values,
                       va_y.astype(int).values,
                       early_stopping_rounds=early_stopping_rounds)

    def predict(self, te_x):
        return self.model.predict_proba(te_x.values)[:, 1]

    def save_model(self):
        model_path = os.path.join('../output/model',
                                  f'{self.run_fold_name}.model')
        os.makedirs(os.path.dirname(model_path), exist_ok=True)
        Data.dump(self.model, model_path)

    def load_model(self):
        model_path = os.path.join('../output/model',
                                  f'{self.run_fold_name}.model')
        self.model = Data.load(model_path)
Пример #6
0
def test_bernoulli(learner, breast_cancer_data: Tuple4Array):
    X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = breast_cancer_data
    # test early stopping features
    # test other args, n_trees, LR, minibatching- args as fixture
    ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=learner, verbose=False)
    ngb.fit(X_cls_train, Y_cls_train)
    y_pred = ngb.predict(X_cls_test)
    y_prob = ngb.predict_proba(X_cls_test)
    y_dist = ngb.pred_dist(X_cls_test)
Пример #7
0
def test_categorical(k: int, learner, breast_cancer_data: Tuple4Array):
    X_train, X_test, y_train, _ = breast_cancer_data
    dist = k_categorical(k)
    y_train = np.random.randint(0, k, (len(y_train)))
    # test early stopping features
    ngb = NGBClassifier(Dist=dist, Score=LogScore, Base=learner, verbose=False)
    ngb.fit(X_train, y_train)
    y_pred = ngb.predict(X_test)
    y_prob = ngb.predict_proba(X_test)
    y_dist = ngb.pred_dist(X_test)
Пример #8
0
	def test_bernoulli(self, learners, cls_data):
		X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = cls_data
		for Learner in learners:
			# test early stopping features
			# test other args, n_trees, LR, minibatching- args as fixture
			ngb = NGBClassifier(Dist=Bernoulli, Score=LogScore, Base=Learner, verbose=False)
			ngb.fit(X_cls_train, Y_cls_train)
			y_pred = ngb.predict(X_cls_test)
			y_prob = ngb.predict_proba(X_cls_test)
			y_dist = ngb.pred_dist(X_cls_test)
Пример #9
0
	def test_categorical(self, learners, cls_data):
		X_cls_train, X_cls_test, Y_cls_train, Y_cls_test = cls_data
		for K in [2,4,7]:
			Dist = k_categorical(K)
			Y_cls_train = np.random.randint(0,K,(len(Y_cls_train)))

			for Learner in learners:
				# test early stopping features
				ngb = NGBClassifier(Dist=Dist, Score=LogScore, Base=Learner, verbose=False)
				ngb.fit(X_cls_train, Y_cls_train)
				y_pred = ngb.predict(X_cls_test)
				y_prob = ngb.predict_proba(X_cls_test)
				y_dist = ngb.pred_dist(X_cls_test)
Пример #10
0
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

from ngboost import NGBClassifier
from ngboost.distns import k_categorical

if __name__ == "__main__":

    X, y = load_breast_cancer(True)
    y[0:
      15] = 2  # artificially make this a 3-class problem instead of a 2-class problem
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2)

    ngb = NGBClassifier(Dist=k_categorical(
        3))  # tell ngboost that there are 3 possible outcomes
    ngb.fit(X_train, Y_train)  # Y should have only 3 values: {0,1,2}

    # predicted probabilities of class 0, 1, and 2 (columns) for each observation (row)
    preds = ngb.predict_proba(X_test)