Пример #1
0
 def __init__(self, Dist=Normal, Score=MLE(),
              Base=default_tree_learner, natural_gradient=False,
              n_estimators=100, learning_rate=0.1, minibatch_frac=1.0,
              verbose=True, tol=1e-4):
     self.Dist = Dist
     self.Score = Score
     self.Base = Base
     self.natural_gradient = natural_gradient
     self.n_estimators = n_estimators
     self.learning_rate = learning_rate
     self.minibatch_frac = minibatch_frac
     self.verbose = verbose
     self.init_params = None
     self.base_models = []
     self.scalings = []
     self.tol = tol
     self.loss_fn = lambda P, Y: self.Score(self.Dist(P.T), Y).sum()
     self.grad_fn = grad(self.loss_fn)
     #self.grad_fn = jit(vmap(grad(self.loss_fn)))
     self.hessian_fn = jit(vmap(jacrev(grad(self.loss_fn))))
     #self.loss_fn = jit(vmap(self.loss_fn))
     self.Score.setup_distn(self.Dist)
     if isinstance(self.Score, CRPS_SURV):
         self.marginal_score = MLE_SURV()
     elif isinstance(self.Score, CRPS):
         self.marginal_score = MLE()
     else:
         self.marginal_score = self.Score
     self.marginal_loss = lambda P, Y: self.marginal_score(self.Dist(P), Y)
     self.marginal_grad = jit(vmap(grad(self.marginal_loss)))
     self.marginal_loss = jit(vmap(self.marginal_loss))
     self.matmul_inv_fn = jit(vmap(lambda A, b: np.linalg.solve(A, b)))
Пример #2
0
def mvnorm_mle(Y, max_iter=1e4, lr=0.5, eps=1e-4):
    N = Y.shape[0]
    params = np.array([[0, 0, 1, 0, 1]] * N).T
    for _ in range(max_iter):
        D = MultivariateNormal(params)
        S = MLE()
        grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
        params = params - lr * grad
        if np.linalg.norm(grad) < eps:
            break
Пример #3
0
def lognormal_mle(Y, max_iter=1e4, lr=0.05, eps=1e-4, verbose=False):
    N = Y.shape[0]
    params = np.array([[0, 0]] * N).T
    for i in range(int(max_iter)):
        if i % 500 == 1 and verbose:
            print('Param: ', params[:, :2])
            print('Grad: ', grad)
        D = LogNormal(params)
        S = MLE()

        grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
        params = params - lr * grad
        if np.linalg.norm(grad) < eps:
            break

    mu = params[0, 0]
    sigma = params[1, 0]
    return mu, sigma
Пример #4
0
    def objective(trial):
        param = {
            'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1e0),
            'n_estimators': trial.suggest_int('n_estimators', 100, 800),
            'minibatch_frac':trial.suggest_discrete_uniform('minibatch_frac', 0.1, 0.9, 0.1),
        }

        regression_model = NGBRegressor(**param, Base= best_base, Dist=Normal, Score=MLE(), natural_gradient=True, verbose=False)
        estimated_y_in_cv = model_selection.cross_val_predict(regression_model, train_x, train_y, cv=fold_number)
        r2 = metrics.r2_score(train_y, estimated_y_in_cv)
        return 1.0 - r2
Пример #5
0
def estimate_infcens(Y):
    res = {}
    params = np.array([[0, 0, 1, 0, 1]] * N).T
    for _ in range(100000):
        D = MultivariateNormal(params)
        S = MLE()
        grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
        params = params - 1 * grad
        if np.linalg.norm(grad) < 1e-4:
            break

    print('Jointly Estimated E:', params[0, 0])
    res['joint'] = params[0, 0]

    params = np.array([[0, 0]] * N).T
    for _ in range(100000):
        D = LogNormal(params)
        S = MLE()
        grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
        params = params - 0.005 * grad
        if np.linalg.norm(grad) < 1e-4:
            break

    print('Estimate E (assume non-inf):', params[0, 0])
    res['lognorm'] = params[0, 0]
    return res
Пример #6
0
 def __init__(self, Dist=Normal, Score=MLE(),
              Base=default_tree_learner, natural_gradient=False,
              n_estimators=500, learning_rate=0.01, minibatch_frac=1.0,
              verbose=True, verbose_eval=100, tol=1e-4):
     self.Dist = Dist
     self.Score = Score
     self.Base = Base
     self.natural_gradient = natural_gradient
     self.n_estimators = n_estimators
     self.learning_rate = learning_rate
     self.minibatch_frac = minibatch_frac
     self.verbose = verbose
     self.verbose_eval = verbose_eval
     self.init_params = None
     self.base_models = []
     self.scalings = []
     self.tol = tol
Пример #7
0
from ngboost.ngboost import NGBoost
from ngboost.distns import Bernoulli
from ngboost.learners import default_tree_learner
from ngboost.scores import MLE

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

if __name__ == "__main__":

    X, Y = load_breast_cancer(True)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    ngb = NGBoost(Base=default_tree_learner,
                  Dist=Bernoulli,
                  Score=MLE(),
                  verbose=True)
    ngb.fit(X_train, Y_train)

    preds = ngb.pred_dist(X_test)
    print("ROC:", roc_auc_score(Y_test, preds.prob))
Пример #8
0
    print("Models")

    start = datetime.now().timestamp()
    qreg = MLPQuantile()
    qreg.fit(X_train_std,y_train)
    preds = qreg.predict(X_test_std)
    end = datetime.now().timestamp()
    results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values)
    results["duration"]=end-start
    save_result([horizon,
                    "MLP",
                    results,
                    1],f"unit_{horizon}",folder)

    start = datetime.now().timestamp()
    ngb = NGBoost(Base=default_tree_learner, Dist=Normal, Score=MLE(), natural_gradient=True,
              verbose=True,n_estimators=1500)
    ngb.fit(X_train_std, y_train.values)
    Y_dists = ngb.pred_dist(X_test_std)
    a=pd.DataFrame()
    for i in np.arange(1,100):
        a[i]=Y_dists.ppf(i/100)
    preds = a.values
    end = datetime.now().timestamp()
    results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values)
    results["duration"]=end-start
    save_result([horizon,
                    "NGBOOST",
                    results,
                    1],f"unit_{horizon}",folder)
Пример #9
0
    def train_half(self,
                   params,
                   train_data,
                   num_boost_round,
                   early_stopping_rounds,
                   verbose,
                   importance_df,
                   use_feature_num=None):
        print('Model Creating...')
        self.data = train_data

        if use_feature_num is not None:
            self.features = importance_df['feature'][:use_feature_num].tolist()
        else:
            self.features = self.data.X.columns

        self.models = []
        assert self.data.phase == 'train', 'Use Train Dataset!'

        self.features = [c for c in self.features if c not in ['M']]

        self.X_train = self.data.X[self.features]
        self.y_train = self.data.y

        del self.data
        gc.collect()

        if self.model_type == 'lgb':
            print('LightGBM Model Creating...')
            d_half_1 = lgb.Dataset(
                self.X_train[:int(self.X_train.shape[0] / 2)],
                label=self.y_train[:int(self.X_train.shape[0] / 2)])
            d_half_2 = lgb.Dataset(
                self.X_train[int(self.X_train.shape[0] / 2):],
                label=self.y_train[int(self.X_train.shape[0] / 2):])

            print(
                "Building model with first half and validating on second half:"
            )
            model_1 = lgb.train(params,
                                train_set=d_half_1,
                                num_boost_round=num_boost_round,
                                valid_sets=[d_half_1, d_half_2],
                                verbose_eval=verbose,
                                early_stopping_rounds=early_stopping_rounds)
            self.models.append(model_1)

            print('')
            print(
                "Building model with second half and validating on first half:"
            )
            model_2 = lgb.train(params,
                                train_set=d_half_2,
                                num_boost_round=num_boost_round,
                                valid_sets=[d_half_2, d_half_1],
                                verbose_eval=verbose,
                                early_stopping_rounds=early_stopping_rounds)
            self.models.append(model_2)

        elif self.model_type == 'cat':
            print('CatBoost Model Creating...')
            cat_features_index = np.where(self.X_train.dtypes == 'category')[0]
            d_half_1 = Pool(self.X_train[:int(self.X_train.shape[0] / 2)],
                            label=self.y_train[:int(self.X_train.shape[0] /
                                                    2)],
                            cat_features=cat_features_index)
            d_half_2 = Pool(self.X_train[int(self.X_train.shape[0] / 2):],
                            label=self.y_train[int(self.X_train.shape[0] /
                                                   2):],
                            cat_features=cat_features_index)

            params['iterations'] = num_boost_round
            print(
                "Building model with first half and validating on second half:"
            )
            model_1 = CatBoostRegressor(**params)
            model_1.fit(d_half_1,
                        eval_set=d_half_2,
                        use_best_model=True,
                        early_stopping_rounds=early_stopping_rounds,
                        verbose=verbose)
            self.models.append(model_1)

            print('')
            print(
                "Building model with second half and validating on first half:"
            )
            model_2 = CatBoostRegressor(**params)
            model_2.fit(d_half_2,
                        eval_set=d_half_1,
                        use_best_model=True,
                        early_stopping_rounds=early_stopping_rounds,
                        verbose=verbose)
            self.models.append(model_2)

        elif self.model_type == 'ng':
            print('NGBoost Model Creating...')
            print(
                "Building model with first half and validating on second half:"
            )

            self.X_train[np.isnan(self.X_train.astype(np.float32))] = -9999

            model_1 = NGBRegressor(
                Base=default_tree_learner,
                Dist=Normal,  # Normal, LogNormal
                Score=MLE(),
                natural_gradient=True,
                verbose=True,
                n_estimators=num_boost_round,
                verbose_eval=verbose,
                learning_rate=0.01,
                minibatch_frac=1.0)

            model_1.fit(self.X_train[:int(self.X_train.shape[0] / 2)],
                        self.y_train[:int(self.X_train.shape[0] / 2)],
                        X_val=self.X_train[int(self.X_train.shape[0] / 2):],
                        Y_val=self.y_train[int(self.X_train.shape[0] / 2):])
            self.models.append(model_1)

            print('')
            print(
                "Building model with second half and validating on first half:"
            )
            model_2 = NGBRegressor(
                Base=default_tree_learner,
                Dist=Normal,  # Normal, LogNormal
                Score=MLE(),
                natural_gradient=True,
                verbose=True,
                n_estimators=num_boost_round,
                verbose_eval=verbose,
                learning_rate=0.01,
                minibatch_frac=1.0)

            model_2.fit(self.X_train[int(self.X_train.shape[0] / 2):],
                        self.y_train[int(self.X_train.shape[0] / 2):],
                        X_val=self.X_train[:int(self.X_train.shape[0] / 2)],
                        Y_val=self.y_train[:int(self.X_train.shape[0] / 2)])
            self.models.append(model_2)

        del self.X_train, self.y_train
        gc.collect()

        return self.models
Пример #10
0
def Y_join(T, E):
    col_event = 'Event'
    col_time = 'Time'
    y = np.empty(dtype=[(col_event, np.bool), (col_time, np.float64)],
                 shape=T.shape[0])
    y[col_event] = E
    y[col_time] = np.exp(T)
    return y


Y = Y_join(T, E)

params = np.array([[0, 0, 1, 0, 1]] * N).T
for _ in range(100000):
    D = MultivariateNormal(params)
    S = MLE()
    grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
    params = params - 1 * grad
    if np.linalg.norm(grad) < 1e-4:
        break

print('Jointly Estimated E:', params[0, 0])

params = np.array([[0, 0]] * N).T
for _ in range(100000):
    D = LogNormal(params)
    S = MLE()
    grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True)
    params = params - 0.1 * grad
    if np.linalg.norm(grad) < 1e-4:
        break