def __init__(self, Dist=Normal, Score=MLE(), Base=default_tree_learner, natural_gradient=False, n_estimators=100, learning_rate=0.1, minibatch_frac=1.0, verbose=True, tol=1e-4): self.Dist = Dist self.Score = Score self.Base = Base self.natural_gradient = natural_gradient self.n_estimators = n_estimators self.learning_rate = learning_rate self.minibatch_frac = minibatch_frac self.verbose = verbose self.init_params = None self.base_models = [] self.scalings = [] self.tol = tol self.loss_fn = lambda P, Y: self.Score(self.Dist(P.T), Y).sum() self.grad_fn = grad(self.loss_fn) #self.grad_fn = jit(vmap(grad(self.loss_fn))) self.hessian_fn = jit(vmap(jacrev(grad(self.loss_fn)))) #self.loss_fn = jit(vmap(self.loss_fn)) self.Score.setup_distn(self.Dist) if isinstance(self.Score, CRPS_SURV): self.marginal_score = MLE_SURV() elif isinstance(self.Score, CRPS): self.marginal_score = MLE() else: self.marginal_score = self.Score self.marginal_loss = lambda P, Y: self.marginal_score(self.Dist(P), Y) self.marginal_grad = jit(vmap(grad(self.marginal_loss))) self.marginal_loss = jit(vmap(self.marginal_loss)) self.matmul_inv_fn = jit(vmap(lambda A, b: np.linalg.solve(A, b)))
def mvnorm_mle(Y, max_iter=1e4, lr=0.5, eps=1e-4): N = Y.shape[0] params = np.array([[0, 0, 1, 0, 1]] * N).T for _ in range(max_iter): D = MultivariateNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - lr * grad if np.linalg.norm(grad) < eps: break
def lognormal_mle(Y, max_iter=1e4, lr=0.05, eps=1e-4, verbose=False): N = Y.shape[0] params = np.array([[0, 0]] * N).T for i in range(int(max_iter)): if i % 500 == 1 and verbose: print('Param: ', params[:, :2]) print('Grad: ', grad) D = LogNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - lr * grad if np.linalg.norm(grad) < eps: break mu = params[0, 0] sigma = params[1, 0] return mu, sigma
def objective(trial): param = { 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 1e0), 'n_estimators': trial.suggest_int('n_estimators', 100, 800), 'minibatch_frac':trial.suggest_discrete_uniform('minibatch_frac', 0.1, 0.9, 0.1), } regression_model = NGBRegressor(**param, Base= best_base, Dist=Normal, Score=MLE(), natural_gradient=True, verbose=False) estimated_y_in_cv = model_selection.cross_val_predict(regression_model, train_x, train_y, cv=fold_number) r2 = metrics.r2_score(train_y, estimated_y_in_cv) return 1.0 - r2
def estimate_infcens(Y): res = {} params = np.array([[0, 0, 1, 0, 1]] * N).T for _ in range(100000): D = MultivariateNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - 1 * grad if np.linalg.norm(grad) < 1e-4: break print('Jointly Estimated E:', params[0, 0]) res['joint'] = params[0, 0] params = np.array([[0, 0]] * N).T for _ in range(100000): D = LogNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - 0.005 * grad if np.linalg.norm(grad) < 1e-4: break print('Estimate E (assume non-inf):', params[0, 0]) res['lognorm'] = params[0, 0] return res
def __init__(self, Dist=Normal, Score=MLE(), Base=default_tree_learner, natural_gradient=False, n_estimators=500, learning_rate=0.01, minibatch_frac=1.0, verbose=True, verbose_eval=100, tol=1e-4): self.Dist = Dist self.Score = Score self.Base = Base self.natural_gradient = natural_gradient self.n_estimators = n_estimators self.learning_rate = learning_rate self.minibatch_frac = minibatch_frac self.verbose = verbose self.verbose_eval = verbose_eval self.init_params = None self.base_models = [] self.scalings = [] self.tol = tol
from ngboost.ngboost import NGBoost from ngboost.distns import Bernoulli from ngboost.learners import default_tree_learner from ngboost.scores import MLE from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score if __name__ == "__main__": X, Y = load_breast_cancer(True) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) ngb = NGBoost(Base=default_tree_learner, Dist=Bernoulli, Score=MLE(), verbose=True) ngb.fit(X_train, Y_train) preds = ngb.pred_dist(X_test) print("ROC:", roc_auc_score(Y_test, preds.prob))
print("Models") start = datetime.now().timestamp() qreg = MLPQuantile() qreg.fit(X_train_std,y_train) preds = qreg.predict(X_test_std) end = datetime.now().timestamp() results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values) results["duration"]=end-start save_result([horizon, "MLP", results, 1],f"unit_{horizon}",folder) start = datetime.now().timestamp() ngb = NGBoost(Base=default_tree_learner, Dist=Normal, Score=MLE(), natural_gradient=True, verbose=True,n_estimators=1500) ngb.fit(X_train_std, y_train.values) Y_dists = ngb.pred_dist(X_test_std) a=pd.DataFrame() for i in np.arange(1,100): a[i]=Y_dists.ppf(i/100) preds = a.values end = datetime.now().timestamp() results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values) results["duration"]=end-start save_result([horizon, "NGBOOST", results, 1],f"unit_{horizon}",folder)
def train_half(self, params, train_data, num_boost_round, early_stopping_rounds, verbose, importance_df, use_feature_num=None): print('Model Creating...') self.data = train_data if use_feature_num is not None: self.features = importance_df['feature'][:use_feature_num].tolist() else: self.features = self.data.X.columns self.models = [] assert self.data.phase == 'train', 'Use Train Dataset!' self.features = [c for c in self.features if c not in ['M']] self.X_train = self.data.X[self.features] self.y_train = self.data.y del self.data gc.collect() if self.model_type == 'lgb': print('LightGBM Model Creating...') d_half_1 = lgb.Dataset( self.X_train[:int(self.X_train.shape[0] / 2)], label=self.y_train[:int(self.X_train.shape[0] / 2)]) d_half_2 = lgb.Dataset( self.X_train[int(self.X_train.shape[0] / 2):], label=self.y_train[int(self.X_train.shape[0] / 2):]) print( "Building model with first half and validating on second half:" ) model_1 = lgb.train(params, train_set=d_half_1, num_boost_round=num_boost_round, valid_sets=[d_half_1, d_half_2], verbose_eval=verbose, early_stopping_rounds=early_stopping_rounds) self.models.append(model_1) print('') print( "Building model with second half and validating on first half:" ) model_2 = lgb.train(params, train_set=d_half_2, num_boost_round=num_boost_round, valid_sets=[d_half_2, d_half_1], verbose_eval=verbose, early_stopping_rounds=early_stopping_rounds) self.models.append(model_2) elif self.model_type == 'cat': print('CatBoost Model Creating...') cat_features_index = np.where(self.X_train.dtypes == 'category')[0] d_half_1 = Pool(self.X_train[:int(self.X_train.shape[0] / 2)], label=self.y_train[:int(self.X_train.shape[0] / 2)], cat_features=cat_features_index) d_half_2 = Pool(self.X_train[int(self.X_train.shape[0] / 2):], label=self.y_train[int(self.X_train.shape[0] / 2):], cat_features=cat_features_index) params['iterations'] = num_boost_round print( "Building model with first half and validating on second half:" ) model_1 = CatBoostRegressor(**params) model_1.fit(d_half_1, eval_set=d_half_2, use_best_model=True, early_stopping_rounds=early_stopping_rounds, verbose=verbose) self.models.append(model_1) print('') print( "Building model with second half and validating on first half:" ) model_2 = CatBoostRegressor(**params) model_2.fit(d_half_2, eval_set=d_half_1, use_best_model=True, early_stopping_rounds=early_stopping_rounds, verbose=verbose) self.models.append(model_2) elif self.model_type == 'ng': print('NGBoost Model Creating...') print( "Building model with first half and validating on second half:" ) self.X_train[np.isnan(self.X_train.astype(np.float32))] = -9999 model_1 = NGBRegressor( Base=default_tree_learner, Dist=Normal, # Normal, LogNormal Score=MLE(), natural_gradient=True, verbose=True, n_estimators=num_boost_round, verbose_eval=verbose, learning_rate=0.01, minibatch_frac=1.0) model_1.fit(self.X_train[:int(self.X_train.shape[0] / 2)], self.y_train[:int(self.X_train.shape[0] / 2)], X_val=self.X_train[int(self.X_train.shape[0] / 2):], Y_val=self.y_train[int(self.X_train.shape[0] / 2):]) self.models.append(model_1) print('') print( "Building model with second half and validating on first half:" ) model_2 = NGBRegressor( Base=default_tree_learner, Dist=Normal, # Normal, LogNormal Score=MLE(), natural_gradient=True, verbose=True, n_estimators=num_boost_round, verbose_eval=verbose, learning_rate=0.01, minibatch_frac=1.0) model_2.fit(self.X_train[int(self.X_train.shape[0] / 2):], self.y_train[int(self.X_train.shape[0] / 2):], X_val=self.X_train[:int(self.X_train.shape[0] / 2)], Y_val=self.y_train[:int(self.X_train.shape[0] / 2)]) self.models.append(model_2) del self.X_train, self.y_train gc.collect() return self.models
def Y_join(T, E): col_event = 'Event' col_time = 'Time' y = np.empty(dtype=[(col_event, np.bool), (col_time, np.float64)], shape=T.shape[0]) y[col_event] = E y[col_time] = np.exp(T) return y Y = Y_join(T, E) params = np.array([[0, 0, 1, 0, 1]] * N).T for _ in range(100000): D = MultivariateNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - 1 * grad if np.linalg.norm(grad) < 1e-4: break print('Jointly Estimated E:', params[0, 0]) params = np.array([[0, 0]] * N).T for _ in range(100000): D = LogNormal(params) S = MLE() grad = np.mean(S.grad(D, Y, natural=True).T, axis=1, keepdims=True) params = params - 0.1 * grad if np.linalg.norm(grad) < 1e-4: break