def test_classification(self): data, target = load_breast_cancer(True) x_train, x_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=42) ngb = NGBoost(Base=default_tree_learner, Dist=Bernoulli, Score=MLE, verbose=False) ngb.fit(x_train, y_train) preds = ngb.pred_dist(x_test) score = roc_auc_score(y_test, preds.prob) assert score >= 0.95
ngb = NGBoost( Base=default_tree_learner, Dist=Normal, Score=MLE, n_estimators=args.n_estimators, learning_rate=args.lr, natural_gradient=args.natural, minibatch_frac=args.minibatch_frac, verbose=True, ) ngb.fit(x_tr, y_tr) x_te, y_te, _ = gen_data(n=1000, bound=1.3) x_te = poly_transform.transform(x_te) preds = ngb.pred_dist(x_te) pctles, obs, _, _ = calibration_regression(preds, y_te) all_preds = ngb.staged_pred_dist(x_te) preds = all_preds[-1] plt.figure(figsize=(6, 3)) plt.scatter(x_tr[:, 1], y_tr, color="black", marker=".", alpha=0.5) plt.plot( x_te[:, 1], preds.loc, color="black", linestyle="-", linewidth=1, label="Predicted mean", )
best_itr = np.argmin(val_rmse) + 1 best_itr = np.argmin(val_nll) + 1 full_retrain = True if full_retrain: ngb = NGBoost(Base=base_name_to_learner[args.base], Dist=eval(args.distn), Score=score_name_to_score[args.score](64), n_estimators=args.n_est, learning_rate=args.lr, natural_gradient=args.natural, minibatch_frac=args.minibatch_frac, verbose=args.verbose) ngb.fit(X_trainall, y_trainall) forecast = ngb.pred_dist(X_test, max_iter=best_itr) y_ngb += list(forecast.loc) ngb_rmse += [np.sqrt(mean_squared_error(forecast.loc, y_test))] ngb_nll += [-forecast.logpdf(y_test.flatten()).mean()] #print(np.sqrt(mean_squared_error(forecast.loc, y_test))) #for idx, y_p, y_t in zip(test_index, list(forecast.loc), y_test): # print(idx, y_t, y_p, np.abs(y_p - y_t)) if args.verbose or True: print("[%d/%d] BestIter=%d RMSE: Val=%.4f Test=%.4f NLL: Test=%.4f" % (itr+1, args.n_splits, best_itr, np.sqrt(val_rmse[best_itr-1]), np.sqrt(mean_squared_error(forecast.loc, y_test)), ngb_nll[-1]))
from ngboost.ngboost import NGBoost from ngboost.distns import Bernoulli from ngboost.learners import default_tree_learner from ngboost.scores import MLE from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score if __name__ == "__main__": X, Y = load_breast_cancer(True) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) ngb = NGBoost(Base=default_tree_learner, Dist=Bernoulli, Score=MLE(), verbose=True) ngb.fit(X_train, Y_train) preds = ngb.pred_dist(X_test) print("ROC:", roc_auc_score(Y_test, preds.prob))
argparser.add_argument("--distn", type=str, default="Normal") argparser.add_argument("--natural", action="store_true") argparser.add_argument("--score", type=str, default="CRPS") args = argparser.parse_args() np.random.seed(123) m, n = 1200, 50 noise = np.random.randn(*(m, 1)) beta1 = np.random.randn(n, 1) X = np.random.randn(m, n) / np.sqrt(n) Y = X @ beta1 + args.noise_lvl * noise print(X.shape, Y.shape) X_train, X_test = X[:1000, :], X[1000:, ] Y_train, Y_test = Y[:1000], Y[1000:] ngb = NGBoost(n_estimators=400, learning_rate=args.lr, Dist=Normal, Base=default_linear_learner, natural_gradient=args.natural, minibatch_frac=1.0, Score=eval(args.score)(), verbose=True, verbose_eval=10) losses = ngb.fit(X_train, Y_train) forecast = ngb.pred_dist(X_test) print("R2:", r2_score(Y_test, forecast.loc))
# Y = X @ beta + 0.5 * noise Y = X @ beta1 + 0.5 * np.sqrt(np.exp(X @ beta2)) * noise print(X.shape, Y.shape) axis = np.linspace(0.0, 2, 200) plt.figure(figsize=(8, 3)) ngb = NGBoost(n_estimators=100, learning_rate=1.0, Dist=Normal, Base=default_linear_learner, natural_gradient=True, minibatch_frac=1.0, Score=CRPS()) ngb.fit(X, Y) preds = ngb.pred_dist(X) print(preds.scale.mean()) print(preds.scale.std()) pctles, observed, slope, intercept = calibration_regression(preds, Y) plt.subplot(1, 2, 1) plot_pit_histogram(pctles, observed, label="CRPS", linestyle="--") plt.subplot(1, 2, 2) plt.plot(axis, gaussian_kde(preds.scale)(axis), linestyle="--", color="black", label="CRPS") ngb = NGBoost(n_estimators=100, learning_rate=0.5,
test_size=0.2) X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.2) ngb = NGBoost(Dist=eval(args.distn), n_estimators=args.n_est, learning_rate=args.lr, natural_gradient=args.natural, verbose=args.verbose, minibatch_frac=1.0, Base=base_name_to_learner[args.base], Score=eval(args.score)()) train_losses = ngb.fit(X_train, Y_train) #, X_val, Y_val) forecast = ngb.pred_dist(X_test) train_forecast = ngb.pred_dist(X_train) print('NGB score: %.4f (val), %.4f (train)' % (concordance_index_censored(Y_test['Event'], Y_test['Time'], -forecast.mean())[0], concordance_index_censored(Y_train['Event'], Y_train['Time'], -train_forecast.mean())[0])) #logger.tick(forecast, Y_test) ## ## sksurv ## gbsa = GBSA(n_estimators=args.n_est, learning_rate=args.lr, subsample=args.minibatch_frac, verbose=args.verbose)
start = datetime.now().timestamp() qreg = MLPQuantile() qreg.fit(X_train_std,y_train) preds = qreg.predict(X_test_std) end = datetime.now().timestamp() results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values) results["duration"]=end-start save_result([horizon, "MLP", results, 1],f"unit_{horizon}",folder) start = datetime.now().timestamp() ngb = NGBoost(Base=default_tree_learner, Dist=Normal, Score=MLE(), natural_gradient=True, verbose=True,n_estimators=1500) ngb.fit(X_train_std, y_train.values) Y_dists = ngb.pred_dist(X_test_std) a=pd.DataFrame() for i in np.arange(1,100): a[i]=Y_dists.ppf(i/100) preds = a.values end = datetime.now().timestamp() results=evaluate((np.exp(preds)-1),(np.exp(y_test)-1).values) results["duration"]=end-start save_result([horizon, "NGBOOST", results, 1],f"unit_{horizon}",folder)