Exemplo n.º 1
0
def model_test_for_esn_base(Base,
                            esn_param,
                            X_train,
                            X_test,
                            Y_train,
                            Y_test,
                            n_estimators=500,
                            learning_rate=0.01,
                            Score=MLE,
                            Dist=Normal,
                            verbose=True,
                            verbose_eval=100,
                            plot_predict=True,
                            return_y_pred=False,
                            return_y_dists=False,
                            return_mse=False):

    ESN = SimpleESN(n_readout=esn_param['n_readout'],
                    n_components=esn_param['n_components'],
                    damping=esn_param['damping'],
                    weight_scaling=esn_param['weight_scaling'],
                    discard_steps=0,
                    random_state=None)
    X_train = ESN.fit_transform(X_train)
    X_test = ESN.fit_transform(X_test)

    ngb = NGBRegressor(Base=Base,
                       n_estimators=n_estimators,
                       verbose=verbose,
                       verbose_eval=verbose_eval,
                       learning_rate=learning_rate,
                       Dist=Dist,
                       Score=Score)
    print(ESN, '\n')
    print(ngb, '\n')
    ngb.fit(X_train, Y_train)
    Y_preds = ngb.predict(X_test)
    Y_dists = ngb.pred_dist(X_test)  # return norm method: mean std
    # test Mean Squared Error
    test_MSE = mean_squared_error(Y_preds, Y_test)
    print('\nTest MSE', test_MSE)
    # test Negative Log Likelihood
    test_NLL = -Y_dists.logpdf(Y_test).mean()
    print('Test NLL', test_NLL)

    if plot_predict:
        df = pd.concat([Y_test, pd.Series(Y_preds, index=Y_test.index)],
                       axis=1)
        df.columns = ['test', 'pred']
        df.plot(figsize=(10, 4),
                title='MSE:{}  NLL:{}'.format(round(test_MSE, 4),
                                              round(test_NLL, 4)))
    if (return_y_pred) & (not (return_y_dists)):
        return pd.Series(Y_preds, index=Y_test.index)
    if (not (return_y_pred)) & (return_y_dists):
        return Y_dists
    if (return_y_pred) & (return_y_dists):
        return pd.Series(Y_preds, index=Y_test.index), Y_dists
    if return_mse:
        return test_MSE
Exemplo n.º 2
0
def test_dists_runs_on_examples_crpscore(dist: Distn, learner,
                                         boston_data: Tuple4Array):
    X_train, X_test, y_train, y_test = boston_data
    # TODO: test early stopping features
    ngb = NGBRegressor(Dist=dist, Score=CRPScore, Base=learner, verbose=False)
    ngb.fit(X_train, y_train)
    y_pred = ngb.predict(X_test)
    y_dist = ngb.pred_dist(X_test)
Exemplo n.º 3
0
	def test_dists(self, learners, reg_dists, reg_data):
		X_reg_train, X_reg_test, Y_reg_train, Y_reg_test = reg_data
		for Dist, Scores in reg_dists.items():
			for Score in Scores:
				for Learner in learners:
					# test early stopping features
					ngb = NGBRegressor(Dist=Dist, Score=Score, Base=Learner, verbose=False)
					ngb.fit(X_reg_train, Y_reg_train)
					y_pred = ngb.predict(X_reg_test)
					y_dist = ngb.pred_dist(X_reg_test)
Exemplo n.º 4
0
def ngb_Normal():
    ngb_Normal = NGBRegressor(Dist=Normal).fit(X_train, Y_train)
    globals()['ngb_Normal'] = ngb_Normal
    Y_preds = ngb_Normal.predict(X_test)
    Y_dists = ngb_Normal.pred_dist(X_test)
    # test Mean Squared Error
    test_MSE = mean_squared_error(Y_preds, Y_test)
    print('Test MSE_Normal', test_MSE)
    # test Negative Log Likelihood
    test_NLL = -Y_dists.logpdf(Y_test).mean()
    print('Test NLL_Normal', test_NLL)
Exemplo n.º 5
0
def test_regression(boston_data):
    from sklearn.metrics import mean_squared_error

    x_train, x_test, y_train, y_test = boston_data
    ngb = NGBRegressor(verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = mean_squared_error(y_test, preds)
    assert score <= 15

    score = ngb.score(x_test, y_test)
    assert score <= 15

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Normal)

    score = mean_squared_error(y_test, preds)
    assert score <= 15
Exemplo n.º 6
0
def ngb_cv():
    print("====================================")
    b1 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=2)
    b2 = DecisionTreeRegressor(criterion='friedman_mse', max_depth=4)
    param_grid = {'minibatch_frac': [1.0, 0.5], 'Base': [b1, b2]}
    ngb = NGBRegressor(Dist=Normal, verbose=True)
    grid_search = GridSearchCV(ngb, param_grid=param_grid, cv=3)
    grid_search.fit(X_train, Y_train)
    best_params = grid_search.best_params_
    print(best_params)
    ngb_cv = NGBRegressor(Dist=Normal, verbose=True,
                          **best_params).fit(X_train, Y_train)
    globals()['ngb_cv'] = ngb_cv
    Y_preds = ngb_cv.predict(X_test)
    Y_dists = ngb_cv.pred_dist(X_test)
    # test Mean Squared Error
    test_MSE_CV = mean_squared_error(Y_preds, Y_test)
    print('Test MSE_CV', test_MSE_CV)
    # test Negative Log Likelihood
    test_NLL_CV = -Y_dists.logpdf(Y_test).mean()
    print('Test NLL_CV', test_NLL_CV)
Exemplo n.º 7
0
def test_regression():
    from sklearn.datasets import load_boston
    from sklearn.metrics import mean_squared_error
    data, target = load_boston(True)
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        target,
                                                        test_size=0.2,
                                                        random_state=42)
    ngb = NGBRegressor(verbose=False)
    ngb.fit(x_train, y_train)
    preds = ngb.predict(x_test)
    score = mean_squared_error(y_test, preds)
    assert score <= 8.0

    score = ngb.score(x_test, y_test)
    assert score <= 8.0

    dist = ngb.pred_dist(x_test)
    assert isinstance(dist, Normal)

    score = mean_squared_error(y_test, preds)
    assert score <= 8.0
Exemplo n.º 8
0
def test_multivariatenormal(k: 2, learner):
    dist = MultivariateNormal(k)

    # Generate some sample data
    N = 500
    X_train = np.random.randn(N, k)
    y_fns = [np.sin, np.cos, np.exp]
    y_cols = [
        fn(X_train[:, num_col]).reshape(-1, 1) + np.random.randn(N, 1)
        for num_col, fn in enumerate(y_fns[:k])
    ]
    y_train = np.hstack(y_cols)
    X_test = np.random.randn(N, k)

    ngb = NGBRegressor(Dist=dist, Score=LogScore, Base=learner, verbose=False)
    ngb.fit(X_train, y_train)
    y_pred = ngb.predict(X_test)
    y_dist = ngb.pred_dist(X_test)

    mean = y_dist.mean
    sample = y_dist.rv()
    scipy_list = y_dist.scipy_distribution()
Exemplo n.º 9
0
from ngboost import NGBRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# 加载数据集
X, Y = load_boston(return_X_y=True)
# 切分训练集,测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
# 使用NGRegressor
ngb = NGBRegressor().fit(X_train, Y_train)
Y_preds = ngb.predict(X_test)
# 计算MSE
test_MSE = mean_squared_error(Y_preds, Y_test)
print('MSE', test_MSE)
# 计算NLL Negative Log Likelihood
Y_dists = ngb.pred_dist(X_test)
test_NLL = -Y_dists.logpdf(Y_test.flatten()).mean()
print('NLL', test_NLL)
Exemplo n.º 10
0
# Fit and predict
rf = RandomForestRegressor(n_estimators=400,
                           random_state=SEED).fit(X_train, y_train)
y_pred = rf.predict(X_test)
print('Random Forest: R2 score on testing data: {:.2f}%'.format(
    100 * r2_score(y_test, y_pred)))

# Fit and predict
lgb = LGBMRegressor(n_estimators=400, random_state=SEED).fit(X_train, y_train)
y_pred = lgb.predict(X_test)
print('LightGBM: R2 score on testing data: {:.2f}%'.format(
    100 * r2_score(y_test, y_pred)))

# Fit and predict
np.random.seed(SEED)
ngb = NGBRegressor(n_estimators=400,
                   Base=default_tree_learner,
                   Dist=Normal,
                   Score=MLE).fit(X_train, y_train)
y_pred = ngb.predict(X_test)
print('NGBoost: R2 score on testing data: {:.2f}%'.format(
    100 * r2_score(y_test, y_pred)))

# Probability distribution
obs_idx = [0, 1]
dist = ngb.pred_dist(X_test[obs_idx, :])
print('P(y_0|x_0) is normally distributed with loc={:.2f} and scale={:.2f}'.
      format(dist.loc[0], dist.scale[0]))
print('P(y_1|x_1) is normally distributed with loc={:.2f} and scale={:.2f}'.
      format(dist.loc[1], dist.scale[1]))
Exemplo n.º 11
0
def model_test(Base,
               X_train,
               X_test,
               Y_train,
               Y_test,
               n_estimators=500,
               learning_rate=0.01,
               Score=MLE,
               Dist=Normal,
               verbose=True,
               verbose_eval=100,
               plot_predict=True,
               return_y_pred=False,
               return_y_dists=False,
               return_mse=False,
               Y_scaler=None):
    ngb = NGBRegressor(Base=Base,
                       n_estimators=n_estimators,
                       verbose=verbose,
                       verbose_eval=verbose_eval,
                       learning_rate=learning_rate,
                       Dist=Dist,
                       Score=Score)
    print(ngb, '\n')
    ngb.fit(X_train, Y_train)
    Y_preds = ngb.predict(X_test)
    Y_dists = ngb.pred_dist(X_test)  # return norm method: mean std
    # test Mean Squared Error
    test_MSE = mean_squared_error(Y_preds, Y_test)
    print('\nTest MSE', test_MSE)
    # test Negative Log Likelihood
    test_NLL = -Y_dists.logpdf(Y_test).mean()
    print('Test NLL', test_NLL)

    if plot_predict:
        if Y_scaler is not None:
            df = pd.concat([
                pd.Series(Y_scaler.inverse_transform(
                    Y_test.copy().values.reshape(-1, 1)).reshape(-1, ),
                          index=Y_test.index),
                pd.Series(Y_scaler.inverse_transform(
                    np.array(Y_preds).reshape(-1, 1)).reshape(-1, ),
                          index=Y_test.index)
            ],
                           axis=1)
            df.columns = ['test', 'pred']
            df.plot(figsize=(10, 4),
                    title='MSE:{}  NLL:{}'.format(round(test_MSE, 4),
                                                  round(test_NLL, 4)))
        else:
            df = pd.concat(
                [Y_test, pd.Series(Y_preds, index=Y_test.index)], axis=1)
            df.columns = ['test', 'pred']
            df.plot(figsize=(10, 4),
                    title='MSE:{}  NLL:{}'.format(round(test_MSE, 4),
                                                  round(test_NLL, 4)))
    if (return_y_pred) & (not (return_y_dists)):
        return pd.Series(Y_preds, index=Y_test.index)
    if (not (return_y_pred)) & (return_y_dists):
        return Y_dists
    if (return_y_pred) & (return_y_dists):
        return pd.Series(Y_preds, index=Y_test.index), Y_dists
    if return_mse:
        return test_MSE
Exemplo n.º 12
0
        regression_model.fit(train_x_tmp, train_y_tmp,
                  eval_set=[(train_x_validation, train_y_validation)],
                  early_stopping_rounds=100)
        best_n_estimators = regression_model.best_iteration_
    regression_model = cat.CatBoostRegressor(**study.best_params, n_estimators=best_n_estimators, logging_level='Silent')

elif method_flag == 15:
    train_x_tmp, train_x_validation, train_y_tmp, train_y_validation = train_test_split(train_x,
                                                                                        train_y,
                                                                                        test_size=fraction_of_validation_samples,
                                                                                        random_state=0)

    # 決定木
    ngb_tree = NGBRegressor(Base=default_tree_learner).fit(train_x_tmp, train_y_tmp)
    Y_preds_tree = ngb_tree.predict(train_x_validation)
    Y_dists_tree = ngb_tree.pred_dist(train_x_validation)

    # test Mean Squared Error
    test_MSE_tree = mean_squared_error(Y_preds_tree, train_y_validation)
    print('Test MSE_tree', test_MSE_tree)

    # test Negative Log Likelihood
    test_NLL_tree = -Y_dists_tree.logpdf(train_y_validation.flatten()).mean()
    print('Test NLL_tree', test_NLL_tree)

    # Ridge
    ngb_ridge = NGBRegressor(Base=default_linear_learner).fit(train_x_tmp, train_y_tmp)
    Y_preds_ridge = ngb_ridge.predict(train_x_validation)
    Y_dists_ridge = ngb_ridge.pred_dist(train_x_validation)

    # test Mean Squared Error
Exemplo n.º 13
0
        # re-train using all the data after tuning number of iterations
        ngb = NGBRegressor(
            Base=base_name_to_learner[args.base],
            Dist=eval(args.distn),
            Score=eval(args.score),
            n_estimators=args.n_est,
            learning_rate=args.lr,
            natural_gradient=args.natural,
            minibatch_frac=args.minibatch_frac,
            verbose=args.verbose,
        )
        ngb.fit(X_trainall, y_trainall)

        # the final prediction for this fold
        forecast = ngb.pred_dist(X_test, max_iter=best_itr)
        forecast_val = ngb.pred_dist(X_val, max_iter=best_itr)

        # set the appropriate scale if using a homoskedastic Normal
        if args.distn == "NormalFixedVar":
            scale = (forecast.var *
                     ((forecast_val.loc - y_val.flatten())**2).mean()**0.5)
            forecast = norm_dist(loc=forecast.loc, scale=scale)

        ngb_rmse += [np.sqrt(mean_squared_error(forecast.mean(), y_test))]
        ngb_nll += [-forecast.logpdf(y_test.flatten()).mean()]

        print("[%d/%d] BestIter=%d RMSE: Val=%.4f Test=%.4f NLL: Test=%.4f" % (
            itr + 1,
            args.n_splits,
            best_itr,
Exemplo n.º 14
0
    argparser.add_argument("--score", type=str, default="CRPScore")
    args = argparser.parse_args()

    np.random.seed(123)

    m, n = 1200, 50
    noise = np.random.randn(*(m, 1))
    beta1 = np.random.randn(n, 1)
    X = np.random.randn(m, n) / np.sqrt(n)
    Y = (X @ beta1 + args.noise_lvl * noise).squeeze()
    print(X.shape, Y.shape)

    X_train, X_test = X[:1000, :], X[1000:, ]
    Y_train, Y_test = Y[:1000], Y[1000:]

    ngb = NGBRegressor(
        n_estimators=400,
        learning_rate=args.lr,
        Dist=Normal,
        Base=default_linear_learner,
        natural_gradient=args.natural,
        minibatch_frac=1.0,
        Score=eval(args.score),
        verbose=True,
        verbose_eval=100,
    )

    losses = ngb.fit(X_train, Y_train)
    forecast = ngb.pred_dist(X_test)
    print("R2:", r2_score(Y_test, forecast.loc))
Exemplo n.º 15
0
    data_axs.plot(X, Y[:, 0], label="Dim 1")
    data_axs.plot(X, Y[:, 1], label="Dim 2")
    data_axs.set_xlabel("X")
    data_axs.set_ylabel("Y")
    data_axs.set_title("Input Data")
    data_axs.legend()
    data_figure.show()

    X_val, Y_val, _ = simulate_data(500)
    X_val = X_val.reshape(-1, 1)
    ngb = NGBRegressor(Dist=dist,
                       verbose=True,
                       n_estimators=2000,
                       natural_gradient=True)
    ngb.fit(X, Y, X_val=X_val, Y_val=Y_val, early_stopping_rounds=100)
    y_dist = ngb.pred_dist(X, max_iter=ngb.best_val_loss_itr)

    # Extract parameters for plotting
    mean = y_dist.mean()
    sigma, corrs = cov_to_sigma(y_dist.cov)
    true_cov_mat = np.array([dist.cov for dist in true_dist])
    true_mean = np.array([dist.mean for dist in true_dist])
    true_sigma, true_corrs = cov_to_sigma(true_cov_mat)

    # Plot the parameters in the sigma, correlation representation
    fig, axs = plt.subplots(5, 1, sharex=True)
    colors = ["blue", "red"]
    axs[4].set_xlabel("X")
    for i in range(2):
        axs[i].set_title("Mean Dimension:" + str(i))
        axs[i].plot(X, mean[:, i], label="fitted")