def PoissonReg(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = PoissonRegressor()
    reg1.fit(X_train, y_train1)
    reg2 = PoissonRegressor()
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="PoissonReg",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
def PoissonRegGS(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = PoissonRegressor()
    reg2 = PoissonRegressor()
    grid_values = {'alpha': list(range(1, 3))}

    grid_reg1 = GridSearchCV(
        reg1,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg1.fit(X_train, y_train1)
    reg1 = grid_reg1.best_estimator_
    reg1.fit(X_train, y_train1)
    grid_reg2 = GridSearchCV(
        reg2,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg2.fit(X_train, y_train2)
    reg2 = grid_reg1.best_estimator_
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params1: dict = grid_reg1.best_params_
    best_params2: dict = grid_reg2.best_params_
    best_params = {}
    for key in best_params1.keys():
        best_params[key] = [best_params1[key], best_params2[key]]
    saveBestParams(nameOfModel="PoissonRegGS", best_params=best_params)
    logSave(nameOfModel="PoissonRegGS",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)
Exemplo n.º 3
0
def test_sklearn_poisson_regression(nps_app_inst: ArrayApplication):
    def dsqr(dev_func, y, _y_pred):
        dev = dev_func(y, _y_pred)
        y_mean = nps_app_inst.mean(y)
        dev_null = dev_func(y, y_mean)
        return 1 - dev / dev_null

    from sklearn.linear_model import PoissonRegressor as SKPoissonRegressor

    coef = np.array([0.2, -0.1])
    real_X = np.array([[0, 1, 2, 3, 4]]).T
    real_y = np.exp(np.dot(real_X, coef[0]) + coef[1]).reshape(-1)
    X = nps_app_inst.array(real_X, block_shape=real_X.shape)
    y = nps_app_inst.array(real_y, block_shape=real_y.shape)
    param_set = [
        {"tol": 1e-4, "max_iter": 100},
    ]
    for kwargs in param_set:
        lr_model: PoissonRegression = PoissonRegression(**kwargs)
        lr_model.fit(X, y)
        y_pred = lr_model.predict(X).get()
        print("D^2", dsqr(lr_model.deviance, y, y_pred).get())

        sk_lr_model = SKPoissonRegressor(**kwargs)
        sk_lr_model.fit(real_X, real_y)
        sk_y_pred = sk_lr_model.predict(real_X)
        print("D^2", dsqr(lr_model.deviance, y, sk_y_pred).get())
Exemplo n.º 4
0
def main(lr, train_path, eval_path, save_path, save_img):
    """Problem: Poisson regression with gradient ascent.

    Args:
        lr: Learning rate for gradient ascent.
        train_path: Path to CSV file containing dataset for training.
        eval_path: Path to CSV file containing dataset for evaluation.
        save_path: Path to save predictions.
    """
    # Load training set
    train = pd.read_csv(train_path)
    x_train, y_train = train[['x_1', 'x_2', 'x_3',
                              'x_4']], train[['y']].values.ravel()
    glm = PoissonRegressor(tol=1e-5, max_iter=10000000)
    glm.fit(x_train, y_train)

    valid = pd.read_csv(eval_path)
    x_eval, y_eval = valid[['x_1', 'x_2', 'x_3',
                            'x_4']], valid[['y']].values.ravel()
    predictions = glm.predict(x_eval)

    np.savetxt(save_path, predictions)
    util.scatter(y_eval, predictions, save_img)
    print(glm.coef_)
    print(glm.score(x_eval, y_eval))
def sk_poisson_regression(X_train, X_test, y_train, y_test):
    glm = PoissonRegressor(alpha=0, fit_intercept=False, max_iter=300)
    glm.fit(X_train, y_train)
    print('score: ', glm.score(X_test, y_test))

    y_hat = glm.predict(X)

    fig = plt.figure(figsize=(6.0, 6.0))
    plt.plot(X, y, 'o')
    plt.plot(X, y_hat, '*', color='r')
    plt.xlabel('x (total_bill)')
    plt.ylabel('y (tips)')
    plt.xlim(0, 60)
    plt.ylim(0, 12)
    plt.show()
Exemplo n.º 6
0
def regression(transformed, train_data_index_list, test_data_index_list,
               combined_data, dataset_name, data_path, regression_type):
    X_train1 = transformed[transformed.index.isin(train_data_index_list)]
    X_train1 = np.array(X_train1)

    X_test1 = transformed[transformed.index.isin(test_data_index_list)]
    X_test1 = np.array(X_test1)

    Y_train1 = combined_data[transformed.index.isin(train_data_index_list)]
    Y_train1 = Y_train1['bug']

    Y_test1 = combined_data[transformed.index.isin(test_data_index_list)]
    Y_test1 = Y_test1['bug']

    if (regression_type == 'poisson'):
        reg = PoissonRegressor().fit(X_train1, Y_train1)
    elif (regression_type == 'linear'):
        reg = LinearRegression().fit(X_train1, Y_train1)
    else:
        reg = Lasso().fit(X_train1, Y_train1)

    predictions = reg.predict(X_test1)

    FPA_result = str(FPA(predictions))
    CLC_result = str(CLC(predictions))

    if (regression_type == 'poisson'):
        path_to_save = '../../BTP_results/ml_results/poisson' + '_' + dataset_name
        write_to_file('poisson_' + data_path, FPA_result, CLC_result,
                      path_to_save)
    elif (regression_type == 'linear'):
        path_to_save = '../../BTP_results/ml_results/linear' + '_' + dataset_name
        write_to_file('linear_' + data_path, FPA_result, CLC_result,
                      path_to_save)
    else:
        path_to_save = '../../BTP_results/ml_results/lasso' + '_' + dataset_name
        write_to_file('lasso_' + data_path, FPA_result, CLC_result,
                      path_to_save)

    print("FPA metric value obtained is: " + FPA_result)
    print("CLC metric value obtained is: " + CLC_result)
    print("MSE is: " + str(mean_squared_error(Y_test1, predictions)))

    print("success!!")
print(scores)

# %%
# We can visually compare observed and predicted values, aggregated by the
# drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance
# bonus/malus (``BonusMalus``).

fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(16, 8))
fig.subplots_adjust(hspace=0.3, wspace=0.2)

plot_obs_pred(
    df=df_train,
    feature="DrivAge",
    weight="Exposure",
    observed="Frequency",
    predicted=glm_freq.predict(X_train),
    y_label="Claim Frequency",
    title="train data",
    ax=ax[0, 0],
)

plot_obs_pred(
    df=df_test,
    feature="DrivAge",
    weight="Exposure",
    observed="Frequency",
    predicted=glm_freq.predict(X_test),
    y_label="Claim Frequency",
    title="test data",
    ax=ax[0, 1],
    fill_legend=True,
Exemplo n.º 8
0
 def poissonregressor(self,X_train,X_test,y_train,y_test):
     
     regressor= PoissonRegressor()
     regfit=regressor.fit(self.X_train,self.y_train)
     return regressor.predict(self.X_test)
Exemplo n.º 9
0
print(scores)

# %%
# We can visually compare observed and predicted values, aggregated by the
# drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance
# bonus/malus (``BonusMalus``).

fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(16, 8))
fig.subplots_adjust(hspace=0.3, wspace=0.2)

plot_obs_pred(
    df=df_train,
    feature="DrivAge",
    weight="Exposure",
    observed="Frequency",
    predicted=glm_freq.predict(X_train),
    y_label="Claim Frequency",
    title="train data",
    ax=ax[0, 0],
)

plot_obs_pred(
    df=df_test,
    feature="DrivAge",
    weight="Exposure",
    observed="Frequency",
    predicted=glm_freq.predict(X_test),
    y_label="Claim Frequency",
    title="test data",
    ax=ax[0, 1],
    fill_legend=True
Exemplo n.º 10
0
regr_l2_100.fit(X_train_std, y_train)
#print(scores_length_l2_100_reg)
#The mean score and the standard deviation are hence given by:
print("%0.2f (with L2 alpha = 100) accuracy with a standard deviation of %0.2f" % (scores_length_l2_100_reg.mean(), scores_length_l2_100_reg.std()))
#print(patient)

# Commented out IPython magic to ensure Python compatibility.
# Modeling with Poisson Regressor

import sklearn
from sklearn.linear_model import PoissonRegressor
regr = PoissonRegressor(alpha=1.0, fit_intercept=True, max_iter=100, tol=0.0001, warm_start=False, verbose=0)
regr.fit(X_train_std, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test_std)

from sklearn.metrics import r2_score
print(r2_score(y_test, y_pred))

# The coefficients
# print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f'
#       % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
#       % r2_score(y_test, y_pred))

scores_length_no_reg = cross_val_score(regr, X_train_std, y_train, cv=5, scoring='r2') 
regr.fit(X_train_std, y_train)