示例#1
0
def test_OLS_train_MSE_with_sklearn(X, z):
    print("Testing OLS compared to sklearn")

    sampling = SamplingMethod().train_and_test(X, z, model_type = RegressionType.OLS)
    sampling_score = sampling.test_model(sampling.model, sampling.X_train, sampling.y_train).mse

    # Use the same scaled data, to test just the OLS method
    lin_model = LinearRegression(fit_intercept=False).fit(sampling.X_train, sampling.y_train)
    sklearn_y_train_predict = lin_model.predict(sampling.X_train)
    sklearn_score = mean_squared_error(sampling.y_train, sklearn_y_train_predict)    

    diff = np.abs(np.abs(sklearn_score) - np.abs(sampling_score))
    assert_msg = "\nDifference between r2 scores " + str(diff) + " should be less than " + str(error_tolerance)+ ".Sampling: " + str(sampling_score) + " model: " + str(sklearn_score)
    assert diff < error_tolerance, assert_msg
示例#2
0
def test_mse_with_sklearn(X, z):
    print("Testing MSE compared to sklearn MSE method")

    sampling = SamplingMethod().train_and_test(X, z)

    sklearn_mse = mean_squared_error(sampling.y_test, sampling.model.get_y_pred(sampling.X_test))
    diff = np.abs(np.abs(sklearn_mse) - np.abs(sampling.mse))
    assert_msg = "\nDifference between MSE scores methods" + str(diff) + " should be less than " + str(error_tolerance) + ".Sampling: " + str(sampling.mse) + " model: " + str(sklearn_mse)
    assert diff < error_tolerance, assert_msg
示例#3
0
def test_r2_with_sklearn(X, z):
    print("Testing r2 compared to sklearn r2 method")

    sampling = SamplingMethod().train_and_test(X, z)

    sklearn_score_on_red_model = r2_score(sampling.y_test, sampling.model.get_y_pred(sampling.X_test))
    diff = np.abs(np.abs(sklearn_score_on_red_model) - np.abs(sampling.r2))
    assert_msg = "\nDifference between r2 scores methods" + str(diff) + " should be less than " + str(error_tolerance)
    assert diff < error_tolerance, assert_msg
示例#4
0
def test_mean_and_std_of_scaled_data(X, z):
    print("Testing mean and std of scaled data")

    X_train, X_test, Y_train, Y_test = train_test_split(X, z, test_size = 0.2)
    train_data_scaled, test_data_scaled = SamplingMethod.scale_standard(X_train, X_test)

    assert(np.isclose(np.mean(train_data_scaled), 0, atol = 1e-15, equal_nan=True), np.mean(train_data_scaled))
    assert(np.isclose(np.std(train_data_scaled), 1, atol = 1e-15, equal_nan=True), np.std(train_data_scaled))
    assert(np.isclose(np.mean(test_data_scaled), 0, atol = 1e-15, equal_nan=True), np.mean(test_data_scaled))
    assert(np.isclose(np.std(test_data_scaled), 1, atol = 1e-15, equal_nan=True), np.std(test_data_scaled))
示例#5
0
def test_ridge_with_sklearn(X, z):
    print("Testing Ridge compared to sklearn")

    ridge_lambda = 1.0
    sampling = SamplingMethod().train_and_test(X, z, model_type = RegressionType.Ridge, alpha = ridge_lambda)

    lin_model = Ridge(alpha = ridge_lambda, fit_intercept=False).fit(sampling.X_train, sampling.y_train)
    y_test_predict = lin_model.predict(sampling.X_test)
    sklearn_score = r2_score(sampling.y_test, y_test_predict)

    diff = np.abs(np.abs(sklearn_score) - np.abs(sampling.r2))
    assert_msg = "\nDifference between r2 scores " + str(diff) + " should be less than " + str(error_tolerance) + ", sklearn: " + str(sklearn_score) + ", our model: " + str(sampling.r2)
    assert diff < error_tolerance, assert_msg
示例#6
0
    def run_k_fold_validation(self, X, y, model_type, alpha=0.0):
        assert X.shape[0] == y.shape[0], (
            "X.shape[0] and y.shape[0] needs to be the same length, but: " +
            str(X.shape[0]) + " != " + str(y.shape[0]))

        X_fold_indices = [x for x in range(X.shape[0])]
        X_fold_indices = np.reshape(X_fold_indices, (self.kfolds, -1))
        k_indices = [x for x in range(self.kfolds)]

        y_pred = np.empty((len(X_fold_indices[0]), self.kfolds))
        y_pred_train = np.empty(
            (len(X_fold_indices[0]) * (self.kfolds - 1), self.kfolds))

        for fold in range(self.kfolds):
            X_indices = X_fold_indices[np.delete(k_indices, fold)].reshape(-1)
            X_train, X_test = SamplingMethod.scale_standard(
                X[X_indices], X[X_fold_indices[fold]])

            y_train = y[X_indices]
            y_test = y[X_fold_indices[fold]]
            y_test.shape = (y_test.shape[0], 1)
            y_train.shape = (y_train.shape[0], 1)

            model = RegressionMethod().fit(X_train, y_train, model_type, alpha)

            y_pred[:, fold] = model.get_y_pred(X_test).ravel()
            y_pred_train[:, fold] = model.get_y_pred(X_train).ravel()

        self.y_pred = y_pred
        self.y_pred_train = y_pred_train

        self.r2 = self.R2(y_test, y_pred)
        self.mse = self.MSE(y_test, y_pred)
        self.bias = self.get_bias(y_test, y_pred)
        self.var = self.get_variance(y_pred)

        self.r2_train = self.R2(y_train, y_pred_train)
        self.mse_train = self.MSE(y_train, y_pred_train)
        self.bias_train = self.get_bias(y_train, y_pred_train)
        self.var_train = self.get_variance(y_pred_train)

        return self
示例#7
0
import numpy as np
from random import random, seed

from RegLib.SamplingMethod import SamplingMethod
from RegLib.RegressionMethod import RegressionType
from RegLib.HelperFunctions import confidence_interval, create_frankie_data, create_X
from PROJECT_SETUP import SEED, SAVE_FIG

np.random.seed(SEED)

N = 100
noise = 0.3
p = 5

x, y, z = create_frankie_data(SEED, N = N, noise_strength=noise)
X = create_X(x, y, n = p)
perm_index = np.random.permutation(len(z))
sampling = SamplingMethod().train_and_test(X, z, perm_index = perm_index, model_type = RegressionType.OLS)

info_to_add = {
    "N: ": N,
    "Noise: ": noise,
    "Polynomial degree: " : p,
    "MSE: " : sampling.mse,
    "R2: ": sampling.r2
}

confidence_interval(X, z, sampling.model.beta, noise, N, info_to_add = info_to_add, save_fig = SAVE_FIG)
perm_index = np.random.permutation(len(z))

polydegree = np.zeros(p)
mse_train = np.zeros(p)
mse_test = np.zeros(p)
r2_train = np.zeros(p)
r2_test = np.zeros(p)

for degree in range(p):
    progressBar(degree + 1, p)
    polydegree[degree] = degree + 1

    X = create_X(x, y, degree, debug=False)
    sampling = SamplingMethod().train_and_test(X,
                                               z,
                                               perm_index,
                                               RegressionType.OLS,
                                               shuffle=False,
                                               test_size=0.3)
    mse_test[degree] = sampling.mse
    r2_test[degree] = sampling.r2

    train_sample = sampling.test_model(sampling.model, sampling.X_train,
                                       sampling.y_train)
    mse_train[degree] = train_sample.mse
    r2_train[degree] = train_sample.r2

values_to_plot = {
    "Train error": mse_train,
    "Test error": mse_test,
}