示例#1
0
def CV_fit(X, z, k, f=None, alpha=0, method='OLS'):
    #f is the exact function
    OLS = method == 'OLS'
    Ridge = method == 'Ridge'
    Lasso = method == 'Lasso'
    if f is None:
        f = z
    kf = oh.k_fold(k)
    kf.get_n_splits(X)
    beta = np.zeros((k, X.shape[1]))
    errors = np.zeros(k)
    betasSigma = np.zeros(beta.shape)
    i = 0
    for train_index, test_index in kf.split():
        #print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_validation = X[train_index], X[test_index]
        z_train, z_validation = z[train_index], z[test_index]
        f_train, f_validation = f[train_index], f[test_index]

        if OLS:
            beta[i, :] = oh.linFit(X_train, z_train, model='OLS', _lambda=0)
            #zPredictsOLS[:,i] = (X_test @ betaOLS).reshape(-1) # Used validation to get good results
        elif Ridge:
            beta[i, :] = oh.linFit(X_train,
                                   z_train,
                                   model='Ridge',
                                   _lambda=alpha)
            #zPredictsRidge[:,i] = (X_test @ betaRidge).reshape(-1) # Used validation to get good results
        elif Lasso:
            clf = skl.Lasso(alpha=alpha,
                            fit_intercept=False,
                            max_iter=10**8,
                            precompute=True).fit(X_train, z_train)
            beta[i, :] = clf.coef_
        else:
            raise Exception(
                'method has to be Lasso, OLS or Ridge, not {}'.format(method))

        zPredicts = (X_validation @ beta[i, :])
        errors[i] = np.mean((f_validation - zPredicts)**2)

        if OLS:
            sigmaOLSSq = 1 / (X_validation.shape[0] -
                              0 * X_validation.shape[1]) * np.sum(
                                  (z_validation - zPredicts)**2)
            sigmaBetaOLSSq = sigmaOLSSq * np.diag(
                np.linalg.pinv(X_validation.T @ X_validation))
            betasSigma[i, :] = np.sqrt(sigmaBetaOLSSq)

        elif Ridge:
            XInvRidge = np.linalg.pinv(X_validation.T @ X_validation +
                                       alpha * np.eye(len(beta[i, :])))
            sigmaRidgeSq = 1 / (X_validation.shape[0] -
                                0 * X_validation.shape[1]) * np.sum(
                                    (z_validation - zPredicts)**2)
            sigmaBetaRidgeSq = sigmaRidgeSq * np.diag(
                XInvRidge @ X_validation.T @ X_validation @ XInvRidge.T)
            betasSigma[i, :] = np.sqrt(sigmaBetaRidgeSq)

        elif Lasso:
            pass

        i += 1
    return beta, errors, betasSigma
示例#2
0
            betasLassoTemp = np.empty((k, numBetas))
            betasSigmaLassoTemp = np.empty((k, numBetas))

            zTests = np.empty((int(z.shape[0] / k)))
            i = 0

            for train_index, test_index in kf.split():
                X_train, X_validation = X_rest[train_index], X_rest[test_index]
                x_train, x_validation = x_rest[train_index], x_rest[test_index]
                y_train, y_validation = y_rest[train_index], y_rest[test_index]
                z_train, z_validation = z_rest[train_index], z_rest[test_index]
                f_train, f_validation = f_rest[train_index], f_rest[test_index]

                # OLS, Finding the best lambda
                betaOLS = oh.linFit(X_train,
                                    z_train,
                                    model='OLS',
                                    _lambda=_lambda)
                betasOLSTemp[i] = betaOLS.reshape(-1)
                zPredictsOLS = (X_validation @ betaOLS)
                errorsOLS[i] = np.mean((f_validation - zPredictsOLS)**2)
                sigmaOLSSq = 1 / (X_validation.shape[0] -
                                  0 * X_validation.shape[1]) * np.sum(
                                      (z_validation - zPredictsOLS)**2)
                sigmaBetaOLSSq = sigmaOLSSq * np.diag(
                    np.linalg.pinv(X_validation.T @ X_validation))
                betasSigmaOLSTemp[i] = np.sqrt(sigmaBetaOLSSq)

                # Ridge, Finding the best lambda
                betaRidge = oh.linFit(X_train,
                                      z_train,
                                      model='Ridge',
示例#3
0
n = 1000
x_ = np.random.rand(n)
y_ = np.random.rand(n)

z = oh.frankeFunction(x_, y_) + 0.1 * np.random.randn(n)

# Set up the design matrix

MSE = []
R2_score = []

for grad in range(1, 6):
    X = oh.create_X(x_, y_, grad)
    invXTX = np.linalg.inv(X.T @ X)  # Need this anyway
    #beta = invXTX @ X.T @ z
    beta = oh.linFit(X, z)
    ztilde = X @ beta

    MSE.append(oh.mse(z, ztilde))
    R2_score.append(oh.R2_score(z, ztilde))
    #sigma = np.sqrt(np.var(ztilde))
    #print("Sigma numpy: ", sigma)
    sigma = np.sqrt(1 / (X.shape[0] - X.shape[1] - 1) * np.sum(
        (z - ztilde)**2))
    #print("Sigma self: ", sigma)

    betaSigma = np.zeros(len(beta))
    relative = betaSigma
    betaConf = np.zeros((len(beta), 2))
    for i in range(len(beta)):
        #betaSigma[i] = sigma * np.sqrt(np.sqrt(invXTX[i][i]))
z2 = z.reshape(2, 2)
assert oh.bias(z, z) + oh.var(z, z) == oh.mse(z, z)
assert oh.bias(z, z + 1) + oh.var(z, z + 1) == oh.mse(z, z + 1)
print("The function bias(z,ztilde) works as advertised")

# Test R2 score
print("Testing the R2 score")
z = np.arange(4) + 1
assert oh.R2_score(z, z) == 1
print("Testing a matrix")
z2 = z.reshape(2, 2)
assert oh.R2_score(z, z) == 1
print("The function R2_score(z,ztilde) works as advertised")

print("Testing linear Regression functions")
x = np.random.randn(11)
y = np.random.randn(11)
X = oh.create_X(x, y, 2)
np.set_printoptions(precision=2)
print(X)
z = x + y
zTildeOLS = X @ oh.linFit(X, z, model='OLS')
assert oh.mse(z, zTildeOLS) < 10**(-28)
print("OLS works as advertised")
zTildeRidge = X @ oh.linFit(X, z, model='Ridge', _lambda=0.1)
assert oh.mse(z, zTildeOLS) < 10**(-28)
print("Ridge works as advertised")
zTildeLasso = X @ oh.linFit(X, z, model='Lasso', _lambda=0.1)
assert oh.mse(z, zTildeOLS) < 10**(-28)
print("Ridge works as advertised")