示例#1
0
def Ridge_unit_test(min_deg=2, max_deg=5, tol=1e-6):
    """
    Tests our implementation of Ridge against sci-kit learn up to a given tolerance
    """
    n = 100  # Number of data points
    # Prepare data set
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y) + np.random.normal(0, 1, n) * 0.2
    degrees = np.arange(min_deg, max_deg + 1)
    for deg in degrees:
        # Set up design matrix
        X = linear_regression.design_matrix_2D(x, y, 5)
        for lamb in np.linspace(0, 1, 10):
            # Compute optimal parameters using our homegrown Ridge regression
            beta = linear_regression.Ridge_2D(X=X, z=z, lamb=lamb)
            # Compute optimal parameters using sklearn
            skl_reg = Ridge(alpha=lamb, fit_intercept=False).fit(X, z)
            beta_skl = skl_reg.coef_

            for i in range(len(beta)):
                if abs(beta[i] - beta_skl[i]) < tol:
                    pass
                else:
                    print(
                        "Warning! mismatch with SKL in Ridge_2D_unit_test with tol = %.0e"
                        % tol)
                    print("Parameter no. %i for deg = %i" % (i, deg))
                    print("-> (OUR) beta = %8.12f" % beta[i])
                    print("-> (SKL) beta = %8.12f" % beta_skl[i])
    return
示例#2
0
def bootstrap_ridge_lasso(X_train, X_test, z_train, z_test, n_bootstraps,
                          lamb_lasso, lamb_ridge):
    """Performs the bootstrapped bias variance analysis for only Ridge and Lasso, given input
    training and test data, the number of bootstrap iterations and the lambda values for
    Ridge and Lasso. Intended for studying bias/variance dependency as a function of lambda-values.

    Returns MSE, mean squared bias and mean variance for Ridge and Lasso, in that order
    """

    z_boot_ridge = np.zeros((len(z_test), n_bootstraps))
    z_boot_lasso = np.zeros((len(z_test), n_bootstraps))
    for i in range(n_bootstraps):
        shuffle = np.random.randint(0, len(z_train), len(z_train))
        X_boot, z_boot = X_train[shuffle], z_train[shuffle]
        betas_boot_ridge = linear_regression.Ridge_2D(
            X_boot, z_boot, lamb_ridge)  # Ridge, given lambda
        clf_Lasso = skl.Lasso(alpha=lamb_lasso,
                              fit_intercept=False).fit(X_boot, z_boot)
        z_boot_lasso[:, i] = clf_Lasso.predict(X_test)  # Lasso, given lambda
        z_boot_ridge[:, i] = X_test @ betas_boot_ridge

    ridge_mse, ridge_bias, ridge_variance = compute_mse_bias_variance(
        z_test, z_boot_ridge)
    lasso_mse, lasso_bias, lasso_variance = compute_mse_bias_variance(
        z_test, z_boot_lasso)

    return ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance
def bootstrap_all(X_train, X_test, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge):
    """ Performs the bootstrapped bias variance analysis for OLS, Ridge and Lasso, given input
        training and test data, the number of bootstrap iterations and the lambda values for
        Ridge and Lasso.

        Returns MSE, mean squared bias and mean variance for Ridge, Lasso and OLS in that order.
    """

    z_boot_ols = np.zeros((len(z_test),n_bootstraps))
    z_boot_ridge = np.zeros((len(z_test),n_bootstraps))
    z_boot_lasso= np.zeros((len(z_test),n_bootstraps))
    for i in range(n_bootstraps):
        shuffle = np.random.randint(0,len(z_train),len(z_train))
        X_boot, z_boot = X_train[shuffle] , z_train[shuffle]
        betas_boot_ols = linear_regression.OLS_SVD_2D(X_boot, z_boot)
        betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb_ridge) #Ridge, given lambda
        clf_Lasso = skl.Lasso(alpha=lamb_lasso,fit_intercept=False).fit(X_boot,z_boot)
        z_boot_lasso[:,i] = clf_Lasso.predict(X_test) #Lasso, given lambda
        z_boot_ridge[:,i] = X_test @ betas_boot_ridge
        z_boot_ols[:,i] = X_test @ betas_boot_ols

    ridge_mse, ridge_bias, ridge_variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge)

    lasso_mse, lasso_bias, lasso_variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso)

    ols_mse, ols_bias, ols_variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_ols)

    return ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance
示例#4
0
def crossvalidation_Ridge(X, z, k_folds, lambd):
    # Wrapper function, see crossvalidation
    return crossvalidation(
        X,
        z,
        k_folds,
        regression=lambda X, z: linear_regression.Ridge_2D(X, z, lambd))
示例#5
0
def k_fold_cv_all(X, z, n_lambdas, lambdas, k_folds):
    """
    Performs k-fold cross validation for Ridge, Lasso and OLS. The Lasso and Ridge
    MSE-values are computed for a number of n_lambdas, with the lambda values given
    by the lambdas array. OLS is done only once for each of the k_folds folds.

    Args:
        X (array): Design matrix
        z (array): Data-values/response-values/whatever-they-are-called-in-your-field-values
        n_lambdas (int): number of lambda values to use for Lasso and Ridge.
        lambdas (array): The actual lambda-values to try.
        k_folds (int): The number of folds.

    Return:
        lasso_cv_mse (array): array containing the computed MSE for each lambda in Lasso
        ridge_cv_mse (array): array containing the computed MSE for each lambda in Ridge
        ols_cv_mse (float): computed MSE for OLS.
    """

    ridge_fold_score = np.zeros((n_lambdas, k_folds))
    lasso_fold_score = np.zeros((n_lambdas, k_folds))
    test_list, train_list = k_fold_selection(z, k_folds)
    for i in range(n_lambdas):
        lamb = lambdas[i]
        for j in range(k_folds):
            test_ind_cv = test_list[j]
            train_ind_cv = train_list[j]
            X_train_cv = X[train_ind_cv]
            z_train_cv = z[train_ind_cv]
            X_test_cv = X[test_ind_cv]
            z_test_cv = z[test_ind_cv]
            clf_Lasso = skl.Lasso(alpha=lamb, fit_intercept=False).fit(
                X_train_cv, z_train_cv)
            z_lasso_test = clf_Lasso.predict(X_test_cv)
            ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv,
                                                     lamb)
            z_ridge_test = X_test_cv @ ridge_betas
            ridge_fold_score[i, j] = stat_tools.MSE(z_test_cv, z_ridge_test)
            lasso_fold_score[i, j] = stat_tools.MSE(z_test_cv, z_lasso_test)

    lasso_cv_mse = np.mean(lasso_fold_score, axis=1)
    ridge_cv_mse = np.mean(ridge_fold_score, axis=1)

    # Get ols_mse for cv.
    ols_fold_score = np.zeros(k_folds)
    for i in range(k_folds):
        test_ind_cv = test_list[j]
        train_ind_cv = train_list[j]
        X_train_cv = X[train_ind_cv]
        z_train_cv = z[train_ind_cv]
        X_test_cv = X[test_ind_cv]
        z_test_cv = z[test_ind_cv]
        ols_cv_betas = linear_regression.OLS_SVD_2D(X_train_cv, z_train_cv)
        z_ols_test = X_test_cv @ ols_cv_betas
        ols_fold_score[i] = stat_tools.MSE(z_test_cv, z_ols_test)

    ols_cv_mse = np.mean(ols_fold_score)

    return lasso_cv_mse, ridge_cv_mse, ols_cv_mse
示例#6
0
def franke_predictions(n=1000, noise_scale=0.2, degree=20, ridge_lambda=1e-2, lasso_lambda=1e-5, plot_grid_size=2000):
    """ For a given sample size n, noise_scale, max_degree and penalty parameters: produces ols,
        ridge and lasso predictions, as well as ground truth on a plotting meshgrid with input grid size.

        output:
            x_plot_mesh: meshgrid of x-coordinates
            y_plot_mesh: meshgrid of y-coordinates
            z_predict_ols: ols prediction of z on the meshgrid
            z_predict_ridge: ridge prediction of z on the meshgrid
            z_predict_lasso: lasso prediction of z on the meshgrid
            z_plot_franke: Actual Franke values on the meshgrid.

    """

    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z = z + noise_scale*np.random.normal(0,1,len(z))
    #   Centering the response
    z_intercept = np.mean(z)
    z = z - z_intercept
    # Scaling
    X = linear_regression.design_matrix_2D(x,y,degree)
    scaler = StandardScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)

    #Setting up plotting grid
    x_plot = np.linspace(0,1,plot_grid_size)
    y_plot = np.linspace(0,1,plot_grid_size)
    x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot,y_plot)
    x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten(), y_plot_mesh.flatten()

    X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat,y_plot_mesh_flat,degree)
    X_plot_design_scaled = scaler.transform(X_plot_design)


    z_plot_franke = FrankeFunction(x_plot_mesh, y_plot_mesh)

    # OLS
    betas = linear_regression.OLS_SVD_2D(X_scaled, z)
    z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept
    z_predict_ols = z_predict_flat_ols.reshape(plot_grid_size,-1)

    # Ridge

    betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda)
    z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept
    z_predict_ridge = z_predict_flat_ridge.reshape(plot_grid_size,-1)
    # Lasso

    clf_Lasso = skl.Lasso(alpha=lasso_lambda,fit_intercept=False, max_iter=10000).fit(X_scaled,z)
    z_predict_flat_lasso = clf_Lasso.predict(X_plot_design_scaled) + z_intercept
    z_predict_lasso = z_predict_flat_lasso.reshape(plot_grid_size,-1)

    return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_plot_franke
示例#7
0
def bootstrap_ridge(X_train, X_test, z_train, z_test, n_bootstraps, lambd):
    """
    wrapper for easily performing ridge regression without having to define a lambda function for the regression parameter each time.
    """
    return bootstrap(
        X_train,
        X_test,
        z_train,
        z_test,
        n_bootstraps,
        regression=lambda X, z: linear_regression.Ridge_2D(X, z, lambd),
    )
示例#8
0
def terrain_predictions(spacing=40,
                        degree=20,
                        ridge_lambda=1e-2,
                        lasso_lambda=1e-5):
    """For a given sampling spacing, degree and penalty parameters: produces ols,
    ridge and lasso predictions, as well as ground truth on a plotting meshgrid.

    output:
        x_plot_mesh: meshgrid of x-coordinates
        y_plot_mesh: meshgrid of y-coordinates
        z_predict_ols: ols prediction of z on the meshgrid
        z_predict_ridge: ridge prediction of z on the meshgrid
        z_predict_lasso: lasso prediction of z on the meshgrid
        z_true: Actual terrain values on the meshgrid.

    """
    # #control variables, resticted to upper half of plot currently.
    # spacing = 10
    # degree = 25
    # ridge_lambda = 1e-2
    # lasso_lambda = 1e-5

    # Setting up the terrain data:
    # Note structure! X-coordinates are on the rows of terrain_data
    # Point_selection.flatten() moves most rapidly over the x-coordinates
    # Meshgrids flattened also move most rapidly over the x-coordinates. Thus
    # this should make z(x,y).reshape(length_y,length_x) be consistent with terrain_data
    terrain_data = imread("../../datafiles/SRTM_data_Norway_1.tif")
    point_selection = terrain_data[:1801:spacing, :1801:
                                   spacing]  # Make quadratic and downsample
    x_terrain_selection = np.linspace(0, 1, point_selection.shape[1])
    y_terrain_selection = np.linspace(0, 1, point_selection.shape[0])
    X_coord_selection, Y_coord_selection = np.meshgrid(x_terrain_selection,
                                                       y_terrain_selection)
    z_terrain_selection = point_selection.flatten()  # the response values
    x_terrain_selection_flat = X_coord_selection.flatten(
    )  # the first degree feature variables
    y_terrain_selection_flat = Y_coord_selection.flatten(
    )  # the first degree feature variables

    x = x_terrain_selection_flat
    y = y_terrain_selection_flat
    z = z_terrain_selection

    # Centering
    z_intercept = np.mean(z)
    z = z - z_intercept
    # Scaling
    X = linear_regression.design_matrix_2D(x, y, degree)
    scaler = StandardScaler()
    scaler.fit(X)
    X_scaled = scaler.transform(X)

    x_plot = np.linspace(0, 1, 1801)
    y_plot = np.linspace(0, 1, 1801)
    x_plot_mesh, y_plot_mesh = np.meshgrid(x_plot, y_plot)
    x_plot_mesh_flat, y_plot_mesh_flat = x_plot_mesh.flatten(
    ), y_plot_mesh.flatten()

    X_plot_design = linear_regression.design_matrix_2D(x_plot_mesh_flat,
                                                       y_plot_mesh_flat,
                                                       degree)
    X_plot_design_scaled = scaler.transform(X_plot_design)

    # Ground truth

    z_true = terrain_data[:1801, :1801]

    # OLS
    betas = linear_regression.OLS_SVD_2D(X_scaled, z)
    z_predict_flat_ols = (X_plot_design_scaled @ betas) + z_intercept
    z_predict_ols = z_predict_flat_ols.reshape(-1, 1801)

    # Ridge
    betas_ridge = linear_regression.Ridge_2D(X_scaled, z, ridge_lambda)
    z_predict_flat_ridge = (X_plot_design_scaled @ betas_ridge) + z_intercept
    z_predict_ridge = z_predict_flat_ridge.reshape(-1, 1801)
    # Lasso

    clf_Lasso = skl.Lasso(alpha=lasso_lambda,
                          fit_intercept=False).fit(X_scaled, z)
    z_predict_flat_lasso = clf_Lasso.predict(
        X_plot_design_scaled) + z_intercept
    z_predict_lasso = z_predict_flat_lasso.reshape(-1, 1801)

    return x_plot_mesh, y_plot_mesh, z_predict_ols, z_predict_ridge, z_predict_lasso, z_true
示例#9
0
def part_1a():
    # Sample the franke function n times at randomly chosen points
    n = 100
    deg = 5
    noise_scale = 0.2
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z_noisy = z + noise_scale * np.random.normal(0, 1, len(z))
    # Making the design matrix
    X = linear_regression.design_matrix_2D(x, y, deg)
    # Find the least-squares solution
    beta = linear_regression.OLS_2D(X, z)
    beta_noisy = linear_regression.OLS_2D(X, z_noisy)

    # Split into training and test data with ratio 0.2
    X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2)
    # Scale data according to sklearn, beware possible problems with intercept and std.
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # For ridge and lasso, lasso directly from sklearn.
    # For given polynomial degree, input X and z. X should be prescaled.

    n_lambdas = 100
    lambdas = np.logspace(-3, 0, n_lambdas)
    k_folds = 5
    ridge_fold_score = np.zeros(n_lambdas, k_folds)
    lasso_fold_score = np.zeros(n_lambdas, k_folds)
    test_list, train_list = k_fold_selection(z, k_folds)
    for i in range(n_lambdas):

        for j in range(k_folds):
            test_ind_cv = test_list[j]
            train_ind_cv = train_list[j]
            X_train_cv = X[train_ind_cv]
            z_train_cv = z[train_ind_cv]
            X_test_cv = X[test_ind_cv]
            z_test_cv = z[test_ind_cv]
            clf_Lasso = skl.Lasso(alpha=lamb).fit(X_train_cv, z_train_cv)
            z_lasso_test = clf_Lasso.predict(X_test_cv)
            ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv,
                                                     lamb)
            z_ridge_test = X_test_cv @ ridge_betas
            ridge_fold_score[i, j] = stat_tools.MSE(z, z_ridge_test)
            lasso_fold_score[i, j] = stat_tools.MSE(z, z_lasso_test)

    lasso_cv_mse = np.mean(lasso_fold_score, axis=1, keepdims=True)
    ridge_cv_mse = np.mean(ridge_fold_score, axis=1, keepdims=True)
    best_lambda_lasso = lambdas[np.argmin(lasso_cv_mse)]
    best_lambda_ridge = lambdas[np.argmin(ridge_cv_mse)]

    # Bootstrap skeleton
    # For given polynomial degree, input X_train, z_train, X_test and z_test.
    # X_train and X_test should be scaled?
    n_bootstraps = 100
    z_boot_model = np.zeros(len(z_test), n_bootstraps)
    for bootstrap_number in range(n_bootstraps):
        # For the number of data value points (len_z) in the training set, pick a random
        # data value (z_train[random]) and its corresponding row in the design matrix
        shuffle = np.random.randint(0, len(z_train), len(z_train))
        X_boot, z_boot = X_train[shuffle], z_train[shuffle]
        betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot)
        #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
        #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot)
        #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda
        z_boot_model[:, i] = X_test @ betas_boot
    mse, bias, variance = stat_tools.compute_mse_bias_variance(
        z_test, z_boot_model)

    # Check MSE
    print("MSE = %.3f" %
          MSE(z, linear_regression.evaluate_poly_2D(x, y, beta, deg)))
    # And with noise
    print("Including standard normal noise scaled by {}, MSE = {:.3f}".format(
        noise_scale,
        MSE(z_noisy, linear_regression.evaluate_poly_2D(x, y, beta_noisy,
                                                        deg))))
    # Evaluate the Franke function & least-squares
    x = np.linspace(0, 1, 30)
    y = np.linspace(0, 1, 30)
    X, Y = np.meshgrid(x, y)

    z_analytic = FrankeFunction(X, Y)
    z_fit = linear_regression.evaluate_poly_2D(X, Y, beta, deg)
    z_fit_noisy = linear_regression.evaluate_poly_2D(X, Y, beta_noisy, deg)

    fig = plt.figure()

    # Plot the analytic curve
    ax = fig.add_subplot(1, 3, 1, projection="3d")
    ax.set_title("Franke Function")
    ax.view_init(azim=45)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    surf = ax.plot_surface(X, Y, z_analytic, cmap=cm.coolwarm)

    # Plot the fitted curve
    ax = fig.add_subplot(1, 3, 2, projection="3d")
    ax.set_title("OLS")
    ax.view_init(azim=45)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    surf = ax.plot_surface(X, Y, z_fit, cmap=cm.coolwarm)

    # Plot fitted curve, with noisy beta estimates
    ax = fig.add_subplot(1, 3, 3, projection="3d")
    ax.set_title("OLS with noise")
    ax.view_init(azim=45)
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    ax.set_zlabel("z")
    surf = ax.plot_surface(X, Y, z_fit_noisy, cmap=cm.coolwarm)

    plt.show()

    return
def deprecated_franke_analysis_full():
    n = 1000
    noise_scale = 0.2
    x = np.random.uniform(0, 1, n)
    y = np.random.uniform(0, 1, n)
    z = FrankeFunction(x, y)
    # Adding standard normal noise:
    z = z + noise_scale*np.random.normal(0,1,len(z))
    max_degree = 20
    n_lambdas = 30
    n_bootstraps = 50
    k_folds = 5
    lambdas = np.logspace(-3,0,n_lambdas)
    subset_lambdas = lambdas[::5]

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)
    ols_cv_mse = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    ridge_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas)))
    lasso_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas)))

    # Actual computations
    for degree in range(max_degree):
        X = linear_regression.design_matrix_2D(x,y,degree)
        X_train, X_test, z_train, z_test = train_test_split(X, z, test_size = 0.2)
        # Scaling and feeding to CV.
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:,0] = 1

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        X_train_scaled[:,0] = 1
        X_test_scaled[:,0] = 1

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)


        # CV, find best lambdas and get mse vs lambda for given degree.

        ridge_fold_score = np.zeros((n_lambdas, k_folds))
        lasso_fold_score = np.zeros((n_lambdas, k_folds))
        test_list, train_list = stat_tools.k_fold_selection(z, k_folds)
        for i in range(n_lambdas):
            lamb = lambdas[i]
            for j in range(k_folds):
                test_ind_cv = test_list[j]
                train_ind_cv = train_list[j]
                X_train_cv = X[train_ind_cv]
                z_train_cv = z[train_ind_cv]
                X_test_cv = X[test_ind_cv]
                z_test_cv = z[test_ind_cv]
                clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv)
                z_lasso_test = clf_Lasso.predict(X_test_cv)
                ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb)
                z_ridge_test = X_test_cv @ ridge_betas
                ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test)
                lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test)

        lasso_cv_mse = np.mean(lasso_fold_score, axis=1)
        ridge_cv_mse = np.mean(ridge_fold_score, axis=1)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse

        # All regressions bootstraps at once
        lamb_ridge = best_ridge_lambda[degree]
        lamb_lasso = best_lasso_lambda[degree]
        z_boot_ols = np.zeros((len(z_test),n_bootstraps))
        z_boot_ridge = np.zeros((len(z_test),n_bootstraps))
        z_boot_lasso= np.zeros((len(z_test),n_bootstraps))
        for i in range(n_bootstraps):
            shuffle = np.random.randint(0,len(z_train),len(z_train))
            X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
            betas_boot_ols = linear_regression.OLS_SVD_2D(X_boot, z_boot)
            betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb_ridge) #Ridge, given lambda
            clf_Lasso = skl.Lasso(alpha=lamb_lasso,fit_intercept=False).fit(X_boot,z_boot)
            z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda
            z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge
            z_boot_ols[:,i] = X_test_scaled @ betas_boot_ols

        ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \
        ridge_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge)

        lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \
        lasso_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso)

        ols_boot_mse[degree], ols_boot_bias[degree], \
        ols_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ols)

        # Bootstrapping for a selection of lambdas for ridge and lasso
        subset_lambda_index = 0
        for lamb in subset_lambdas:
            z_boot_ridge = np.zeros((len(z_test),n_bootstraps))
            z_boot_lasso= np.zeros((len(z_test),n_bootstraps))
            for i in range(n_bootstraps):
                shuffle = np.random.randint(0,len(z_train),len(z_train))
                X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
                betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
                clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot)
                z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda
                z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge

            ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \
            ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge)

            lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \
            lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso)

            subset_lambda_index  += 1
def terrain_analysis():
    # Setting up the terrain data:
    terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif')
    x_terrain = np.arange(terrain_data.shape[1]) #apparently, from the problem description.
    y_terrain = np.arange(terrain_data.shape[0])
    X_coord, Y_coord = np.meshgrid(x_terrain,y_terrain)
    z_terrain = terrain_data.flatten() # the response values
    x_terrain_flat = X_coord.flatten() # the first degree feature variables
    y_terrain_flat = Y_coord.flatten() # the first degree feature variables
    max_degree = 10
    n_lambdas = 15
    n_bootstraps = 20
    k_folds = 5
    lambdas = np.logspace(-3,0,n_lambdas)

    # Quantities of interest:
    mse_ols_test = np.zeros(max_degree)
    mse_ols_train = np.zeros(max_degree)

    ols_boot_mse = np.zeros(max_degree)
    ols_boot_bias = np.zeros(max_degree)
    ols_boot_variance = np.zeros(max_degree)

    best_ridge_lambda = np.zeros(max_degree)
    best_ridge_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_mse = np.zeros(max_degree)
    ridge_best_lambda_boot_bias = np.zeros(max_degree)
    ridge_best_lambda_boot_variance = np.zeros(max_degree)

    best_lasso_lambda = np.zeros(max_degree)
    best_lasso_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_mse = np.zeros(max_degree)
    lasso_best_lambda_boot_bias = np.zeros(max_degree)
    lasso_best_lambda_boot_variance = np.zeros(max_degree)

    ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas))
    lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas))

    # Actual computations
    for degree in range(max_degree):
        X_terrain_design = linear_regression.design_matrix_2D(x_terrain_flat,y_terrain_flat,degree)
        X_train, X_test, z_train, z_test = train_test_split(X_terrain_design, z_terrain, test_size = 0.2)
        # Scaling and feeding to CV.
        z = z_terrain
        X = X_terrain_design
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        X[:,0] = 1

        # Scaling and feeding to bootstrap and OLS
        scaler_boot = StandardScaler()
        scaler_boot.fit(X_train)
        X_train_scaled = scaler_boot.transform(X_train)
        X_test_scaled = scaler_boot.transform(X_test)
        X_train_scaled[:,0] = 1
        X_test_scaled[:,0] = 1

        # OLS, get MSE for test and train set.

        betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train)
        z_test_model = X_test_scaled @ betas
        z_train_model = X_train_scaled @ betas
        mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model)
        mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model)


        # CV, find best lambdas and get mse vs lambda for given degree.

        ridge_fold_score = np.zeros((n_lambdas, k_folds))
        lasso_fold_score = np.zeros((n_lambdas, k_folds))
        test_list, train_list = stat_tools.k_fold_selection(z, k_folds)
        for i in range(n_lambdas):
            lamb = lambdas[i]
            for j in range(k_folds):
                test_ind_cv = test_list[j]
                train_ind_cv = train_list[j]
                X_train_cv = X[train_ind_cv]
                z_train_cv = z[train_ind_cv]
                X_test_cv = X[test_ind_cv]
                z_test_cv = z[test_ind_cv]
                clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv)
                z_lasso_test = clf_Lasso.predict(X_test_cv)
                ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb)
                z_ridge_test = X_test_cv @ ridge_betas
                ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test)
                lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test)

        lasso_cv_mse = np.mean(lasso_fold_score, axis=1)
        ridge_cv_mse = np.mean(ridge_fold_score, axis=1)
        best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)]
        best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)]
        best_lasso_mse[degree] = np.min(lasso_cv_mse)
        best_ridge_mse[degree] = np.min(ridge_cv_mse)
        lasso_lamb_deg_mse[degree] = lasso_cv_mse
        ridge_lamb_deg_mse[degree] = ridge_cv_mse


        # OLS bootstap, get bootstrapped mse, bias and variance for given degree.
        z_boot_model = np.zeros((len(z_test),n_bootstraps))
        for i in range(n_bootstraps):
            shuffle = np.random.randint(0,len(z_train),len(z_train))
            X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
            betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot)
            #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
            #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot)
            #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda
            z_boot_model[:,i] = X_test_scaled @ betas_boot
        mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model)
        ols_boot_mse[degree] = mse
        ols_boot_bias[degree] = bias
        ols_boot_variance[degree] = variance

        # Ridge bootstrap, get bootstrapped mse, bias and variance for given degree and lambda
        lamb = best_ridge_lambda[degree]
        z_boot_model = np.zeros((len(z_test),n_bootstraps))
        for i in range(n_bootstraps):
            shuffle = np.random.randint(0,len(z_train),len(z_train))
            X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
            #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot)
            betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
            #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot)
            #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda
            z_boot_model[:,i] = X_test_scaled @ betas_boot
        mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model)
        ridge_best_lambda_boot_mse[degree] = mse
        ridge_best_lambda_boot_bias[degree] = bias
        ridge_best_lambda_boot_variance[degree] = variance

        # Lasso bootstrap, get bootstrapped mse, bias and variance for given degree and lambda.
        lamb = best_lasso_lambda[degree]
        z_boot_model = np.zeros((len(z_test),n_bootstraps))
        for i in range(n_bootstraps):
            shuffle = np.random.randint(0,len(z_train),len(z_train))
            X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle]
            #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot)
            #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda
            clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot)
            z_boot_model[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda
            #z_boot_model[:,i] = X_test_scaled @ betas_boot
        mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model)
        lasso_best_lambda_boot_mse[degree] = mse
        lasso_best_lambda_boot_bias[degree] = bias
        lasso_best_lambda_boot_variance[degree] = variance

################ All necessary computations should have been done above. Below follows
################ the plotting part.




        return