def cross_validation(X, z, k_folds, regression=linear_regression.OLS_SVD_2D): columns = [ "MSE train", "MSE test", "R2 train", "R2 test" #"Variance train", "Variance test" #"Bias train", "Bias test" ] dat = pd.DataFrame(columns=columns, index=np.arange(k_folds)) test_indices, train_indices = stat_tools.k_fold_selection(z, k = k_folds) for k in range(k_folds): # Training data X_train = X[train_indices[k], :] z_train = z[train_indices[k]] # Testing data X_test = X[test_indices[k], :] z_test = z[test_indices[k]] beta = regression(X_train, z_train) dat["MSE train"][k] = stat_tools.MSE(z_train, X_train @ beta) dat["R2 train"][k] = stat_tools.R2(z_train, X_train @ beta) dat["MSE test"][k] = stat_tools.MSE(z_test, X_test @ beta) dat["R2 test"][k] = stat_tools.R2(z_test, X_test @ beta) return dat
def k_fold_cv_all(X, z, n_lambdas, lambdas, k_folds): """ Performs k-fold cross validation for Ridge, Lasso and OLS. The Lasso and Ridge MSE-values are computed for a number of n_lambdas, with the lambda values given by the lambdas array. OLS is done only once for each of the k_folds folds. Args: X (array): Design matrix z (array): Data-values/response-values/whatever-they-are-called-in-your-field-values n_lambdas (int): number of lambda values to use for Lasso and Ridge. lambdas (array): The actual lambda-values to try. k_folds (int): The number of folds. Return: lasso_cv_mse (array): array containing the computed MSE for each lambda in Lasso ridge_cv_mse (array): array containing the computed MSE for each lambda in Ridge ols_cv_mse (float): computed MSE for OLS. """ ridge_fold_score = np.zeros((n_lambdas, k_folds)) lasso_fold_score = np.zeros((n_lambdas, k_folds)) test_list, train_list = k_fold_selection(z, k_folds) for i in range(n_lambdas): lamb = lambdas[i] for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb, fit_intercept=False).fit( X_train_cv, z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i, j] = stat_tools.MSE(z_test_cv, z_ridge_test) lasso_fold_score[i, j] = stat_tools.MSE(z_test_cv, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1) ridge_cv_mse = np.mean(ridge_fold_score, axis=1) # Get ols_mse for cv. ols_fold_score = np.zeros(k_folds) for i in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] ols_cv_betas = linear_regression.OLS_SVD_2D(X_train_cv, z_train_cv) z_ols_test = X_test_cv @ ols_cv_betas ols_fold_score[i] = stat_tools.MSE(z_test_cv, z_ols_test) ols_cv_mse = np.mean(ols_fold_score) return lasso_cv_mse, ridge_cv_mse, ols_cv_mse
def bootstrap(X_train, X_test, z_train, z_test, bootstraps=100, regression=linear_regression.OLS_SVD_2D): N = len(z_train) #z_model_boot = np.zeros([bootstraps, N]) #z_model_test = np.zeros([bootstraps, len(z_test)]) mse_test = np.zeros(bootstraps) mse_train = np.zeros(bootstraps) for i in range(bootstraps): indices = np.random.randint(0, N, N) X_boot = X_train[indices, :] z_boot = z_train[indices] beta = regression(X_boot, z_boot) z_model_train = X_boot @ beta z_model_test = X_test @ beta mse_train[i] = stat_tools.MSE(z_boot, z_model_train) mse_test[i] = stat_tools.MSE(z_test, z_model_test) data = { "MSE train": [np.mean(mse_train)], #[MSE(z_train, z_model_train)], "MSE test": [np.mean(mse_test)], #[MSE(z_test, z_model_test)], "R2 train": [0], "R2 test": [0], "Bias train": [0], "Bias test": [0], "Variance train": [0], "Variance test": [0] } """ data["MSE train"], data["Bias train"], data["Variance train"] \ = stat_tools.compute_mse_bias_variance(z_train, z_model_train) data["MSE test"], data["Bias test"], data["Variance test"] \ = stat_tools.compute_mse_bias_variance(z_test, z_model_test) print(len(data["MSE train"])) """ return data
def cross_validation(X, z, k_folds, regression): # Initialize outgoing arrays MSE_test = np.zeros(k_folds) test_indices, train_indices = stat_tools.k_fold_selection(z, k_folds=k_folds) N_test = len(test_indices[0]) N_train = len(train_indices[0]) for k in range(k_folds): # Training data X_train = X[train_indices[k], :] z_train = z[train_indices[k]] # Testing data X_test = X[test_indices[k], :] z_test = z[test_indices[k]] # Solve model beta = regression(X_train, z_train) # Compute statistics MSE_test[k] = stat_tools.MSE(z_test, X_test @ beta) # Package results in a neat little package # In order to lessen the chance of missasignment return np.mean(MSE_test)
def cross_validation_lasso(X, z, k_folds, lambd): # Initialize outgoing arrays MSE_test = np.zeros(k_folds) test_indices, train_indices = stat_tools.k_fold_selection(z, k_folds=k_folds) N_test = len(test_indices[0]) N_train = len(train_indices[0]) for k in range(k_folds): # Training data X_train = X[train_indices[k], :] z_train = z[train_indices[k]] # Testing data X_test = X[test_indices[k], :] z_test = z[test_indices[k]] # Solve model clf_Lasso = skl.Lasso(alpha=lambd, fit_intercept=False).fit(X_train, z_train) z_model_test = clf_Lasso.predict(X_test) # Compute statistics MSE_test[k] = stat_tools.MSE(z_test, z_model_test) # Package results in a neat little package # In order to lessen the chance of missasignment return np.mean(MSE_test)
def bootstrap(X_train, X_test, z_train, z_test, bootstraps, regression): """ Assumes regression(X, z), but can take different parameters like bootstrap(..., regression = lambda X, y : regression_method(X, y, lamb)): """ N = z_train.size # Number of data points in training set z_model_test = np.empty( (bootstraps, z_test.size)) # Storage for the bootstrapped test data for i in range(bootstraps): # Generate N random indices from 0, N indices = np.random.randint(0, N, N) # Fetch out the bootstrap data sets X_boot = X_train[indices, :] z_bootstrap = z_train[indices] beta = regression(X_boot, z_bootstrap) z_model_test[i, :] = X_test @ beta MSE_test = stat_tools.MSE(z_test, z_model_test) bias2_test = stat_tools.mean_squared_bias(z_test, z_model_test) variance_test = stat_tools.mean_variance(z_test, z_model_test) return MSE_test, bias2_test, variance_test
def bootstrap_lasso(X_train, X_test, z_train, z_test, bootstraps, lambd): N = z_train.size # Number of data points in training set z_model_test = np.empty( (bootstraps, z_test.size)) # Storage for the bootstrapped test data for i in range(bootstraps): # Generate N random indices from 0, N indices = np.random.randint(0, N, N) # Fetch out the bootstrap data sets X_boot = X_train[indices, :] z_boot = z_train[indices] clf_Lasso = skl.Lasso(alpha=lambd, fit_intercept=False).fit(X_boot, z_boot) z_model_test[i, :] = clf_Lasso.predict(X_test) MSE_test = stat_tools.MSE(z_test, z_model_test) bias2_test = stat_tools.mean_squared_bias(z_test, z_model_test) variance_test = stat_tools.mean_variance(z_test, z_model_test) return MSE_test, bias2_test, variance_test
def crossvalidation_LASSO(X, z, k_folds, lambd): # LASSO requires special treatment due to different interface. Functionally the same as crossvalidation() cv_mse = np.zeros(k_folds) # Storage for the individual MSE scores test_list, train_list = k_fold_selection( z, k_folds) # the k-fold split indices for i in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] # Separate out the training and test data for the current split X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] # Perform the regression in the usual way clf_Lasso = skl.Lasso(alpha=lamb, fit_intercept=False).fit(X_train_cv, z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) lasso_fold_score[i] = stat_tools.MSE(z_test_cv, z_lasso_test) ols_cv_mse = np.mean(ols_fold_score) return ols_cv_mse
def k_folds_cv_OLS_only(X,z,k_folds): """ As could be guessed, computes the k-fold cross-validation MSE for OLS, given input X, y as data; k_folds as number of folds. Returns the computed MSE. """ ols_fold_score = np.zeros(k_folds) test_list, train_list = stat_tools.k_fold_selection(z, k_folds) for i in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] ols_cv_betas = linear_regression.OLS_SVD_2D(X_train_cv, z_train_cv) z_ols_test = X_test_cv @ ols_cv_betas ols_fold_score[i] = stat_tools.MSE(z_test_cv, z_ols_test) ols_cv_mse = np.mean(ols_fold_score) return ols_cv_mse
def crossvalidation(X, z, k_folds, regression): """ Performs k-fold regression for a linear regression model and computes the resulting MSE Note: Hypoer parameters (like the lambdas) can be inserted into regression via lambda functions. i.e lambda X, z : ridge(X, z, lambd = 1e-3) Note 2: LASSO regression is performed as a separate, specialized method due to sci-kit learn adhering to a different interface than our codes. Args: X (array) : Design matrix z (array) : Response values k_folds (Int) : Number of "folds"/splits to split the data into. regression (Function) : Regression method to use. Assumes form: regression(X, z) See above for how to insert additional parameters. """ cv_mse = np.zeros(k_folds) # Storage for the individual MSE scores test_list, train_list = k_fold_selection( z, k_folds) # the k-fold split indices for i in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] # Separate out the training and test data for the current split X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] # Perform the regression in the usual way & compute resulting MSE cv_betas = regression(X_train_cv, z_train_cv) z_model = X_test_cv @ cv_betas cv_mse[i] = stat_tools.MSE(z_test_cv, z_model) ols_cv_mse = np.mean(ols_fold_score) return ols_cv_mse
# Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 #maybe not for ridge+lasso # X_test_scaled[:,0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = stat_tools.k_fold_cv_all(X_scaled,z,n_lambdas,lambdas,k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse ols_cv_mse[degree] = ols_cv_mse_deg
def terrain_analysis(): # Setting up the terrain data: terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif') x_terrain = np.arange(terrain_data.shape[1]) #apparently, from the problem description. y_terrain = np.arange(terrain_data.shape[0]) X_coord, Y_coord = np.meshgrid(x_terrain,y_terrain) z_terrain = terrain_data.flatten() # the response values x_terrain_flat = X_coord.flatten() # the first degree feature variables y_terrain_flat = Y_coord.flatten() # the first degree feature variables max_degree = 10 n_lambdas = 15 n_bootstraps = 20 k_folds = 5 lambdas = np.logspace(-3,0,n_lambdas) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) # Actual computations for degree in range(max_degree): X_terrain_design = linear_regression.design_matrix_2D(x_terrain_flat,y_terrain_flat,degree) X_train, X_test, z_train, z_test = train_test_split(X_terrain_design, z_terrain, test_size = 0.2) # Scaling and feeding to CV. z = z_terrain X = X_terrain_design scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) X[:,0] = 1 # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:,0] = 1 X_test_scaled[:,0] = 1 # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. ridge_fold_score = np.zeros((n_lambdas, k_folds)) lasso_fold_score = np.zeros((n_lambdas, k_folds)) test_list, train_list = stat_tools.k_fold_selection(z, k_folds) for i in range(n_lambdas): lamb = lambdas[i] for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test) lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1) ridge_cv_mse = np.mean(ridge_fold_score, axis=1) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # OLS bootstap, get bootstrapped mse, bias and variance for given degree. z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) ols_boot_mse[degree] = mse ols_boot_bias[degree] = bias ols_boot_variance[degree] = variance # Ridge bootstrap, get bootstrapped mse, bias and variance for given degree and lambda lamb = best_ridge_lambda[degree] z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) ridge_best_lambda_boot_mse[degree] = mse ridge_best_lambda_boot_bias[degree] = bias ridge_best_lambda_boot_variance[degree] = variance # Lasso bootstrap, get bootstrapped mse, bias and variance for given degree and lambda. lamb = best_lasso_lambda[degree] z_boot_model = np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] #betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot) z_boot_model[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda #z_boot_model[:,i] = X_test_scaled @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance(z_test, z_boot_model) lasso_best_lambda_boot_mse[degree] = mse lasso_best_lambda_boot_bias[degree] = bias lasso_best_lambda_boot_variance[degree] = variance ################ All necessary computations should have been done above. Below follows ################ the plotting part. return
def part_1a(): # Sample the franke function n times at randomly chosen points n = 100 deg = 5 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z_noisy = z + noise_scale * np.random.normal(0, 1, len(z)) # Making the design matrix X = linear_regression.design_matrix_2D(x, y, deg) # Find the least-squares solution beta = linear_regression.OLS_2D(X, z) beta_noisy = linear_regression.OLS_2D(X, z_noisy) # Split into training and test data with ratio 0.2 X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2) # Scale data according to sklearn, beware possible problems with intercept and std. scaler = StandardScaler() scaler.fit(X_train) X_train_scaled = scaler.transform(X_train) X_test_scaled = scaler.transform(X_test) # For ridge and lasso, lasso directly from sklearn. # For given polynomial degree, input X and z. X should be prescaled. n_lambdas = 100 lambdas = np.logspace(-3, 0, n_lambdas) k_folds = 5 ridge_fold_score = np.zeros(n_lambdas, k_folds) lasso_fold_score = np.zeros(n_lambdas, k_folds) test_list, train_list = k_fold_selection(z, k_folds) for i in range(n_lambdas): for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb).fit(X_train_cv, z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i, j] = stat_tools.MSE(z, z_ridge_test) lasso_fold_score[i, j] = stat_tools.MSE(z, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1, keepdims=True) ridge_cv_mse = np.mean(ridge_fold_score, axis=1, keepdims=True) best_lambda_lasso = lambdas[np.argmin(lasso_cv_mse)] best_lambda_ridge = lambdas[np.argmin(ridge_cv_mse)] # Bootstrap skeleton # For given polynomial degree, input X_train, z_train, X_test and z_test. # X_train and X_test should be scaled? n_bootstraps = 100 z_boot_model = np.zeros(len(z_test), n_bootstraps) for bootstrap_number in range(n_bootstraps): # For the number of data value points (len_z) in the training set, pick a random # data value (z_train[random]) and its corresponding row in the design matrix shuffle = np.random.randint(0, len(z_train), len(z_train)) X_boot, z_boot = X_train[shuffle], z_train[shuffle] betas_boot = linear_regression.OLS_SVD_2D(X_boot, z_boot) #betas_boot = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda #clf_Lasso = skl.Lasso(alpha=lamb).fit(X_boot,z_boot) #z_boot_model[:,i] = clf_Lasso_predict(X_test) #Lasso, given lambda z_boot_model[:, i] = X_test @ betas_boot mse, bias, variance = stat_tools.compute_mse_bias_variance( z_test, z_boot_model) # Check MSE print("MSE = %.3f" % MSE(z, linear_regression.evaluate_poly_2D(x, y, beta, deg))) # And with noise print("Including standard normal noise scaled by {}, MSE = {:.3f}".format( noise_scale, MSE(z_noisy, linear_regression.evaluate_poly_2D(x, y, beta_noisy, deg)))) # Evaluate the Franke function & least-squares x = np.linspace(0, 1, 30) y = np.linspace(0, 1, 30) X, Y = np.meshgrid(x, y) z_analytic = FrankeFunction(X, Y) z_fit = linear_regression.evaluate_poly_2D(X, Y, beta, deg) z_fit_noisy = linear_regression.evaluate_poly_2D(X, Y, beta_noisy, deg) fig = plt.figure() # Plot the analytic curve ax = fig.add_subplot(1, 3, 1, projection="3d") ax.set_title("Franke Function") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_analytic, cmap=cm.coolwarm) # Plot the fitted curve ax = fig.add_subplot(1, 3, 2, projection="3d") ax.set_title("OLS") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_fit, cmap=cm.coolwarm) # Plot fitted curve, with noisy beta estimates ax = fig.add_subplot(1, 3, 3, projection="3d") ax.set_title("OLS with noise") ax.view_init(azim=45) ax.set_xlabel("x") ax.set_ylabel("y") ax.set_zlabel("z") surf = ax.plot_surface(X, Y, z_fit_noisy, cmap=cm.coolwarm) plt.show() return
def terrain_analysis(): # Setting up the terrain data: terrain_data = imread('../datafiles/SRTM_data_Norway_1.tif') x_terrain = np.arange(terrain_data.shape[1]) y_terrain = np.arange(terrain_data.shape[0]) X_coord, Y_coord = np.meshgrid(x_terrain, y_terrain) z_terrain = terrain_data.flatten() # the response values x_terrain_flat = X_coord.flatten() # the first degree feature variables y_terrain_flat = Y_coord.flatten() # the first degree feature variables max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3, 0, n_lambdas) subset_lambdas = lambdas[::5] #### Should select a subset in some manner of the terrain points #### Should probably also make the feature variables be float that range from [0,1] x = x_terrain_flat[::20] y = y_terrain_flat[::20] z = z_terrain[::20] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) X_scaled[:, 0] = 1 # Probably should not have this. # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:, 0] = 1 # Probably actually not X_test_scaled[:, 0] = 1 # Have a bad feeling about how this might affect ridge/lasso. # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. lasso_cv_mse, ridge_cv_mse, ols_cv_mse = stat_tools.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance = \ stat_tools.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = ridge_mse, ridge_bias, ridge_variance lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = lasso_mse, lasso_bias, lasso_variance ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = ols_mse, ols_bias, ols_variance # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance = \ stat_tools.bootstrap_ridge_lasso(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = ridge_mse, ridge_bias, ridge_variance lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = lasso_mse, lasso_bias, lasso_variance subset_lambda_index += 1 ################ All necessary computations should have been done above. Below follows ################ the plotting part. return
def franke_analysis(): n = 1000 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3, 0, n_lambdas) subset_lambdas = lambdas[::5] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) X_scaled[:, 0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants... # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:, 0] = 1 #maybe not for ridge+lasso X_test_scaled[:, 0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse = stat_tools.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regressions bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance = \ stat_tools.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = ridge_mse, ridge_bias, ridge_variance lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = lasso_mse, lasso_bias, lasso_variance ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = ols_mse, ols_bias, ols_variance # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance = \ stat_tools.bootstrap_ridge_lasso(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = ridge_mse, ridge_bias, ridge_variance lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = lasso_mse, lasso_bias, lasso_variance subset_lambda_index += 1
def deprecated_franke_analysis_full(): n = 1000 noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale*np.random.normal(0,1,len(z)) max_degree = 20 n_lambdas = 30 n_bootstraps = 50 k_folds = 5 lambdas = np.logspace(-3,0,n_lambdas) subset_lambdas = lambdas[::5] # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros((max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x,y,degree) X_train, X_test, z_train, z_test = train_test_split(X, z, test_size = 0.2) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) X[:,0] = 1 # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) X_train_scaled[:,0] = 1 X_test_scaled[:,0] = 1 # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. ridge_fold_score = np.zeros((n_lambdas, k_folds)) lasso_fold_score = np.zeros((n_lambdas, k_folds)) test_list, train_list = stat_tools.k_fold_selection(z, k_folds) for i in range(n_lambdas): lamb = lambdas[i] for j in range(k_folds): test_ind_cv = test_list[j] train_ind_cv = train_list[j] X_train_cv = X[train_ind_cv] z_train_cv = z[train_ind_cv] X_test_cv = X[test_ind_cv] z_test_cv = z[test_ind_cv] clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_train_cv,z_train_cv) z_lasso_test = clf_Lasso.predict(X_test_cv) ridge_betas = linear_regression.Ridge_2D(X_train_cv, z_train_cv, lamb) z_ridge_test = X_test_cv @ ridge_betas ridge_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_ridge_test) lasso_fold_score[i,j] = stat_tools.MSE(z_test_cv, z_lasso_test) lasso_cv_mse = np.mean(lasso_fold_score, axis=1) ridge_cv_mse = np.mean(ridge_fold_score, axis=1) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse # All regressions bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] z_boot_ols = np.zeros((len(z_test),n_bootstraps)) z_boot_ridge = np.zeros((len(z_test),n_bootstraps)) z_boot_lasso= np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot_ols = linear_regression.OLS_SVD_2D(X_boot, z_boot) betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb_ridge) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb_lasso,fit_intercept=False).fit(X_boot,z_boot) z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge z_boot_ols[:,i] = X_test_scaled @ betas_boot_ols ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], \ ridge_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge) lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], \ lasso_best_lambda_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso) ols_boot_mse[degree], ols_boot_bias[degree], \ ols_boot_variance[degree] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ols) # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: z_boot_ridge = np.zeros((len(z_test),n_bootstraps)) z_boot_lasso= np.zeros((len(z_test),n_bootstraps)) for i in range(n_bootstraps): shuffle = np.random.randint(0,len(z_train),len(z_train)) X_boot, z_boot = X_train_scaled[shuffle] , z_train[shuffle] betas_boot_ridge = linear_regression.Ridge_2D(X_boot, z_boot, lamb) #Ridge, given lambda clf_Lasso = skl.Lasso(alpha=lamb,fit_intercept=False).fit(X_boot,z_boot) z_boot_lasso[:,i] = clf_Lasso.predict(X_test_scaled) #Lasso, given lambda z_boot_ridge[:,i] = X_test_scaled @ betas_boot_ridge ridge_subset_lambda_boot_mse[degree, subset_lambda_index ], ridge_subset_lambda_boot_bias[degree, subset_lambda_index ], \ ridge_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_ridge) lasso_subset_lambda_boot_mse[degree, subset_lambda_index ], lasso_subset_lambda_boot_bias[degree, subset_lambda_index ], \ lasso_subset_lambda_boot_variance[degree, subset_lambda_index ] = stat_tools.compute_mse_bias_variance(z_test, z_boot_lasso) subset_lambda_index += 1
def terrain_analysis_plots( spacing=100, max_degree=20, n_lambdas=30, k_folds=5, n_bootstraps=50, do_boot=False, do_subset=False, ): # Setting up the terrain data: # Note structure! X-coordinates are on the rows of terrain_data # Point_selection.flatten() moves most rapidly over the x-coordinates # Meshgrids flattened also move most rapidly over the x-coordinates. Thus # this should make z(x,y).reshape(length_y,length_x) be consistent with terrain_data terrain_data = imread("../../datafiles/SRTM_data_Norway_1.tif") point_selection = terrain_data[:1801:spacing, :1801: spacing] # Make square and downsample x_terrain_selection = np.linspace(0, 1, point_selection.shape[1]) y_terrain_selection = np.linspace(0, 1, point_selection.shape[0]) X_coord_selection, Y_coord_selection = np.meshgrid(x_terrain_selection, y_terrain_selection) z_terrain_selection = point_selection.flatten() # the response values x_terrain_selection_flat = X_coord_selection.flatten( ) # the first degree feature variables y_terrain_selection_flat = Y_coord_selection.flatten( ) # the first degree feature variables lambdas = np.logspace(-6, 0, n_lambdas) subset_lambdas = lambdas[::12] x = x_terrain_selection_flat y = y_terrain_selection_flat z = z_terrain_selection x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Centering z_intercept = np.mean(z) z = z - z_intercept z_train_intercept = np.mean(z_train) z_train = z_train - z_train_intercept z_test = z_test - z_train_intercept ##### Setup of problem is completede above. # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) # X_scaled[:,0] = 1 # Probably should not have this. # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 # Probably actually not # X_test_scaled[:,0] = 1 # Have a bad feeling about how this might affect ridge/lasso. # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse ols_cv_mse[degree] = ols_cv_mse_deg if do_boot: # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance, ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ( ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], ridge_best_lambda_boot_variance[degree], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], lasso_best_lambda_boot_variance[degree], ) = (lasso_mse, lasso_bias, lasso_variance) ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[ degree] = ( ols_mse, ols_bias, ols_variance, ) if do_subset: # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ) = bootstrap.bootstrap_ridge_lasso( X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge, ) ( ridge_subset_lambda_boot_mse[degree, subset_lambda_index], ridge_subset_lambda_boot_bias[degree, subset_lambda_index], ridge_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_subset_lambda_boot_mse[degree, subset_lambda_index], lasso_subset_lambda_boot_bias[degree, subset_lambda_index], lasso_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (lasso_mse, lasso_bias, lasso_variance) subset_lambda_index += 1 # Plots go here. plt.figure() plt.semilogy(ols_cv_mse, label="ols") plt.semilogy(best_ridge_mse, label="ridge") plt.semilogy(best_lasso_mse, label="lasso") plt.title( "CV MSE for OLS, Ridge and Lasso, with the best lambdas for each degree" ) plt.legend() plt.show() # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8 plt.figure() plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8. plt.figure() plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() print("best ridge lambdas:") print(best_ridge_lambda) print("best lasso lambda") print(best_lasso_lambda) return
def franke_analysis_plots( n=1000, noise_scale=0.2, max_degree=20, n_bootstraps=100, k_folds=5, n_lambdas=30, do_boot=True, do_subset=True, ): # Note that max_degrees is the number of degrees, i.e. including 0. # n = 500 # noise_scale = 0.2 x = np.random.uniform(0, 1, n) y = np.random.uniform(0, 1, n) z = FrankeFunction(x, y) # Adding standard normal noise: z = z + noise_scale * np.random.normal(0, 1, len(z)) # max_degree = 15 # n_lambdas = 30 # n_bootstraps = 100 # k_folds = 5 lambdas = np.logspace(-6, 0, n_lambdas) subset_lambdas = lambdas[::12] x_train, x_test, y_train, y_test, z_train, z_test = train_test_split( x, y, z, test_size=0.2) # Centering the response z_intercept = np.mean(z) z = z - z_intercept # Centering the response z_train_intercept = np.mean(z_train) z_train = z_train - z_train_intercept z_test = z_test - z_train_intercept ########### Setup of problem is completed above. # Quantities of interest: mse_ols_test = np.zeros(max_degree) mse_ols_train = np.zeros(max_degree) ols_cv_mse = np.zeros(max_degree) ols_boot_mse = np.zeros(max_degree) ols_boot_bias = np.zeros(max_degree) ols_boot_variance = np.zeros(max_degree) best_ridge_lambda = np.zeros(max_degree) best_ridge_mse = np.zeros(max_degree) ridge_best_lambda_boot_mse = np.zeros(max_degree) ridge_best_lambda_boot_bias = np.zeros(max_degree) ridge_best_lambda_boot_variance = np.zeros(max_degree) best_lasso_lambda = np.zeros(max_degree) best_lasso_mse = np.zeros(max_degree) lasso_best_lambda_boot_mse = np.zeros(max_degree) lasso_best_lambda_boot_bias = np.zeros(max_degree) lasso_best_lambda_boot_variance = np.zeros(max_degree) ridge_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) lasso_lamb_deg_mse = np.zeros((max_degree, n_lambdas)) ridge_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) ridge_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_mse = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_bias = np.zeros((max_degree, len(subset_lambdas))) lasso_subset_lambda_boot_variance = np.zeros( (max_degree, len(subset_lambdas))) # Actual computations for degree in range(max_degree): X = linear_regression.design_matrix_2D(x, y, degree) X_train = linear_regression.design_matrix_2D(x_train, y_train, degree) X_test = linear_regression.design_matrix_2D(x_test, y_test, degree) # Scaling and feeding to CV. scaler = StandardScaler() scaler.fit(X) X_scaled = scaler.transform(X) # X_scaled[:,0] = 1 # Maybe not for ridge+lasso. Don't want to penalize constants... # Scaling and feeding to bootstrap and OLS scaler_boot = StandardScaler() scaler_boot.fit(X_train) X_train_scaled = scaler_boot.transform(X_train) X_test_scaled = scaler_boot.transform(X_test) # X_train_scaled[:,0] = 1 #maybe not for ridge+lasso # X_test_scaled[:,0] = 1 #maybe not for ridge+lasso # OLS, get MSE for test and train set. betas = linear_regression.OLS_SVD_2D(X_train_scaled, z_train) z_test_model = X_test_scaled @ betas z_train_model = X_train_scaled @ betas mse_ols_train[degree] = stat_tools.MSE(z_train, z_train_model) mse_ols_test[degree] = stat_tools.MSE(z_test, z_test_model) # CV, find best lambdas and get mse vs lambda for given degree. Also, gets # ols_CV_MSE lasso_cv_mse, ridge_cv_mse, ols_cv_mse_deg = crossvalidation.k_fold_cv_all( X_scaled, z, n_lambdas, lambdas, k_folds) best_lasso_lambda[degree] = lambdas[np.argmin(lasso_cv_mse)] best_ridge_lambda[degree] = lambdas[np.argmin(ridge_cv_mse)] best_lasso_mse[degree] = np.min(lasso_cv_mse) best_ridge_mse[degree] = np.min(ridge_cv_mse) lasso_lamb_deg_mse[degree] = lasso_cv_mse ridge_lamb_deg_mse[degree] = ridge_cv_mse ols_cv_mse[degree] = ols_cv_mse_deg if do_boot: # All regression bootstraps at once lamb_ridge = best_ridge_lambda[degree] lamb_lasso = best_lasso_lambda[degree] ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ols_mse, ols_bias, ols_variance, ) = bootstrap.bootstrap_all(X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge) ( ridge_best_lambda_boot_mse[degree], ridge_best_lambda_boot_bias[degree], ridge_best_lambda_boot_variance[degree], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_best_lambda_boot_mse[degree], lasso_best_lambda_boot_bias[degree], lasso_best_lambda_boot_variance[degree], ) = (lasso_mse, lasso_bias, lasso_variance) ols_boot_mse[degree], ols_boot_bias[degree], ols_boot_variance[ degree] = ( ols_mse, ols_bias, ols_variance, ) if do_subset: # Bootstrapping for a selection of lambdas for ridge and lasso subset_lambda_index = 0 for lamb in subset_lambdas: ( ridge_mse, ridge_bias, ridge_variance, lasso_mse, lasso_bias, lasso_variance, ) = bootstrap.bootstrap_ridge_lasso( X_train_scaled, X_test_scaled, z_train, z_test, n_bootstraps, lamb_lasso, lamb_ridge, ) ( ridge_subset_lambda_boot_mse[degree, subset_lambda_index], ridge_subset_lambda_boot_bias[degree, subset_lambda_index], ridge_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (ridge_mse, ridge_bias, ridge_variance) ( lasso_subset_lambda_boot_mse[degree, subset_lambda_index], lasso_subset_lambda_boot_bias[degree, subset_lambda_index], lasso_subset_lambda_boot_variance[degree, subset_lambda_index], ) = (lasso_mse, lasso_bias, lasso_variance) subset_lambda_index += 1 # Plots go here. # CV MSE for OLS: plt.figure() plt.semilogy(ols_cv_mse) plt.title("OLS CV MSE") plt.show() # Bootstrap for OLS: plt.figure() plt.semilogy(ols_boot_mse, label="mse") plt.semilogy(ols_boot_bias, label="bias") plt.semilogy(ols_boot_variance, label="variance") plt.title("OLS bias-variance-MSE by bootstrap") plt.legend() plt.show() # CV for Ridge, best+low+middle+high lambdas plt.figure() plt.semilogy(best_ridge_mse, label="best for each degree") plt.semilogy(ridge_lamb_deg_mse[:, 0], label="lambda={}".format(lambdas[0])) plt.semilogy(ridge_lamb_deg_mse[:, 12], label="lambda={}".format(lambdas[12])) plt.semilogy(ridge_lamb_deg_mse[:, 24], label="lambda={}".format(lambdas[24])) plt.title( "Ridge CV MSE for best lambda at each degree, plus for given lambdas across all degrees" ) plt.legend() plt.show() # Bootstrap for the best ridge lambdas: plt.figure() plt.semilogy(ridge_best_lambda_boot_mse, label="mse") plt.semilogy(ridge_best_lambda_boot_bias, label="bias") plt.semilogy(ridge_best_lambda_boot_variance, label="variance") plt.title("Best ridge lambdas for each degree bootstrap") plt.legend() plt.show() # Bootstrap only bias and variance for low+middle+high ridge lambdas plt.figure() plt.semilogy(ridge_subset_lambda_boot_bias[:, 0], label="bias, lambda = {}".format(subset_lambdas[0])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 0], label="variance, lambda = {}".format(subset_lambdas[0]), ) plt.semilogy(ridge_subset_lambda_boot_bias[:, 1], label="bias, lambda = {}".format(subset_lambdas[1])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 1], label="variance, lambda = {}".format(subset_lambdas[1]), ) plt.semilogy(ridge_subset_lambda_boot_bias[:, 2], label="bias, lambda = {}".format(subset_lambdas[2])) plt.semilogy( ridge_subset_lambda_boot_variance[:, 2], label="variance, lambda = {}".format(subset_lambdas[2]), ) plt.title("Bias+variance for low, middle, high ridge lambdas") plt.legend() plt.show() # CV for lasso, best+low+middle+high lambdas plt.figure() plt.semilogy(best_lasso_mse, label="best lambda for each degree") plt.semilogy(lasso_lamb_deg_mse[:, 0], label="lambda={}".format(lambdas[0])) plt.semilogy(lasso_lamb_deg_mse[:, 12], label="lambda={}".format(lambdas[12])) plt.semilogy(lasso_lamb_deg_mse[:, 24], label="lambda={}".format(lambdas[24])) plt.title( "Lasso CV MSE for best lambda at each degree, plus for given lambdas across all degrees" ) plt.legend() plt.show() # Bootstrap for the best lasso lambdas: plt.figure() plt.semilogy(lasso_best_lambda_boot_mse, label="mse") plt.semilogy(lasso_best_lambda_boot_bias, label="bias") plt.semilogy(lasso_best_lambda_boot_variance, label="variance") plt.title("Best lasso lambdas for each degree bootstrap") plt.legend() plt.show() # Bootstrap only bias and variance for low+middle+high lasso lambdas plt.figure() plt.semilogy(lasso_subset_lambda_boot_bias[:, 0], label="bias, lambda = {}".format(subset_lambdas[0])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 0], label="variance, lambda = {}".format(subset_lambdas[0]), ) plt.semilogy(lasso_subset_lambda_boot_bias[:, 1], label="bias, lambda = {}".format(subset_lambdas[1])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 1], label="variance, lambda = {}".format(subset_lambdas[1]), ) plt.semilogy(lasso_subset_lambda_boot_bias[:, 2], label="bias, lambda = {}".format(subset_lambdas[2])) plt.semilogy( lasso_subset_lambda_boot_variance[:, 2], label="variance, lambda = {}".format(subset_lambdas[2]), ) plt.title("Bias+variance for low, middle, high lasso lambdas") plt.legend() plt.show() # For a couple of degrees, plot cv mse vs lambda for ridge, will break program if max_degrees < 8 plt.figure() plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), ridge_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() # For a copule of degrees, plot cv mse vs lambda for lasso, will break program if max_degree < 8. plt.figure() plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 1], label="degree = {}".format(max_degree - 1), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 2], label="degree = {}".format(max_degree - 2), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 3], label="degree = {}".format(max_degree - 3), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 5], label="degree = {}".format(max_degree - 5), ) plt.plot( np.log10(lambdas), lasso_lamb_deg_mse[max_degree - 7], label="degree = {}".format(max_degree - 7), ) plt.legend() plt.show() print("best ridge lambda:") print(best_ridge_lambda) print("best lasso lambda:") print(best_lasso_lambda) return