err_val = np.zeros((num,num)) err_train = np.zeros((num,num)) # Split training data into subtraining set, validation set X_tr, y_tr, X_val, y_val = val.split_data(X_train, y_train, frac=frac, seed=seed) # Filter y values to 0, 1 labels y_tr_true = mnist_two_filter(y_tr) y_val_true = mnist_two_filter(y_val) # Loop over thresholds for i in range(num): # Internally loop over lambdas in regularization path err_val[i,:], err_train[i,:], lams = val.linear_reg_path(X_tr, y_tr_true, X_val, y_val_true, ri.fit_ridge, lammax=lammax, scale=scale, num=num, error_func=val.loss_01, thresh=thresh_arr[i], **kwargs) # Find minimum threshold, lambda from minimum validation error ind_t,ind_l = np.unravel_index(err_val.argmin(), err_val.shape) best_lambda = lams[ind_l] best_thresh = thresh_arr[ind_t] print("Best lambda:",best_lambda) print("Best threshold:",best_thresh) plt.plot(lams,err_val[ind_t,:]) plt.show() # Fit training set for model parameters using best fit lambda w0, w = ri.fit_ridge(X_train, y_train_true, lam=best_lambda)
# Set maximum lambda, minimum lambda lammax = lu.compute_max_lambda(X_train, y_train) print("Maximum lambda: %.3lf." % lammax) # Init error arrays err_val = np.zeros(num) err_train = np.zeros(num) # Run a regularization path 1.15 print("Running regularization path for %d lambda bins." % num) err_val, err_train, lams, nonzeros = val.linear_reg_path( X_train, y_train, X_val, y_val, lu.fit_lasso_fast, lammax=lammax, scale=2, num=num, error_func=val.RMSE, save_nonzeros=True, **kwargs) # Cache results print("Caching results...") np.savez(cache, err_val=err_val, err_train=err_train, lams=lams, nonzeros=nonzeros) else: print("Reading from cache...")
frac=frac, seed=seed) # Filter y values to 0, 1 labels y_tr_true = mnist_two_filter(y_tr) y_val_true = mnist_two_filter(y_val) # Loop over thresholds for i in range(num): # Internally loop over lambdas in regularization path err_val[i, :], err_train[i, :], lams = val.linear_reg_path( X_tr, y_tr_true, X_val, y_val_true, ri.fit_ridge, lammax=lammax, scale=scale, num=num, error_func=val.loss_01, thresh=thresh_arr[i], **kwargs) # Find minimum threshold, lambda from minimum validation error ind_t, ind_l = np.unravel_index(err_val.argmin(), err_val.shape) best_lambda = lams[ind_l] best_thresh = thresh_arr[ind_t] print("Best lambda:", best_lambda) print("Best threshold:", best_thresh) plt.plot(lams, err_val[ind_t, :]) plt.show()
# Run analysis if answer cache doesn't exist if not os.path.exists(cache): print("Cache does not exist, running analysis...") # Set maximum lambda, minimum lambda lammax = lu.compute_max_lambda(X_train,y_train) print("Maximum lambda: %.3lf." % lammax) # Init error arrays err_val = np.zeros(num) err_train = np.zeros(num) # Run a regularization path 1.15 print("Running regularization path for %d lambda bins." % num) err_val, err_train, lams, nonzeros = val.linear_reg_path(X_train, y_train, X_val, y_val, lu.fit_lasso_fast, lammax=lammax, scale=2, num=num, error_func = val.RMSE, save_nonzeros=True, **kwargs) # Cache results print("Caching results...") np.savez(cache,err_val=err_val,err_train=err_train,lams=lams,nonzeros=nonzeros) else: print("Reading from cache...") res = np.load(cache) err_val = res["err_val"] err_train = res["err_train"] lams = res["lams"] nonzeros = res["nonzeros"] # Find best lambda according to validation error # and use that to refit training data, test on test data