def parameter_search_rbf(inputs, targets, test_fraction): """ """ N = inputs.shape[0] # run all experiments on the same train-test split of the data train_part, test_part = train_and_test_split(N, test_fraction=test_fraction) # for the centres of the basis functions sample 10% of the data sample_fraction = 0.15 p = (1 - sample_fraction, sample_fraction) centres = inputs[np.random.choice([False, True], size=N, p=p), :] print("centres.shape = %r" % (centres.shape, )) scales = np.logspace(0, 2, 17) # of the basis functions reg_params = np.logspace(-15, -4, 11) # choices of regularisation strength # create empty 2d arrays to store the train and test errors train_errors = np.empty((scales.size, reg_params.size)) test_errors = np.empty((scales.size, reg_params.size)) # iterate over the scales for i, scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition( designmtx, targets, train_part, test_part) # iteratre over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # store the train and test errors in our 2d arrays train_errors[i, j] = train_error test_errors[i, j] = test_error # we have a 2d array of train and test errors, we want to know the (i,j) # index of the best value best_i = np.argmin(np.argmin(test_errors, axis=1)) best_j = np.argmin(test_errors[i, :]) print("Best joint choice of parameters:") print("\tscale %.2g and lambda = %.2g" % (scales[best_i], reg_params[best_j])) # now we can plot the error for different scales using the best # regulariation choice fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j], test_errors[:, best_j]) ax.set_xscale('log') # ...and the error for different regularisation choices given the best # scale choice fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors[best_i, :], test_errors[best_i, :]) ax.set_xscale('log')
def plot_with_regularisation(inputs,targets,folds): """ Linear regression does not use a feature mapping, typically with such a simple model regularisation does not have much effect. The plot is the same with and without regularisation. Regularisation has only a weak affect on simple linear regression. Using regularisation on simple linear regression may not be that effective. In simple linear regression, regularisation will slightly penalise functions that are further from constant (i.e. with larger gradients). So it will have an effect, but only a small one and that will be to give slightly lower weights than if you had used least squares. """ reg_params = np.logspace(-10,1) train_errors = [] test_errors = [] for reg_param in reg_params: print("Evaluating reg_params: " + str(reg_param)) old_train,old_test=simple_evaluation_linear_model(inputs, targets, test_fraction=0.2, reg_param=reg_param) train_error, test_error = cv_evaluation_linear_model(inputs, targets, folds,reg_param=reg_param) # collect the errors train_errors.append(np.mean(train_error)) test_errors.append(np.mean(test_error)) # plot the results fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) plt.title('Linear Regression Model') ax.set_xscale('log')
def evaluate_reg_param(inputs, targets, folds, centres, scale, reg_params=None): """ Evaluate then plot the performance of different regularisation parameters """ # create the feature mappoing and then the design matrix feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) print("The design matrix shape is:", designmtx.shape) # choose a range of regularisation parameters if reg_params is None: reg_params = np.logspace(-2, 0) num_values = reg_params.size num_folds = len(folds) #in our case this is 5 which makes sense # create some arrays to store results train_mean_errors = np.zeros(num_values) #just the value of reg. choices. test_mean_errors = np.zeros(num_values) train_stdev_errors = np.zeros(num_values) test_stdev_errors = np.zeros(num_values) #what we're doing is for each reg. parameter, we're finding all the cross-train and test error #and then finding the st deviation of each error and mean. for r, reg_param in enumerate(reg_params): #iterate over each reg. param # r is the index of reg_param, reg_param is the regularisation parameter # cross validate with this regularisation parameter #cv_evaluation_linear_model train_errors, test_errors = cv_evaluation_linear_model( designmtx, targets, folds, reg_param=reg_param) # we're interested in the average (mean) training and testing errors train_mean_error = np.mean(train_errors) test_mean_error = np.mean(test_errors) train_stdev_error = np.std(train_errors) test_stdev_error = np.std(test_errors) # store the results train_mean_errors[r] = train_mean_error test_mean_errors[r] = test_mean_error train_stdev_errors[r] = train_stdev_error test_stdev_errors[r] = test_stdev_error # Now plot the results fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors, test_mean_errors) # Here we plot the error ranges too: mean plus/minus 1 standard error. # 1 standard error is the standard deviation divided by sqrt(n) where # n is the number of samples. # (There are other choices for error bars.) # train error bars lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds) upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds) ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b') # test error bars lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds) upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds) ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r') ax.set_xscale('log')
def evaluate_scale(inputs, targets, folds, centres, reg_param, scales=None): """ evaluate then plot the performance of different basis function scales """ # choose a range of scales if scales is None: scales = np.logspace(0, 6, 20) # of the basis functions # num_values = scales.size num_folds = len(folds) # create some arrays to store results train_mean_errors = np.zeros(num_values) test_mean_errors = np.zeros(num_values) train_stdev_errors = np.zeros(num_values) test_stdev_errors = np.zeros(num_values) # for s, scale in enumerate(scales): feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # r is the index of reg_param, reg_param is the regularisation parameter # cross validate with this regularisation parameter train_errors, test_errors = cv_evaluation_linear_model( designmtx, targets, folds, reg_param=reg_param) # we're interested in the average (mean) training and testing errors train_mean_error = np.mean(train_errors) test_mean_error = np.mean(test_errors) train_stdev_error = np.std(train_errors) test_stdev_error = np.std(test_errors) # store the results train_mean_errors[s] = train_mean_error test_mean_errors[s] = test_mean_error train_stdev_errors[s] = train_stdev_error test_stdev_errors[s] = test_stdev_error # Now plot the results fig, ax = plot_train_test_errors("scale", scales, train_mean_errors, test_mean_errors) # Here we plot the error ranges too: mean plus/minus 1 standard error. # 1 standard error is the standard deviation divided by sqrt(n) where # n is the number of samples. # (There are other choices for error bars.) # train error bars lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds) upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds) ax.fill_between(scales, lower, upper, alpha=0.2, color='b') # test error bars lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds) upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds) ax.fill_between(scales, lower, upper, alpha=0.2, color='r') ax.set_xscale('log') # ax.set_xlim([0, 100]) ax.set_title('Train vs Test Error Across Scales With Cross-Validation') fig.savefig("../plots/rbf_searching_scales_cross_validation.pdf", fmt="pdf")
def evaluate_rbf_for_various_reg_params(inputs, targets, test_fraction, test_error_linear): # for rbf feature mappings # for the centres of the basis functions choose 10% of the data n = inputs.shape[0] centres = inputs[ np.random.choice([False, True], size=n, p=[0.90, 0.10]), :] print("centres shape = %r" % (centres.shape, )) # the width (analogous to standard deviation) of the basis functions scale = 8.5 # of the basis functions print("centres = %r" % (centres, )) print("scale = %r" % (scale, )) feature_mapping = construct_rbf_feature_mapping(centres, scale) design_matrix = feature_mapping(inputs) train_part, test_part = train_and_test_split(n, test_fraction=test_fraction) train_design_matrix, train_targets, test_design_matrix, test_targets = \ train_and_test_partition( design_matrix, targets, train_part, test_part) # outputting the shapes of the train and test parts for debugging print("training design matrix shape = %r" % (train_design_matrix.shape, )) print("testing design matrix shape = %r" % (test_design_matrix.shape, )) print("training targets shape = %r" % (train_targets.shape, )) print("testing targets shape = %r" % (test_targets.shape, ) + "\n") # the rbf feature mapping performance reg_params = np.logspace(-15, 5, 20) train_errors = [] test_errors = [] for reg_param in reg_params: print("Evaluating reg. parameter " + str(reg_param)) train_error, test_error = simple_evaluation_linear_model( design_matrix, targets, test_fraction=test_fraction, reg_param=reg_param) train_errors.append(train_error) test_errors.append(test_error) fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) # plotting a straight line showing the linear performance x_lim = ax.get_xlim() ax.plot(x_lim, test_error_linear * np.ones(2), 'g:') ax.set_xscale('log') ax.set_title('Evaluating RBF Performance') fig.savefig("../plots/rbf_vs_linear.pdf", fmt="pdf")
def regression_with_regularization(inputs,targets,folds): reg_params = np.logspace(-10,1) train_errors = [] test_errors = [] for reg_param in reg_params: # evaluate the test and train error for this regularisation parameter old_train,old_test=simple_evaluation_linear_model(inputs, targets, test_fraction=0.2, reg_param=reg_param) train_error, test_error,weights = cv_evaluation_linear_model(inputs, targets, folds,reg_param=reg_param) print(" (train_error_without_cross,test_error_without_cross,train_error_with_cross,test_error_with_cross)= %r" % ((old_train,old_test,np.mean(train_error),np.mean(test_error)),) ) # collect the errors train_errors.append(np.mean(train_error)) test_errors.append(np.mean(test_error)) # plot the results fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) ax.set_xscale('log')
def main( ifname, delimiter=None, columns=None, has_header=True, test_fraction=0.25): """ To be called when the script is run. This function creates, fits and plots synthetic data, and then fits and plots imported data (if a filename is provided). In both cases, data is 2 dimensional real valued data and is fit with maximum likelihood 2d gaussian. parameters ---------- ifname -- filename/path of data file. delimiter -- delimiter of data values has_header -- does the data-file have a header line columns -- a list of integers specifying which columns of the file to import (counting from 0) """ # if no file name is provided then use synthetic data data, field_names = import_data( ifname, delimiter=delimiter, has_header=has_header, columns=columns) exploratory_plots(data, field_names) N = data.shape[0] inputs = data[:,[0,1,2,3,4,5,6,7,8,9,10]] targets = data[:,11] train_error_linear, test_error_linear = evaluate_linear_approx( inputs, targets, test_fraction) plot_train_test_errors( 'degree', [0], train_error_linear, test_error_linear) plt.show()
def evaluate_reg_param(inputs, targets, folds, reg_params=None): """ Evaluate then plot the performance of different regularisation parameters """ # choose a range of regularisation parameters if reg_params is None: reg_params = np.logspace(-2,0) num_values = reg_params.size num_folds = len(folds) # create some arrays to store results train_mean_errors = np.zeros(num_values) test_mean_errors = np.zeros(num_values) train_stdev_errors = np.zeros(num_values) test_stdev_errors = np.zeros(num_values) # for r, reg_param in enumerate(reg_params): # r is the index of reg_param, reg_param is the regularisation parameter # cross validate with this regularisation parameter train_errors, test_errors = cv_evaluation_linear_model(inputs, targets, folds, reg_param=reg_param) # we're interested in the average (mean) training and testing errors train_mean_error = np.mean(train_errors) test_mean_error = np.mean(test_errors) train_stdev_error = np.std(train_errors) test_stdev_error = np.std(test_errors) # store the results train_mean_errors[r] = train_mean_error test_mean_errors[r] = test_mean_error train_stdev_errors[r] = train_stdev_error test_stdev_errors[r] = test_stdev_error # Now plot the results fig, ax = plot_train_test_errors( "$\lambda$", reg_params, train_mean_errors, test_mean_errors) # Here we plot the error ranges too: mean plus/minus 1 standard error. # 1 standard error is the standard deviation divided by sqrt(n) where # n is the number of samples. # (There are other choices for error bars.) # train error bars lower = train_mean_errors - train_stdev_errors/np.sqrt(num_folds) upper = train_mean_errors + train_stdev_errors/np.sqrt(num_folds) ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b') # test error bars lower = test_mean_errors - test_stdev_errors/np.sqrt(num_folds) upper = test_mean_errors + test_stdev_errors/np.sqrt(num_folds) ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r') ax.set_xscale('log')
def evaluate_rbf_for_various_reg_params(inputs, targets, test_fraction, test_error_linear): """ """ # for rbf feature mappings # for the centres of the basis functions choose 10% of the data N = inputs.shape[0] centres = inputs[np.random.choice([False, True], size=N, p=[0.9, 0.1]), :] print("centres.shape = %r" % (centres.shape, )) scale = 10. # of the basis functions feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) train_part, test_part = train_and_test_split(N, test_fraction=test_fraction) train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition( designmtx, targets, train_part, test_part) # output the shapes of the train and test parts for debugging print("train_designmtx.shape = %r" % (train_designmtx.shape, )) print("test_designmtx.shape = %r" % (test_designmtx.shape, )) print("train_targets.shape = %r" % (train_targets.shape, )) print("test_targets.shape = %r" % (test_targets.shape, )) # the rbf feature mapping performance reg_params = np.logspace(-15, -4, 11) train_errors = [] test_errors = [] for reg_param in reg_params: print("Evaluating reg_para " + str(reg_param)) train_error, test_error = simple_evaluation_linear_model( designmtx, targets, test_fraction=test_fraction, reg_param=reg_param) train_errors.append(train_error) test_errors.append(test_error) fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) # we also want to plot a straight line showing the linear performance xlim = ax.get_xlim() ax.plot(xlim, test_error_linear * np.ones(2), 'g:') ax.set_xscale('log')
def evaluate_num_centres(inputs, targets, folds, scale, reg_param, test_error_linear, num_centres_sequence=None): """ Evaluate, then plot the performance of different numbers of basis function centres. """ # choosing a range of numbers of centres if num_centres_sequence is None: num_centres_sequence = np.linspace( start=0.01, stop=1, num=20) # tested with 50, using 20 to speed things up num_values = num_centres_sequence.size num_folds = len(folds) # creating some arrays to store results train_mean_errors = np.zeros(num_values) test_mean_errors = np.zeros(num_values) train_st_dev_errors = np.zeros(num_values) test_st_dev_errors = np.zeros(num_values) n = inputs.shape[0] # running the experiments for c, centre_percentage in enumerate(num_centres_sequence): sample_fraction = centre_percentage p = (1 - sample_fraction, sample_fraction) # constructing the feature mapping anew for each number of centres centres = inputs[np.random.choice([False, True], size=n, p=p), :] # print("\ncentres.shape = %r" % (centres.shape,)) feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # r is the index of reg_param, reg_param is the regularisation parameter # cross validate with this regularisation parameter train_errors, test_errors = cv_evaluation_linear_model( designmtx, targets, folds, reg_param=reg_param) # we're interested in the average (mean) training and testing errors train_mean_error = np.mean(train_errors) test_mean_error = np.mean(test_errors) train_stdev_error = np.std(train_errors) test_stdev_error = np.std(test_errors) # store the results train_mean_errors[c] = train_mean_error test_mean_errors[c] = test_mean_error train_st_dev_errors[c] = train_stdev_error test_st_dev_errors[c] = test_stdev_error # now plotting the results fig, ax = plot_train_test_errors("% of inputs as centres * 100", num_centres_sequence, train_mean_errors, test_mean_errors, test_error_linear) # Here we plot the error ranges too: mean plus/minus 1 standard error. # 1 standard error is the standard deviation divided by sqrt(n) where # n is the number of samples. # (There are other choices for error bars.) # train error bars lower = train_mean_errors - train_st_dev_errors / np.sqrt(num_folds) upper = train_mean_errors + train_st_dev_errors / np.sqrt(num_folds) ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='b') # test error bars lower = test_mean_errors - test_st_dev_errors / np.sqrt(num_folds) upper = test_mean_errors + test_st_dev_errors / np.sqrt(num_folds) ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='r') ax.set_ylim([0, 1]) ax.set_title( 'Train vs Test Error across Centre Proportion with Cross-validation') fig.savefig( "../plots/rbf/rbf_searching_number_centres_cross_validation.png", fmt="png") plt.show()
def evaluate_num_centres(inputs, targets, folds, scale, reg_param, num_centres_sequence=None): """ Evaluate then plot the performance of different numbers of basis function centres. """ # choose a range of numbers of centres if num_centres_sequence is None: num_centres_sequence = np.arange(1, 20) num_values = num_centres_sequence.size num_folds = len(folds) # # create some arrays to store results train_mean_errors = np.zeros(num_values) test_mean_errors = np.zeros(num_values) train_stdev_errors = np.zeros(num_values) test_stdev_errors = np.zeros(num_values) # # run the experiments for c, num_centres in enumerate(num_centres_sequence): centres = np.linspace(0, 1, num_centres) feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # r is the index of reg_param, reg_param is the regularisation parameter # cross validate with this regularisation parameter train_errors, test_errors = cv_evaluation_linear_model( designmtx, targets, folds, reg_param=reg_param) # we're interested in the average (mean) training and testing errors train_mean_error = np.mean(train_errors) test_mean_error = np.mean(test_errors) train_stdev_error = np.std(train_errors) test_stdev_error = np.std(test_errors) # store the results train_mean_errors[c] = train_mean_error test_mean_errors[c] = test_mean_error train_stdev_errors[c] = train_stdev_error test_stdev_errors[c] = test_stdev_error # # Now plot the results fig, ax = plot_train_test_errors("no. centres", num_centres_sequence, train_mean_errors, test_mean_errors) # Here we plot the error ranges too: mean plus/minus 1 standard error. # 1 standard error is the standard deviation divided by sqrt(n) where # n is the number of samples. # (There are other choices for error bars.) # train error bars lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds) upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds) ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='b') # test error bars lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds) upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds) ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='r') ax.set_title( 'Train vs Test Error Across Centre Number With Cross-Validation') fig.savefig("../plots/rbf_searching_number_centres_cross_validation.pdf", fmt="pdf")
def evaluate_reg_param(inputs, targets, folds, centres, scale, test_error_linear, reg_params=None): """ Evaluate, then plot the performance of different regularisation parameters. """ # creating the feature mapping and then the design matrix feature_mapping = construct_rbf_feature_mapping(centres, scale) design_matrix = feature_mapping(inputs) # choose a range of regularisation parameters if reg_params is None: reg_params = np.logspace(-15, 5, 30) # choices of regularisation strength num_values = reg_params.size num_folds = len(folds) # create some arrays to store results train_mean_errors = np.zeros(num_values) test_mean_errors = np.zeros(num_values) train_st_dev_errors = np.zeros(num_values) test_st_dev_errors = np.zeros(num_values) print( 'Calculating means and standard deviations of train and test errors...' ) for r, reg_param in enumerate(reg_params): # r is the index of reg_param, reg_param is the regularisation parameter # cross validate with this regularisation parameter train_errors, test_errors = cv_evaluation_linear_model( design_matrix, targets, folds, reg_param=reg_param) # we're interested in the average (mean) training and testing errors train_mean_error = np.mean(train_errors) test_mean_error = np.mean(test_errors) train_st_dev_error = np.std(train_errors) test_st_dev_error = np.std(test_errors) # storing the results train_mean_errors[r] = train_mean_error test_mean_errors[r] = test_mean_error train_st_dev_errors[r] = train_st_dev_error test_st_dev_errors[r] = test_st_dev_error # plotting the results fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors, test_mean_errors, test_error_linear) # Here we plot the error ranges too: mean plus/minus 1 standard error. # 1 standard error is the standard deviation divided by sqrt(n) where # n is the number of samples. # (There are other choices for error bars.) # train error bars lower = train_mean_errors - train_st_dev_errors / np.sqrt(num_folds) upper = train_mean_errors + train_st_dev_errors / np.sqrt(num_folds) ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b') # test error bars lower = test_mean_errors - test_st_dev_errors / np.sqrt(num_folds) upper = test_mean_errors + test_st_dev_errors / np.sqrt(num_folds) ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r') ax.set_xscale('log') ax.set_ylim([0, 1]) ax.set_title( 'Train vs Test Error across Reg. Param. with Cross-validation') fig.savefig("../plots/rbf/rbf_searching_reg_params_cross_validation.png", fmt="png") plt.show()
def parameter_search_rbf(inputs, targets, test_fraction, folds): """ """ n = inputs.shape[0] # for the centres of the basis functions sample 10% of the data sample_fraction = 0.05 p = (1 - sample_fraction, sample_fraction) centres = inputs[np.random.choice([False, True], size=n, p=p), :] print("\ncentres.shape = %r" % (centres.shape, )) scales = np.logspace(0, 4, 20) # of the basis functions reg_params = np.logspace(-16, -1, 20) # choices of regularisation strength # create empty 2d arrays to store the train and test errors train_mean_errors = np.empty((scales.size, reg_params.size)) test_mean_errors = np.empty((scales.size, reg_params.size)) # iterate over the scales for i, scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test # iterating over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error = cv_evaluation_linear_model( designmtx, targets, folds, reg_param=reg_param) # store the train and test errors in our 2d arrays train_mean_errors[i, j] = np.mean(train_error) test_mean_errors[i, j] = np.mean(test_error) # we have a 2d array of train and test errors, we want to know the (i,j) # index of the best value best_i = np.argmin(np.argmin(test_mean_errors, axis=1)) best_j = np.argmin(test_mean_errors[i, :]) min_place = np.argmin(test_mean_errors) best_i_correct = (int)(min_place / test_mean_errors.shape[1]) best_j_correct = min_place % test_mean_errors.shape[1] print("\nBest joint choice of parameters:") print("\tscale %.2g and lambda = %.2g" % (scales[best_i_correct], reg_params[best_j_correct])) # now we can plot the error for different scales using the best # regularisation choice fig, ax = plot_train_test_errors("scale", scales, train_mean_errors[:, best_j_correct], test_mean_errors[:, best_j_correct]) ax.set_xscale('log') ax.set_title('Train vs Test Error Across Scales') fig.savefig("../plots/rbf_searching_scales.pdf", fmt="pdf") # ...and the error for different regularisation choices given the best # scale choice fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors[best_i_correct, :], test_mean_errors[best_i_correct, :]) ax.set_xscale('log') ax.set_title('Train vs Test Error Across Reg Params') fig.savefig("../plots/rbf_searching_reg_params.pdf", fmt="pdf") ''' # using the best parameters found above, # we now vary the number of centres and evaluate the performance reg_param = reg_params[best_j] scale = scales[best_i] n_centres_seq = np.arange(1, 20) train_errors = [] test_errors = [] for n_centres in n_centres_seq: # constructing the feature mapping anew for each number of centres centres = np.linspace(0, 1, n_centres) feature_mapping = construct_rbf_feature_mapping(centres, scale) design_matrix = feature_mapping(inputs) # evaluating the test and train error for the given regularisation parameter and scale train_error, test_error = cv_evaluation_linear_model( design_matrix, targets, folds, reg_param=reg_param) # collecting the errors train_errors.append(train_error) test_errors.append(test_error) # plotting the results fig, ax = plot_train_test_errors( "no. centres", n_centres_seq, train_errors, test_errors) ax.set_title('Train vs Test Error Across Centre Number') fig.savefig("../plots/rbf_searching_number_centres.pdf", fmt="pdf") ''' return scales[best_i_correct], reg_params[best_j_correct]
def parameter_search_rbf_without_cross(inputs, targets, test_fraction,test_error_linear,normalize=True): """ """ if(normalize): # normalise inputs (meaning radial basis functions are more helpful) for i in range(inputs.shape[1]): inputs[:,i]=(inputs[:,i]-np.mean(inputs[:,i]))/np.std(inputs[:,i]) N = inputs.shape[0] # for the centres of the basis functions sample 10% of the data sample_fractions = np.array([0.05,0.1,0.15,0.2,0.25]) scales = np.logspace(0,4,20 ) # of the basis functions reg_params = np.logspace(-16,-1, 20) # choices of regularisation strength. # create empty 3d arrays to store the train and test errors train_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size)) test_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size)) #Randomly generates a train/test split for data of size N. Returns a 2 arrays of boolean true/false. train_part, test_part = train_and_test_split(N, test_fraction=test_fraction) best_k=0 best_i=0 best_j=0 test_error_temp=10**100 #loop through the possible centres as a percentage (5%, 10%,15%, 20%, 25%) for k,sample_fraction in enumerate(sample_fractions): p = (1-sample_fraction,sample_fraction) centres = inputs[np.random.choice([False,True], size=N, p=p),:] # iterate over the scales for i,scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres,scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test. This effectively takes as inputs the boolean arrays train_part, test_part and the whole design matrix and #creates 2 subsets of the design matrix (train matrix, test matrix). The test data are splitted as well but the values are not affected train_designmtx, train_targets, test_designmtx, test_targets = train_and_test_partition(designmtx, targets, train_part, test_part) # iteratre over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error,weights = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets,reg_param=reg_param) # store the train and test errors in our 2d arrays train_mean_errors[k,i,j] = train_error test_mean_errors[k,i,j] = test_error #When we've found a lowest than stores test error value, we store it's indices if (np.mean(test_error)<test_error_temp): test_error_temp=test_error best_k=k best_i=i best_j=j print ("The value with the lowest error is:",test_mean_errors[best_k][best_i][best_j]) print("Best joint choice of parameters: sample fractions %.2g scale %.2g and lambda = %.2g" % (sample_fractions[best_k],scales[best_i],reg_params[best_j])) # now we can plot the error for different scales using the best # regularization choice # now we can plot the error for different scales using the best regularization choice and centres percentage fig , ax = plot_train_test_errors("scale", scales, train_mean_errors[best_k,:,best_j], test_mean_errors[best_k,:,best_j]) ax.set_xscale('log') fig.suptitle('RBF regression for the best reg. parameter & centres', fontsize=10) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:') #graph the linear regression # ...and the error for different regularisation choices given the best scale choice and centres percentage fig , ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors[best_k,best_i,:], test_mean_errors[best_k,best_i,:]) ax.set_xscale('log') fig.suptitle('RBF regression for the best scale parameter & centres', fontsize=10) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:') # #ax.set_ylim([0,20]) # ...and the error for different centres given the best reg.parameter and the best scale choice fig , ax = plot_train_test_errors("sample fractions", sample_fractions, train_mean_errors[:,best_i,best_j], test_mean_errors[:,best_i,best_j]) fig.suptitle('RBF regression for the best scale parameter & reg. parameter', fontsize=10) ax.set_xlim([0.05, 0.25]) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
def parameter_search_rbf_cross(inputs, targets, folds,test_error_linear,test_inputs,test_targets,normalize=True): """ This function will take as inputs the raw data and targets, the folds for cross validation and the test linear error for plotting """ if(normalize): # normalise inputs (meaning radial basis functions are more helpful) for i in range(inputs.shape[1]): inputs[:,i]=(inputs[:,i]-np.mean(inputs[:,i]))/np.std(inputs[:,i]) test_inputs[:,i]=(test_inputs[:,i]-np.mean(test_inputs[:,i]))/np.std(test_inputs[:,i]) N = inputs.shape[0] # for the centres of the basis functions sample 10% of the data sample_fractions = np.array([0.05,0.1,0.15,0.2,0.25]) scales = np.logspace(0,4,20 ) # of the basis functions reg_params = np.logspace(-16,-1, 20) # choices of regularisation strength. # create empty 3d arrays to store the train and test errors train_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size)) test_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size)) best_k=0 best_i=0 best_j=0 test_error_temp=10**100 #loop through the possible centres as a percentage (5%, 10%,15%, 20%, 25%) for k,sample_fraction in enumerate(sample_fractions): p = (1-sample_fraction,sample_fraction) centres = inputs[np.random.choice([False,True], size=N, p=p),:] # iterate over the scales for i,scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres,scale) designmtx = feature_mapping(inputs) # iteratre over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter for train and test the data train_error, test_error,weights = cv_evaluation_linear_model(designmtx, targets, folds,reg_param=reg_param) #When we've found a lowest than stores test error value, we store it's indices if (np.mean(test_error)<test_error_temp): test_error_temp=np.mean(test_error) best_k=k best_i=i best_j=j optimal_weights=weights optimal_feature_mapping=feature_mapping # store the train and test errors in our 3d matrix train_mean_errors[k,i,j] = np.mean(train_error) test_mean_errors[k,i,j] = np.mean(test_error) print ("The value with the lowest test error at the training stage is:",test_mean_errors[best_k][best_i][best_j]) print("Best joint choice of parameters: sample fractions %.2g scale %.2g and lambda = %.2g" % (sample_fractions[best_k],scales[best_i],reg_params[best_j])) # now we can plot the error for different scales using the best regularization choice and centres percentage fig , ax = plot_train_test_errors("scale", scales, train_mean_errors[best_k,:,best_j], test_mean_errors[best_k,:,best_j]) ax.set_xscale('log') fig.suptitle('RBF regression for the best reg. parameter & centres using cross-validation', fontsize=10) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:') #graph the linear regression # ...and the error for different regularisation choices given the best scale choice and centres percentage fig , ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors[best_k,best_i,:], test_mean_errors[best_k,best_i,:]) ax.set_xscale('log') fig.suptitle('RBF regression for the best scale parameter & centres using cross-validation', fontsize=10) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:') # #ax.set_ylim([0,20]) # ...and the error for different centres given the best reg.parameter and the best scale choice fig , ax = plot_train_test_errors("sample fractions", sample_fractions, train_mean_errors[:,best_i,best_j], test_mean_errors[:,best_i,best_j]) fig.suptitle('RBF regression for the best scale parameter & reg. parameter using cross-validation', fontsize=10) ax.set_xlim([0.05, 0.25]) xlim = ax.get_xlim()#get the xlim to graph the linear regression ax.plot(xlim, test_error_linear*np.ones(2), 'g:') predictive_func=construct_feature_mapping_approx(optimal_feature_mapping, optimal_weights) final_error=root_mean_squared_error(test_targets,predictive_func(test_inputs)) print("final test error for RBF model:",final_error)
def main(ifname, delimiter=None, columns=None, has_header=True, test_fraction=0.25): data, field_names = import_data(ifname, delimiter=delimiter, has_header=has_header, columns=columns) #Exploratory Data Analysis (EDA) raw_data = pd.read_csv('datafile.csv', sep=";") # view correlation efficieny result where |r|=1 has the strongest relation and |r|=0 the weakest df = pd.DataFrame(data=raw_data) print(df.corr()) # view data if it is normally distributed plt.hist(raw_data["quality"], range=(1, 10), edgecolor='black', linewidth=1) plt.xlabel('quality') plt.ylabel('amount of samples') plt.title("distribution of red wine quality") # feature selection import scipy.stats as stats from scipy.stats import chi2_contingency class ChiSquare: def __init__(self, dataframe): self.df = dataframe self.p = None # P-Value self.chi2 = None # Chi Test Statistic self.dof = None self.dfObserved = None self.dfExpected = None def _print_chisquare_result(self, colX, alpha): result = "" if self.p < alpha: result = "{0} is IMPORTANT for Prediction".format(colX) else: result = "{0} is NOT an important predictor. (Discard {0} from model)".format( colX) print(result) def TestIndependence(self, colX, colY, alpha=0.05): X = self.df[colX].astype(str) Y = self.df[colY].astype(str) self.dfObserved = pd.crosstab(Y, X) chi2, p, dof, expected = stats.chi2_contingency( self.dfObserved.values) self.p = p self.chi2 = chi2 self.dof = dof self.dfExpected = pd.DataFrame(expected, columns=self.dfObserved.columns, index=self.dfObserved.index) self._print_chisquare_result(colX, alpha) print('self:%s' % (self), self.chi2, self.p) # Initialize ChiSquare Class cT = ChiSquare(raw_data) # Feature Selection testColumns = [ "fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol" ] for var in testColumns: cT.TestIndependence(colX=var, colY="quality") # split data into inputs and targets inputs = data[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] targets = data[:, 11] # mean normalisation fixed_acidity = inputs[:, 0] volatile_acidity = inputs[:, 1] citric_acid = inputs[:, 2] residual_sugar = inputs[:, 3] chlorides = inputs[:, 4] free_sulfur_dioxide = inputs[:, 5] total_sulfur_dioxide = inputs[:, 6] density = inputs[:, 7] ph = inputs[:, 8] sulphates = inputs[:, 9] alcohol = inputs[:, 10] # draw plot of data set normalised_data = np.column_stack((inputs, targets)) exploratory_plots(normalised_data, field_names) # add a colum of x0.ones inputs[:, 0] = np.ones(len(targets)) # normalize data inputs[:, 1] = (volatile_acidity - np.mean(volatile_acidity)) / np.std(volatile_acidity) inputs[:, 2] = (citric_acid - np.mean(citric_acid)) / np.std(citric_acid) inputs[:, 7] = (density - np.mean(density)) / np.std(density) inputs[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates) inputs[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol) # run all experiments on the same train-test split of the data train_part, test_part = train_and_test_split(inputs.shape[0], test_fraction=test_fraction) # another evaluation function def rsquare(test_targets, test_predicts): y_mean = np.mean(test_targets) ss_tot = sum((test_targets - y_mean)**2) ss_res = sum((test_targets - test_predicts)**2) rsquare = 1 - (ss_res / ss_tot) return rsquare print( '---------------------------Linear Regression-----------------------------------' ) # linear regression # add a column of 1 to the data matrix inputs = inputs[:, [0, 1, 2, 7, 9, 10]] #train_part, test_part = train_and_test_split(inputs.shape[0], test_fraction=test_fraction) train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) weights = ml_weights(train_inputs, train_targets) train_predicts = linear_model_predict(train_inputs, weights) test_predicts = linear_model_predict(test_inputs, weights) train_error = root_mean_squared_error(train_targets, train_predicts) test_error = root_mean_squared_error(test_targets, test_predicts) print("LR-train_weights", weights) print("LR-train_error", train_error) print("LR-test_error", test_error) print("LR-rsquare score", rsquare(test_targets, test_predicts)) print("LR-prediction:", test_predicts[:20], "LR-original", test_targets[:20]) print( '----------------Regularised Linear Regression-----------------------------' ) #regularised linear regression reg_params = np.logspace(-15, -4, 11) train_errors = [] test_errors = [] for reg_param in reg_params: # print("RLR-Evaluating reg_para " + str(reg_param)) train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) reg_weights = regularised_ml_weights(train_inputs, train_targets, reg_param) train_predicts = linear_model_predict(train_inputs, reg_weights) test_predicts = linear_model_predict(test_inputs, reg_weights) train_error = root_mean_squared_error(train_targets, train_predicts) test_error = root_mean_squared_error(test_targets, test_predicts) train_errors.append(train_error) test_errors.append(test_error) #best lambda test_errors = np.array(test_errors) best_l = np.argmin(test_errors) print("RLR-Best joint choice of parameters:") print("RLR-lambda = %.2g" % (reg_params[best_l])) # plot train_test_errors in different reg_params fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) ax.set_xscale('log') reg_weights = regularised_ml_weights(train_inputs, train_targets, best_l) print("RLR-train_weights", reg_weights) print("RLR-train_error", train_errors[best_l]) print("RLR-test_error", test_errors[best_l]) print("RLR-rsquare score", rsquare(test_targets, test_predicts)) print("RLR-prediction:", test_predicts[:20], "RLR-original", test_targets[:20]) print( '-----------------------------kNN Regression------------------------------------' ) # KNN-regression # tip out the x0=1 column inputs = inputs[:, [1, 2, 3, 4, 5]] train_errors = [] test_errors = [] K = range(2, 9) for k in K: train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) knn_approx = construct_knn_approx(train_inputs, train_targets, k) train_knn_predicts = knn_approx(train_inputs) train_error = root_mean_squared_error(train_knn_predicts, train_targets) test_knn_predicts = knn_approx(test_inputs) test_error = root_mean_squared_error(test_knn_predicts, test_targets) train_errors.append(train_error) test_errors.append(test_error) # print("knn_predicts: ", np.around(test_knn_predicts), "knn-original", test_targets) #best k train_errors = np.array(train_errors) test_errors = np.array(test_errors) best_k = np.argmin(test_errors) print("Best joint choice of parameters:") print("k = %.2g" % (K[best_k])) fig, ax = plot_train_test_errors("K", K, train_errors, test_errors) ax.set_xticks(np.arange(min(K), max(K) + 1, 1.0)) print("kNN-train_error", train_errors[-1]) print("kNN-test_error", test_errors[-1]) knn_approx = construct_knn_approx(train_inputs, train_targets, k=3) test_predicts = knn_approx(test_inputs) print("kNN-rsquare score", rsquare(test_targets, test_predicts)) print("kNN-y_predicts", test_predicts[:20], 'y_original', test_targets[:20]) print( '----------------------------RBF Function-------------------------------------' ) # Radinal Basis Functions # for the centres of the basis functions sample 15% of the data sample_fraction = 0.15 p = (1 - sample_fraction, sample_fraction) centres = inputs[np.random.choice([False, True], size=inputs.shape[0], p=p), :] # !!! print("centres.shape = %r" % (centres.shape, )) scales = np.logspace(0, 2, 17) # of the basis functions reg_params = np.logspace(-15, -4, 11) # choices of regularisation strength # create empty 2d arrays to store the train and test errors train_errors = np.empty((scales.size, reg_params.size)) test_errors = np.empty((scales.size, reg_params.size)) # iterate over the scales for i, scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition(designmtx, targets, train_part, test_part) # iteratre over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # store the train and test errors in our 2d arrays train_errors[i, j] = train_error test_errors[i, j] = test_error # we have a 2d array of train and test errors, we want to know the (i,j) # index of the best value best_i = np.argmin(np.argmin(test_errors, axis=1)) best_j = np.argmin(test_errors[i, :]) print("Best joint choice of parameters:") print("\tscale= %.2g and lambda = %.2g" % (scales[best_i], reg_params[best_j])) # now we can plot the error for different scales using the best # regulariation choice fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j], test_errors[:, best_j]) ax.set_xscale('log') # ...and the error for different regularisation choices given the best # scale choice fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors[best_i, :], test_errors[best_i, :]) ax.set_xscale('log') feature_mapping = construct_rbf_feature_mapping(centres, scales[best_i]) reg_weights = regularised_ml_weights(train_designmtx, train_targets, reg_params[best_j]) # test function test_predicts = np.matrix(test_designmtx) * np.matrix(reg_weights).reshape( (len(reg_weights), 1)) test_predicts = np.array(test_predicts).flatten() print("RBF-train_error", train_errors[best_i, best_j]) print("RBF-test_error", test_errors[best_i, best_j]) print("RBF-rsquare score", rsquare(test_targets, test_predicts)) print('RBF_y_predicts: ', test_predicts[:20], 'rbf_y_originals: ', test_targets[:20]) print( '-----------------------------Polynomial---------------------------------------' ) # Polynomial Basis Function # set input features as 'alcohol' degrees = range(1, 10) train_errors = [] test_errors = [] for degree in degrees: processed_inputs = 0 for i in range(inputs.shape[1]): processed_input = expand_to_monomials(inputs[:, i], degree) processed_inputs += processed_input processed_inputs = np.array(processed_inputs) # split data into train and test set processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition\ (processed_inputs, targets, train_part, test_part) train_error, test_error = train_and_test(processed_train_inputs, train_targets, processed_test_inputs, test_targets, reg_param=None) weights = regularised_least_squares_weights(processed_train_inputs, train_targets, reg_param) train_errors.append(train_error) test_errors.append(test_error) train_errors = np.array(train_errors) test_errors = np.array(test_errors) print("Polynomial-train error: ", train_errors[-1]) print("Polynomial-test error: ", test_errors[-1]) best_d = np.argmin(test_errors) print("Best joint choice of degree:") final_degree = degrees[best_d] print("degree = %.2g" % (final_degree)) fig, ax = plot_train_test_errors("Degree", degrees, train_errors, test_errors) ax.set_xticks(np.arange(min(degrees), max(degrees) + 1, 1.0)) # test functionality with the final degree processed_inputs = 0 for i in range(inputs.shape[1]): processed_input = expand_to_monomials(inputs[:, i], final_degree) processed_inputs += processed_input processed_inputs = np.array(processed_inputs) processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition \ (processed_inputs, targets, train_part, test_part) train_error, test_error = train_and_test(processed_train_inputs, train_targets, processed_test_inputs, test_targets, reg_param=None) weights = regularised_least_squares_weights(processed_train_inputs, train_targets, reg_param) # print("processed_train_inputs.shape", processed_train_inputs.shape) # print('weights: ', weights, 'weights shape: ', weights.shape) test_predicts = prediction_function(processed_test_inputs, weights, final_degree) print("Polynomial-rsquare score", rsquare(test_targets, test_predicts)) print('Polynomial-y_predicts: ', test_predicts[:20], 'Polynomial-y_original: ', test_targets[:20]) plt.show()
def evaluate_reg_param(inputs, targets, folds, centres, scale, reg_params=None): """ Evaluate then plot the performance of different regularisation parameters """ # create the feature mappoing and then the design matrix feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # choose a range of regularisation parameters if reg_params is None: reg_params = np.logspace(-15, 0) num_values = reg_params.size num_folds = len(folds) # create some arrays to store results train_mean_errors = np.zeros(num_values) test_mean_errors = np.zeros(num_values) train_stdev_errors = np.zeros(num_values) test_stdev_errors = np.zeros(num_values) # for r, reg_param in enumerate(reg_params): # r is the index of reg_param, reg_param is the regularisation parameter # cross validate with this regularisation parameter train_errors, test_errors = cv_evaluation_linear_model( designmtx, targets, folds, reg_param=reg_param) # we're interested in the average (mean) training and testing errors train_mean_error = np.mean(train_errors) test_mean_error = np.mean(test_errors) train_stdev_error = np.std(train_errors) test_stdev_error = np.std(test_errors) # store the results train_mean_errors[r] = train_mean_error test_mean_errors[r] = test_mean_error train_stdev_errors[r] = train_stdev_error test_stdev_errors[r] = test_stdev_error #Get test error without reg param blank, test_errors_without_reg = cv_evaluation_linear_model(designmtx, targets, folds, reg_param=None) test_mean_error_without_reg_param = np.mean(test_errors_without_reg) # Now plot the results fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors, test_mean_errors) # Here we plot the error ranges too: mean plus/minus 1 standard error. # 1 standard error is the standard deviation divided by sqrt(n) where # n is the number of samples. # (There are other choices for error bars.) # train error bars lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds) upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds) ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b') # test error bars lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds) upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds) ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r') #plot green line to represent no reg params xlim = ax.get_xlim() ax.plot(xlim, test_mean_error_without_reg_param * np.ones(2), 'g:') ax.set_xscale('log')
def main(): """ This function contains example code that demonstrates how to use the functions defined in poly_fit_base for fitting polynomial curves to data. """ # specify the centres of the rbf basis functions centres = np.linspace(0,1,7) # the width (analogous to standard deviation) of the basis functions scale = 0.15 print("centres = %r" % (centres,)) print("scale = %r" % (scale,)) feature_mapping = construct_rbf_feature_mapping(centres,scale) datamtx = np.linspace(0,1, 51) designmtx = feature_mapping(datamtx) fig = plt.figure() ax = fig.add_subplot(1,1,1) for colid in range(designmtx.shape[1]): ax.plot(datamtx, designmtx[:,colid]) ax.set_xlim([0,1]) ax.set_xticks([0,1]) ax.set_yticks([0,1]) # choose number of data-points and sample a pair of vectors: the input # values and the corresponding target values N = 20 inputs, targets = sample_data(N, arbitrary_function_1, seed=37) # define the feature mapping for the data feature_mapping = construct_rbf_feature_mapping(centres,scale) # now construct the design matrix designmtx = feature_mapping(inputs) # # find the weights that fit the data in a least squares way weights = ml_weights(designmtx, targets) # use weights to create a function that takes inputs and returns predictions # in python, functions can be passed just like any other object # those who know MATLAB might call this a function handle rbf_approx = construct_feature_mapping_approx(feature_mapping, weights) fig, ax, lines = plot_function_data_and_approximation( rbf_approx, inputs, targets, arbitrary_function_1) ax.legend(lines, ['true function', 'data', 'linear approx']) ax.set_xticks([]) ax.set_yticks([]) fig.tight_layout() fig.savefig("regression_rbf.pdf", fmt="pdf") # for a single choice of regularisation strength we can plot the # approximating function reg_param = 10**-3 reg_weights = regularised_ml_weights( designmtx, targets, reg_param) rbf_reg_approx = construct_feature_mapping_approx(feature_mapping, reg_weights) fig, ax, lines = plot_function_data_and_approximation( rbf_reg_approx, inputs, targets, arbitrary_function_1) ax.set_xticks([]) ax.set_yticks([]) fig.tight_layout() fig.savefig("regression_rbf_basis_functions_reg.pdf", fmt="pdf") # to find a good regularisation parameter, we can performa a parameter # search (a naive way to do this is to simply try a sequence of reasonable # values within a reasonable range. # sample some training and testing inputs train_inputs, train_targets = sample_data(N, arbitrary_function_1, seed=37) # we need to use a different seed for our test data, otherwise some of our # sampled points will be the same test_inputs, test_targets = sample_data(100, arbitrary_function_1, seed=82) # convert the raw inputs into feature vectors (construct design matrices) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) # now we're going to evaluate train and test error for a sequence of # potential regularisation strengths storing the results reg_params = np.logspace(-5,1) train_errors = [] test_errors = [] for reg_param in reg_params: # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test( train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # collect the errors train_errors.append(train_error) test_errors.append(test_error) # plot the results fig, ax = plot_train_test_errors( "$\lambda$", reg_params, train_errors, test_errors) ax.set_xscale('log') # we may also be interested in choosing the right number of centres, or # the right width/scale of the rbf functions. # Here we vary the width and evaluate the performance reg_param = 10**-3 scales = np.logspace(-2,0) train_errors = [] test_errors = [] for scale in scales: # we must construct the feature mapping anew for each scale feature_mapping = construct_rbf_feature_mapping(centres,scale) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test( train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # collect the errors train_errors.append(train_error) test_errors.append(test_error) # plot the results fig, ax = plot_train_test_errors( "scale", scales, train_errors, test_errors) ax.set_xscale('log') # Here we vary the number of centres and evaluate the performance reg_param = 10**-3 scale = 0.15 n_centres_seq = np.arange(3,20) train_errors = [] test_errors = [] for n_centres in n_centres_seq: # we must construct the feature mapping anew for each number of centres centres = np.linspace(0,1,n_centres) feature_mapping = construct_rbf_feature_mapping(centres,scale) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test( train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # collect the errors train_errors.append(train_error) test_errors.append(test_error) # plot the results fig, ax = plot_train_test_errors( "Num. Centres", n_centres_seq, train_errors, test_errors) plt.show()
def bayesian_regression_entry_point(data): """ This function contains example code that demonstrates how to use the functions defined in poly_fit_base for fitting polynomial curves to data. """ data_targets = data[:, -1] data = data[:, 0:11] print(data) print(data_targets) for i in range(data.shape[1]): data[:, i] = (data[:, i] - np.mean(data[:, i])) / np.std(data[:, i]) print("standard deviation is %s" % str(np.std(data, axis=0))) inputs = data[0:960, :] targets = data_targets[0:960] test_inputs = data[1300:1599, :] test_targets = data_targets[1300:1599] # specify the centres of the rbf basis functions N = inputs.shape[0] centres1 = inputs[np.random.choice([False, True], size=N, p=[0.9, 0.1]), :] # centres1 = data[10,:] # centres1 = np.linspace(4,20,10) print(centres1) # the width (analogous to standard deviation) of the basis functions scale = 47 print("centres = %r" % (centres1, )) print("scale = %r" % (scale, )) # create the feature mapping feature_mapping = construct_rbf_feature_mapping(centres1, scale) # plot the basis functions themselves for reference # sample number of data-points: inputs and targets # define the noise precision of our data beta = (1 / 0.01)**2 # now construct the design matrix for the inputs designmtx = feature_mapping(inputs) test_designmtx = feature_mapping(test_inputs) print(designmtx.shape) # the number of features is the width of this matrix M = designmtx.shape[1] # define a prior mean and covaraince matrix # m0 = np.random.randn(M) m0 = np.zeros(M) print("m0 equals %r" % (m0)) alpha = 50 S0 = alpha * np.identity(M) # find the posterior over weights mN, SN = calculate_weights_posterior(designmtx, targets, beta, m0, S0) # for i in range(500): # mN, SN = calculate_weights_posterior(designmtx, targets, beta, mN, SN) train_error, test_error = train_and_test(designmtx, targets, test_designmtx, test_targets, mN) print(train_error, test_error) # cross-validation # train_error, test_error = cv_evaluation_linear_model(designmtx, targets, folds, mN) # print(train_error, test_error, np.mean(train_error), np.mean(test_error)) # the posterior mean (also the MAP) gives the central prediction mean_approx = construct_feature_mapping_approx(feature_mapping, mN) fig, ax, lines = plot_function_data_and_approximation( mean_approx, test_inputs, test_targets) ax.legend(lines, ['Prediction', 'True value']) ax.set_xticks([]) ax.set_ylabel("Quality") fig.suptitle('Prediction vlaue against True value', fontsize=10) fig.savefig("regression_bayesian_rbf.pdf", fmt="pdf") # search the optimum alpha for baysian model regression train_inputs = data[0:960, :] train_targets = data_targets[0:960] test_inputs = data[960:1300, :] test_targets = data_targets[960:1300] # folds = create_cv_folds(train_inputs.shape[0], num_folds) alphas = np.logspace(1, 3) # convert the raw inputs into feature vectors (construct design matrices) # train_errors = np.empty(alphas.size) # test_errors = np.empty(alphas.size) train_errors = [] test_errors = [] for a, alpha in enumerate(alphas): # we must construct the feature mapping anew for each scale feature_mapping = construct_rbf_feature_mapping(centres1, scale) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) beta = (1 / 0.01)**2 M = train_designmtx.shape[1] # define a prior mean and covaraince matrix m0 = np.zeros(M) S0 = alpha * np.identity(M) # find the posterior over weights mN, SN = calculate_weights_posterior(train_designmtx, train_targets, beta, m0, S0) # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, mN) train_errors.append(train_error) test_errors.append(test_error) # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN) # train_errors[a] = np.mean(train_error) # test_errors[a] = np.mean(test_error) # plot the results min_error = np.min(test_errors) min_error_index = np.argmin(test_errors) fig, ax = plot_train_test_errors("alpha", alphas, train_errors, test_errors) fig.suptitle('Alpha vs Error in Bayesian', fontsize=10) ax.plot(alphas[min_error_index], min_error, "ro") # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error))) ax.annotate((str(alphas[min_error_index]), str(min_error)), xy=(alphas[min_error_index], min_error), xytext=(alphas[min_error_index] + 0.01, min_error + 0.01), arrowprops=dict(facecolor='green', shrink=0.1)) ax.set_xscale('log') fig.savefig("alpha.pdf", fmt="pdf") # search the optimum beta for baysian model regression train_inputs = data[0:960, :] train_targets = data_targets[0:960] test_inputs = data[960:1300, :] test_targets = data_targets[960:1300] # folds = create_cv_folds(train_inputs.shape[0], num_folds) betas = (1. / np.logspace(-3, 1))**2 # convert the raw inputs into feature vectors (construct design matrices) # train_errors = np.empty(betas.size) # test_errors = np.empty(betas.size) train_errors = [] test_errors = [] for b, beta in enumerate(betas): # we must construct the feature mapping anew for each scale feature_mapping = construct_rbf_feature_mapping(centres1, scale) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) M = train_designmtx.shape[1] # define a prior mean and covaraince matrix m0 = np.zeros(M) alpha = 50 S0 = alpha * np.identity(M) # find the posterior over weights mN, SN = calculate_weights_posterior(train_designmtx, train_targets, beta, m0, S0) # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, mN) train_errors.append(train_error) test_errors.append(test_error) # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN) # train_errors[b] = np.mean(train_error) # test_errors[b] = np.mean(test_error) # plot the results min_error = np.min(test_errors) min_error_index = np.argmin(test_errors) fig, ax = plot_train_test_errors("beta", betas, train_errors, test_errors) fig.suptitle('Beta vs Error in Bayesian', fontsize=10) ax.plot(betas[min_error_index], min_error, "ro") # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error))) ax.annotate((str(betas[min_error_index]), str(min_error)), xy=(betas[min_error_index], min_error), xytext=(betas[min_error_index] + 0.05, min_error + 0.05), arrowprops=dict(facecolor='green', shrink=0.1)) ax.set_xscale('log') fig.savefig("beta.pdf", fmt="pdf") # search the optimum scale for baysian model regression scales = np.logspace(0.5, 3) train_inputs = data[0:960, :] train_targets = data_targets[0:960] test_inputs = data[960:1300, :] test_targets = data_targets[960:1300] # folds = create_cv_folds(train_inputs.shape[0], num_folds) # convert the raw inputs into feature vectors (construct design matrices) # train_errors = np.empty(scales.size) # test_errors = np.empty(scales.size) train_errors = [] test_errors = [] for j, scale in enumerate(scales): # we must construct the feature mapping anew for each scale feature_mapping = construct_rbf_feature_mapping(centres1, scale) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) beta = (1. / 0.01)**2 M = train_designmtx.shape[1] # define a prior mean and covaraince matrix m0 = np.zeros(M) alpha = 50 S0 = alpha * np.identity(M) # find the posterior over weights mN, SN = calculate_weights_posterior(train_designmtx, train_targets, beta, m0, S0) # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, mN) # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN) # train_errors[j] = np.mean(train_error) # test_errors[j] = np.mean(test_error) train_errors.append(train_error) test_errors.append(test_error) # plot the results min_error = np.min(test_errors) min_error_index = np.argmin(test_errors) fig, ax = plot_train_test_errors("scale", scales, train_errors, test_errors) fig.suptitle('Scale vs Error in Bayesian', fontsize=10) ax.plot(scales[min_error_index], min_error, "ro") # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error))) ax.annotate((str(scales[min_error_index]), str(min_error)), xy=(scales[min_error_index], min_error), xytext=(scales[min_error_index] + 0.2, min_error + 0.2), arrowprops=dict(facecolor='green', shrink=0.1)) ax.set_xscale('log') fig.savefig("scale.pdf", fmt="pdf") # Here we vary the number of centres and evaluate the performance scale = 60 train_inputs = data[0:960, :] train_targets = data_targets[0:960] test_inputs = data[960:1300, :] test_targets = data_targets[960:1300] # folds = create_cv_folds(train_inputs.shape[0], num_folds) cent_parts = np.linspace(0.05, 0.8, 16) # train_errors = np.empty(cent_parts.size) # test_errors = np.empty(cent_parts.size) train_errors = [] test_errors = [] N = train_inputs.shape[0] for n, cent_part in enumerate(cent_parts): # we must construct the feature mapping anew for each number of centres centres1 = train_inputs[np.random.choice( [False, True], size=N, p=[1 - cent_part, cent_part]), :] feature_mapping = construct_rbf_feature_mapping(centres1, scale) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) # evaluate the test and train error for this regularisation parameter M = train_designmtx.shape[1] # define a prior mean and covaraince matrix m0 = np.zeros(M) beta = (1. / 0.01)**2 alpha = 50 S0 = alpha * np.identity(M) # find the posterior over weights mN, SN = calculate_weights_posterior(train_designmtx, train_targets, beta, m0, S0) train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, mN) train_errors.append(train_error) test_errors.append(test_error) # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN) # train_errors[n] = np.mean(train_error) # test_errors[n] = np.mean(test_error) # plot the results min_error = np.min(test_errors) min_error_index = np.argmin(test_errors) fig, ax = plot_train_test_errors("Num. Centres", cent_parts, train_errors, test_errors) fig.suptitle('Num. Centres vs Error in Bayesian', fontsize=10) ax.plot(cent_parts[min_error_index], min_error, "ro") ax.text(cent_parts[min_error_index], min_error, (str(cent_parts[min_error_index]), str(min_error))) fig.savefig("Num. centres.pdf", fmt="pdf") plt.show()