def train_and_test(train_inputs, train_targets, test_inputs, test_targets, reg_param=None): """ Will fit a linear model with either least squares, or regularised least squares to the training data, then evaluate on both test and training data parameters ---------- train_inputs - the input design matrix for training train_targets - the training targets as a vector test_inputs - the input design matrix for testing test_targets - the test targets as a vector reg_param (optional) - the regularisation strength. If provided, then regularised maximum likelihood fitting is uses with this regularisation strength. Otherwise, (non-regularised) least squares is used. returns ------- train_error - the training error for the approximation test_error - the test error for the approximation """ # finding the optimal weights (depends on regularisation) if reg_param is None: # using simple least squares approach weights = ml_weights(train_inputs, train_targets) else: # using regularised least squares approach weights = regularised_ml_weights(train_inputs, train_targets, reg_param) # predictions are linear functions of the inputs, we evaluate those here train_predicts = linear_model_predict(train_inputs, weights) test_predicts = linear_model_predict(test_inputs, weights) residuals = (np.array(train_targets).flatten() - np.array(train_predicts).flatten()) variance_training_error = np.var(residuals) sum_joint_log_probabilities = 0 for n in range(len(test_predicts)): if test_predicts[n] <= 0: continue else: sum_joint_log_probabilities += math.log(test_predicts[n]) sum_joint_log_probabilities *= -1 # print("Error as negative joint log probability: %r" % sum_joint_log_probabilities) # evaluate the error between the predictions and true targets on both sets train_error = root_mean_squared_error(train_targets, train_predicts) test_error = root_mean_squared_error(test_targets, test_predicts) if np.isnan(test_error): print("test_predicts = %r" % (test_predicts, )) return train_error, test_error, variance_training_error
def train_and_test(train_inputs, train_targets, test_inputs, test_targets, reg_param=None): """ Will fit a linear model with either least squares, or regularised least squares to the training data, then evaluate on both test and training data parameters ---------- train_inputs - the input design matrix for training train_targets - the training targets as a vector test_inputs - the input design matrix for testing test_targets - the test targets as a vector reg_param (optional) - the regularisation strength. If provided, then regularised maximum likelihood fitting is uses with this regularisation strength. Otherwise, (non-regularised) least squares is used. returns ------- train_error - the training error for the approximation test_error - the test error for the approximation """ # Find the optimal weights (depends on regularisation) if reg_param is None: # use simple least squares approach weights = ml_weights(train_inputs, train_targets) else: # use regularised least squares approach weights = regularised_ml_weights(train_inputs, train_targets, reg_param) # predictions are linear functions of the inputs, we evaluate those here train_predicts = linear_model_predict(train_inputs, weights) test_predicts = linear_model_predict(test_inputs, weights) # evaluate the error between the predictions and true targets on both sets train_error = root_mean_squared_error(train_targets, train_predicts) test_error = root_mean_squared_error(test_targets, test_predicts) if np.isnan(test_error): print("test_predicts = %r" % (test_predicts, )) return train_error, test_error
def main(ifname=None, delimiter=None, columns=None, normalise=None, features=None): """ To be called when the script is run. This function fits and plots imported data (if a filename is provided). Data is 2 dimensional real valued data and is fit with maximum likelihood 2d gaussian. parameters ---------- ifname -- filename/path of data file. delimiter -- delimiter of data values has_header -- does the data-file have a header line columns -- a list of integers specifying which columns of the file to import (counting from 0) """ # if no file name is provided then use synthetic data if ifname is None: print("You need to ingest the CSV file") else: data, field_names = import_data(ifname, delimiter=delimiter, has_header=True, columns=columns) # DATA PREPARATION----------------------------------------------- N = data.shape[0] target = data[:, 11:] # Ask user to confirm whether to normalise or not if normalise == None: normalise_response = input( "Do you want to normalise the data? (Y/N)") normalise = normalise_response.upper() normalise_label = "" if normalise == "Y": normalise_label = "_normalised" # Normalise input data fixed_acidity = data[:, 0] volatility_acidity = data[:, 1] citric_acid = data[:, 2] residual_sugar = data[:, 3] chlorides = data[:, 4] free_sulfur_dioxide = data[:, 5] total_sulfur_dioxide = data[:, 6] density = data[:, 7] pH = data[:, 8] sulphates = data[:, 9] alcohol = data[:, 10] data[:, 0] = (fixed_acidity - np.mean(fixed_acidity)) / np.std(fixed_acidity) data[:, 1] = (volatility_acidity - np.mean(volatility_acidity) ) / np.std(volatility_acidity) data[:, 2] = (citric_acid - np.mean(citric_acid)) / np.std(citric_acid) data[:, 3] = (residual_sugar - np.mean(residual_sugar)) / np.std(residual_sugar) data[:, 4] = (chlorides - np.mean(chlorides)) / np.std(chlorides) data[:, 5] = (free_sulfur_dioxide - np.mean(free_sulfur_dioxide) ) / np.std(free_sulfur_dioxide) data[:, 6] = (total_sulfur_dioxide - np.mean(total_sulfur_dioxide) ) / np.std(total_sulfur_dioxide) data[:, 7] = (density - np.mean(density)) / np.std(density) data[:, 8] = (pH - np.mean(pH)) / np.std(pH) data[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates) data[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol) elif normalise != "N": sys.exit("Please enter valid reponse of Y or N") if features == None: feature_response = input( "Please specify which feature combination you want (e.g.1,2,5,7)" ) feature_response = feature_response.split(",") # need to convert list of strings into list of integer feature_combin = [] for i in range(len(feature_response)): print(feature_response[i]) feature_combin.append(int(feature_response[i])) else: feature_combin = features inputs = np.array([]) for j in range(len(feature_combin)): inputs = np.append(inputs, data[:, feature_combin[j]]) inputs = inputs.reshape(len(feature_combin), data.shape[0]) inputs = (np.rot90(inputs, 3))[:, ::-1] #print("INPUT: ", inputs) # Plotting RBF Model ---------------------------------------------------------- # specify the centres of the rbf basis functions centres = np.asarray([ 0.35, 0.4, 0.45, 0.459090909, 0.468181818, 0.477272727, 0.486363636, 0.495454545, 0.504545455, 0.513636364, 0.522727273, 0.531818182, 0.540909091, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.7, 0.75, 0.8 ]) # the width (analogous to standard deviation) of the basis functions scale = 450 reg_param = 7.906043210907701e-11 print("centres = %r" % (centres, )) print("scale = %r" % (scale, )) print("reg param = %r" % (reg_param, )) # create the feature mapping feature_mapping = construct_rbf_feature_mapping(centres, scale) # plot the basis functions themselves for reference #display_basis_functions(feature_mapping) # now construct the design matrix for the inputs designmtx = feature_mapping(inputs) # the number of features is the widht of this matrix print("DESIGN MATRIX: ", designmtx) if reg_param is None: # use simple least squares approach weights = ml_weights(designmtx, target) else: # use regularised least squares approach weights = regularised_ml_weights(designmtx, target, reg_param) # get the cross-validation folds num_folds = 4 folds = create_cv_folds(N, num_folds) train_errors, test_errors = cv_evaluation_linear_model( designmtx, target, folds, reg_param=reg_param) # we're interested in the average (mean) training and testing errors train_mean_error = np.mean(train_errors) test_mean_error = np.mean(test_errors) train_stdev_error = np.std(train_errors) test_stdev_error = np.std(test_errors) print("TRAIN MEAN ERROR: ", train_mean_error) print("TEST MEAN ERROR: ", test_mean_error) print("TRAIN STDEV ERROR: ", train_stdev_error) print("TEST STDEV ERROR: ", test_stdev_error) print("ML WEIGHTS: ", weights) apply_validation_set(feature_combin, feature_mapping, weights)
def main(ifname, delimiter=None, columns=None, has_header=True, test_fraction=0.25): data, field_names = import_data(ifname, delimiter=delimiter, has_header=has_header, columns=columns) #Exploratory Data Analysis (EDA) raw_data = pd.read_csv('datafile.csv', sep=";") # view correlation efficieny result where |r|=1 has the strongest relation and |r|=0 the weakest df = pd.DataFrame(data=raw_data) print(df.corr()) # view data if it is normally distributed plt.hist(raw_data["quality"], range=(1, 10), edgecolor='black', linewidth=1) plt.xlabel('quality') plt.ylabel('amount of samples') plt.title("distribution of red wine quality") # feature selection import scipy.stats as stats from scipy.stats import chi2_contingency class ChiSquare: def __init__(self, dataframe): self.df = dataframe self.p = None # P-Value self.chi2 = None # Chi Test Statistic self.dof = None self.dfObserved = None self.dfExpected = None def _print_chisquare_result(self, colX, alpha): result = "" if self.p < alpha: result = "{0} is IMPORTANT for Prediction".format(colX) else: result = "{0} is NOT an important predictor. (Discard {0} from model)".format( colX) print(result) def TestIndependence(self, colX, colY, alpha=0.05): X = self.df[colX].astype(str) Y = self.df[colY].astype(str) self.dfObserved = pd.crosstab(Y, X) chi2, p, dof, expected = stats.chi2_contingency( self.dfObserved.values) self.p = p self.chi2 = chi2 self.dof = dof self.dfExpected = pd.DataFrame(expected, columns=self.dfObserved.columns, index=self.dfObserved.index) self._print_chisquare_result(colX, alpha) print('self:%s' % (self), self.chi2, self.p) # Initialize ChiSquare Class cT = ChiSquare(raw_data) # Feature Selection testColumns = [ "fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol" ] for var in testColumns: cT.TestIndependence(colX=var, colY="quality") # split data into inputs and targets inputs = data[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] targets = data[:, 11] # mean normalisation fixed_acidity = inputs[:, 0] volatile_acidity = inputs[:, 1] citric_acid = inputs[:, 2] residual_sugar = inputs[:, 3] chlorides = inputs[:, 4] free_sulfur_dioxide = inputs[:, 5] total_sulfur_dioxide = inputs[:, 6] density = inputs[:, 7] ph = inputs[:, 8] sulphates = inputs[:, 9] alcohol = inputs[:, 10] # draw plot of data set normalised_data = np.column_stack((inputs, targets)) exploratory_plots(normalised_data, field_names) # add a colum of x0.ones inputs[:, 0] = np.ones(len(targets)) # normalize data inputs[:, 1] = (volatile_acidity - np.mean(volatile_acidity)) / np.std(volatile_acidity) inputs[:, 2] = (citric_acid - np.mean(citric_acid)) / np.std(citric_acid) inputs[:, 7] = (density - np.mean(density)) / np.std(density) inputs[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates) inputs[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol) # run all experiments on the same train-test split of the data train_part, test_part = train_and_test_split(inputs.shape[0], test_fraction=test_fraction) # another evaluation function def rsquare(test_targets, test_predicts): y_mean = np.mean(test_targets) ss_tot = sum((test_targets - y_mean)**2) ss_res = sum((test_targets - test_predicts)**2) rsquare = 1 - (ss_res / ss_tot) return rsquare print( '---------------------------Linear Regression-----------------------------------' ) # linear regression # add a column of 1 to the data matrix inputs = inputs[:, [0, 1, 2, 7, 9, 10]] #train_part, test_part = train_and_test_split(inputs.shape[0], test_fraction=test_fraction) train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) weights = ml_weights(train_inputs, train_targets) train_predicts = linear_model_predict(train_inputs, weights) test_predicts = linear_model_predict(test_inputs, weights) train_error = root_mean_squared_error(train_targets, train_predicts) test_error = root_mean_squared_error(test_targets, test_predicts) print("LR-train_weights", weights) print("LR-train_error", train_error) print("LR-test_error", test_error) print("LR-rsquare score", rsquare(test_targets, test_predicts)) print("LR-prediction:", test_predicts[:20], "LR-original", test_targets[:20]) print( '----------------Regularised Linear Regression-----------------------------' ) #regularised linear regression reg_params = np.logspace(-15, -4, 11) train_errors = [] test_errors = [] for reg_param in reg_params: # print("RLR-Evaluating reg_para " + str(reg_param)) train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) reg_weights = regularised_ml_weights(train_inputs, train_targets, reg_param) train_predicts = linear_model_predict(train_inputs, reg_weights) test_predicts = linear_model_predict(test_inputs, reg_weights) train_error = root_mean_squared_error(train_targets, train_predicts) test_error = root_mean_squared_error(test_targets, test_predicts) train_errors.append(train_error) test_errors.append(test_error) #best lambda test_errors = np.array(test_errors) best_l = np.argmin(test_errors) print("RLR-Best joint choice of parameters:") print("RLR-lambda = %.2g" % (reg_params[best_l])) # plot train_test_errors in different reg_params fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors) ax.set_xscale('log') reg_weights = regularised_ml_weights(train_inputs, train_targets, best_l) print("RLR-train_weights", reg_weights) print("RLR-train_error", train_errors[best_l]) print("RLR-test_error", test_errors[best_l]) print("RLR-rsquare score", rsquare(test_targets, test_predicts)) print("RLR-prediction:", test_predicts[:20], "RLR-original", test_targets[:20]) print( '-----------------------------kNN Regression------------------------------------' ) # KNN-regression # tip out the x0=1 column inputs = inputs[:, [1, 2, 3, 4, 5]] train_errors = [] test_errors = [] K = range(2, 9) for k in K: train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition( inputs, targets, train_part, test_part) knn_approx = construct_knn_approx(train_inputs, train_targets, k) train_knn_predicts = knn_approx(train_inputs) train_error = root_mean_squared_error(train_knn_predicts, train_targets) test_knn_predicts = knn_approx(test_inputs) test_error = root_mean_squared_error(test_knn_predicts, test_targets) train_errors.append(train_error) test_errors.append(test_error) # print("knn_predicts: ", np.around(test_knn_predicts), "knn-original", test_targets) #best k train_errors = np.array(train_errors) test_errors = np.array(test_errors) best_k = np.argmin(test_errors) print("Best joint choice of parameters:") print("k = %.2g" % (K[best_k])) fig, ax = plot_train_test_errors("K", K, train_errors, test_errors) ax.set_xticks(np.arange(min(K), max(K) + 1, 1.0)) print("kNN-train_error", train_errors[-1]) print("kNN-test_error", test_errors[-1]) knn_approx = construct_knn_approx(train_inputs, train_targets, k=3) test_predicts = knn_approx(test_inputs) print("kNN-rsquare score", rsquare(test_targets, test_predicts)) print("kNN-y_predicts", test_predicts[:20], 'y_original', test_targets[:20]) print( '----------------------------RBF Function-------------------------------------' ) # Radinal Basis Functions # for the centres of the basis functions sample 15% of the data sample_fraction = 0.15 p = (1 - sample_fraction, sample_fraction) centres = inputs[np.random.choice([False, True], size=inputs.shape[0], p=p), :] # !!! print("centres.shape = %r" % (centres.shape, )) scales = np.logspace(0, 2, 17) # of the basis functions reg_params = np.logspace(-15, -4, 11) # choices of regularisation strength # create empty 2d arrays to store the train and test errors train_errors = np.empty((scales.size, reg_params.size)) test_errors = np.empty((scales.size, reg_params.size)) # iterate over the scales for i, scale in enumerate(scales): # i is the index, scale is the corresponding scale # we must recreate the feature mapping each time for different scales feature_mapping = construct_rbf_feature_mapping(centres, scale) designmtx = feature_mapping(inputs) # partition the design matrix and targets into train and test train_designmtx, train_targets, test_designmtx, test_targets = \ train_and_test_partition(designmtx, targets, train_part, test_part) # iteratre over the regularisation parameters for j, reg_param in enumerate(reg_params): # j is the index, reg_param is the corresponding regularisation # parameter # train and test the data train_error, test_error = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # store the train and test errors in our 2d arrays train_errors[i, j] = train_error test_errors[i, j] = test_error # we have a 2d array of train and test errors, we want to know the (i,j) # index of the best value best_i = np.argmin(np.argmin(test_errors, axis=1)) best_j = np.argmin(test_errors[i, :]) print("Best joint choice of parameters:") print("\tscale= %.2g and lambda = %.2g" % (scales[best_i], reg_params[best_j])) # now we can plot the error for different scales using the best # regulariation choice fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j], test_errors[:, best_j]) ax.set_xscale('log') # ...and the error for different regularisation choices given the best # scale choice fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors[best_i, :], test_errors[best_i, :]) ax.set_xscale('log') feature_mapping = construct_rbf_feature_mapping(centres, scales[best_i]) reg_weights = regularised_ml_weights(train_designmtx, train_targets, reg_params[best_j]) # test function test_predicts = np.matrix(test_designmtx) * np.matrix(reg_weights).reshape( (len(reg_weights), 1)) test_predicts = np.array(test_predicts).flatten() print("RBF-train_error", train_errors[best_i, best_j]) print("RBF-test_error", test_errors[best_i, best_j]) print("RBF-rsquare score", rsquare(test_targets, test_predicts)) print('RBF_y_predicts: ', test_predicts[:20], 'rbf_y_originals: ', test_targets[:20]) print( '-----------------------------Polynomial---------------------------------------' ) # Polynomial Basis Function # set input features as 'alcohol' degrees = range(1, 10) train_errors = [] test_errors = [] for degree in degrees: processed_inputs = 0 for i in range(inputs.shape[1]): processed_input = expand_to_monomials(inputs[:, i], degree) processed_inputs += processed_input processed_inputs = np.array(processed_inputs) # split data into train and test set processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition\ (processed_inputs, targets, train_part, test_part) train_error, test_error = train_and_test(processed_train_inputs, train_targets, processed_test_inputs, test_targets, reg_param=None) weights = regularised_least_squares_weights(processed_train_inputs, train_targets, reg_param) train_errors.append(train_error) test_errors.append(test_error) train_errors = np.array(train_errors) test_errors = np.array(test_errors) print("Polynomial-train error: ", train_errors[-1]) print("Polynomial-test error: ", test_errors[-1]) best_d = np.argmin(test_errors) print("Best joint choice of degree:") final_degree = degrees[best_d] print("degree = %.2g" % (final_degree)) fig, ax = plot_train_test_errors("Degree", degrees, train_errors, test_errors) ax.set_xticks(np.arange(min(degrees), max(degrees) + 1, 1.0)) # test functionality with the final degree processed_inputs = 0 for i in range(inputs.shape[1]): processed_input = expand_to_monomials(inputs[:, i], final_degree) processed_inputs += processed_input processed_inputs = np.array(processed_inputs) processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition \ (processed_inputs, targets, train_part, test_part) train_error, test_error = train_and_test(processed_train_inputs, train_targets, processed_test_inputs, test_targets, reg_param=None) weights = regularised_least_squares_weights(processed_train_inputs, train_targets, reg_param) # print("processed_train_inputs.shape", processed_train_inputs.shape) # print('weights: ', weights, 'weights shape: ', weights.shape) test_predicts = prediction_function(processed_test_inputs, weights, final_degree) print("Polynomial-rsquare score", rsquare(test_targets, test_predicts)) print('Polynomial-y_predicts: ', test_predicts[:20], 'Polynomial-y_original: ', test_targets[:20]) plt.show()
def main(): """ This function contains example code that demonstrates how to use the functions defined in poly_fit_base for fitting polynomial curves to data. """ # specify the centres of the rbf basis functions centres = np.linspace(0,1,7) # the width (analogous to standard deviation) of the basis functions scale = 0.15 print("centres = %r" % (centres,)) print("scale = %r" % (scale,)) feature_mapping = construct_rbf_feature_mapping(centres,scale) datamtx = np.linspace(0,1, 51) designmtx = feature_mapping(datamtx) fig = plt.figure() ax = fig.add_subplot(1,1,1) for colid in range(designmtx.shape[1]): ax.plot(datamtx, designmtx[:,colid]) ax.set_xlim([0,1]) ax.set_xticks([0,1]) ax.set_yticks([0,1]) # choose number of data-points and sample a pair of vectors: the input # values and the corresponding target values N = 20 inputs, targets = sample_data(N, arbitrary_function_1, seed=37) # define the feature mapping for the data feature_mapping = construct_rbf_feature_mapping(centres,scale) # now construct the design matrix designmtx = feature_mapping(inputs) # # find the weights that fit the data in a least squares way weights = ml_weights(designmtx, targets) # use weights to create a function that takes inputs and returns predictions # in python, functions can be passed just like any other object # those who know MATLAB might call this a function handle rbf_approx = construct_feature_mapping_approx(feature_mapping, weights) fig, ax, lines = plot_function_data_and_approximation( rbf_approx, inputs, targets, arbitrary_function_1) ax.legend(lines, ['true function', 'data', 'linear approx']) ax.set_xticks([]) ax.set_yticks([]) fig.tight_layout() fig.savefig("regression_rbf.pdf", fmt="pdf") # for a single choice of regularisation strength we can plot the # approximating function reg_param = 10**-3 reg_weights = regularised_ml_weights( designmtx, targets, reg_param) rbf_reg_approx = construct_feature_mapping_approx(feature_mapping, reg_weights) fig, ax, lines = plot_function_data_and_approximation( rbf_reg_approx, inputs, targets, arbitrary_function_1) ax.set_xticks([]) ax.set_yticks([]) fig.tight_layout() fig.savefig("regression_rbf_basis_functions_reg.pdf", fmt="pdf") # to find a good regularisation parameter, we can performa a parameter # search (a naive way to do this is to simply try a sequence of reasonable # values within a reasonable range. # sample some training and testing inputs train_inputs, train_targets = sample_data(N, arbitrary_function_1, seed=37) # we need to use a different seed for our test data, otherwise some of our # sampled points will be the same test_inputs, test_targets = sample_data(100, arbitrary_function_1, seed=82) # convert the raw inputs into feature vectors (construct design matrices) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) # now we're going to evaluate train and test error for a sequence of # potential regularisation strengths storing the results reg_params = np.logspace(-5,1) train_errors = [] test_errors = [] for reg_param in reg_params: # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test( train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # collect the errors train_errors.append(train_error) test_errors.append(test_error) # plot the results fig, ax = plot_train_test_errors( "$\lambda$", reg_params, train_errors, test_errors) ax.set_xscale('log') # we may also be interested in choosing the right number of centres, or # the right width/scale of the rbf functions. # Here we vary the width and evaluate the performance reg_param = 10**-3 scales = np.logspace(-2,0) train_errors = [] test_errors = [] for scale in scales: # we must construct the feature mapping anew for each scale feature_mapping = construct_rbf_feature_mapping(centres,scale) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test( train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # collect the errors train_errors.append(train_error) test_errors.append(test_error) # plot the results fig, ax = plot_train_test_errors( "scale", scales, train_errors, test_errors) ax.set_xscale('log') # Here we vary the number of centres and evaluate the performance reg_param = 10**-3 scale = 0.15 n_centres_seq = np.arange(3,20) train_errors = [] test_errors = [] for n_centres in n_centres_seq: # we must construct the feature mapping anew for each number of centres centres = np.linspace(0,1,n_centres) feature_mapping = construct_rbf_feature_mapping(centres,scale) train_designmtx = feature_mapping(train_inputs) test_designmtx = feature_mapping(test_inputs) # evaluate the test and train error for this regularisation parameter train_error, test_error = train_and_test( train_designmtx, train_targets, test_designmtx, test_targets, reg_param=reg_param) # collect the errors train_errors.append(train_error) test_errors.append(test_error) # plot the results fig, ax = plot_train_test_errors( "Num. Centres", n_centres_seq, train_errors, test_errors) plt.show()