def evaluate_errors_reg_param(inputs,
                              targets,
                              folds,
                              centres,
                              scale,
                              reg_params=None):
    """
      Evaluate then plot the performance of different regularisation parameters
    """
    # create the feature mappoing and then the design matrix
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    designmtx = feature_mapping(inputs)
    # choose a range of regularisation parameters
    if reg_params is None:
        reg_params = np.logspace(-11, 0)
    num_values = reg_params.size
    num_folds = len(folds)
    # create some arrays to store results
    test_mean_errors = np.zeros(num_values)
    #
    for r, reg_param in enumerate(reg_params):
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        test_mean_error = np.mean(test_errors)
        # store the results
        test_mean_errors[r] = test_mean_error

    return test_mean_errors
示例#2
0
def evaluate_reg_param(inputs,
                       targets,
                       folds,
                       centres,
                       scale,
                       reg_params=None):
    """
      Evaluate then plot the performance of different regularisation parameters
    """
    # create the feature mappoing and then the design matrix
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    designmtx = feature_mapping(inputs)
    print("The design matrix shape is:", designmtx.shape)
    # choose a range of regularisation parameters
    if reg_params is None:
        reg_params = np.logspace(-2, 0)
    num_values = reg_params.size
    num_folds = len(folds)  #in our case this is 5 which makes sense
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)  #just the value of reg. choices.
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)

    #what we're doing is for each reg. parameter, we're finding all the cross-train and test error
    #and then finding the st deviation of each error and mean.
    for r, reg_param in enumerate(reg_params):  #iterate over each reg. param
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        #cv_evaluation_linear_model
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[r] = train_mean_error
        test_mean_errors[r] = test_mean_error
        train_stdev_errors[r] = train_stdev_error
        test_stdev_errors[r] = test_stdev_error

    # Now plot the results
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_mean_errors, test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r')
    ax.set_xscale('log')
示例#3
0
def parameter_search_rbf(inputs, targets, test_fraction):
    """
    """
    N = inputs.shape[0]
    # run all experiments on the same train-test split of the data
    train_part, test_part = train_and_test_split(N,
                                                 test_fraction=test_fraction)
    # for the centres of the basis functions sample 10% of the data
    sample_fraction = 0.15
    p = (1 - sample_fraction, sample_fraction)
    centres = inputs[np.random.choice([False, True], size=N, p=p), :]
    print("centres.shape = %r" % (centres.shape, ))
    scales = np.logspace(0, 2, 17)  # of the basis functions
    reg_params = np.logspace(-15, -4, 11)  # choices of regularisation strength
    # create empty 2d arrays to store the train and test errors
    train_errors = np.empty((scales.size, reg_params.size))
    test_errors = np.empty((scales.size, reg_params.size))
    # iterate over the scales
    for i, scale in enumerate(scales):
        # i is the index, scale is the corresponding scale
        # we must recreate the feature mapping each time for different scales
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # partition the design matrix and targets into train and test
        train_designmtx, train_targets, test_designmtx, test_targets = \
            train_and_test_partition(
                designmtx, targets, train_part, test_part)
        # iteratre over the regularisation parameters
        for j, reg_param in enumerate(reg_params):
            # j is the index, reg_param is the corresponding regularisation
            # parameter
            # train and test the data
            train_error, test_error = train_and_test(train_designmtx,
                                                     train_targets,
                                                     test_designmtx,
                                                     test_targets,
                                                     reg_param=reg_param)
            # store the train and test errors in our 2d arrays
            train_errors[i, j] = train_error
            test_errors[i, j] = test_error
    # we have a 2d array of train and test errors, we want to know the (i,j)
    # index of the best value
    best_i = np.argmin(np.argmin(test_errors, axis=1))
    best_j = np.argmin(test_errors[i, :])
    print("Best joint choice of parameters:")
    print("\tscale %.2g and lambda = %.2g" %
          (scales[best_i], reg_params[best_j]))
    # now we can plot the error for different scales using the best
    # regulariation choice
    fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j],
                                     test_errors[:, best_j])
    ax.set_xscale('log')
    # ...and the error for  different regularisation choices given the best
    # scale choice
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_errors[best_i, :],
                                     test_errors[best_i, :])
    ax.set_xscale('log')
def evaluate_scale(inputs, targets, folds, centres, reg_param, scales=None):
    """
    evaluate then plot the performance of different basis function scales
    """
    # choose a range of scales
    if scales is None:
        scales = np.logspace(0, 6, 20)  # of the basis functions
    #
    num_values = scales.size
    num_folds = len(folds)
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)
    #
    for s, scale in enumerate(scales):
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[s] = train_mean_error
        test_mean_errors[s] = test_mean_error
        train_stdev_errors[s] = train_stdev_error
        test_stdev_errors[s] = test_stdev_error

    # Now plot the results
    fig, ax = plot_train_test_errors("scale", scales, train_mean_errors,
                                     test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(scales, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(scales, lower, upper, alpha=0.2, color='r')
    ax.set_xscale('log')
    # ax.set_xlim([0, 100])

    ax.set_title('Train vs Test Error Across Scales With Cross-Validation')
    fig.savefig("../plots/rbf_searching_scales_cross_validation.pdf",
                fmt="pdf")
示例#5
0
def evaluate_rbf_for_various_reg_params(inputs, targets, test_fraction,
                                        test_error_linear):
    # for rbf feature mappings
    # for the centres of the basis functions choose 10% of the data
    n = inputs.shape[0]
    centres = inputs[
        np.random.choice([False, True], size=n, p=[0.90, 0.10]), :]
    print("centres shape = %r" % (centres.shape, ))

    # the width (analogous to standard deviation) of the basis functions
    scale = 8.5  # of the basis functions
    print("centres = %r" % (centres, ))
    print("scale = %r" % (scale, ))

    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    design_matrix = feature_mapping(inputs)

    train_part, test_part = train_and_test_split(n,
                                                 test_fraction=test_fraction)
    train_design_matrix, train_targets, test_design_matrix, test_targets = \
        train_and_test_partition(
            design_matrix, targets, train_part, test_part)

    # outputting the shapes of the train and test parts for debugging
    print("training design matrix shape = %r" % (train_design_matrix.shape, ))
    print("testing design matrix shape = %r" % (test_design_matrix.shape, ))
    print("training targets shape = %r" % (train_targets.shape, ))
    print("testing targets shape = %r" % (test_targets.shape, ) + "\n")

    # the rbf feature mapping performance
    reg_params = np.logspace(-15, 5, 20)
    train_errors = []
    test_errors = []

    for reg_param in reg_params:
        print("Evaluating reg. parameter " + str(reg_param))
        train_error, test_error = simple_evaluation_linear_model(
            design_matrix,
            targets,
            test_fraction=test_fraction,
            reg_param=reg_param)
        train_errors.append(train_error)
        test_errors.append(test_error)

    fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors,
                                     test_errors)

    # plotting a straight line showing the linear performance
    x_lim = ax.get_xlim()
    ax.plot(x_lim, test_error_linear * np.ones(2), 'g:')

    ax.set_xscale('log')
    ax.set_title('Evaluating RBF Performance')
    fig.savefig("../plots/rbf_vs_linear.pdf", fmt="pdf")
示例#6
0
def evaluate_rbf_for_various_reg_params(inputs, targets, test_fraction,
                                        test_error_linear):
    """
    """

    # for rbf feature mappings
    # for the centres of the basis functions choose 10% of the data
    N = inputs.shape[0]
    centres = inputs[np.random.choice([False, True], size=N, p=[0.9, 0.1]), :]
    print("centres.shape = %r" % (centres.shape, ))
    scale = 10.  # of the basis functions
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    designmtx = feature_mapping(inputs)
    train_part, test_part = train_and_test_split(N,
                                                 test_fraction=test_fraction)
    train_designmtx, train_targets, test_designmtx, test_targets = \
        train_and_test_partition(
            designmtx, targets, train_part, test_part)
    # output the shapes of the train and test parts for debugging
    print("train_designmtx.shape = %r" % (train_designmtx.shape, ))
    print("test_designmtx.shape = %r" % (test_designmtx.shape, ))
    print("train_targets.shape = %r" % (train_targets.shape, ))
    print("test_targets.shape = %r" % (test_targets.shape, ))
    # the rbf feature mapping performance
    reg_params = np.logspace(-15, -4, 11)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        print("Evaluating reg_para " + str(reg_param))
        train_error, test_error = simple_evaluation_linear_model(
            designmtx,
            targets,
            test_fraction=test_fraction,
            reg_param=reg_param)
        train_errors.append(train_error)
        test_errors.append(test_error)

    fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors,
                                     test_errors)
    # we also want to plot a straight line showing the linear performance
    xlim = ax.get_xlim()
    ax.plot(xlim, test_error_linear * np.ones(2), 'g:')
    ax.set_xscale('log')
def evaluate_errors_num_centres(inputs,
                                targets,
                                folds,
                                scale,
                                reg_param,
                                num_centres_sequence=None):
    """
      Evaluate then plot the performance of different numbers of basis
      function centres.
    """
    # fix the reg_param
    reg_param = 0.01
    # fix the scale
    scale = 100
    # choose a range of numbers of centres
    if num_centres_sequence is None:
        num_centres_sequence = np.arange(200, 250)
    num_values = num_centres_sequence.size
    num_folds = len(folds)
    #
    # create array to store results
    test_mean_errors = np.zeros(num_values)

    #
    # run the experiments
    for c, num_centres in enumerate(num_centres_sequence):
        centres = np.linspace(0, 1, num_centres)
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        test_mean_error = np.mean(test_errors)
        # store the results
        test_mean_errors[c] = test_mean_error

    return test_mean_errors
def evaluate_reg_param(inputs,
                       targets,
                       folds,
                       centres,
                       scale,
                       test_error_linear,
                       reg_params=None):
    """
      Evaluate, then plot the performance of different regularisation parameters.
    """

    # creating the feature mapping and then the design matrix
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    design_matrix = feature_mapping(inputs)

    # choose a range of regularisation parameters
    if reg_params is None:
        reg_params = np.logspace(-15, 5,
                                 30)  # choices of regularisation strength

    num_values = reg_params.size
    num_folds = len(folds)
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_st_dev_errors = np.zeros(num_values)
    test_st_dev_errors = np.zeros(num_values)

    print(
        'Calculating means and standard deviations of train and test errors...'
    )
    for r, reg_param in enumerate(reg_params):
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            design_matrix, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_st_dev_error = np.std(train_errors)
        test_st_dev_error = np.std(test_errors)
        # storing the results
        train_mean_errors[r] = train_mean_error
        test_mean_errors[r] = test_mean_error
        train_st_dev_errors[r] = train_st_dev_error
        test_st_dev_errors[r] = test_st_dev_error

    # plotting the results
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_mean_errors, test_mean_errors,
                                     test_error_linear)

    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_st_dev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_st_dev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_st_dev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_st_dev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r')

    ax.set_xscale('log')
    ax.set_ylim([0, 1])

    ax.set_title(
        'Train vs Test Error across Reg. Param. with Cross-validation')
    fig.savefig("../plots/rbf/rbf_searching_reg_params_cross_validation.png",
                fmt="png")

    plt.show()
示例#9
0
def main(ifname=None,
         delimiter=None,
         columns=None,
         normalise=None,
         features=None):
    """
    To be called when the script is run. This function fits and plots imported data (if a filename is
    provided). Data is 2 dimensional real valued data and is fit
    with maximum likelihood 2d gaussian.

    parameters
    ----------
    ifname -- filename/path of data file.
    delimiter -- delimiter of data values
    has_header -- does the data-file have a header line
    columns -- a list of integers specifying which columns of the file to import
        (counting from 0)
    """
    # if no file name is provided then use synthetic data
    if ifname is None:
        print("You need to ingest the CSV file")
    else:
        data, field_names = import_data(ifname,
                                        delimiter=delimiter,
                                        has_header=True,
                                        columns=columns)

        # DATA PREPARATION-----------------------------------------------
        N = data.shape[0]
        target = data[:, 11:]

        # Ask user to confirm whether to normalise or not
        if normalise == None:
            normalise_response = input(
                "Do you want to normalise the data? (Y/N)")
            normalise = normalise_response.upper()
            normalise_label = ""

        if normalise == "Y":
            normalise_label = "_normalised"
            # Normalise input data
            fixed_acidity = data[:, 0]
            volatility_acidity = data[:, 1]
            citric_acid = data[:, 2]
            residual_sugar = data[:, 3]
            chlorides = data[:, 4]
            free_sulfur_dioxide = data[:, 5]
            total_sulfur_dioxide = data[:, 6]
            density = data[:, 7]
            pH = data[:, 8]
            sulphates = data[:, 9]
            alcohol = data[:, 10]

            data[:, 0] = (fixed_acidity -
                          np.mean(fixed_acidity)) / np.std(fixed_acidity)
            data[:, 1] = (volatility_acidity - np.mean(volatility_acidity)
                          ) / np.std(volatility_acidity)
            data[:, 2] = (citric_acid -
                          np.mean(citric_acid)) / np.std(citric_acid)
            data[:, 3] = (residual_sugar -
                          np.mean(residual_sugar)) / np.std(residual_sugar)
            data[:, 4] = (chlorides - np.mean(chlorides)) / np.std(chlorides)
            data[:, 5] = (free_sulfur_dioxide - np.mean(free_sulfur_dioxide)
                          ) / np.std(free_sulfur_dioxide)
            data[:, 6] = (total_sulfur_dioxide - np.mean(total_sulfur_dioxide)
                          ) / np.std(total_sulfur_dioxide)
            data[:, 7] = (density - np.mean(density)) / np.std(density)
            data[:, 8] = (pH - np.mean(pH)) / np.std(pH)
            data[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates)
            data[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol)
        elif normalise != "N":
            sys.exit("Please enter valid reponse of Y or N")

        if features == None:
            feature_response = input(
                "Please specify which feature combination you want (e.g.1,2,5,7)"
            )
            feature_response = feature_response.split(",")
            # need to convert list of strings into list of integer
            feature_combin = []
            for i in range(len(feature_response)):
                print(feature_response[i])
                feature_combin.append(int(feature_response[i]))
        else:
            feature_combin = features

        inputs = np.array([])
        for j in range(len(feature_combin)):
            inputs = np.append(inputs, data[:, feature_combin[j]])
        inputs = inputs.reshape(len(feature_combin), data.shape[0])
        inputs = (np.rot90(inputs, 3))[:, ::-1]
        #print("INPUT: ", inputs)

        # Plotting RBF Model ----------------------------------------------------------
        # specify the centres of the rbf basis functions
        centres = np.asarray([
            0.35, 0.4, 0.45, 0.459090909, 0.468181818, 0.477272727,
            0.486363636, 0.495454545, 0.504545455, 0.513636364, 0.522727273,
            0.531818182, 0.540909091, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
            0.62, 0.63, 0.64, 0.65, 0.7, 0.75, 0.8
        ])
        # the width (analogous to standard deviation) of the basis functions
        scale = 450
        reg_param = 7.906043210907701e-11

        print("centres = %r" % (centres, ))
        print("scale = %r" % (scale, ))
        print("reg param = %r" % (reg_param, ))

        # create the feature mapping
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        # plot the basis functions themselves for reference
        #display_basis_functions(feature_mapping)
        # now construct the design matrix for the inputs
        designmtx = feature_mapping(inputs)
        # the number of features is the widht of this matrix
        print("DESIGN MATRIX: ", designmtx)

        if reg_param is None:
            # use simple least squares approach
            weights = ml_weights(designmtx, target)
        else:
            # use regularised least squares approach
            weights = regularised_ml_weights(designmtx, target, reg_param)

        # get the cross-validation folds
        num_folds = 4
        folds = create_cv_folds(N, num_folds)

        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, target, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        print("TRAIN MEAN ERROR: ", train_mean_error)
        print("TEST MEAN ERROR: ", test_mean_error)
        print("TRAIN STDEV ERROR: ", train_stdev_error)
        print("TEST STDEV ERROR: ", test_stdev_error)
        print("ML WEIGHTS: ", weights)
        apply_validation_set(feature_combin, feature_mapping, weights)
def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # specify the centres of the rbf basis functions
    centres = np.linspace(0, 1, 9)
    # the width (analogous to standard deviation) of the basis functions
    scale = 0.1
    print("centres = %r" % (centres, ))
    print("scale = %r" % (scale, ))
    # create the feature mapping
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    # plot the basis functions themselves for reference
    display_basis_functions(feature_mapping)

    # sample number of data-points: inputs and targets
    N = 9
    # define the noise precision of our data
    beta = (1. / 0.1)**2
    inputs, targets = sample_data(N,
                                  arbitrary_function_1,
                                  noise=np.sqrt(1. / beta),
                                  seed=37)
    # now construct the design matrix for the inputs
    designmtx = feature_mapping(inputs)
    # the number of features is the widht of this matrix
    M = designmtx.shape[1]
    # define a prior mean and covaraince matrix
    m0 = np.zeros(M)
    alpha = 100
    S0 = alpha * np.identity(M)
    # find the posterior over weights
    mN, SN = calculate_weights_posterior(designmtx, targets, beta, m0, S0)
    # the posterior mean (also the MAP) gives the central prediction
    mean_approx = construct_feature_mapping_approx(feature_mapping, mN)
    fig, ax, lines = plot_function_data_and_approximation(
        mean_approx, inputs, targets, arbitrary_function_1)
    # now plot a number of samples from the posterior
    xs = np.linspace(0, 1, 101)
    print("mN = %r" % (mN, ))
    for i in range(20):
        weights_sample = np.random.multivariate_normal(mN, SN)
        sample_approx = construct_feature_mapping_approx(
            feature_mapping, weights_sample)
        sample_ys = sample_approx(xs)
        line, = ax.plot(xs, sample_ys, 'm', linewidth=0.5)
    lines.append(line)
    ax.legend(lines, ['true function', 'data', 'mean approx', 'samples'])
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_bayesian_rbf.pdf", fmt="pdf")

    # now for the predictive distribuiton
    new_inputs = np.linspace(0, 1, 51)
    new_designmtx = feature_mapping(new_inputs)
    ys, sigma2Ns = predictive_distribution(new_designmtx, beta, mN, SN)
    print("(sigma2Ns**0.5).shape = %r" % ((sigma2Ns**0.5).shape, ))
    print("np.sqrt(sigma2Ns).shape = %r" % (np.sqrt(sigma2Ns).shape, ))
    print("ys.shape = %r" % (ys.shape, ))
    fig, ax, lines = plot_function_and_data(inputs, targets,
                                            arbitrary_function_1)
    ax.plot(new_inputs, ys, 'r', linewidth=3)
    lower = ys - np.sqrt(sigma2Ns)
    upper = ys + np.sqrt(sigma2Ns)
    print("lower.shape = %r" % (lower.shape, ))
    print("upper.shape = %r" % (upper.shape, ))
    ax.fill_between(new_inputs, lower, upper, alpha=0.2, color='r')

    plt.show()
def parameter_search_rbf_without_cross(inputs, targets, test_fraction,test_error_linear,normalize=True):
    """
    """
    if(normalize):
        # normalise inputs (meaning radial basis functions are more helpful)
        for i in range(inputs.shape[1]):
            inputs[:,i]=(inputs[:,i]-np.mean(inputs[:,i]))/np.std(inputs[:,i])
    N = inputs.shape[0]

    # for the centres of the basis functions sample 10% of the data
    sample_fractions = np.array([0.05,0.1,0.15,0.2,0.25])
    scales = np.logspace(0,4,20 ) # of the basis functions
    reg_params = np.logspace(-16,-1, 20) # choices of regularisation strength.
    # create empty 3d arrays to store the train and test errors
    train_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size))
    test_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size))
    
    #Randomly generates a train/test split for data of size N. Returns a 2 arrays of boolean true/false.
    train_part, test_part = train_and_test_split(N, test_fraction=test_fraction)
    best_k=0
    best_i=0
    best_j=0
    test_error_temp=10**100
    
 
    #loop through the possible centres as a percentage (5%, 10%,15%, 20%, 25%)
    for k,sample_fraction in enumerate(sample_fractions):
        p = (1-sample_fraction,sample_fraction)
        centres = inputs[np.random.choice([False,True], size=N, p=p),:]       
        # iterate over the scales
        for i,scale in enumerate(scales):
            # i is the index, scale is the corresponding scale
            # we must recreate the feature mapping each time for different scales
            feature_mapping = construct_rbf_feature_mapping(centres,scale)
            designmtx = feature_mapping(inputs)
            # partition the design matrix and targets into train and test. This effectively takes as inputs the boolean arrays train_part, test_part and the whole design matrix and
            #creates 2 subsets of the design matrix  (train matrix, test matrix). The test data are splitted as well but the values are not affected
            train_designmtx, train_targets, test_designmtx, test_targets =  train_and_test_partition(designmtx, targets, train_part, test_part)
            # iteratre over the regularisation parameters
            for j, reg_param in enumerate(reg_params):
                # j is the index, reg_param is the corresponding regularisation
                # parameter
                # train and test the data
                train_error, test_error,weights = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets,reg_param=reg_param)
                # store the train and test errors in our 2d arrays
                train_mean_errors[k,i,j] = train_error
                test_mean_errors[k,i,j] = test_error
                #When we've found a lowest than stores test error value, we store it's indices
                if (np.mean(test_error)<test_error_temp):
                    test_error_temp=test_error
                    best_k=k
                    best_i=i
                    best_j=j
    print ("The value with the lowest error is:",test_mean_errors[best_k][best_i][best_j])
    print("Best joint choice of parameters: sample fractions %.2g scale %.2g and lambda = %.2g" % (sample_fractions[best_k],scales[best_i],reg_params[best_j]))
    
    
    # now we can plot the error for different scales using the best
    # regularization choice
    
    # now we can plot the error for different scales using the best regularization choice and centres percentage
    fig , ax = plot_train_test_errors("scale", scales, train_mean_errors[best_k,:,best_j], test_mean_errors[best_k,:,best_j])
    ax.set_xscale('log')
    fig.suptitle('RBF regression for the best reg. parameter & centres', fontsize=10)
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:') #graph the linear regression
    
    
    # ...and the error for  different regularisation choices given the best scale choice and centres percentage
    fig , ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors[best_k,best_i,:], test_mean_errors[best_k,best_i,:])
    ax.set_xscale('log')
    fig.suptitle('RBF regression for the best scale parameter & centres', fontsize=10)
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
    # #ax.set_ylim([0,20])
    
    
    # ...and the error for  different centres given the best reg.parameter and the best scale choice
    fig , ax = plot_train_test_errors("sample fractions", sample_fractions, train_mean_errors[:,best_i,best_j], test_mean_errors[:,best_i,best_j])
    fig.suptitle('RBF regression for the best scale parameter & reg. parameter', fontsize=10)
    ax.set_xlim([0.05, 0.25])
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
def evaluate_num_centres(inputs,
                         targets,
                         folds,
                         scale,
                         reg_param,
                         num_centres_sequence=None):
    """
      Evaluate then plot the performance of different numbers of basis
      function centres.
    """

    # choose a range of numbers of centres
    if num_centres_sequence is None:
        num_centres_sequence = np.arange(1, 20)
    num_values = num_centres_sequence.size
    num_folds = len(folds)
    #
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)
    #
    # run the experiments
    for c, num_centres in enumerate(num_centres_sequence):
        centres = np.linspace(0, 1, num_centres)
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[c] = train_mean_error
        test_mean_errors[c] = test_mean_error
        train_stdev_errors[c] = train_stdev_error
        test_stdev_errors[c] = test_stdev_error
    #
    # Now plot the results
    fig, ax = plot_train_test_errors("no. centres", num_centres_sequence,
                                     train_mean_errors, test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='r')
    ax.set_title(
        'Train vs Test Error Across Centre Number With Cross-Validation')
    fig.savefig("../plots/rbf_searching_number_centres_cross_validation.pdf",
                fmt="pdf")
def parameter_search_rbf_cross(inputs, targets, folds,test_error_linear,test_inputs,test_targets,normalize=True):
    """
    This function will take as inputs the raw data and targets, the folds for cross validation and the test linear error for plotting
    """
    if(normalize):
        # normalise inputs (meaning radial basis functions are more helpful)
        for i in range(inputs.shape[1]):
            inputs[:,i]=(inputs[:,i]-np.mean(inputs[:,i]))/np.std(inputs[:,i])
            test_inputs[:,i]=(test_inputs[:,i]-np.mean(test_inputs[:,i]))/np.std(test_inputs[:,i])
    N = inputs.shape[0]

    # for the centres of the basis functions sample 10% of the data
    sample_fractions = np.array([0.05,0.1,0.15,0.2,0.25])
    scales = np.logspace(0,4,20 ) # of the basis functions
    reg_params = np.logspace(-16,-1, 20) # choices of regularisation strength.
    # create empty 3d arrays to store the train and test errors
    train_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size))
    test_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size))
    
    best_k=0
    best_i=0
    best_j=0
    test_error_temp=10**100
    
    #loop through the possible centres as a percentage (5%, 10%,15%, 20%, 25%)
    for k,sample_fraction in enumerate(sample_fractions):
        p = (1-sample_fraction,sample_fraction)
        centres = inputs[np.random.choice([False,True], size=N, p=p),:]
        # iterate over the scales
        for i,scale in enumerate(scales):
            # i is the index, scale is the corresponding scale
            # we must recreate the feature mapping each time for different scales
            feature_mapping = construct_rbf_feature_mapping(centres,scale)
            designmtx = feature_mapping(inputs)
            # iteratre over the regularisation parameters
            for j, reg_param in enumerate(reg_params):
                # j is the index, reg_param is the corresponding regularisation
                # parameter for train and test the data
                train_error, test_error,weights = cv_evaluation_linear_model(designmtx, targets, folds,reg_param=reg_param)
                
                #When we've found a lowest than stores test error value, we store it's indices
                if (np.mean(test_error)<test_error_temp):
                    test_error_temp=np.mean(test_error)
                    best_k=k
                    best_i=i
                    best_j=j
                    optimal_weights=weights
                    optimal_feature_mapping=feature_mapping
                    
                # store the train and test errors in our 3d matrix
                train_mean_errors[k,i,j] = np.mean(train_error)
                test_mean_errors[k,i,j] = np.mean(test_error)
    
    print ("The value with the lowest test error at the training stage is:",test_mean_errors[best_k][best_i][best_j])
    print("Best joint choice of parameters: sample fractions %.2g scale %.2g and lambda = %.2g" % (sample_fractions[best_k],scales[best_i],reg_params[best_j]))
    
    
    # now we can plot the error for different scales using the best regularization choice and centres percentage
    fig , ax = plot_train_test_errors("scale", scales, train_mean_errors[best_k,:,best_j], test_mean_errors[best_k,:,best_j])
    ax.set_xscale('log')
    fig.suptitle('RBF regression for the best reg. parameter & centres using cross-validation', fontsize=10)
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:') #graph the linear regression
    
    
    # ...and the error for  different regularisation choices given the best scale choice and centres percentage
    fig , ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors[best_k,best_i,:], test_mean_errors[best_k,best_i,:])
    ax.set_xscale('log')
    fig.suptitle('RBF regression for the best scale parameter & centres using cross-validation', fontsize=10)
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
    # #ax.set_ylim([0,20])
    
    
    # ...and the error for  different centres given the best reg.parameter and the best scale choice
    fig , ax = plot_train_test_errors("sample fractions", sample_fractions, train_mean_errors[:,best_i,best_j], test_mean_errors[:,best_i,best_j])
    fig.suptitle('RBF regression for the best scale parameter & reg. parameter using cross-validation', fontsize=10)
    ax.set_xlim([0.05, 0.25])
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
    
    predictive_func=construct_feature_mapping_approx(optimal_feature_mapping, optimal_weights)
    
    final_error=root_mean_squared_error(test_targets,predictive_func(test_inputs))
    print("final test error for RBF model:",final_error)
def parameter_search_rbf(inputs, targets, test_fraction, folds):
    """
    """

    n = inputs.shape[0]

    # for the centres of the basis functions sample 10% of the data
    sample_fraction = 0.05
    p = (1 - sample_fraction, sample_fraction)
    centres = inputs[np.random.choice([False, True], size=n, p=p), :]
    print("\ncentres.shape = %r" % (centres.shape, ))

    scales = np.logspace(0, 4, 20)  # of the basis functions
    reg_params = np.logspace(-16, -1, 20)  # choices of regularisation strength
    # create empty 2d arrays to store the train and test errors
    train_mean_errors = np.empty((scales.size, reg_params.size))
    test_mean_errors = np.empty((scales.size, reg_params.size))

    # iterate over the scales
    for i, scale in enumerate(scales):
        # i is the index, scale is the corresponding scale
        # we must recreate the feature mapping each time for different scales
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # partition the design matrix and targets into train and test

        # iterating over the regularisation parameters
        for j, reg_param in enumerate(reg_params):
            # j is the index, reg_param is the corresponding regularisation
            # parameter
            # train and test the data
            train_error, test_error = cv_evaluation_linear_model(
                designmtx, targets, folds, reg_param=reg_param)
            # store the train and test errors in our 2d arrays
            train_mean_errors[i, j] = np.mean(train_error)
            test_mean_errors[i, j] = np.mean(test_error)

    # we have a 2d array of train and test errors, we want to know the (i,j)
    # index of the best value
    best_i = np.argmin(np.argmin(test_mean_errors, axis=1))
    best_j = np.argmin(test_mean_errors[i, :])
    min_place = np.argmin(test_mean_errors)
    best_i_correct = (int)(min_place / test_mean_errors.shape[1])
    best_j_correct = min_place % test_mean_errors.shape[1]
    print("\nBest joint choice of parameters:")
    print("\tscale %.2g and lambda = %.2g" %
          (scales[best_i_correct], reg_params[best_j_correct]))

    # now we can plot the error for different scales using the best
    # regularisation choice
    fig, ax = plot_train_test_errors("scale", scales,
                                     train_mean_errors[:, best_j_correct],
                                     test_mean_errors[:, best_j_correct])
    ax.set_xscale('log')
    ax.set_title('Train vs Test Error Across Scales')
    fig.savefig("../plots/rbf_searching_scales.pdf", fmt="pdf")

    # ...and the error for  different regularisation choices given the best
    # scale choice
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_mean_errors[best_i_correct, :],
                                     test_mean_errors[best_i_correct, :])
    ax.set_xscale('log')
    ax.set_title('Train vs Test Error Across Reg Params')
    fig.savefig("../plots/rbf_searching_reg_params.pdf", fmt="pdf")
    '''
    # using the best parameters found above,
    # we now vary the number of centres and evaluate the performance
    reg_param = reg_params[best_j]
    scale = scales[best_i]
    n_centres_seq = np.arange(1, 20)
    train_errors = []
    test_errors = []
    for n_centres in n_centres_seq:
        # constructing the feature mapping anew for each number of centres
        centres = np.linspace(0, 1, n_centres)
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        design_matrix = feature_mapping(inputs)

        # evaluating the test and train error for the given regularisation parameter and scale
        train_error, test_error = cv_evaluation_linear_model(
            design_matrix, targets, folds, reg_param=reg_param)

        # collecting the errors
        train_errors.append(train_error)
        test_errors.append(test_error)

    # plotting the results
    fig, ax = plot_train_test_errors(
        "no. centres", n_centres_seq, train_errors, test_errors)
    ax.set_title('Train vs Test Error Across Centre Number')
    fig.savefig("../plots/rbf_searching_number_centres.pdf", fmt="pdf")
    '''

    return scales[best_i_correct], reg_params[best_j_correct]
def main(ifname,
         delimiter=None,
         columns=None,
         has_header=True,
         test_fraction=0.25):
    data, field_names = import_data(ifname,
                                    delimiter=delimiter,
                                    has_header=has_header,
                                    columns=columns)

    #Exploratory Data Analysis (EDA)
    raw_data = pd.read_csv('datafile.csv', sep=";")

    # view correlation efficieny result where |r|=1 has the strongest relation and |r|=0 the weakest
    df = pd.DataFrame(data=raw_data)
    print(df.corr())

    # view data if it is normally distributed
    plt.hist(raw_data["quality"],
             range=(1, 10),
             edgecolor='black',
             linewidth=1)
    plt.xlabel('quality')
    plt.ylabel('amount of samples')
    plt.title("distribution of red wine quality")

    # feature selection
    import scipy.stats as stats
    from scipy.stats import chi2_contingency

    class ChiSquare:
        def __init__(self, dataframe):
            self.df = dataframe
            self.p = None  # P-Value
            self.chi2 = None  # Chi Test Statistic
            self.dof = None

            self.dfObserved = None
            self.dfExpected = None

        def _print_chisquare_result(self, colX, alpha):
            result = ""
            if self.p < alpha:
                result = "{0} is IMPORTANT for Prediction".format(colX)
            else:
                result = "{0} is NOT an important predictor. (Discard {0} from model)".format(
                    colX)

            print(result)

        def TestIndependence(self, colX, colY, alpha=0.05):
            X = self.df[colX].astype(str)
            Y = self.df[colY].astype(str)

            self.dfObserved = pd.crosstab(Y, X)
            chi2, p, dof, expected = stats.chi2_contingency(
                self.dfObserved.values)
            self.p = p
            self.chi2 = chi2
            self.dof = dof

            self.dfExpected = pd.DataFrame(expected,
                                           columns=self.dfObserved.columns,
                                           index=self.dfObserved.index)

            self._print_chisquare_result(colX, alpha)
            print('self:%s' % (self), self.chi2, self.p)

    # Initialize ChiSquare Class
    cT = ChiSquare(raw_data)

    # Feature Selection
    testColumns = [
        "fixed acidity", "volatile acidity", "citric acid", "residual sugar",
        "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density",
        "pH", "sulphates", "alcohol"
    ]
    for var in testColumns:
        cT.TestIndependence(colX=var, colY="quality")
    # split data into inputs and targets
    inputs = data[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]
    targets = data[:, 11]
    # mean normalisation
    fixed_acidity = inputs[:, 0]
    volatile_acidity = inputs[:, 1]
    citric_acid = inputs[:, 2]
    residual_sugar = inputs[:, 3]
    chlorides = inputs[:, 4]
    free_sulfur_dioxide = inputs[:, 5]
    total_sulfur_dioxide = inputs[:, 6]
    density = inputs[:, 7]
    ph = inputs[:, 8]
    sulphates = inputs[:, 9]
    alcohol = inputs[:, 10]

    # draw plot of data set
    normalised_data = np.column_stack((inputs, targets))
    exploratory_plots(normalised_data, field_names)

    # add a colum of x0.ones
    inputs[:, 0] = np.ones(len(targets))
    # normalize data
    inputs[:, 1] = (volatile_acidity -
                    np.mean(volatile_acidity)) / np.std(volatile_acidity)
    inputs[:, 2] = (citric_acid - np.mean(citric_acid)) / np.std(citric_acid)
    inputs[:, 7] = (density - np.mean(density)) / np.std(density)
    inputs[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates)
    inputs[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol)
    # run all experiments on the same train-test split of the data
    train_part, test_part = train_and_test_split(inputs.shape[0],
                                                 test_fraction=test_fraction)

    # another evaluation function
    def rsquare(test_targets, test_predicts):
        y_mean = np.mean(test_targets)
        ss_tot = sum((test_targets - y_mean)**2)
        ss_res = sum((test_targets - test_predicts)**2)
        rsquare = 1 - (ss_res / ss_tot)
        return rsquare

    print(
        '---------------------------Linear Regression-----------------------------------'
    )

    # linear regression
    # add a column of 1 to the data matrix
    inputs = inputs[:, [0, 1, 2, 7, 9, 10]]
    #train_part, test_part = train_and_test_split(inputs.shape[0], test_fraction=test_fraction)
    train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition(
        inputs, targets, train_part, test_part)
    weights = ml_weights(train_inputs, train_targets)
    train_predicts = linear_model_predict(train_inputs, weights)
    test_predicts = linear_model_predict(test_inputs, weights)
    train_error = root_mean_squared_error(train_targets, train_predicts)
    test_error = root_mean_squared_error(test_targets, test_predicts)
    print("LR-train_weights", weights)
    print("LR-train_error", train_error)
    print("LR-test_error", test_error)
    print("LR-rsquare score", rsquare(test_targets, test_predicts))
    print("LR-prediction:", test_predicts[:20], "LR-original",
          test_targets[:20])

    print(
        '----------------Regularised Linear Regression-----------------------------'
    )

    #regularised linear regression
    reg_params = np.logspace(-15, -4, 11)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        # print("RLR-Evaluating reg_para " + str(reg_param))
        train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition(
            inputs, targets, train_part, test_part)
        reg_weights = regularised_ml_weights(train_inputs, train_targets,
                                             reg_param)
        train_predicts = linear_model_predict(train_inputs, reg_weights)
        test_predicts = linear_model_predict(test_inputs, reg_weights)
        train_error = root_mean_squared_error(train_targets, train_predicts)
        test_error = root_mean_squared_error(test_targets, test_predicts)
        train_errors.append(train_error)
        test_errors.append(test_error)

    #best lambda
    test_errors = np.array(test_errors)
    best_l = np.argmin(test_errors)
    print("RLR-Best joint choice of parameters:")
    print("RLR-lambda = %.2g" % (reg_params[best_l]))
    # plot train_test_errors in different reg_params
    fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors,
                                     test_errors)
    ax.set_xscale('log')
    reg_weights = regularised_ml_weights(train_inputs, train_targets, best_l)
    print("RLR-train_weights", reg_weights)
    print("RLR-train_error", train_errors[best_l])
    print("RLR-test_error", test_errors[best_l])
    print("RLR-rsquare score", rsquare(test_targets, test_predicts))
    print("RLR-prediction:", test_predicts[:20], "RLR-original",
          test_targets[:20])

    print(
        '-----------------------------kNN Regression------------------------------------'
    )

    # KNN-regression
    # tip out the x0=1 column
    inputs = inputs[:, [1, 2, 3, 4, 5]]

    train_errors = []
    test_errors = []
    K = range(2, 9)
    for k in K:
        train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition(
            inputs, targets, train_part, test_part)
        knn_approx = construct_knn_approx(train_inputs, train_targets, k)
        train_knn_predicts = knn_approx(train_inputs)
        train_error = root_mean_squared_error(train_knn_predicts,
                                              train_targets)
        test_knn_predicts = knn_approx(test_inputs)
        test_error = root_mean_squared_error(test_knn_predicts, test_targets)
        train_errors.append(train_error)
        test_errors.append(test_error)
        # print("knn_predicts: ", np.around(test_knn_predicts), "knn-original", test_targets)

    #best k
    train_errors = np.array(train_errors)
    test_errors = np.array(test_errors)
    best_k = np.argmin(test_errors)
    print("Best joint choice of parameters:")
    print("k = %.2g" % (K[best_k]))
    fig, ax = plot_train_test_errors("K", K, train_errors, test_errors)
    ax.set_xticks(np.arange(min(K), max(K) + 1, 1.0))

    print("kNN-train_error", train_errors[-1])
    print("kNN-test_error", test_errors[-1])
    knn_approx = construct_knn_approx(train_inputs, train_targets, k=3)
    test_predicts = knn_approx(test_inputs)
    print("kNN-rsquare score", rsquare(test_targets, test_predicts))
    print("kNN-y_predicts", test_predicts[:20], 'y_original',
          test_targets[:20])

    print(
        '----------------------------RBF Function-------------------------------------'
    )

    # Radinal Basis Functions
    # for the centres of the basis functions sample 15% of the data
    sample_fraction = 0.15
    p = (1 - sample_fraction, sample_fraction)
    centres = inputs[np.random.choice([False, True], size=inputs.shape[0],
                                      p=p), :]  # !!!
    print("centres.shape = %r" % (centres.shape, ))
    scales = np.logspace(0, 2, 17)  # of the basis functions
    reg_params = np.logspace(-15, -4, 11)  # choices of regularisation strength
    # create empty 2d arrays to store the train and test errors
    train_errors = np.empty((scales.size, reg_params.size))
    test_errors = np.empty((scales.size, reg_params.size))
    # iterate over the scales
    for i, scale in enumerate(scales):
        # i is the index, scale is the corresponding scale
        # we must recreate the feature mapping each time for different scales
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # partition the design matrix and targets into train and test
        train_designmtx, train_targets, test_designmtx, test_targets = \
            train_and_test_partition(designmtx, targets, train_part, test_part)
        # iteratre over the regularisation parameters
        for j, reg_param in enumerate(reg_params):
            # j is the index, reg_param is the corresponding regularisation
            # parameter
            # train and test the data
            train_error, test_error = train_and_test(train_designmtx,
                                                     train_targets,
                                                     test_designmtx,
                                                     test_targets,
                                                     reg_param=reg_param)
            # store the train and test errors in our 2d arrays
            train_errors[i, j] = train_error
            test_errors[i, j] = test_error
    # we have a 2d array of train and test errors, we want to know the (i,j)
    # index of the best value
    best_i = np.argmin(np.argmin(test_errors, axis=1))
    best_j = np.argmin(test_errors[i, :])
    print("Best joint choice of parameters:")
    print("\tscale= %.2g and lambda = %.2g" %
          (scales[best_i], reg_params[best_j]))
    # now we can plot the error for different scales using the best
    # regulariation choice
    fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j],
                                     test_errors[:, best_j])
    ax.set_xscale('log')
    # ...and the error for  different regularisation choices given the best
    # scale choice
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_errors[best_i, :],
                                     test_errors[best_i, :])
    ax.set_xscale('log')
    feature_mapping = construct_rbf_feature_mapping(centres, scales[best_i])
    reg_weights = regularised_ml_weights(train_designmtx, train_targets,
                                         reg_params[best_j])
    # test function
    test_predicts = np.matrix(test_designmtx) * np.matrix(reg_weights).reshape(
        (len(reg_weights), 1))
    test_predicts = np.array(test_predicts).flatten()

    print("RBF-train_error", train_errors[best_i, best_j])
    print("RBF-test_error", test_errors[best_i, best_j])
    print("RBF-rsquare score", rsquare(test_targets, test_predicts))
    print('RBF_y_predicts: ', test_predicts[:20], 'rbf_y_originals: ',
          test_targets[:20])

    print(
        '-----------------------------Polynomial---------------------------------------'
    )

    # Polynomial Basis Function
    # set input features as 'alcohol'
    degrees = range(1, 10)
    train_errors = []
    test_errors = []
    for degree in degrees:
        processed_inputs = 0
        for i in range(inputs.shape[1]):
            processed_input = expand_to_monomials(inputs[:, i], degree)
            processed_inputs += processed_input
        processed_inputs = np.array(processed_inputs)
        # split data into train and test set
        processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition\
            (processed_inputs, targets, train_part, test_part)
        train_error, test_error = train_and_test(processed_train_inputs,
                                                 train_targets,
                                                 processed_test_inputs,
                                                 test_targets,
                                                 reg_param=None)
        weights = regularised_least_squares_weights(processed_train_inputs,
                                                    train_targets, reg_param)
        train_errors.append(train_error)
        test_errors.append(test_error)

    train_errors = np.array(train_errors)
    test_errors = np.array(test_errors)
    print("Polynomial-train error: ", train_errors[-1])
    print("Polynomial-test error: ", test_errors[-1])
    best_d = np.argmin(test_errors)
    print("Best joint choice of degree:")
    final_degree = degrees[best_d]
    print("degree = %.2g" % (final_degree))
    fig, ax = plot_train_test_errors("Degree", degrees, train_errors,
                                     test_errors)
    ax.set_xticks(np.arange(min(degrees), max(degrees) + 1, 1.0))

    # test functionality with the final degree
    processed_inputs = 0
    for i in range(inputs.shape[1]):
        processed_input = expand_to_monomials(inputs[:, i], final_degree)
        processed_inputs += processed_input
    processed_inputs = np.array(processed_inputs)

    processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition \
        (processed_inputs, targets, train_part, test_part)
    train_error, test_error = train_and_test(processed_train_inputs,
                                             train_targets,
                                             processed_test_inputs,
                                             test_targets,
                                             reg_param=None)
    weights = regularised_least_squares_weights(processed_train_inputs,
                                                train_targets, reg_param)
    # print("processed_train_inputs.shape", processed_train_inputs.shape)
    # print('weights: ', weights, 'weights shape: ', weights.shape)
    test_predicts = prediction_function(processed_test_inputs, weights,
                                        final_degree)
    print("Polynomial-rsquare score", rsquare(test_targets, test_predicts))
    print('Polynomial-y_predicts: ', test_predicts[:20],
          'Polynomial-y_original: ', test_targets[:20])
    plt.show()
def bayesian_regression_entry_point(data):
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    data_targets = data[:, -1]
    data = data[:, 0:11]

    print(data)
    print(data_targets)
    for i in range(data.shape[1]):
        data[:, i] = (data[:, i] - np.mean(data[:, i])) / np.std(data[:, i])
    print("standard deviation is %s" % str(np.std(data, axis=0)))

    inputs = data[0:960, :]
    targets = data_targets[0:960]
    test_inputs = data[1300:1599, :]
    test_targets = data_targets[1300:1599]

    # specify the centres of the rbf basis functions
    N = inputs.shape[0]
    centres1 = inputs[np.random.choice([False, True], size=N, p=[0.9, 0.1]), :]
    # centres1 = data[10,:]
    # centres1 = np.linspace(4,20,10)
    print(centres1)

    # the width (analogous to standard deviation) of the basis functions
    scale = 47
    print("centres = %r" % (centres1, ))
    print("scale = %r" % (scale, ))
    # create the feature mapping
    feature_mapping = construct_rbf_feature_mapping(centres1, scale)
    # plot the basis functions themselves for reference

    # sample number of data-points: inputs and targets
    # define the noise precision of our data
    beta = (1 / 0.01)**2
    # now construct the design matrix for the inputs
    designmtx = feature_mapping(inputs)
    test_designmtx = feature_mapping(test_inputs)
    print(designmtx.shape)
    # the number of features is the width of this matrix
    M = designmtx.shape[1]
    # define a prior mean and covaraince matrix
    # m0 = np.random.randn(M)
    m0 = np.zeros(M)
    print("m0 equals %r" % (m0))
    alpha = 50
    S0 = alpha * np.identity(M)
    # find the posterior over weights
    mN, SN = calculate_weights_posterior(designmtx, targets, beta, m0, S0)
    # for i in range(500):
    #     mN, SN = calculate_weights_posterior(designmtx, targets, beta, mN, SN)

    train_error, test_error = train_and_test(designmtx, targets,
                                             test_designmtx, test_targets, mN)
    print(train_error, test_error)

    # cross-validation
    # train_error, test_error = cv_evaluation_linear_model(designmtx, targets, folds, mN)
    # print(train_error, test_error, np.mean(train_error), np.mean(test_error))

    # the posterior mean (also the MAP) gives the central prediction
    mean_approx = construct_feature_mapping_approx(feature_mapping, mN)
    fig, ax, lines = plot_function_data_and_approximation(
        mean_approx, test_inputs, test_targets)
    ax.legend(lines, ['Prediction', 'True value'])
    ax.set_xticks([])
    ax.set_ylabel("Quality")
    fig.suptitle('Prediction vlaue against True value', fontsize=10)
    fig.savefig("regression_bayesian_rbf.pdf", fmt="pdf")

    # search the optimum alpha for baysian model regression
    train_inputs = data[0:960, :]
    train_targets = data_targets[0:960]
    test_inputs = data[960:1300, :]
    test_targets = data_targets[960:1300]
    # folds = create_cv_folds(train_inputs.shape[0], num_folds)
    alphas = np.logspace(1, 3)

    # convert the raw inputs into feature vectors (construct design matrices)
    # train_errors = np.empty(alphas.size)
    # test_errors = np.empty(alphas.size)
    train_errors = []
    test_errors = []
    for a, alpha in enumerate(alphas):
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres1, scale)
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)

        beta = (1 / 0.01)**2
        M = train_designmtx.shape[1]
        # define a prior mean and covaraince matrix
        m0 = np.zeros(M)

        S0 = alpha * np.identity(M)
        # find the posterior over weights
        mN, SN = calculate_weights_posterior(train_designmtx, train_targets,
                                             beta, m0, S0)

        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(train_designmtx,
                                                 train_targets, test_designmtx,
                                                 test_targets, mN)
        train_errors.append(train_error)
        test_errors.append(test_error)
        # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN)
        # train_errors[a] = np.mean(train_error)
        # test_errors[a] = np.mean(test_error)
    # plot the results
    min_error = np.min(test_errors)
    min_error_index = np.argmin(test_errors)
    fig, ax = plot_train_test_errors("alpha", alphas, train_errors,
                                     test_errors)
    fig.suptitle('Alpha vs Error in Bayesian', fontsize=10)
    ax.plot(alphas[min_error_index], min_error, "ro")
    # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error)))
    ax.annotate((str(alphas[min_error_index]), str(min_error)),
                xy=(alphas[min_error_index], min_error),
                xytext=(alphas[min_error_index] + 0.01, min_error + 0.01),
                arrowprops=dict(facecolor='green', shrink=0.1))
    ax.set_xscale('log')
    fig.savefig("alpha.pdf", fmt="pdf")

    # search the optimum beta for baysian model regression
    train_inputs = data[0:960, :]
    train_targets = data_targets[0:960]
    test_inputs = data[960:1300, :]
    test_targets = data_targets[960:1300]
    # folds = create_cv_folds(train_inputs.shape[0], num_folds)
    betas = (1. / np.logspace(-3, 1))**2

    # convert the raw inputs into feature vectors (construct design matrices)
    # train_errors = np.empty(betas.size)
    # test_errors = np.empty(betas.size)
    train_errors = []
    test_errors = []
    for b, beta in enumerate(betas):
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres1, scale)
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)

        M = train_designmtx.shape[1]
        # define a prior mean and covaraince matrix
        m0 = np.zeros(M)
        alpha = 50
        S0 = alpha * np.identity(M)
        # find the posterior over weights
        mN, SN = calculate_weights_posterior(train_designmtx, train_targets,
                                             beta, m0, S0)

        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(train_designmtx,
                                                 train_targets, test_designmtx,
                                                 test_targets, mN)
        train_errors.append(train_error)
        test_errors.append(test_error)

        # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN)
        # train_errors[b] = np.mean(train_error)
        # test_errors[b] = np.mean(test_error)
    # plot the results
    min_error = np.min(test_errors)
    min_error_index = np.argmin(test_errors)
    fig, ax = plot_train_test_errors("beta", betas, train_errors, test_errors)
    fig.suptitle('Beta vs Error in Bayesian', fontsize=10)
    ax.plot(betas[min_error_index], min_error, "ro")
    # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error)))
    ax.annotate((str(betas[min_error_index]), str(min_error)),
                xy=(betas[min_error_index], min_error),
                xytext=(betas[min_error_index] + 0.05, min_error + 0.05),
                arrowprops=dict(facecolor='green', shrink=0.1))
    ax.set_xscale('log')
    fig.savefig("beta.pdf", fmt="pdf")

    # search the optimum scale for baysian model regression
    scales = np.logspace(0.5, 3)
    train_inputs = data[0:960, :]
    train_targets = data_targets[0:960]
    test_inputs = data[960:1300, :]
    test_targets = data_targets[960:1300]
    # folds = create_cv_folds(train_inputs.shape[0], num_folds)

    # convert the raw inputs into feature vectors (construct design matrices)
    # train_errors = np.empty(scales.size)
    # test_errors = np.empty(scales.size)
    train_errors = []
    test_errors = []
    for j, scale in enumerate(scales):
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres1, scale)
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)

        beta = (1. / 0.01)**2
        M = train_designmtx.shape[1]
        # define a prior mean and covaraince matrix
        m0 = np.zeros(M)
        alpha = 50
        S0 = alpha * np.identity(M)
        # find the posterior over weights
        mN, SN = calculate_weights_posterior(train_designmtx, train_targets,
                                             beta, m0, S0)

        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(train_designmtx,
                                                 train_targets, test_designmtx,
                                                 test_targets, mN)
        # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN)
        # train_errors[j] = np.mean(train_error)
        # test_errors[j] = np.mean(test_error)

        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    min_error = np.min(test_errors)
    min_error_index = np.argmin(test_errors)
    fig, ax = plot_train_test_errors("scale", scales, train_errors,
                                     test_errors)
    fig.suptitle('Scale vs Error in Bayesian', fontsize=10)
    ax.plot(scales[min_error_index], min_error, "ro")
    # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error)))
    ax.annotate((str(scales[min_error_index]), str(min_error)),
                xy=(scales[min_error_index], min_error),
                xytext=(scales[min_error_index] + 0.2, min_error + 0.2),
                arrowprops=dict(facecolor='green', shrink=0.1))
    ax.set_xscale('log')
    fig.savefig("scale.pdf", fmt="pdf")

    # Here we vary the number of centres and evaluate the performance
    scale = 60
    train_inputs = data[0:960, :]
    train_targets = data_targets[0:960]
    test_inputs = data[960:1300, :]
    test_targets = data_targets[960:1300]
    # folds = create_cv_folds(train_inputs.shape[0], num_folds)
    cent_parts = np.linspace(0.05, 0.8, 16)
    # train_errors = np.empty(cent_parts.size)
    # test_errors = np.empty(cent_parts.size)
    train_errors = []
    test_errors = []
    N = train_inputs.shape[0]

    for n, cent_part in enumerate(cent_parts):
        # we must construct the feature mapping anew for each number of centres
        centres1 = train_inputs[np.random.choice(
            [False, True], size=N, p=[1 - cent_part, cent_part]), :]

        feature_mapping = construct_rbf_feature_mapping(centres1, scale)
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)
        # evaluate the test and train error for this regularisation parameter

        M = train_designmtx.shape[1]
        # define a prior mean and covaraince matrix
        m0 = np.zeros(M)
        beta = (1. / 0.01)**2
        alpha = 50
        S0 = alpha * np.identity(M)
        # find the posterior over weights
        mN, SN = calculate_weights_posterior(train_designmtx, train_targets,
                                             beta, m0, S0)

        train_error, test_error = train_and_test(train_designmtx,
                                                 train_targets, test_designmtx,
                                                 test_targets, mN)
        train_errors.append(train_error)
        test_errors.append(test_error)

        # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN)
        # train_errors[n] = np.mean(train_error)
        # test_errors[n] = np.mean(test_error)
    # plot the results
    min_error = np.min(test_errors)
    min_error_index = np.argmin(test_errors)
    fig, ax = plot_train_test_errors("Num. Centres", cent_parts, train_errors,
                                     test_errors)
    fig.suptitle('Num. Centres vs Error in Bayesian', fontsize=10)
    ax.plot(cent_parts[min_error_index], min_error, "ro")
    ax.text(cent_parts[min_error_index], min_error,
            (str(cent_parts[min_error_index]), str(min_error)))
    fig.savefig("Num. centres.pdf", fmt="pdf")

    plt.show()
def evaluate_num_centres(inputs,
                         targets,
                         folds,
                         scale,
                         reg_param,
                         test_error_linear,
                         num_centres_sequence=None):
    """
      Evaluate, then plot the performance of different numbers of basis
      function centres.
    """

    # choosing a range of numbers of centres
    if num_centres_sequence is None:
        num_centres_sequence = np.linspace(
            start=0.01, stop=1,
            num=20)  # tested with 50, using 20 to speed things up

    num_values = num_centres_sequence.size
    num_folds = len(folds)

    # creating some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_st_dev_errors = np.zeros(num_values)
    test_st_dev_errors = np.zeros(num_values)

    n = inputs.shape[0]

    # running the experiments
    for c, centre_percentage in enumerate(num_centres_sequence):
        sample_fraction = centre_percentage
        p = (1 - sample_fraction, sample_fraction)
        # constructing the feature mapping anew for each number of centres
        centres = inputs[np.random.choice([False, True], size=n, p=p), :]
        # print("\ncentres.shape = %r" % (centres.shape,))
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[c] = train_mean_error
        test_mean_errors[c] = test_mean_error
        train_st_dev_errors[c] = train_stdev_error
        test_st_dev_errors[c] = test_stdev_error

    # now plotting the results
    fig, ax = plot_train_test_errors("% of inputs as centres * 100",
                                     num_centres_sequence, train_mean_errors,
                                     test_mean_errors, test_error_linear)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_st_dev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_st_dev_errors / np.sqrt(num_folds)
    ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_st_dev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_st_dev_errors / np.sqrt(num_folds)
    ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='r')
    ax.set_ylim([0, 1])

    ax.set_title(
        'Train vs Test Error across Centre Proportion with Cross-validation')
    fig.savefig(
        "../plots/rbf/rbf_searching_number_centres_cross_validation.png",
        fmt="png")

    plt.show()
def evaluate_reg_param(inputs,
                       targets,
                       folds,
                       centres,
                       scale,
                       reg_params=None):
    """
      Evaluate then plot the performance of different regularisation parameters
    """
    # create the feature mappoing and then the design matrix
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    designmtx = feature_mapping(inputs)
    # choose a range of regularisation parameters
    if reg_params is None:
        reg_params = np.logspace(-15, 0)
    num_values = reg_params.size
    num_folds = len(folds)
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)
    #
    for r, reg_param in enumerate(reg_params):
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[r] = train_mean_error
        test_mean_errors[r] = test_mean_error
        train_stdev_errors[r] = train_stdev_error
        test_stdev_errors[r] = test_stdev_error

    #Get test error without reg param
    blank, test_errors_without_reg = cv_evaluation_linear_model(designmtx,
                                                                targets,
                                                                folds,
                                                                reg_param=None)
    test_mean_error_without_reg_param = np.mean(test_errors_without_reg)

    # Now plot the results
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_mean_errors, test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r')
    #plot green line to represent no reg params
    xlim = ax.get_xlim()
    ax.plot(xlim, test_mean_error_without_reg_param * np.ones(2), 'g:')
    ax.set_xscale('log')
def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # specify the centres of the rbf basis functions
    centres = np.linspace(0,1,7)
    # the width (analogous to standard deviation) of the basis functions
    scale = 0.15
    print("centres = %r" % (centres,))
    print("scale = %r" % (scale,))
    feature_mapping = construct_rbf_feature_mapping(centres,scale)  
    datamtx = np.linspace(0,1, 51)
    designmtx = feature_mapping(datamtx)
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    for colid in range(designmtx.shape[1]):
      ax.plot(datamtx, designmtx[:,colid])
    ax.set_xlim([0,1])
    ax.set_xticks([0,1])
    ax.set_yticks([0,1])

    # choose number of data-points and sample a pair of vectors: the input
    # values and the corresponding target values
    N = 20
    inputs, targets = sample_data(N, arbitrary_function_1, seed=37)
    # define the feature mapping for the data
    feature_mapping = construct_rbf_feature_mapping(centres,scale)  
    # now construct the design matrix
    designmtx = feature_mapping(inputs)
    #
    # find the weights that fit the data in a least squares way
    weights = ml_weights(designmtx, targets)
    # use weights to create a function that takes inputs and returns predictions
    # in python, functions can be passed just like any other object
    # those who know MATLAB might call this a function handle
    rbf_approx = construct_feature_mapping_approx(feature_mapping, weights)
    fig, ax, lines = plot_function_data_and_approximation(
        rbf_approx, inputs, targets, arbitrary_function_1)
    ax.legend(lines, ['true function', 'data', 'linear approx'])
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_rbf.pdf", fmt="pdf")

    # for a single choice of regularisation strength we can plot the
    # approximating function
    reg_param = 10**-3
    reg_weights = regularised_ml_weights(
        designmtx, targets, reg_param)
    rbf_reg_approx = construct_feature_mapping_approx(feature_mapping, reg_weights)
    fig, ax, lines = plot_function_data_and_approximation(
        rbf_reg_approx, inputs, targets, arbitrary_function_1)
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_rbf_basis_functions_reg.pdf", fmt="pdf")

    # to find a good regularisation parameter, we can performa a parameter
    # search (a naive way to do this is to simply try a sequence of reasonable
    # values within a reasonable range.
    
    # sample some training and testing inputs
    train_inputs, train_targets = sample_data(N, arbitrary_function_1, seed=37)
    # we need to use a different seed for our test data, otherwise some of our
    # sampled points will be the same
    test_inputs, test_targets = sample_data(100, arbitrary_function_1, seed=82)
    # convert the raw inputs into feature vectors (construct design matrices)
    train_designmtx = feature_mapping(train_inputs)
    test_designmtx = feature_mapping(test_inputs)
    # now we're going to evaluate train and test error for a sequence of
    # potential regularisation strengths storing the results
    reg_params = np.logspace(-5,1)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "$\lambda$", reg_params, train_errors, test_errors)        
    ax.set_xscale('log')


    # we may also be interested in choosing the right number of centres, or
    # the right width/scale of the rbf functions.
    # Here we vary the width and evaluate the performance
    reg_param = 10**-3
    scales = np.logspace(-2,0)
    train_errors = []
    test_errors = []
    for scale in scales:
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres,scale)  
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "scale", scales, train_errors, test_errors)        
    ax.set_xscale('log')

    # Here we vary the number of centres and evaluate the performance
    reg_param = 10**-3
    scale = 0.15
    n_centres_seq = np.arange(3,20)
    train_errors = []
    test_errors = []
    for n_centres in n_centres_seq:
        # we must construct the feature mapping anew for each number of centres
        centres = np.linspace(0,1,n_centres)
        feature_mapping = construct_rbf_feature_mapping(centres,scale)  
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "Num. Centres", n_centres_seq, train_errors, test_errors)        
    plt.show()
def main(inputs, targets, scale, best_no_centres, test_fraction=0.20):
    # setting a seed to get the same pseudo-random results every time
    np.random.seed(30)

    print("\n")

    std_inputs = standardise(inputs)

    train_part, test_part = train_and_test_split(std_inputs.shape[0],
                                                 test_fraction)

    train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition(
        std_inputs, targets, train_part, test_part)

    # specifying the centres of the rbf basis functions
    # choosing 10% of the data for the centres of the basis functions or the optimal proportion from earlier analyses
    centres = train_inputs[
        np.random.choice([False, True],
                         size=train_inputs.shape[0],
                         p=[1 - best_no_centres, best_no_centres]), :]
    print("centres shape = %r" % (centres.shape, ))

    # the width (analogous to standard deviation) of the basis functions
    # scale of the basis functions from analysis in external_data file
    # We consider the basis function widths to be fixed for simplicity
    print("scale = %r" % scale)

    # creating the feature mapping
    feature_mapping = construct_rbf_feature_mapping(centres, scale)

    # plotting the basis functions themselves for reference
    display_basis_functions(feature_mapping, train_inputs.shape[1])

    # alpha and beta define the shape of our curve when we start

    # beta is defining the noise precision of our data, as the reciprocal of the target variance
    # it is the spread from the highest point (top) of the curve
    # it corresponds to additive Gaussian noise of variance, which is beta to the power of -1
    beta = np.reciprocal(0.40365849982557295)
    # beta = np.reciprocal(np.var(train_targets))
    # beta = 100
    # higher beta is going to give us higher precision, so less overlap
    # as a side note, could also do beta = 1 / np.var(train_targets)

    # location of the highest point of the initial curve / prior distribution
    # because targets represent quality ranging from 0 to 10
    alpha = mode(targets)[0][0]
    # alpha = 100

    # now applying our feature mapping to the train inputs and constructing the design matrix
    design_matrix = feature_mapping(train_inputs)
    # the number of features (phis) is the width of this matrix
    # it is equal to the number of centres drawn from the train inputs
    # the shape[0] is the number of data points I use for training
    M = design_matrix.shape[1]

    # defining a prior mean and covariance matrix
    # they represent our prior belief over the distribution
    # our initial estimate of the range of probabilities
    m0 = np.zeros(M)

    for m in range(len(m0)):
        m0[m] = mode(targets)[0][0]  # setting to be the mode of targets
        # m0[m] = 0

    S0 = alpha * np.identity(M)

    # diagonal regularisation matrix A to punish over-fitting
    # A = alpha * np.identity(M)
    # E = 0.5 * m0.T * A * m0
    # Zp = regularisation constant
    # prior_m0 = np.exp(-E)/Zp

    # finding the posterior over weights
    # if we have enough data, the posteriors will be the same, no matter the initial parameters
    # because they will have been updated according to Bayes' rule
    mN, SN = calculate_weights_posterior(design_matrix, train_targets, beta,
                                         m0, S0)
    # print("mN = %r" % (mN,))

    # the posterior mean (also the MAP) gives the central prediction
    mean_approx = construct_feature_mapping_approx(feature_mapping, mN)

    # getting MAP and calculating root mean squared errors
    train_output = mean_approx(train_inputs)
    test_output = mean_approx(test_inputs)

    bayesian_mean_train_error = root_mean_squared_error(
        train_targets, train_output)
    bayesian_mean_test_error = root_mean_squared_error(test_targets,
                                                       test_output)
    print("Root mean squared errors:")
    print("Train error of posterior mean (applying Bayesian inference): %r" %
          bayesian_mean_train_error)
    print("Test error of posterior mean (applying Bayesian inference): %r" %
          bayesian_mean_test_error)

    # plotting one input variable on the x axis as an example
    fig, ax, lines = plot_function_and_data(std_inputs[:, 10], targets)

    # creating data to use for plotting
    xs = np.ndarray((101, train_inputs.shape[1]))

    for column in range(train_inputs.shape[1]):
        column_sample = np.linspace(-5, 5, 101)
        column_sample = column_sample.reshape((column_sample.shape[0], ))
        xs[:, column] = column_sample

    ys = mean_approx(xs)
    line, = ax.plot(xs[:, 10], ys, 'r-')
    lines.append(line)
    ax.set_ylim([0, 10])

    # now plotting a number of samples from the posterior
    for i in range(20):
        weights_sample = np.random.multivariate_normal(mN, SN)
        sample_approx = construct_feature_mapping_approx(
            feature_mapping, weights_sample)
        sample_ys = sample_approx(xs)
        line, = ax.plot(xs[:, 10], sample_ys, 'm', linewidth=0.5)
    lines.append(line)
    ax.legend(lines, ['data', 'mean approx', 'samples'])

    # now for the predictive distribution
    new_designmtx = feature_mapping(xs)
    ys, sigma2Ns = predictive_distribution(new_designmtx, beta, mN, SN)
    print("(sigma2Ns**0.5).shape = %r" % ((sigma2Ns**0.5).shape, ))
    print("np.sqrt(sigma2Ns).shape = %r" % (np.sqrt(sigma2Ns).shape, ))
    print("ys.shape = %r" % (ys.shape, ))

    ax.plot(xs[:, 10], ys, 'r', linewidth=3)
    lower = ys - np.sqrt(sigma2Ns)
    upper = ys + np.sqrt(sigma2Ns)
    print("lower.shape = %r" % (lower.shape, ))
    print("upper.shape = %r" % (upper.shape, ))
    ax.fill_between(xs[:, 10], lower, upper, alpha=0.2, color='r')
    ax.set_title('Posterior Mean, Samples, and Predictive Distribution')
    ax.set_xlabel('standardised alcohol content')
    ax.set_ylabel('p(t|x)')
    fig.tight_layout()
    fig.savefig("../plots/bayesian/bayesian_rbf.png", fmt="png")

    plt.show()

    # the predictive distribution
    test_design_matrix = feature_mapping(test_inputs)
    predictions, prediction_sigma2 = predictive_distribution(
        test_design_matrix, beta, mN, SN)
    sum_joint_log_probabilities = 0
    for n in range(len(predictions)):
        sum_joint_log_probabilities += math.log(predictions[n])

    sum_joint_log_probabilities *= -1
    # joint_log_probabilities = (np.array(test_targets).flatten() - np.array(predictions).flatten())
    # print(np.mean(joint_log_probabilities))
    print("Error as negative joint log probability: %r" %
          sum_joint_log_probabilities)
def main(ifname=None, delimiter=None, columns=None):
    """
    To be called when the script is run. This function fits and plots imported data (if a filename is
    provided). Data is 2 dimensional real valued data and is fit
    with maximum likelihood 2d gaussian.

    parameters
    ----------
    ifname -- filename/path of data file.
    delimiter -- delimiter of data values
    has_header -- does the data-file have a header line
    columns -- a list of integers specifying which columns of the file to import
        (counting from 0)
    """
    # if no file name is provided then use synthetic data
    if ifname is None:
        print("You need to ingest the CSV file")
    else:
        data, field_names = import_data(ifname,
                                        delimiter=delimiter,
                                        has_header=True,
                                        columns=columns)

        #DATA PREPARATION-----------------------------------------------
        counter = 0
        N = data.shape[0]
        input = data[:, 0:data.shape[1] - 1]
        target = data[:, data.shape[1] - 1:]
        #print("FEATURES : ",columns)
        #print("INPUT :", input)

        #declare number of centre to explore and create matrix for storing testing mean errors
        num_centres_sequence = np.arange(5, 100)
        scales = np.logspace(-10, 10)
        reg_params = np.logspace(-15, 10)

        # specify the centres and scale of some rbf basis functions
        default_centres = np.asarray([
            0.35, 0.4, 0.45, 0.459090909, 0.468181818, 0.477272727,
            0.486363636, 0.495454545, 0.504545455, 0.513636364, 0.522727273,
            0.531818182, 0.540909091, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
            0.62, 0.63, 0.64, 0.65, 0.7, 0.75, 0.8
        ])
        default_scale = 26.8
        default_reg_param = 7.906043210907701e-11

        # get the cross-validation folds
        num_folds = 4
        folds = create_cv_folds(N, num_folds)

        # evaluate then plot the performance of different reg params
        evaluate_reg_param(input, target, folds, default_centres,
                           default_scale, reg_params)
        # evaluate then plot the performance of different scales
        evaluate_scale(input, target, folds, default_centres,
                       default_reg_param)
        # evaluate then plot the performance of different numbers of basis
        # function centres.
        #test_mean_errors_for_centre = evaluate_num_centres(input, target, folds, default_scale, default_reg_param,num_centres_sequence)
        #steep_centre,optimum_centre = point_of_steepest_gradient(test_mean_errors_for_centre,num_centres_sequence)
        #print("Centre with steepest drop of test mean errors: ",steep_centre)
        #print("Optimum number of centres which within tolerance: ",optimum_centre)

        # the width (analogous to standard deviation) of the basis functions
        scale = 0.1
        feature_mapping = construct_rbf_feature_mapping(default_centres, scale)
        datamtx = np.linspace(0, 1, 51)
        designmtx = feature_mapping(datamtx)
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        for colid in range(designmtx.shape[1]):
            ax.plot(datamtx, designmtx[:, colid])
            ax.set_xlim([0, 1])
            ax.set_xticks([0, 1])
            ax.set_yticks([0, 1])