Пример #1
0
def parameter_search_rbf(inputs, targets, test_fraction):
    """
    """
    N = inputs.shape[0]
    # run all experiments on the same train-test split of the data
    train_part, test_part = train_and_test_split(N,
                                                 test_fraction=test_fraction)
    # for the centres of the basis functions sample 10% of the data
    sample_fraction = 0.15
    p = (1 - sample_fraction, sample_fraction)
    centres = inputs[np.random.choice([False, True], size=N, p=p), :]
    print("centres.shape = %r" % (centres.shape, ))
    scales = np.logspace(0, 2, 17)  # of the basis functions
    reg_params = np.logspace(-15, -4, 11)  # choices of regularisation strength
    # create empty 2d arrays to store the train and test errors
    train_errors = np.empty((scales.size, reg_params.size))
    test_errors = np.empty((scales.size, reg_params.size))
    # iterate over the scales
    for i, scale in enumerate(scales):
        # i is the index, scale is the corresponding scale
        # we must recreate the feature mapping each time for different scales
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # partition the design matrix and targets into train and test
        train_designmtx, train_targets, test_designmtx, test_targets = \
            train_and_test_partition(
                designmtx, targets, train_part, test_part)
        # iteratre over the regularisation parameters
        for j, reg_param in enumerate(reg_params):
            # j is the index, reg_param is the corresponding regularisation
            # parameter
            # train and test the data
            train_error, test_error = train_and_test(train_designmtx,
                                                     train_targets,
                                                     test_designmtx,
                                                     test_targets,
                                                     reg_param=reg_param)
            # store the train and test errors in our 2d arrays
            train_errors[i, j] = train_error
            test_errors[i, j] = test_error
    # we have a 2d array of train and test errors, we want to know the (i,j)
    # index of the best value
    best_i = np.argmin(np.argmin(test_errors, axis=1))
    best_j = np.argmin(test_errors[i, :])
    print("Best joint choice of parameters:")
    print("\tscale %.2g and lambda = %.2g" %
          (scales[best_i], reg_params[best_j]))
    # now we can plot the error for different scales using the best
    # regulariation choice
    fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j],
                                     test_errors[:, best_j])
    ax.set_xscale('log')
    # ...and the error for  different regularisation choices given the best
    # scale choice
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_errors[best_i, :],
                                     test_errors[best_i, :])
    ax.set_xscale('log')
def plot_with_regularisation(inputs,targets,folds):    
    """
    Linear regression does not use a feature mapping, typically with such a 
    simple model regularisation does not have much effect. The plot is the
    same with and without regularisation. Regularisation has only a weak affect
    on simple linear regression. Using regularisation on simple linear
    regression may not be that effective.  In simple linear regression, 
    regularisation will slightly penalise functions that are further from 
    constant (i.e. with larger gradients). So it will have an effect, but 
    only a small one and that will be to give slightly lower weights than if 
    you had used least squares.
    """
    reg_params = np.logspace(-10,1)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        print("Evaluating reg_params: " + str(reg_param))
        old_train,old_test=simple_evaluation_linear_model(inputs, targets, test_fraction=0.2, reg_param=reg_param)
        train_error, test_error = cv_evaluation_linear_model(inputs, targets, folds,reg_param=reg_param)
        # collect the errors
        train_errors.append(np.mean(train_error))
        test_errors.append(np.mean(test_error))
    # plot the results
    fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors)  
    plt.title('Linear Regression Model')      
    ax.set_xscale('log')
Пример #3
0
def evaluate_reg_param(inputs,
                       targets,
                       folds,
                       centres,
                       scale,
                       reg_params=None):
    """
      Evaluate then plot the performance of different regularisation parameters
    """
    # create the feature mappoing and then the design matrix
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    designmtx = feature_mapping(inputs)
    print("The design matrix shape is:", designmtx.shape)
    # choose a range of regularisation parameters
    if reg_params is None:
        reg_params = np.logspace(-2, 0)
    num_values = reg_params.size
    num_folds = len(folds)  #in our case this is 5 which makes sense
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)  #just the value of reg. choices.
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)

    #what we're doing is for each reg. parameter, we're finding all the cross-train and test error
    #and then finding the st deviation of each error and mean.
    for r, reg_param in enumerate(reg_params):  #iterate over each reg. param
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        #cv_evaluation_linear_model
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[r] = train_mean_error
        test_mean_errors[r] = test_mean_error
        train_stdev_errors[r] = train_stdev_error
        test_stdev_errors[r] = test_stdev_error

    # Now plot the results
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_mean_errors, test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r')
    ax.set_xscale('log')
def evaluate_scale(inputs, targets, folds, centres, reg_param, scales=None):
    """
    evaluate then plot the performance of different basis function scales
    """
    # choose a range of scales
    if scales is None:
        scales = np.logspace(0, 6, 20)  # of the basis functions
    #
    num_values = scales.size
    num_folds = len(folds)
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)
    #
    for s, scale in enumerate(scales):
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[s] = train_mean_error
        test_mean_errors[s] = test_mean_error
        train_stdev_errors[s] = train_stdev_error
        test_stdev_errors[s] = test_stdev_error

    # Now plot the results
    fig, ax = plot_train_test_errors("scale", scales, train_mean_errors,
                                     test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(scales, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(scales, lower, upper, alpha=0.2, color='r')
    ax.set_xscale('log')
    # ax.set_xlim([0, 100])

    ax.set_title('Train vs Test Error Across Scales With Cross-Validation')
    fig.savefig("../plots/rbf_searching_scales_cross_validation.pdf",
                fmt="pdf")
Пример #5
0
def evaluate_rbf_for_various_reg_params(inputs, targets, test_fraction,
                                        test_error_linear):
    # for rbf feature mappings
    # for the centres of the basis functions choose 10% of the data
    n = inputs.shape[0]
    centres = inputs[
        np.random.choice([False, True], size=n, p=[0.90, 0.10]), :]
    print("centres shape = %r" % (centres.shape, ))

    # the width (analogous to standard deviation) of the basis functions
    scale = 8.5  # of the basis functions
    print("centres = %r" % (centres, ))
    print("scale = %r" % (scale, ))

    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    design_matrix = feature_mapping(inputs)

    train_part, test_part = train_and_test_split(n,
                                                 test_fraction=test_fraction)
    train_design_matrix, train_targets, test_design_matrix, test_targets = \
        train_and_test_partition(
            design_matrix, targets, train_part, test_part)

    # outputting the shapes of the train and test parts for debugging
    print("training design matrix shape = %r" % (train_design_matrix.shape, ))
    print("testing design matrix shape = %r" % (test_design_matrix.shape, ))
    print("training targets shape = %r" % (train_targets.shape, ))
    print("testing targets shape = %r" % (test_targets.shape, ) + "\n")

    # the rbf feature mapping performance
    reg_params = np.logspace(-15, 5, 20)
    train_errors = []
    test_errors = []

    for reg_param in reg_params:
        print("Evaluating reg. parameter " + str(reg_param))
        train_error, test_error = simple_evaluation_linear_model(
            design_matrix,
            targets,
            test_fraction=test_fraction,
            reg_param=reg_param)
        train_errors.append(train_error)
        test_errors.append(test_error)

    fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors,
                                     test_errors)

    # plotting a straight line showing the linear performance
    x_lim = ax.get_xlim()
    ax.plot(x_lim, test_error_linear * np.ones(2), 'g:')

    ax.set_xscale('log')
    ax.set_title('Evaluating RBF Performance')
    fig.savefig("../plots/rbf_vs_linear.pdf", fmt="pdf")
Пример #6
0
def regression_with_regularization(inputs,targets,folds):    
    reg_params = np.logspace(-10,1)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        # evaluate the test and train error for this regularisation parameter
        old_train,old_test=simple_evaluation_linear_model(inputs, targets, test_fraction=0.2, reg_param=reg_param)
        train_error, test_error,weights = cv_evaluation_linear_model(inputs, targets, folds,reg_param=reg_param)
        print(" (train_error_without_cross,test_error_without_cross,train_error_with_cross,test_error_with_cross)= %r" % ((old_train,old_test,np.mean(train_error),np.mean(test_error)),) )
        # collect the errors
        train_errors.append(np.mean(train_error))
        test_errors.append(np.mean(test_error))
    # plot the results
    fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors, test_errors)        
    ax.set_xscale('log')
def main(
        ifname, delimiter=None, columns=None, has_header=True,
        test_fraction=0.25):
    """
    To be called when the script is run. This function creates, fits and plots
    synthetic data, and then fits and plots imported data (if a filename is
    provided). In both cases, data is 2 dimensional real valued data and is fit
    with maximum likelihood 2d gaussian.

    parameters
    ----------
    ifname -- filename/path of data file. 
    delimiter -- delimiter of data values
    has_header -- does the data-file have a header line
    columns -- a list of integers specifying which columns of the file to import
        (counting from 0)    
    """
    # if no file name is provided then use synthetic data
    data, field_names = import_data(
            ifname, delimiter=delimiter, has_header=has_header, columns=columns)
    exploratory_plots(data, field_names)
    N = data.shape[0]
    inputs = data[:,[0,1,2,3,4,5,6,7,8,9,10]]
    targets = data[:,11]



  

    train_error_linear, test_error_linear = evaluate_linear_approx(
        inputs, targets, test_fraction)
        
    plot_train_test_errors(
        'degree', [0], train_error_linear, test_error_linear) 
   
    plt.show()
def evaluate_reg_param(inputs, targets, folds, reg_params=None):
    """
      Evaluate then plot the performance of different regularisation parameters
    """
    # choose a range of regularisation parameters
    if reg_params is None:
        reg_params = np.logspace(-2,0)
    num_values = reg_params.size
    num_folds = len(folds)
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)
    #    
    for r, reg_param in enumerate(reg_params):
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(inputs, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[r] = train_mean_error
        test_mean_errors[r] = test_mean_error
        train_stdev_errors[r] = train_stdev_error
        test_stdev_errors[r] = test_stdev_error

    # Now plot the results
    fig, ax = plot_train_test_errors(
        "$\lambda$", reg_params, train_mean_errors, test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples. 
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors/np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors/np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors/np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors/np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r')
    ax.set_xscale('log')
Пример #9
0
def evaluate_rbf_for_various_reg_params(inputs, targets, test_fraction,
                                        test_error_linear):
    """
    """

    # for rbf feature mappings
    # for the centres of the basis functions choose 10% of the data
    N = inputs.shape[0]
    centres = inputs[np.random.choice([False, True], size=N, p=[0.9, 0.1]), :]
    print("centres.shape = %r" % (centres.shape, ))
    scale = 10.  # of the basis functions
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    designmtx = feature_mapping(inputs)
    train_part, test_part = train_and_test_split(N,
                                                 test_fraction=test_fraction)
    train_designmtx, train_targets, test_designmtx, test_targets = \
        train_and_test_partition(
            designmtx, targets, train_part, test_part)
    # output the shapes of the train and test parts for debugging
    print("train_designmtx.shape = %r" % (train_designmtx.shape, ))
    print("test_designmtx.shape = %r" % (test_designmtx.shape, ))
    print("train_targets.shape = %r" % (train_targets.shape, ))
    print("test_targets.shape = %r" % (test_targets.shape, ))
    # the rbf feature mapping performance
    reg_params = np.logspace(-15, -4, 11)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        print("Evaluating reg_para " + str(reg_param))
        train_error, test_error = simple_evaluation_linear_model(
            designmtx,
            targets,
            test_fraction=test_fraction,
            reg_param=reg_param)
        train_errors.append(train_error)
        test_errors.append(test_error)

    fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors,
                                     test_errors)
    # we also want to plot a straight line showing the linear performance
    xlim = ax.get_xlim()
    ax.plot(xlim, test_error_linear * np.ones(2), 'g:')
    ax.set_xscale('log')
def evaluate_num_centres(inputs,
                         targets,
                         folds,
                         scale,
                         reg_param,
                         test_error_linear,
                         num_centres_sequence=None):
    """
      Evaluate, then plot the performance of different numbers of basis
      function centres.
    """

    # choosing a range of numbers of centres
    if num_centres_sequence is None:
        num_centres_sequence = np.linspace(
            start=0.01, stop=1,
            num=20)  # tested with 50, using 20 to speed things up

    num_values = num_centres_sequence.size
    num_folds = len(folds)

    # creating some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_st_dev_errors = np.zeros(num_values)
    test_st_dev_errors = np.zeros(num_values)

    n = inputs.shape[0]

    # running the experiments
    for c, centre_percentage in enumerate(num_centres_sequence):
        sample_fraction = centre_percentage
        p = (1 - sample_fraction, sample_fraction)
        # constructing the feature mapping anew for each number of centres
        centres = inputs[np.random.choice([False, True], size=n, p=p), :]
        # print("\ncentres.shape = %r" % (centres.shape,))
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[c] = train_mean_error
        test_mean_errors[c] = test_mean_error
        train_st_dev_errors[c] = train_stdev_error
        test_st_dev_errors[c] = test_stdev_error

    # now plotting the results
    fig, ax = plot_train_test_errors("% of inputs as centres * 100",
                                     num_centres_sequence, train_mean_errors,
                                     test_mean_errors, test_error_linear)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_st_dev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_st_dev_errors / np.sqrt(num_folds)
    ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_st_dev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_st_dev_errors / np.sqrt(num_folds)
    ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='r')
    ax.set_ylim([0, 1])

    ax.set_title(
        'Train vs Test Error across Centre Proportion with Cross-validation')
    fig.savefig(
        "../plots/rbf/rbf_searching_number_centres_cross_validation.png",
        fmt="png")

    plt.show()
def evaluate_num_centres(inputs,
                         targets,
                         folds,
                         scale,
                         reg_param,
                         num_centres_sequence=None):
    """
      Evaluate then plot the performance of different numbers of basis
      function centres.
    """

    # choose a range of numbers of centres
    if num_centres_sequence is None:
        num_centres_sequence = np.arange(1, 20)
    num_values = num_centres_sequence.size
    num_folds = len(folds)
    #
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)
    #
    # run the experiments
    for c, num_centres in enumerate(num_centres_sequence):
        centres = np.linspace(0, 1, num_centres)
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[c] = train_mean_error
        test_mean_errors[c] = test_mean_error
        train_stdev_errors[c] = train_stdev_error
        test_stdev_errors[c] = test_stdev_error
    #
    # Now plot the results
    fig, ax = plot_train_test_errors("no. centres", num_centres_sequence,
                                     train_mean_errors, test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(num_centres_sequence, lower, upper, alpha=0.2, color='r')
    ax.set_title(
        'Train vs Test Error Across Centre Number With Cross-Validation')
    fig.savefig("../plots/rbf_searching_number_centres_cross_validation.pdf",
                fmt="pdf")
def evaluate_reg_param(inputs,
                       targets,
                       folds,
                       centres,
                       scale,
                       test_error_linear,
                       reg_params=None):
    """
      Evaluate, then plot the performance of different regularisation parameters.
    """

    # creating the feature mapping and then the design matrix
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    design_matrix = feature_mapping(inputs)

    # choose a range of regularisation parameters
    if reg_params is None:
        reg_params = np.logspace(-15, 5,
                                 30)  # choices of regularisation strength

    num_values = reg_params.size
    num_folds = len(folds)
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_st_dev_errors = np.zeros(num_values)
    test_st_dev_errors = np.zeros(num_values)

    print(
        'Calculating means and standard deviations of train and test errors...'
    )
    for r, reg_param in enumerate(reg_params):
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            design_matrix, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_st_dev_error = np.std(train_errors)
        test_st_dev_error = np.std(test_errors)
        # storing the results
        train_mean_errors[r] = train_mean_error
        test_mean_errors[r] = test_mean_error
        train_st_dev_errors[r] = train_st_dev_error
        test_st_dev_errors[r] = test_st_dev_error

    # plotting the results
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_mean_errors, test_mean_errors,
                                     test_error_linear)

    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_st_dev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_st_dev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_st_dev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_st_dev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r')

    ax.set_xscale('log')
    ax.set_ylim([0, 1])

    ax.set_title(
        'Train vs Test Error across Reg. Param. with Cross-validation')
    fig.savefig("../plots/rbf/rbf_searching_reg_params_cross_validation.png",
                fmt="png")

    plt.show()
def parameter_search_rbf(inputs, targets, test_fraction, folds):
    """
    """

    n = inputs.shape[0]

    # for the centres of the basis functions sample 10% of the data
    sample_fraction = 0.05
    p = (1 - sample_fraction, sample_fraction)
    centres = inputs[np.random.choice([False, True], size=n, p=p), :]
    print("\ncentres.shape = %r" % (centres.shape, ))

    scales = np.logspace(0, 4, 20)  # of the basis functions
    reg_params = np.logspace(-16, -1, 20)  # choices of regularisation strength
    # create empty 2d arrays to store the train and test errors
    train_mean_errors = np.empty((scales.size, reg_params.size))
    test_mean_errors = np.empty((scales.size, reg_params.size))

    # iterate over the scales
    for i, scale in enumerate(scales):
        # i is the index, scale is the corresponding scale
        # we must recreate the feature mapping each time for different scales
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # partition the design matrix and targets into train and test

        # iterating over the regularisation parameters
        for j, reg_param in enumerate(reg_params):
            # j is the index, reg_param is the corresponding regularisation
            # parameter
            # train and test the data
            train_error, test_error = cv_evaluation_linear_model(
                designmtx, targets, folds, reg_param=reg_param)
            # store the train and test errors in our 2d arrays
            train_mean_errors[i, j] = np.mean(train_error)
            test_mean_errors[i, j] = np.mean(test_error)

    # we have a 2d array of train and test errors, we want to know the (i,j)
    # index of the best value
    best_i = np.argmin(np.argmin(test_mean_errors, axis=1))
    best_j = np.argmin(test_mean_errors[i, :])
    min_place = np.argmin(test_mean_errors)
    best_i_correct = (int)(min_place / test_mean_errors.shape[1])
    best_j_correct = min_place % test_mean_errors.shape[1]
    print("\nBest joint choice of parameters:")
    print("\tscale %.2g and lambda = %.2g" %
          (scales[best_i_correct], reg_params[best_j_correct]))

    # now we can plot the error for different scales using the best
    # regularisation choice
    fig, ax = plot_train_test_errors("scale", scales,
                                     train_mean_errors[:, best_j_correct],
                                     test_mean_errors[:, best_j_correct])
    ax.set_xscale('log')
    ax.set_title('Train vs Test Error Across Scales')
    fig.savefig("../plots/rbf_searching_scales.pdf", fmt="pdf")

    # ...and the error for  different regularisation choices given the best
    # scale choice
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_mean_errors[best_i_correct, :],
                                     test_mean_errors[best_i_correct, :])
    ax.set_xscale('log')
    ax.set_title('Train vs Test Error Across Reg Params')
    fig.savefig("../plots/rbf_searching_reg_params.pdf", fmt="pdf")
    '''
    # using the best parameters found above,
    # we now vary the number of centres and evaluate the performance
    reg_param = reg_params[best_j]
    scale = scales[best_i]
    n_centres_seq = np.arange(1, 20)
    train_errors = []
    test_errors = []
    for n_centres in n_centres_seq:
        # constructing the feature mapping anew for each number of centres
        centres = np.linspace(0, 1, n_centres)
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        design_matrix = feature_mapping(inputs)

        # evaluating the test and train error for the given regularisation parameter and scale
        train_error, test_error = cv_evaluation_linear_model(
            design_matrix, targets, folds, reg_param=reg_param)

        # collecting the errors
        train_errors.append(train_error)
        test_errors.append(test_error)

    # plotting the results
    fig, ax = plot_train_test_errors(
        "no. centres", n_centres_seq, train_errors, test_errors)
    ax.set_title('Train vs Test Error Across Centre Number')
    fig.savefig("../plots/rbf_searching_number_centres.pdf", fmt="pdf")
    '''

    return scales[best_i_correct], reg_params[best_j_correct]
Пример #14
0
def parameter_search_rbf_without_cross(inputs, targets, test_fraction,test_error_linear,normalize=True):
    """
    """
    if(normalize):
        # normalise inputs (meaning radial basis functions are more helpful)
        for i in range(inputs.shape[1]):
            inputs[:,i]=(inputs[:,i]-np.mean(inputs[:,i]))/np.std(inputs[:,i])
    N = inputs.shape[0]

    # for the centres of the basis functions sample 10% of the data
    sample_fractions = np.array([0.05,0.1,0.15,0.2,0.25])
    scales = np.logspace(0,4,20 ) # of the basis functions
    reg_params = np.logspace(-16,-1, 20) # choices of regularisation strength.
    # create empty 3d arrays to store the train and test errors
    train_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size))
    test_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size))
    
    #Randomly generates a train/test split for data of size N. Returns a 2 arrays of boolean true/false.
    train_part, test_part = train_and_test_split(N, test_fraction=test_fraction)
    best_k=0
    best_i=0
    best_j=0
    test_error_temp=10**100
    
 
    #loop through the possible centres as a percentage (5%, 10%,15%, 20%, 25%)
    for k,sample_fraction in enumerate(sample_fractions):
        p = (1-sample_fraction,sample_fraction)
        centres = inputs[np.random.choice([False,True], size=N, p=p),:]       
        # iterate over the scales
        for i,scale in enumerate(scales):
            # i is the index, scale is the corresponding scale
            # we must recreate the feature mapping each time for different scales
            feature_mapping = construct_rbf_feature_mapping(centres,scale)
            designmtx = feature_mapping(inputs)
            # partition the design matrix and targets into train and test. This effectively takes as inputs the boolean arrays train_part, test_part and the whole design matrix and
            #creates 2 subsets of the design matrix  (train matrix, test matrix). The test data are splitted as well but the values are not affected
            train_designmtx, train_targets, test_designmtx, test_targets =  train_and_test_partition(designmtx, targets, train_part, test_part)
            # iteratre over the regularisation parameters
            for j, reg_param in enumerate(reg_params):
                # j is the index, reg_param is the corresponding regularisation
                # parameter
                # train and test the data
                train_error, test_error,weights = train_and_test(train_designmtx, train_targets, test_designmtx, test_targets,reg_param=reg_param)
                # store the train and test errors in our 2d arrays
                train_mean_errors[k,i,j] = train_error
                test_mean_errors[k,i,j] = test_error
                #When we've found a lowest than stores test error value, we store it's indices
                if (np.mean(test_error)<test_error_temp):
                    test_error_temp=test_error
                    best_k=k
                    best_i=i
                    best_j=j
    print ("The value with the lowest error is:",test_mean_errors[best_k][best_i][best_j])
    print("Best joint choice of parameters: sample fractions %.2g scale %.2g and lambda = %.2g" % (sample_fractions[best_k],scales[best_i],reg_params[best_j]))
    
    
    # now we can plot the error for different scales using the best
    # regularization choice
    
    # now we can plot the error for different scales using the best regularization choice and centres percentage
    fig , ax = plot_train_test_errors("scale", scales, train_mean_errors[best_k,:,best_j], test_mean_errors[best_k,:,best_j])
    ax.set_xscale('log')
    fig.suptitle('RBF regression for the best reg. parameter & centres', fontsize=10)
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:') #graph the linear regression
    
    
    # ...and the error for  different regularisation choices given the best scale choice and centres percentage
    fig , ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors[best_k,best_i,:], test_mean_errors[best_k,best_i,:])
    ax.set_xscale('log')
    fig.suptitle('RBF regression for the best scale parameter & centres', fontsize=10)
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
    # #ax.set_ylim([0,20])
    
    
    # ...and the error for  different centres given the best reg.parameter and the best scale choice
    fig , ax = plot_train_test_errors("sample fractions", sample_fractions, train_mean_errors[:,best_i,best_j], test_mean_errors[:,best_i,best_j])
    fig.suptitle('RBF regression for the best scale parameter & reg. parameter', fontsize=10)
    ax.set_xlim([0.05, 0.25])
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
Пример #15
0
def parameter_search_rbf_cross(inputs, targets, folds,test_error_linear,test_inputs,test_targets,normalize=True):
    """
    This function will take as inputs the raw data and targets, the folds for cross validation and the test linear error for plotting
    """
    if(normalize):
        # normalise inputs (meaning radial basis functions are more helpful)
        for i in range(inputs.shape[1]):
            inputs[:,i]=(inputs[:,i]-np.mean(inputs[:,i]))/np.std(inputs[:,i])
            test_inputs[:,i]=(test_inputs[:,i]-np.mean(test_inputs[:,i]))/np.std(test_inputs[:,i])
    N = inputs.shape[0]

    # for the centres of the basis functions sample 10% of the data
    sample_fractions = np.array([0.05,0.1,0.15,0.2,0.25])
    scales = np.logspace(0,4,20 ) # of the basis functions
    reg_params = np.logspace(-16,-1, 20) # choices of regularisation strength.
    # create empty 3d arrays to store the train and test errors
    train_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size))
    test_mean_errors = np.empty((sample_fractions.size,scales.size,reg_params.size))
    
    best_k=0
    best_i=0
    best_j=0
    test_error_temp=10**100
    
    #loop through the possible centres as a percentage (5%, 10%,15%, 20%, 25%)
    for k,sample_fraction in enumerate(sample_fractions):
        p = (1-sample_fraction,sample_fraction)
        centres = inputs[np.random.choice([False,True], size=N, p=p),:]
        # iterate over the scales
        for i,scale in enumerate(scales):
            # i is the index, scale is the corresponding scale
            # we must recreate the feature mapping each time for different scales
            feature_mapping = construct_rbf_feature_mapping(centres,scale)
            designmtx = feature_mapping(inputs)
            # iteratre over the regularisation parameters
            for j, reg_param in enumerate(reg_params):
                # j is the index, reg_param is the corresponding regularisation
                # parameter for train and test the data
                train_error, test_error,weights = cv_evaluation_linear_model(designmtx, targets, folds,reg_param=reg_param)
                
                #When we've found a lowest than stores test error value, we store it's indices
                if (np.mean(test_error)<test_error_temp):
                    test_error_temp=np.mean(test_error)
                    best_k=k
                    best_i=i
                    best_j=j
                    optimal_weights=weights
                    optimal_feature_mapping=feature_mapping
                    
                # store the train and test errors in our 3d matrix
                train_mean_errors[k,i,j] = np.mean(train_error)
                test_mean_errors[k,i,j] = np.mean(test_error)
    
    print ("The value with the lowest test error at the training stage is:",test_mean_errors[best_k][best_i][best_j])
    print("Best joint choice of parameters: sample fractions %.2g scale %.2g and lambda = %.2g" % (sample_fractions[best_k],scales[best_i],reg_params[best_j]))
    
    
    # now we can plot the error for different scales using the best regularization choice and centres percentage
    fig , ax = plot_train_test_errors("scale", scales, train_mean_errors[best_k,:,best_j], test_mean_errors[best_k,:,best_j])
    ax.set_xscale('log')
    fig.suptitle('RBF regression for the best reg. parameter & centres using cross-validation', fontsize=10)
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:') #graph the linear regression
    
    
    # ...and the error for  different regularisation choices given the best scale choice and centres percentage
    fig , ax = plot_train_test_errors("$\lambda$", reg_params, train_mean_errors[best_k,best_i,:], test_mean_errors[best_k,best_i,:])
    ax.set_xscale('log')
    fig.suptitle('RBF regression for the best scale parameter & centres using cross-validation', fontsize=10)
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
    # #ax.set_ylim([0,20])
    
    
    # ...and the error for  different centres given the best reg.parameter and the best scale choice
    fig , ax = plot_train_test_errors("sample fractions", sample_fractions, train_mean_errors[:,best_i,best_j], test_mean_errors[:,best_i,best_j])
    fig.suptitle('RBF regression for the best scale parameter & reg. parameter using cross-validation', fontsize=10)
    ax.set_xlim([0.05, 0.25])
    xlim = ax.get_xlim()#get the xlim to graph the linear regression
    ax.plot(xlim, test_error_linear*np.ones(2), 'g:')
    
    predictive_func=construct_feature_mapping_approx(optimal_feature_mapping, optimal_weights)
    
    final_error=root_mean_squared_error(test_targets,predictive_func(test_inputs))
    print("final test error for RBF model:",final_error)
def main(ifname,
         delimiter=None,
         columns=None,
         has_header=True,
         test_fraction=0.25):
    data, field_names = import_data(ifname,
                                    delimiter=delimiter,
                                    has_header=has_header,
                                    columns=columns)

    #Exploratory Data Analysis (EDA)
    raw_data = pd.read_csv('datafile.csv', sep=";")

    # view correlation efficieny result where |r|=1 has the strongest relation and |r|=0 the weakest
    df = pd.DataFrame(data=raw_data)
    print(df.corr())

    # view data if it is normally distributed
    plt.hist(raw_data["quality"],
             range=(1, 10),
             edgecolor='black',
             linewidth=1)
    plt.xlabel('quality')
    plt.ylabel('amount of samples')
    plt.title("distribution of red wine quality")

    # feature selection
    import scipy.stats as stats
    from scipy.stats import chi2_contingency

    class ChiSquare:
        def __init__(self, dataframe):
            self.df = dataframe
            self.p = None  # P-Value
            self.chi2 = None  # Chi Test Statistic
            self.dof = None

            self.dfObserved = None
            self.dfExpected = None

        def _print_chisquare_result(self, colX, alpha):
            result = ""
            if self.p < alpha:
                result = "{0} is IMPORTANT for Prediction".format(colX)
            else:
                result = "{0} is NOT an important predictor. (Discard {0} from model)".format(
                    colX)

            print(result)

        def TestIndependence(self, colX, colY, alpha=0.05):
            X = self.df[colX].astype(str)
            Y = self.df[colY].astype(str)

            self.dfObserved = pd.crosstab(Y, X)
            chi2, p, dof, expected = stats.chi2_contingency(
                self.dfObserved.values)
            self.p = p
            self.chi2 = chi2
            self.dof = dof

            self.dfExpected = pd.DataFrame(expected,
                                           columns=self.dfObserved.columns,
                                           index=self.dfObserved.index)

            self._print_chisquare_result(colX, alpha)
            print('self:%s' % (self), self.chi2, self.p)

    # Initialize ChiSquare Class
    cT = ChiSquare(raw_data)

    # Feature Selection
    testColumns = [
        "fixed acidity", "volatile acidity", "citric acid", "residual sugar",
        "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density",
        "pH", "sulphates", "alcohol"
    ]
    for var in testColumns:
        cT.TestIndependence(colX=var, colY="quality")
    # split data into inputs and targets
    inputs = data[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]
    targets = data[:, 11]
    # mean normalisation
    fixed_acidity = inputs[:, 0]
    volatile_acidity = inputs[:, 1]
    citric_acid = inputs[:, 2]
    residual_sugar = inputs[:, 3]
    chlorides = inputs[:, 4]
    free_sulfur_dioxide = inputs[:, 5]
    total_sulfur_dioxide = inputs[:, 6]
    density = inputs[:, 7]
    ph = inputs[:, 8]
    sulphates = inputs[:, 9]
    alcohol = inputs[:, 10]

    # draw plot of data set
    normalised_data = np.column_stack((inputs, targets))
    exploratory_plots(normalised_data, field_names)

    # add a colum of x0.ones
    inputs[:, 0] = np.ones(len(targets))
    # normalize data
    inputs[:, 1] = (volatile_acidity -
                    np.mean(volatile_acidity)) / np.std(volatile_acidity)
    inputs[:, 2] = (citric_acid - np.mean(citric_acid)) / np.std(citric_acid)
    inputs[:, 7] = (density - np.mean(density)) / np.std(density)
    inputs[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates)
    inputs[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol)
    # run all experiments on the same train-test split of the data
    train_part, test_part = train_and_test_split(inputs.shape[0],
                                                 test_fraction=test_fraction)

    # another evaluation function
    def rsquare(test_targets, test_predicts):
        y_mean = np.mean(test_targets)
        ss_tot = sum((test_targets - y_mean)**2)
        ss_res = sum((test_targets - test_predicts)**2)
        rsquare = 1 - (ss_res / ss_tot)
        return rsquare

    print(
        '---------------------------Linear Regression-----------------------------------'
    )

    # linear regression
    # add a column of 1 to the data matrix
    inputs = inputs[:, [0, 1, 2, 7, 9, 10]]
    #train_part, test_part = train_and_test_split(inputs.shape[0], test_fraction=test_fraction)
    train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition(
        inputs, targets, train_part, test_part)
    weights = ml_weights(train_inputs, train_targets)
    train_predicts = linear_model_predict(train_inputs, weights)
    test_predicts = linear_model_predict(test_inputs, weights)
    train_error = root_mean_squared_error(train_targets, train_predicts)
    test_error = root_mean_squared_error(test_targets, test_predicts)
    print("LR-train_weights", weights)
    print("LR-train_error", train_error)
    print("LR-test_error", test_error)
    print("LR-rsquare score", rsquare(test_targets, test_predicts))
    print("LR-prediction:", test_predicts[:20], "LR-original",
          test_targets[:20])

    print(
        '----------------Regularised Linear Regression-----------------------------'
    )

    #regularised linear regression
    reg_params = np.logspace(-15, -4, 11)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        # print("RLR-Evaluating reg_para " + str(reg_param))
        train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition(
            inputs, targets, train_part, test_part)
        reg_weights = regularised_ml_weights(train_inputs, train_targets,
                                             reg_param)
        train_predicts = linear_model_predict(train_inputs, reg_weights)
        test_predicts = linear_model_predict(test_inputs, reg_weights)
        train_error = root_mean_squared_error(train_targets, train_predicts)
        test_error = root_mean_squared_error(test_targets, test_predicts)
        train_errors.append(train_error)
        test_errors.append(test_error)

    #best lambda
    test_errors = np.array(test_errors)
    best_l = np.argmin(test_errors)
    print("RLR-Best joint choice of parameters:")
    print("RLR-lambda = %.2g" % (reg_params[best_l]))
    # plot train_test_errors in different reg_params
    fig, ax = plot_train_test_errors("$\lambda$", reg_params, train_errors,
                                     test_errors)
    ax.set_xscale('log')
    reg_weights = regularised_ml_weights(train_inputs, train_targets, best_l)
    print("RLR-train_weights", reg_weights)
    print("RLR-train_error", train_errors[best_l])
    print("RLR-test_error", test_errors[best_l])
    print("RLR-rsquare score", rsquare(test_targets, test_predicts))
    print("RLR-prediction:", test_predicts[:20], "RLR-original",
          test_targets[:20])

    print(
        '-----------------------------kNN Regression------------------------------------'
    )

    # KNN-regression
    # tip out the x0=1 column
    inputs = inputs[:, [1, 2, 3, 4, 5]]

    train_errors = []
    test_errors = []
    K = range(2, 9)
    for k in K:
        train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition(
            inputs, targets, train_part, test_part)
        knn_approx = construct_knn_approx(train_inputs, train_targets, k)
        train_knn_predicts = knn_approx(train_inputs)
        train_error = root_mean_squared_error(train_knn_predicts,
                                              train_targets)
        test_knn_predicts = knn_approx(test_inputs)
        test_error = root_mean_squared_error(test_knn_predicts, test_targets)
        train_errors.append(train_error)
        test_errors.append(test_error)
        # print("knn_predicts: ", np.around(test_knn_predicts), "knn-original", test_targets)

    #best k
    train_errors = np.array(train_errors)
    test_errors = np.array(test_errors)
    best_k = np.argmin(test_errors)
    print("Best joint choice of parameters:")
    print("k = %.2g" % (K[best_k]))
    fig, ax = plot_train_test_errors("K", K, train_errors, test_errors)
    ax.set_xticks(np.arange(min(K), max(K) + 1, 1.0))

    print("kNN-train_error", train_errors[-1])
    print("kNN-test_error", test_errors[-1])
    knn_approx = construct_knn_approx(train_inputs, train_targets, k=3)
    test_predicts = knn_approx(test_inputs)
    print("kNN-rsquare score", rsquare(test_targets, test_predicts))
    print("kNN-y_predicts", test_predicts[:20], 'y_original',
          test_targets[:20])

    print(
        '----------------------------RBF Function-------------------------------------'
    )

    # Radinal Basis Functions
    # for the centres of the basis functions sample 15% of the data
    sample_fraction = 0.15
    p = (1 - sample_fraction, sample_fraction)
    centres = inputs[np.random.choice([False, True], size=inputs.shape[0],
                                      p=p), :]  # !!!
    print("centres.shape = %r" % (centres.shape, ))
    scales = np.logspace(0, 2, 17)  # of the basis functions
    reg_params = np.logspace(-15, -4, 11)  # choices of regularisation strength
    # create empty 2d arrays to store the train and test errors
    train_errors = np.empty((scales.size, reg_params.size))
    test_errors = np.empty((scales.size, reg_params.size))
    # iterate over the scales
    for i, scale in enumerate(scales):
        # i is the index, scale is the corresponding scale
        # we must recreate the feature mapping each time for different scales
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        designmtx = feature_mapping(inputs)
        # partition the design matrix and targets into train and test
        train_designmtx, train_targets, test_designmtx, test_targets = \
            train_and_test_partition(designmtx, targets, train_part, test_part)
        # iteratre over the regularisation parameters
        for j, reg_param in enumerate(reg_params):
            # j is the index, reg_param is the corresponding regularisation
            # parameter
            # train and test the data
            train_error, test_error = train_and_test(train_designmtx,
                                                     train_targets,
                                                     test_designmtx,
                                                     test_targets,
                                                     reg_param=reg_param)
            # store the train and test errors in our 2d arrays
            train_errors[i, j] = train_error
            test_errors[i, j] = test_error
    # we have a 2d array of train and test errors, we want to know the (i,j)
    # index of the best value
    best_i = np.argmin(np.argmin(test_errors, axis=1))
    best_j = np.argmin(test_errors[i, :])
    print("Best joint choice of parameters:")
    print("\tscale= %.2g and lambda = %.2g" %
          (scales[best_i], reg_params[best_j]))
    # now we can plot the error for different scales using the best
    # regulariation choice
    fig, ax = plot_train_test_errors("scale", scales, train_errors[:, best_j],
                                     test_errors[:, best_j])
    ax.set_xscale('log')
    # ...and the error for  different regularisation choices given the best
    # scale choice
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_errors[best_i, :],
                                     test_errors[best_i, :])
    ax.set_xscale('log')
    feature_mapping = construct_rbf_feature_mapping(centres, scales[best_i])
    reg_weights = regularised_ml_weights(train_designmtx, train_targets,
                                         reg_params[best_j])
    # test function
    test_predicts = np.matrix(test_designmtx) * np.matrix(reg_weights).reshape(
        (len(reg_weights), 1))
    test_predicts = np.array(test_predicts).flatten()

    print("RBF-train_error", train_errors[best_i, best_j])
    print("RBF-test_error", test_errors[best_i, best_j])
    print("RBF-rsquare score", rsquare(test_targets, test_predicts))
    print('RBF_y_predicts: ', test_predicts[:20], 'rbf_y_originals: ',
          test_targets[:20])

    print(
        '-----------------------------Polynomial---------------------------------------'
    )

    # Polynomial Basis Function
    # set input features as 'alcohol'
    degrees = range(1, 10)
    train_errors = []
    test_errors = []
    for degree in degrees:
        processed_inputs = 0
        for i in range(inputs.shape[1]):
            processed_input = expand_to_monomials(inputs[:, i], degree)
            processed_inputs += processed_input
        processed_inputs = np.array(processed_inputs)
        # split data into train and test set
        processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition\
            (processed_inputs, targets, train_part, test_part)
        train_error, test_error = train_and_test(processed_train_inputs,
                                                 train_targets,
                                                 processed_test_inputs,
                                                 test_targets,
                                                 reg_param=None)
        weights = regularised_least_squares_weights(processed_train_inputs,
                                                    train_targets, reg_param)
        train_errors.append(train_error)
        test_errors.append(test_error)

    train_errors = np.array(train_errors)
    test_errors = np.array(test_errors)
    print("Polynomial-train error: ", train_errors[-1])
    print("Polynomial-test error: ", test_errors[-1])
    best_d = np.argmin(test_errors)
    print("Best joint choice of degree:")
    final_degree = degrees[best_d]
    print("degree = %.2g" % (final_degree))
    fig, ax = plot_train_test_errors("Degree", degrees, train_errors,
                                     test_errors)
    ax.set_xticks(np.arange(min(degrees), max(degrees) + 1, 1.0))

    # test functionality with the final degree
    processed_inputs = 0
    for i in range(inputs.shape[1]):
        processed_input = expand_to_monomials(inputs[:, i], final_degree)
        processed_inputs += processed_input
    processed_inputs = np.array(processed_inputs)

    processed_train_inputs, train_targets, processed_test_inputs, test_targets = train_and_test_partition \
        (processed_inputs, targets, train_part, test_part)
    train_error, test_error = train_and_test(processed_train_inputs,
                                             train_targets,
                                             processed_test_inputs,
                                             test_targets,
                                             reg_param=None)
    weights = regularised_least_squares_weights(processed_train_inputs,
                                                train_targets, reg_param)
    # print("processed_train_inputs.shape", processed_train_inputs.shape)
    # print('weights: ', weights, 'weights shape: ', weights.shape)
    test_predicts = prediction_function(processed_test_inputs, weights,
                                        final_degree)
    print("Polynomial-rsquare score", rsquare(test_targets, test_predicts))
    print('Polynomial-y_predicts: ', test_predicts[:20],
          'Polynomial-y_original: ', test_targets[:20])
    plt.show()
def evaluate_reg_param(inputs,
                       targets,
                       folds,
                       centres,
                       scale,
                       reg_params=None):
    """
      Evaluate then plot the performance of different regularisation parameters
    """
    # create the feature mappoing and then the design matrix
    feature_mapping = construct_rbf_feature_mapping(centres, scale)
    designmtx = feature_mapping(inputs)
    # choose a range of regularisation parameters
    if reg_params is None:
        reg_params = np.logspace(-15, 0)
    num_values = reg_params.size
    num_folds = len(folds)
    # create some arrays to store results
    train_mean_errors = np.zeros(num_values)
    test_mean_errors = np.zeros(num_values)
    train_stdev_errors = np.zeros(num_values)
    test_stdev_errors = np.zeros(num_values)
    #
    for r, reg_param in enumerate(reg_params):
        # r is the index of reg_param, reg_param is the regularisation parameter
        # cross validate with this regularisation parameter
        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, targets, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        # store the results
        train_mean_errors[r] = train_mean_error
        test_mean_errors[r] = test_mean_error
        train_stdev_errors[r] = train_stdev_error
        test_stdev_errors[r] = test_stdev_error

    #Get test error without reg param
    blank, test_errors_without_reg = cv_evaluation_linear_model(designmtx,
                                                                targets,
                                                                folds,
                                                                reg_param=None)
    test_mean_error_without_reg_param = np.mean(test_errors_without_reg)

    # Now plot the results
    fig, ax = plot_train_test_errors("$\lambda$", reg_params,
                                     train_mean_errors, test_mean_errors)
    # Here we plot the error ranges too: mean plus/minus 1 standard error.
    # 1 standard error is the standard deviation divided by sqrt(n) where
    # n is the number of samples.
    # (There are other choices for error bars.)
    # train error bars
    lower = train_mean_errors - train_stdev_errors / np.sqrt(num_folds)
    upper = train_mean_errors + train_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='b')
    # test error bars
    lower = test_mean_errors - test_stdev_errors / np.sqrt(num_folds)
    upper = test_mean_errors + test_stdev_errors / np.sqrt(num_folds)
    ax.fill_between(reg_params, lower, upper, alpha=0.2, color='r')
    #plot green line to represent no reg params
    xlim = ax.get_xlim()
    ax.plot(xlim, test_mean_error_without_reg_param * np.ones(2), 'g:')
    ax.set_xscale('log')
def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # specify the centres of the rbf basis functions
    centres = np.linspace(0,1,7)
    # the width (analogous to standard deviation) of the basis functions
    scale = 0.15
    print("centres = %r" % (centres,))
    print("scale = %r" % (scale,))
    feature_mapping = construct_rbf_feature_mapping(centres,scale)  
    datamtx = np.linspace(0,1, 51)
    designmtx = feature_mapping(datamtx)
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    for colid in range(designmtx.shape[1]):
      ax.plot(datamtx, designmtx[:,colid])
    ax.set_xlim([0,1])
    ax.set_xticks([0,1])
    ax.set_yticks([0,1])

    # choose number of data-points and sample a pair of vectors: the input
    # values and the corresponding target values
    N = 20
    inputs, targets = sample_data(N, arbitrary_function_1, seed=37)
    # define the feature mapping for the data
    feature_mapping = construct_rbf_feature_mapping(centres,scale)  
    # now construct the design matrix
    designmtx = feature_mapping(inputs)
    #
    # find the weights that fit the data in a least squares way
    weights = ml_weights(designmtx, targets)
    # use weights to create a function that takes inputs and returns predictions
    # in python, functions can be passed just like any other object
    # those who know MATLAB might call this a function handle
    rbf_approx = construct_feature_mapping_approx(feature_mapping, weights)
    fig, ax, lines = plot_function_data_and_approximation(
        rbf_approx, inputs, targets, arbitrary_function_1)
    ax.legend(lines, ['true function', 'data', 'linear approx'])
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_rbf.pdf", fmt="pdf")

    # for a single choice of regularisation strength we can plot the
    # approximating function
    reg_param = 10**-3
    reg_weights = regularised_ml_weights(
        designmtx, targets, reg_param)
    rbf_reg_approx = construct_feature_mapping_approx(feature_mapping, reg_weights)
    fig, ax, lines = plot_function_data_and_approximation(
        rbf_reg_approx, inputs, targets, arbitrary_function_1)
    ax.set_xticks([])
    ax.set_yticks([])
    fig.tight_layout()
    fig.savefig("regression_rbf_basis_functions_reg.pdf", fmt="pdf")

    # to find a good regularisation parameter, we can performa a parameter
    # search (a naive way to do this is to simply try a sequence of reasonable
    # values within a reasonable range.
    
    # sample some training and testing inputs
    train_inputs, train_targets = sample_data(N, arbitrary_function_1, seed=37)
    # we need to use a different seed for our test data, otherwise some of our
    # sampled points will be the same
    test_inputs, test_targets = sample_data(100, arbitrary_function_1, seed=82)
    # convert the raw inputs into feature vectors (construct design matrices)
    train_designmtx = feature_mapping(train_inputs)
    test_designmtx = feature_mapping(test_inputs)
    # now we're going to evaluate train and test error for a sequence of
    # potential regularisation strengths storing the results
    reg_params = np.logspace(-5,1)
    train_errors = []
    test_errors = []
    for reg_param in reg_params:
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "$\lambda$", reg_params, train_errors, test_errors)        
    ax.set_xscale('log')


    # we may also be interested in choosing the right number of centres, or
    # the right width/scale of the rbf functions.
    # Here we vary the width and evaluate the performance
    reg_param = 10**-3
    scales = np.logspace(-2,0)
    train_errors = []
    test_errors = []
    for scale in scales:
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres,scale)  
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "scale", scales, train_errors, test_errors)        
    ax.set_xscale('log')

    # Here we vary the number of centres and evaluate the performance
    reg_param = 10**-3
    scale = 0.15
    n_centres_seq = np.arange(3,20)
    train_errors = []
    test_errors = []
    for n_centres in n_centres_seq:
        # we must construct the feature mapping anew for each number of centres
        centres = np.linspace(0,1,n_centres)
        feature_mapping = construct_rbf_feature_mapping(centres,scale)  
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)
        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(
            train_designmtx, train_targets, test_designmtx, test_targets,
            reg_param=reg_param)
        # collect the errors
        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    fig, ax = plot_train_test_errors(
        "Num. Centres", n_centres_seq, train_errors, test_errors)        
    plt.show()
def bayesian_regression_entry_point(data):
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    data_targets = data[:, -1]
    data = data[:, 0:11]

    print(data)
    print(data_targets)
    for i in range(data.shape[1]):
        data[:, i] = (data[:, i] - np.mean(data[:, i])) / np.std(data[:, i])
    print("standard deviation is %s" % str(np.std(data, axis=0)))

    inputs = data[0:960, :]
    targets = data_targets[0:960]
    test_inputs = data[1300:1599, :]
    test_targets = data_targets[1300:1599]

    # specify the centres of the rbf basis functions
    N = inputs.shape[0]
    centres1 = inputs[np.random.choice([False, True], size=N, p=[0.9, 0.1]), :]
    # centres1 = data[10,:]
    # centres1 = np.linspace(4,20,10)
    print(centres1)

    # the width (analogous to standard deviation) of the basis functions
    scale = 47
    print("centres = %r" % (centres1, ))
    print("scale = %r" % (scale, ))
    # create the feature mapping
    feature_mapping = construct_rbf_feature_mapping(centres1, scale)
    # plot the basis functions themselves for reference

    # sample number of data-points: inputs and targets
    # define the noise precision of our data
    beta = (1 / 0.01)**2
    # now construct the design matrix for the inputs
    designmtx = feature_mapping(inputs)
    test_designmtx = feature_mapping(test_inputs)
    print(designmtx.shape)
    # the number of features is the width of this matrix
    M = designmtx.shape[1]
    # define a prior mean and covaraince matrix
    # m0 = np.random.randn(M)
    m0 = np.zeros(M)
    print("m0 equals %r" % (m0))
    alpha = 50
    S0 = alpha * np.identity(M)
    # find the posterior over weights
    mN, SN = calculate_weights_posterior(designmtx, targets, beta, m0, S0)
    # for i in range(500):
    #     mN, SN = calculate_weights_posterior(designmtx, targets, beta, mN, SN)

    train_error, test_error = train_and_test(designmtx, targets,
                                             test_designmtx, test_targets, mN)
    print(train_error, test_error)

    # cross-validation
    # train_error, test_error = cv_evaluation_linear_model(designmtx, targets, folds, mN)
    # print(train_error, test_error, np.mean(train_error), np.mean(test_error))

    # the posterior mean (also the MAP) gives the central prediction
    mean_approx = construct_feature_mapping_approx(feature_mapping, mN)
    fig, ax, lines = plot_function_data_and_approximation(
        mean_approx, test_inputs, test_targets)
    ax.legend(lines, ['Prediction', 'True value'])
    ax.set_xticks([])
    ax.set_ylabel("Quality")
    fig.suptitle('Prediction vlaue against True value', fontsize=10)
    fig.savefig("regression_bayesian_rbf.pdf", fmt="pdf")

    # search the optimum alpha for baysian model regression
    train_inputs = data[0:960, :]
    train_targets = data_targets[0:960]
    test_inputs = data[960:1300, :]
    test_targets = data_targets[960:1300]
    # folds = create_cv_folds(train_inputs.shape[0], num_folds)
    alphas = np.logspace(1, 3)

    # convert the raw inputs into feature vectors (construct design matrices)
    # train_errors = np.empty(alphas.size)
    # test_errors = np.empty(alphas.size)
    train_errors = []
    test_errors = []
    for a, alpha in enumerate(alphas):
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres1, scale)
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)

        beta = (1 / 0.01)**2
        M = train_designmtx.shape[1]
        # define a prior mean and covaraince matrix
        m0 = np.zeros(M)

        S0 = alpha * np.identity(M)
        # find the posterior over weights
        mN, SN = calculate_weights_posterior(train_designmtx, train_targets,
                                             beta, m0, S0)

        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(train_designmtx,
                                                 train_targets, test_designmtx,
                                                 test_targets, mN)
        train_errors.append(train_error)
        test_errors.append(test_error)
        # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN)
        # train_errors[a] = np.mean(train_error)
        # test_errors[a] = np.mean(test_error)
    # plot the results
    min_error = np.min(test_errors)
    min_error_index = np.argmin(test_errors)
    fig, ax = plot_train_test_errors("alpha", alphas, train_errors,
                                     test_errors)
    fig.suptitle('Alpha vs Error in Bayesian', fontsize=10)
    ax.plot(alphas[min_error_index], min_error, "ro")
    # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error)))
    ax.annotate((str(alphas[min_error_index]), str(min_error)),
                xy=(alphas[min_error_index], min_error),
                xytext=(alphas[min_error_index] + 0.01, min_error + 0.01),
                arrowprops=dict(facecolor='green', shrink=0.1))
    ax.set_xscale('log')
    fig.savefig("alpha.pdf", fmt="pdf")

    # search the optimum beta for baysian model regression
    train_inputs = data[0:960, :]
    train_targets = data_targets[0:960]
    test_inputs = data[960:1300, :]
    test_targets = data_targets[960:1300]
    # folds = create_cv_folds(train_inputs.shape[0], num_folds)
    betas = (1. / np.logspace(-3, 1))**2

    # convert the raw inputs into feature vectors (construct design matrices)
    # train_errors = np.empty(betas.size)
    # test_errors = np.empty(betas.size)
    train_errors = []
    test_errors = []
    for b, beta in enumerate(betas):
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres1, scale)
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)

        M = train_designmtx.shape[1]
        # define a prior mean and covaraince matrix
        m0 = np.zeros(M)
        alpha = 50
        S0 = alpha * np.identity(M)
        # find the posterior over weights
        mN, SN = calculate_weights_posterior(train_designmtx, train_targets,
                                             beta, m0, S0)

        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(train_designmtx,
                                                 train_targets, test_designmtx,
                                                 test_targets, mN)
        train_errors.append(train_error)
        test_errors.append(test_error)

        # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN)
        # train_errors[b] = np.mean(train_error)
        # test_errors[b] = np.mean(test_error)
    # plot the results
    min_error = np.min(test_errors)
    min_error_index = np.argmin(test_errors)
    fig, ax = plot_train_test_errors("beta", betas, train_errors, test_errors)
    fig.suptitle('Beta vs Error in Bayesian', fontsize=10)
    ax.plot(betas[min_error_index], min_error, "ro")
    # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error)))
    ax.annotate((str(betas[min_error_index]), str(min_error)),
                xy=(betas[min_error_index], min_error),
                xytext=(betas[min_error_index] + 0.05, min_error + 0.05),
                arrowprops=dict(facecolor='green', shrink=0.1))
    ax.set_xscale('log')
    fig.savefig("beta.pdf", fmt="pdf")

    # search the optimum scale for baysian model regression
    scales = np.logspace(0.5, 3)
    train_inputs = data[0:960, :]
    train_targets = data_targets[0:960]
    test_inputs = data[960:1300, :]
    test_targets = data_targets[960:1300]
    # folds = create_cv_folds(train_inputs.shape[0], num_folds)

    # convert the raw inputs into feature vectors (construct design matrices)
    # train_errors = np.empty(scales.size)
    # test_errors = np.empty(scales.size)
    train_errors = []
    test_errors = []
    for j, scale in enumerate(scales):
        # we must construct the feature mapping anew for each scale
        feature_mapping = construct_rbf_feature_mapping(centres1, scale)
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)

        beta = (1. / 0.01)**2
        M = train_designmtx.shape[1]
        # define a prior mean and covaraince matrix
        m0 = np.zeros(M)
        alpha = 50
        S0 = alpha * np.identity(M)
        # find the posterior over weights
        mN, SN = calculate_weights_posterior(train_designmtx, train_targets,
                                             beta, m0, S0)

        # evaluate the test and train error for this regularisation parameter
        train_error, test_error = train_and_test(train_designmtx,
                                                 train_targets, test_designmtx,
                                                 test_targets, mN)
        # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN)
        # train_errors[j] = np.mean(train_error)
        # test_errors[j] = np.mean(test_error)

        train_errors.append(train_error)
        test_errors.append(test_error)
    # plot the results
    min_error = np.min(test_errors)
    min_error_index = np.argmin(test_errors)
    fig, ax = plot_train_test_errors("scale", scales, train_errors,
                                     test_errors)
    fig.suptitle('Scale vs Error in Bayesian', fontsize=10)
    ax.plot(scales[min_error_index], min_error, "ro")
    # ax.text(scales[min_error_index],min_error,(str(scales[min_error_index]),str(min_error)))
    ax.annotate((str(scales[min_error_index]), str(min_error)),
                xy=(scales[min_error_index], min_error),
                xytext=(scales[min_error_index] + 0.2, min_error + 0.2),
                arrowprops=dict(facecolor='green', shrink=0.1))
    ax.set_xscale('log')
    fig.savefig("scale.pdf", fmt="pdf")

    # Here we vary the number of centres and evaluate the performance
    scale = 60
    train_inputs = data[0:960, :]
    train_targets = data_targets[0:960]
    test_inputs = data[960:1300, :]
    test_targets = data_targets[960:1300]
    # folds = create_cv_folds(train_inputs.shape[0], num_folds)
    cent_parts = np.linspace(0.05, 0.8, 16)
    # train_errors = np.empty(cent_parts.size)
    # test_errors = np.empty(cent_parts.size)
    train_errors = []
    test_errors = []
    N = train_inputs.shape[0]

    for n, cent_part in enumerate(cent_parts):
        # we must construct the feature mapping anew for each number of centres
        centres1 = train_inputs[np.random.choice(
            [False, True], size=N, p=[1 - cent_part, cent_part]), :]

        feature_mapping = construct_rbf_feature_mapping(centres1, scale)
        train_designmtx = feature_mapping(train_inputs)
        test_designmtx = feature_mapping(test_inputs)
        # evaluate the test and train error for this regularisation parameter

        M = train_designmtx.shape[1]
        # define a prior mean and covaraince matrix
        m0 = np.zeros(M)
        beta = (1. / 0.01)**2
        alpha = 50
        S0 = alpha * np.identity(M)
        # find the posterior over weights
        mN, SN = calculate_weights_posterior(train_designmtx, train_targets,
                                             beta, m0, S0)

        train_error, test_error = train_and_test(train_designmtx,
                                                 train_targets, test_designmtx,
                                                 test_targets, mN)
        train_errors.append(train_error)
        test_errors.append(test_error)

        # train_error, test_error = cv_evaluation_linear_model(train_designmtx, train_targets, folds, mN)
        # train_errors[n] = np.mean(train_error)
        # test_errors[n] = np.mean(test_error)
    # plot the results
    min_error = np.min(test_errors)
    min_error_index = np.argmin(test_errors)
    fig, ax = plot_train_test_errors("Num. Centres", cent_parts, train_errors,
                                     test_errors)
    fig.suptitle('Num. Centres vs Error in Bayesian', fontsize=10)
    ax.plot(cent_parts[min_error_index], min_error, "ro")
    ax.text(cent_parts[min_error_index], min_error,
            (str(cent_parts[min_error_index]), str(min_error)))
    fig.savefig("Num. centres.pdf", fmt="pdf")

    plt.show()