Exemplos de create_cv_folds em Python, exemplos de regression_train_test.create_cv_folds em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: regression_cross_validation.py Projeto: anthonyh209/Quality-Prediction-Model

def main():
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # choose number of data-points and sample a pair of vectors: the input
    # values and the corresponding target values
    N = 500
    inputs, targets = sample_data(N, arbitrary_function_2, seed=1)

    # specify the centres and scale of some rbf basis functions
    default_centres = np.linspace(0, 1, 21)
    default_scale = 0.03
    default_reg_param = 0.08

    # get the cross-validation folds
    num_folds = 4
    folds = create_cv_folds(N, num_folds)

    # evaluate then plot the performance of different reg params
    evaluate_reg_param(inputs, targets, folds, default_centres, default_scale)
    # evaluate then plot the performance of different scales
    evaluate_scale(inputs, targets, folds, default_centres, default_reg_param)
    # evaluate then plot the performance of different numbers of basis
    # function centres.
    evaluate_num_centres(inputs, targets, folds, default_scale,
                         default_reg_param)

    plt.show()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: regression_rbf_cross_validation.py Projeto: f-z/machine-learning-projects

def main(inputs,
         targets,
         test_error_linear,
         best_scale=None,
         best_reg_param=None,
         best_no_centres=None):
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """
    # setting a seed to get the same pseudo-random results every time
    np.random.seed(30)

    # defining default values in case they are not provided
    if best_scale is None:
        best_scale = 6.7
    if best_reg_param is None:
        best_reg_param = 9.2e-08

    print("\nPerforming cross-validation...")

    # getting the cross-validation folds
    num_folds = 5
    folds = create_cv_folds(inputs.shape[0], num_folds)

    # standardising for rbf - to make distances equivalent
    std_inputs = standardise(inputs)

    # specifying the centres and scale of some rbf basis functions
    centres = std_inputs[
        np.random.choice([False, True],
                         size=std_inputs.shape[0],
                         p=[1 - best_no_centres, best_no_centres]), :]

    # using the estimated optimal values I found in the external_data file as starting values
    # scale = the width (analogous to standard deviation) of the basis functions
    # evaluating then plotting the performance of different reg params
    print("Evaluating reg. parameters...")
    evaluate_reg_param(std_inputs, targets, folds, centres, best_scale,
                       test_error_linear)

    # evaluating and plotting the performance of different scales
    print("\nEvaluating scales...")
    evaluate_scale(std_inputs, targets, folds, centres, best_reg_param,
                   test_error_linear)

    # evaluating then plotting the performance of different numbers of basis function centres
    print("\nEvaluating proportion of centres...")
    evaluate_num_centres(std_inputs, targets, folds, best_scale,
                         best_reg_param, test_error_linear)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: regression_cross_validation.py Projeto: f-z/machine-learning-projects

def main(name, delimiter, columns, has_header=True, test_fraction=0.25):
    """
    This function contains example code that demonstrates how to use the 
    functions defined in poly_fit_base for fitting polynomial curves to data.
    """

    # importing using csv reader and storing as numpy array
    header, data = import_csv(name, delimiter)

    print("\n")

    n = data.shape[1]
    # deleting the last column (quality) from inputs
    inputs = np.delete(data, n - 1, 1)
    # assigning it as targets instead
    targets = data[:, n - 1]

    inputs = normalise(inputs)

    # specifying the centres and scale of some rbf basis functions
    centres = inputs[np.random.choice(
        [False, True], size=inputs.shape[0], p=[0.90, 0.10]), :]

    # the width (analogous to standard deviation) of the basis functions
    scale = 8.5

    # getting the cross-validation folds
    num_folds = 5
    folds = create_cv_folds(data.shape[0], num_folds)

    scale, reg_param = parameter_search_rbf(inputs, targets, test_fraction,
                                            folds)

    # evaluating then plotting the performance of different reg params
    evaluate_reg_param(inputs, targets, folds, centres, scale)

    # we found that reg params around 0.01 are optimal
    # evaluating and plotting the performance of different scales
    evaluate_scale(inputs, targets, folds, centres, reg_param)
    # evaluating then plotting the performance of different numbers of basis
    # function centres.
    evaluate_num_centres(inputs, targets, folds, scale, reg_param)

    plt.show()

Exemplo n.º 4

0

Exibir arquivo

def main(ifname=None,
         delimiter=None,
         columns=None,
         normalise=None,
         features=None):
    """
    To be called when the script is run. This function fits and plots imported data (if a filename is
    provided). Data is 2 dimensional real valued data and is fit
    with maximum likelihood 2d gaussian.

    parameters
    ----------
    ifname -- filename/path of data file.
    delimiter -- delimiter of data values
    has_header -- does the data-file have a header line
    columns -- a list of integers specifying which columns of the file to import
        (counting from 0)
    """
    # if no file name is provided then use synthetic data
    if ifname is None:
        print("You need to ingest the CSV file")
    else:
        data, field_names = import_data(ifname,
                                        delimiter=delimiter,
                                        has_header=True,
                                        columns=columns)

        # DATA PREPARATION-----------------------------------------------
        N = data.shape[0]
        target = data[:, 11:]

        # Ask user to confirm whether to normalise or not
        if normalise == None:
            normalise_response = input(
                "Do you want to normalise the data? (Y/N)")
            normalise = normalise_response.upper()
            normalise_label = ""

        if normalise == "Y":
            normalise_label = "_normalised"
            # Normalise input data
            fixed_acidity = data[:, 0]
            volatility_acidity = data[:, 1]
            citric_acid = data[:, 2]
            residual_sugar = data[:, 3]
            chlorides = data[:, 4]
            free_sulfur_dioxide = data[:, 5]
            total_sulfur_dioxide = data[:, 6]
            density = data[:, 7]
            pH = data[:, 8]
            sulphates = data[:, 9]
            alcohol = data[:, 10]

            data[:, 0] = (fixed_acidity -
                          np.mean(fixed_acidity)) / np.std(fixed_acidity)
            data[:, 1] = (volatility_acidity - np.mean(volatility_acidity)
                          ) / np.std(volatility_acidity)
            data[:, 2] = (citric_acid -
                          np.mean(citric_acid)) / np.std(citric_acid)
            data[:, 3] = (residual_sugar -
                          np.mean(residual_sugar)) / np.std(residual_sugar)
            data[:, 4] = (chlorides - np.mean(chlorides)) / np.std(chlorides)
            data[:, 5] = (free_sulfur_dioxide - np.mean(free_sulfur_dioxide)
                          ) / np.std(free_sulfur_dioxide)
            data[:, 6] = (total_sulfur_dioxide - np.mean(total_sulfur_dioxide)
                          ) / np.std(total_sulfur_dioxide)
            data[:, 7] = (density - np.mean(density)) / np.std(density)
            data[:, 8] = (pH - np.mean(pH)) / np.std(pH)
            data[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates)
            data[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol)
        elif normalise != "N":
            sys.exit("Please enter valid reponse of Y or N")

        if features == None:
            feature_response = input(
                "Please specify which feature combination you want (e.g.1,2,5,7)"
            )
            feature_response = feature_response.split(",")
            # need to convert list of strings into list of integer
            feature_combin = []
            for i in range(len(feature_response)):
                print(feature_response[i])
                feature_combin.append(int(feature_response[i]))
        else:
            feature_combin = features

        inputs = np.array([])
        for j in range(len(feature_combin)):
            inputs = np.append(inputs, data[:, feature_combin[j]])
        inputs = inputs.reshape(len(feature_combin), data.shape[0])
        inputs = (np.rot90(inputs, 3))[:, ::-1]
        #print("INPUT: ", inputs)

        # Plotting RBF Model ----------------------------------------------------------
        # specify the centres of the rbf basis functions
        centres = np.asarray([
            0.35, 0.4, 0.45, 0.459090909, 0.468181818, 0.477272727,
            0.486363636, 0.495454545, 0.504545455, 0.513636364, 0.522727273,
            0.531818182, 0.540909091, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
            0.62, 0.63, 0.64, 0.65, 0.7, 0.75, 0.8
        ])
        # the width (analogous to standard deviation) of the basis functions
        scale = 450
        reg_param = 7.906043210907701e-11

        print("centres = %r" % (centres, ))
        print("scale = %r" % (scale, ))
        print("reg param = %r" % (reg_param, ))

        # create the feature mapping
        feature_mapping = construct_rbf_feature_mapping(centres, scale)
        # plot the basis functions themselves for reference
        #display_basis_functions(feature_mapping)
        # now construct the design matrix for the inputs
        designmtx = feature_mapping(inputs)
        # the number of features is the widht of this matrix
        print("DESIGN MATRIX: ", designmtx)

        if reg_param is None:
            # use simple least squares approach
            weights = ml_weights(designmtx, target)
        else:
            # use regularised least squares approach
            weights = regularised_ml_weights(designmtx, target, reg_param)

        # get the cross-validation folds
        num_folds = 4
        folds = create_cv_folds(N, num_folds)

        train_errors, test_errors = cv_evaluation_linear_model(
            designmtx, target, folds, reg_param=reg_param)
        # we're interested in the average (mean) training and testing errors
        train_mean_error = np.mean(train_errors)
        test_mean_error = np.mean(test_errors)
        train_stdev_error = np.std(train_errors)
        test_stdev_error = np.std(test_errors)
        print("TRAIN MEAN ERROR: ", train_mean_error)
        print("TEST MEAN ERROR: ", test_mean_error)
        print("TRAIN STDEV ERROR: ", train_stdev_error)
        print("TEST STDEV ERROR: ", test_stdev_error)
        print("ML WEIGHTS: ", weights)
        apply_validation_set(feature_combin, feature_mapping, weights)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: regression_linear_gradient_descent.py Projeto: f-z/machine-learning-projects

def main(header, inputs, targets, test_fraction=0.25):
    # setting a seed to get the same pseudo-random results every time
    np.random.seed(30)

    raw_inputs = inputs
    # normalise inputs (to investigate if it has an effect on the linear regression)
    # and defining them for gradient descent method
    grad_desc_inputs = standardise(inputs)
    ones = np.ones((inputs.shape[0], 1))
    grad_desc_inputs = np.hstack((ones, grad_desc_inputs))

    # defining target for gradient descent method
    grad_desc_target = targets

    # converting to matrices and initialising theta
    grad_desc_inputs = np.matrix(grad_desc_inputs)
    grad_desc_target = (np.matrix(targets)).T
    # theta2 = np.zeros((12,1)) - this generates a column - we don't want this here
    theta2 = np.zeros((1, inputs.shape[1] + 1))
    theta2 = np.matrix(theta2)

    # defining alpha, the learning rate and the number of iterations for
    # the gradient descent method
    alpha = 0.01
    iterations = 1000

    # performing linear regression on the data set
    g2, cost2 = gradient_descent(grad_desc_inputs, grad_desc_target, theta2,
                                 alpha, iterations)

    # get the cost (error) of the model
    compute_cost(grad_desc_inputs, grad_desc_target, g2)

    # plotting the error function against the number of iteration to check if gradient
    # descent is working or not
    # If gradient descent is working correctly the error function should decrease
    # after every iteration until it reaches the local minimum
    fig, ax = plt.subplots(figsize=(7, 5))
    ax.plot(np.arange(iterations), cost2, 'r')
    ax.set_xlabel('Iterations')
    ax.set_ylabel('Error')
    ax.set_title('Error vs Iterations')
    fig.savefig("../plots/simple_linear/cost_vs_iterations_plot.png",
                fmt="png")

    # Predict function for the gradient descent method
    quality = (grad_desc_inputs * g2.T)

    # calculating the test error for the gradient descent method
    errors = np.sum((np.array(quality) - np.array(grad_desc_target))**
                    2) / len(grad_desc_target)
    test_error_grad = np.sqrt(errors)
    print(
        "Test error obtained from the gradient descent method: {test_error_grad}"
        .format(**locals()))
    """
    Implementing the Linear regression method using the least squares approach.
    """
    # adding a columns of ones to the input matrix
    N, D = inputs.shape
    column = np.ones((N, 1))
    inputs = np.hstack((column, inputs))

    # performing linear regression with normalised inputs
    print("Printing linear regression with normalised inputs:")
    fig, ax, train_error, test_error = evaluate_linear_approx(
        inputs, targets, test_fraction)
    fig.suptitle(
        "Plot of Train and Test Errors with \n Normalised Inputs Against Different Reg. Parameters"
    )
    # plt.ylim(0.5, 0.75)

    # performing linear regression with raw data
    print("Printing linear regression with raw data:")
    fig2, ax2, train_error2, test_error2 = evaluate_linear_approx(
        raw_inputs, targets, test_fraction)
    fig2.suptitle(
        "Plot of Train and Test Errors with \n Raw Inputs Against Different Reg. Parameters"
    )
    # plt.ylim(0.5, 0.75)

    # cross validation
    num_folds = 5
    folds = create_cv_folds(N, num_folds)

    fig2, ax = evaluate_reg_param(raw_inputs, targets, folds, reg_params=None)
    fig2.suptitle(
        "Cross-validation of Train and Test Errors \n with Raw Inputs Against Different Reg. Parameters"
    )
    # plt.ylim(0.6, 0.68)

    plt.show()

Exemplo n.º 6

0

Exibir arquivo

Arquivo: crossvalidation_rbf.py Projeto: anthonyh209/Quality-Prediction-Model

def main(ifname=None, delimiter=None, columns=None, normalise=None):
    """
    To be called when the script is run. This function fits and plots imported data (if a filename is
    provided). Data is 2 dimensional real valued data and is fit
    with maximum likelihood 2d gaussian.

    parameters
    ----------
    ifname -- filename/path of data file.
    delimiter -- delimiter of data values
    has_header -- does the data-file have a header line
    columns -- a list of integers specifying which columns of the file to import
        (counting from 0)
    """
    # if no file name is provided then use synthetic data
    if ifname is None:
        print("You need to ingest the CSV file")
    else:
        data, field_names = import_data(ifname,
                                        delimiter=delimiter,
                                        has_header=True,
                                        columns=columns)

        #DATA PREPARATION-----------------------------------------------
        if normalise == None:
            # Ask user to confirm whether to normalise or not
            normalise_response = input(
                "Do you want to normalise the data? (Y/N)")
            normalise = normalise_response.upper()
            normalise_label = ""

        if normalise == "Y":
            normalise_label = "_normalised"
            #Normalise input data
            fixed_acidity = data[:, 0]
            volatility_acidity = data[:, 1]
            citric_acid = data[:, 2]
            residual_sugar = data[:, 3]
            chlorides = data[:, 4]
            free_sulfur_dioxide = data[:, 5]
            total_sulfur_dioxide = data[:, 6]
            density = data[:, 7]
            pH = data[:, 8]
            sulphates = data[:, 9]
            alcohol = data[:, 10]

            data[:, 0] = (fixed_acidity -
                          np.mean(fixed_acidity)) / np.std(fixed_acidity)
            data[:, 1] = (volatility_acidity - np.mean(volatility_acidity)
                          ) / np.std(volatility_acidity)
            data[:, 2] = (citric_acid -
                          np.mean(citric_acid)) / np.std(citric_acid)
            data[:, 3] = (residual_sugar -
                          np.mean(residual_sugar)) / np.std(residual_sugar)
            data[:, 4] = (chlorides - np.mean(chlorides)) / np.std(chlorides)
            data[:, 5] = (free_sulfur_dioxide - np.mean(free_sulfur_dioxide)
                          ) / np.std(free_sulfur_dioxide)
            data[:, 6] = (total_sulfur_dioxide - np.mean(total_sulfur_dioxide)
                          ) / np.std(total_sulfur_dioxide)
            data[:, 7] = (density - np.mean(density)) / np.std(density)
            data[:, 8] = (pH - np.mean(pH)) / np.std(pH)
            data[:, 9] = (sulphates - np.mean(sulphates)) / np.std(sulphates)
            data[:, 10] = (alcohol - np.mean(alcohol)) / np.std(alcohol)
        elif normalise != "N":
            sys.exit("Please enter valid reponse of Y or N")

        counter = 0
        N = data.shape[0]
        target = data[:, 11:]

        # get the cross-validation folds
        num_folds = 4
        folds = create_cv_folds(N, num_folds)

        feature_combin = create_combination()
        #declare number of centre to explore and create matrix for storing testing mean errors
        #num_centres_sequence = np.arange(5,80)
        #num_centre = num_centres_sequence.size
        #matrix_centre_errors = np.zeros(shape=(len(feature_combin), num_centre+1))
        #declare scales to explore and create matrix for storing testing mean errors
        scales = np.logspace(-10, 10)
        num_scales = scales.size
        matrix_scale_errors = np.zeros(shape=(len(feature_combin),
                                              num_scales + 1))
        #declare reg_param to explore and create matrix for storing testing mean errors
        reg_params = np.logspace(-15, 5)
        num_reg_params = reg_params.size
        matrix_reg_param_errors = np.zeros(shape=(len(feature_combin),
                                                  num_reg_params + 1))

        for i in range(len(feature_combin)):
            inputs = np.array([])
            for j in range(len(feature_combin[i])):
                inputs = np.append(inputs, data[:, feature_combin[i][j]])
            inputs = inputs.reshape(len(feature_combin[i]), data.shape[0])
            inputs = (np.rot90(inputs, 3))[:, ::-1]
            counter += 1
            print("COUNTER: ", counter)
            #CROSS VALIDATION----------------------------------------------
            default_centres = np.asarray([
                0.35, 0.4, 0.45, 0.459090909, 0.468181818, 0.477272727,
                0.486363636, 0.495454545, 0.504545455, 0.513636364,
                0.522727273, 0.531818182, 0.540909091, 0.55, 0.56, 0.57, 0.58,
                0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.7, 0.75, 0.8
            ])
            default_scale = 27
            default_reg_param = 7.906043210907701e-11

            #matrix_centre_errors[i][0] = magic(feature_combin[i])
            #test_mean_errors_centres = evaluate_errors_num_centres(inputs, target, folds, default_scale, default_reg_param,num_centres_sequence)
            #matrix_centre_errors[i][1:] = test_mean_errors_centres

            matrix_scale_errors[i][0] = magic(feature_combin[i])
            test_mean_errors_scales = evaluate_errors_scale(inputs,
                                                            target,
                                                            folds,
                                                            default_centres,
                                                            default_reg_param,
                                                            scales=scales)
            matrix_scale_errors[i][1:] = test_mean_errors_scales

            matrix_reg_param_errors[i][0] = magic(feature_combin[i])
            test_mean_errors_reg_param = evaluate_errors_reg_param(
                inputs,
                target,
                folds,
                default_centres,
                default_scale,
                reg_params=reg_params)
            matrix_reg_param_errors[i][1:] = test_mean_errors_reg_param

        #np.savetxt('centre_errors'+normalise_label+'.csv', matrix_centre_errors, fmt='%.6f', delimiter=',',
        #          header="#combination, #5,#6,#7,#8,#9,#10,#11,#12,#13,#14,#15,#16,#17,#18,#19,#20,#21,#22,#23,#24,#25,#26,#27,#28,#29,#30,#31,#32,#33,#34,#35,#36,#37,#38,#39,#40,#41,#42,#43,#44,#45,#46,#47,#48,#49,#50,#51,#52,#53,#54,#55,#56,#57,#58,#59,#60,#61,#62,#63,#64,#65,#66,#67,#68,#69,#70,#71,#72,#73,#74,#75,#76,#77,#78,#79,#80")
        np.savetxt(
            'scale_errors' + normalise_label + '.csv',
            matrix_scale_errors,
            fmt='%.6f',
            delimiter=',',
            header=
            "#combination, 1e-10, 2.5595479226995335e-10, 6.551285568595495e-10, 1.67683293681101e-09, 4.291934260128778e-09, 1.0985411419875573e-08, 2.8117686979742307e-08, 7.196856730011529e-08, 1.8420699693267165e-07, 4.7148663634573897e-07, 1.2067926406393288e-06, 3.088843596477485e-06, 7.906043210907702e-06, 2.0235896477251556e-05, 5.1794746792312125e-05, 0.0001325711365590111, 0.000339322177189533, 0.000868511373751352, 0.0022229964825261957, 0.005689866029018305, 0.014563484775012445, 0.03727593720314938, 0.09540954763499963, 0.2442053094548655, 0.6250551925273976, 1.5998587196060574, 4.094915062380419, 10.481131341546874, 26.826957952797272, 68.66488450042998, 175.75106248547965, 449.8432668969453, 1151.3953993264481, 2947.0517025518097, 7543.120063354608, 19306.977288832535, 49417.13361323838, 126485.52168552957, 323745.7542817653, 828642.7728546859, 2120950.8879201924, 5428675.439323859, 13894954.94373136, 35564803.06223121, 91029817.79915264, 232995181.05153814, 596362331.6594661, 1526417967.1752365, 3906939937.0546207, 10000000000.0"
        )
        np.savetxt(
            'reg_param_errors' + normalise_label + '.csv',
            matrix_reg_param_errors,
            fmt='%.6f',
            delimiter=',',
            header=
            "#combination, 1e-15, 2.5595479226995332e-15, 6.5512855685954955e-15, 1.67683293681101e-14, 4.291934260128778e-14, 1.0985411419875572e-13, 2.8117686979742364e-13, 7.196856730011528e-13, 1.8420699693267164e-12, 4.71486636345739e-12, 1.2067926406393265e-11, 3.088843596477485e-11, 7.906043210907701e-11, 2.0235896477251556e-10, 5.179474679231223e-10, 1.3257113655901108e-09, 3.3932217718953295e-09, 8.68511373751352e-09, 2.2229964825261957e-08, 5.689866029018305e-08, 1.4563484775012443e-07, 3.727593720314938e-07, 9.540954763499963e-07, 2.4420530945486548e-06, 6.250551925273976e-06, 1.5998587196060572e-05, 4.094915062380419e-05, 0.00010481131341546875, 0.0002682695795279727, 0.0006866488450042998, 0.0017575106248547965, 0.004498432668969453, 0.011513953993264481, 0.029470517025518096, 0.07543120063354607, 0.19306977288832536, 0.49417133613238384, 1.2648552168552958, 3.237457542817653, 8.28642772854686, 21.209508879201927, 54.286754393238596, 138.9495494373136, 355.64803062231215, 910.2981779915265, 2329.9518105153816, 5963.6233165946605, 15264.179671752365, 39069.39937054621, 100000.0"
        )

Exemplo n.º 7

0

Exibir arquivo

Arquivo: group_project_evaluation.py Projeto: theMitsosS/Machine-Learning

def main(ifname=None, delimiter=None, columns=None):
    delimiter = ';'
    columns = np.arange(12)
    if ifname is None:
        ifname = 'datafile.csv'
    data, field_names = import_data(ifname,
                                    delimiter=delimiter,
                                    has_header=True,
                                    columns=columns)
    targets = data[:, -1]
    inputs = data[:, 0:11]

    #We decided that the test fraction will be 0.2
    test_fraction = 0.2
    #np.random.seed(5)
    #let's leave 20% out for the
    train_part, test_part = train_and_test_split(data.shape[0], test_fraction)
    train_inputs, train_targets, test_inputs, test_targets = train_and_test_partition(
        inputs, targets, train_part, test_part)

    # get the cross-validation folds
    num_folds = 5
    folds = create_cv_folds(
        train_inputs.shape[0], num_folds
    )  # this is just an array of arrays where folds[0][0]= [true,false,false] and folds[0][1]=[false,true,true]

    #first of all let's plot some exploratory plots
    exploratory_plots()

    #Now, let's try some linear regression
    linear_regression_entry_point(field_names, train_inputs, train_targets,
                                  folds, test_fraction)

    #Now, let's see the performance of the bayesian regression
    bayesian_regression_entry_point(data)

    #Let's see how the kNN model will behave
    kNN_entry_point(data, field_names)

    #Finally, let's see how the RBF model will behave
    train_error_linear, test_error_linear = simple_linear_regression(
        train_inputs, train_targets, folds, test_fraction, test_inputs,
        test_targets)
    #RBF regression with normalisation but without cross validation
    parameter_search_rbf_without_cross(train_inputs,
                                       train_targets,
                                       test_fraction,
                                       test_error_linear,
                                       normalize=True)

    #RBF regression with cross-validation and normalisation
    parameter_search_rbf_cross(train_inputs, train_targets, folds,
                               test_error_linear, test_inputs, test_targets)

    #RBF regression with cross-validation but without normalisation
    parameter_search_rbf_cross(train_inputs,
                               train_targets,
                               folds,
                               test_error_linear,
                               test_inputs,
                               test_targets,
                               normalize=False)

    plt.show()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: evaluate_rbf.py Projeto: anthonyh209/Quality-Prediction-Model

def main(ifname=None, delimiter=None, columns=None):
    """
    To be called when the script is run. This function fits and plots imported data (if a filename is
    provided). Data is 2 dimensional real valued data and is fit
    with maximum likelihood 2d gaussian.

    parameters
    ----------
    ifname -- filename/path of data file.
    delimiter -- delimiter of data values
    has_header -- does the data-file have a header line
    columns -- a list of integers specifying which columns of the file to import
        (counting from 0)
    """
    # if no file name is provided then use synthetic data
    if ifname is None:
        print("You need to ingest the CSV file")
    else:
        data, field_names = import_data(ifname,
                                        delimiter=delimiter,
                                        has_header=True,
                                        columns=columns)

        #DATA PREPARATION-----------------------------------------------
        counter = 0
        N = data.shape[0]
        input = data[:, 0:data.shape[1] - 1]
        target = data[:, data.shape[1] - 1:]
        #print("FEATURES : ",columns)
        #print("INPUT :", input)

        #declare number of centre to explore and create matrix for storing testing mean errors
        num_centres_sequence = np.arange(5, 100)
        scales = np.logspace(-10, 10)
        reg_params = np.logspace(-15, 10)

        # specify the centres and scale of some rbf basis functions
        default_centres = np.asarray([
            0.35, 0.4, 0.45, 0.459090909, 0.468181818, 0.477272727,
            0.486363636, 0.495454545, 0.504545455, 0.513636364, 0.522727273,
            0.531818182, 0.540909091, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
            0.62, 0.63, 0.64, 0.65, 0.7, 0.75, 0.8
        ])
        default_scale = 26.8
        default_reg_param = 7.906043210907701e-11

        # get the cross-validation folds
        num_folds = 4
        folds = create_cv_folds(N, num_folds)

        # evaluate then plot the performance of different reg params
        evaluate_reg_param(input, target, folds, default_centres,
                           default_scale, reg_params)
        # evaluate then plot the performance of different scales
        evaluate_scale(input, target, folds, default_centres,
                       default_reg_param)
        # evaluate then plot the performance of different numbers of basis
        # function centres.
        #test_mean_errors_for_centre = evaluate_num_centres(input, target, folds, default_scale, default_reg_param,num_centres_sequence)
        #steep_centre,optimum_centre = point_of_steepest_gradient(test_mean_errors_for_centre,num_centres_sequence)
        #print("Centre with steepest drop of test mean errors: ",steep_centre)
        #print("Optimum number of centres which within tolerance: ",optimum_centre)

        # the width (analogous to standard deviation) of the basis functions
        scale = 0.1
        feature_mapping = construct_rbf_feature_mapping(default_centres, scale)
        datamtx = np.linspace(0, 1, 51)
        designmtx = feature_mapping(datamtx)
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        for colid in range(designmtx.shape[1]):
            ax.plot(datamtx, designmtx[:, colid])
            ax.set_xlim([0, 1])
            ax.set_xticks([0, 1])
            ax.set_yticks([0, 1])