def crossValidation(train_x, train_y, class_num, filename, model_name, K=3):
    """Doing k-fold cross validation, default K is 3."""
    divided = int(len(train_x) / K)
    overall_acc = 0
    total_PD = []
    total_FA = []
    for fold in range(K):
        print()
        print("Now fold is {}".format(fold))
        # Compute start and end index
        start = divided * fold
        end = divided * (fold + 1)
        training_x = np.concatenate((train_x[:start], train_x[end:]))
        training_y = np.concatenate((train_y[:start], train_y[end:]))
        validation_x = train_x[start:end]
        validation_y = train_y[start:end].values
        #Choose model
        if model_name == 'NBC':
            prior, train_mean, train_var = NBC.train(training_x, training_y, class_num)
            acc, FA, PD = NBC.test(validation_x, validation_y, prior, train_mean, train_var, class_num, filename, model_name, False)
            total_FA.append(np.array(FA))
            total_PD.append(np.array(PD))
        overall_acc += acc
    print("Overall accuracy: {}".format(overall_acc / K))
    # Plot ROC curve for cv
    total_FA = np.array(total_FA)
    total_PD = np.array(total_PD)
    FA_mean = np.mean(total_FA, axis=0)
    PD_mean = np.mean(total_PD, axis=0)
    FA_var = np.var(total_FA, axis=0)
    PD_var = np.var(total_PD, axis=0)
    # Plot ROC curve of validation data
    fig = plt.figure()
    plt.errorbar(FA_mean, PD_mean, yerr=PD_var, uplims=True, lolims=True)
    # plt.xlim(0, 1)
    # plt.ylim(0, 1)
    plt.xlabel('FA')
    plt.ylabel('PD')
    fig.savefig('plotting/' + filename + '_' + model_name + '_lower_roc_CV.png')
Exemplo n.º 2
0
        wine_train_x, wine_train_y, wine_test_x, wine_test_y = readData()

    K = 5
    class_num = 3
    print("====================== IRIS =================")
    # Bayesian classifier
    crossValidation(iris_train_x, iris_train_y, class_num, 'iris', 'BC', K)
    prior, train_mean, train_cov = BC.train(iris_train_x.values,
                                            iris_train_y.values, class_num)
    acc = BC.test(iris_test_x.values, iris_test_y.values, prior, train_mean,
                  train_cov, class_num, 'iris', 'BC', True)
    # Naive-Bayes classifier
    crossValidation(iris_train_x, iris_train_y, class_num, 'iris', 'NBC', K)
    prior, train_mean, train_var = NBC.train(iris_train_x.values,
                                             iris_train_y.values, class_num)
    acc = NBC.test(iris_test_x.values, iris_test_y.values, prior, train_mean,
                   train_var, class_num, 'iris', 'NBC', True)

    print("====================== WINE =================")
    # Bayesian classifier
    crossValidation(wine_train_x, wine_train_y, class_num, 'wine', 'BC', K)
    prior, train_mean, train_cov = BC.train(wine_train_x.values,
                                            wine_train_y.values, class_num)
    acc = BC.test(wine_test_x.values, wine_test_y.values, prior, train_mean,
                  train_cov, class_num, 'wine', 'BC', True)
    # Naive-Bayes classifier
    crossValidation(wine_train_x, wine_train_y, class_num, 'wine', 'NBC', K)
    prior, train_mean, train_var = NBC.train(wine_train_x.values,
                                             wine_train_y.values, class_num)
    acc = NBC.test(wine_test_x.values, wine_test_y.values, prior, train_mean,
                   train_var, class_num, 'wine', 'NBC', True)
    # print("eigen vector shape = {}".format(eigen_vectors.shape))
    # print("lower_dimension_data training shape: {}".format(lower_dimension_data.shape))
    # print("lower_dimension_data testing shape: {}".format(lower_dimension_data_test.shape))

    # # Applying PCA to classifier
    # prior, train_mean, train_cov = NBC.train(lower_dimension_data, iris_train_y.values.ravel(), CLASS_NUM)
    # acc = NBC.test(lower_dimension_data_test, iris_test_y.values.ravel(), prior, train_mean, train_cov, CLASS_NUM, 'iris_task2', 'NBC', True)

    # predict, probability, model = TestBest(lower_dimension_data, iris_train_y.values.ravel(), lower_dimension_data_test, iris_test_y.values.ravel())
    # computeConfusionMatrix(probability, iris_test_y.values.ravel())
    # plotROC(probability, iris_test_y.values.ravel(), 'iris_task2_low_SVM')

    print("=================== BREAST ==============")
    # Original data to classifier
    prior, train_mean, train_cov = NBC.train(breast_train_x.values, breast_train_y.values.ravel(), CLASS_NUM)
    acc = NBC.test(breast_test_x.values, breast_test_y.values.ravel(), prior, train_mean, train_cov, CLASS_NUM, 'breast_task2_ori', 'NBC', True)

    predict, probability, model = TestBest(breast_train_x.values, breast_train_y.values.ravel(), breast_test_x.values, breast_test_y.values.ravel())
    computeConfusionMatrix(probability, breast_test_y.values.ravel())
    plotROC(probability, breast_test_y.values.ravel(), 'breast_task2_ori_SVM')

    print("===============")

    lower_dimension_data, eigen_vectors = PCA(breast_train_x.values)
    lower_dimension_data_test = np.matmul(breast_test_x.values, eigen_vectors)
    print("data shape = {}".format(breast_train_x.values.shape))
    print("eigen vector shape = {}".format(eigen_vectors.shape))
    print("lower_dimension_data training shape: {}".format(lower_dimension_data.shape))
    print("lower_dimension_data testing shape: {}".format(lower_dimension_data_test.shape))

    prior, train_mean, train_cov = NBC.train(lower_dimension_data, breast_train_y.values.ravel(), CLASS_NUM)