def main(): """ Calls every function to implement Rain Forests """ opts = util.parse_args() train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) #constructs our ensemble of decision stumps ds_ensemble = construct_ensemble(opts.T, train_partition) #constructs a list of lists of (predicted labels for all examples) for each classifer ds_list = testing(test_partition, ds_ensemble, opts.threshold) #gets the final predicted labels for test data by majority vote finalpred_lst = finaloutput(ds_list, test_partition) #contructs confusion matrix confusion_matrix = util.construct_cm(finalpred_lst, test_partition) #computes the true positive and false positive rates for the confusion matrix (true_pos, false_pos) = util.rates(confusion_matrix) #print statements print("T:", opts.T, ", thresh: ", opts.threshold) print(" prediction ") print(" -1 1") print("-1", "| ", confusion_matrix[0, 0], " ", confusion_matrix[0, 1]) print(" 1", "| ", confusion_matrix[1, 0], " ", confusion_matrix[1, 1]) print(" ") print("false positive: ", false_pos) print("true positive: ", true_pos)
def main(): # read in data (y in {-1,1}) opts = util.parse_args('Random forests') train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) T = opts.classifier_nums threshold = opts.thresh # training the data ensemble = random_forest_train_data(train_partition, T) # testing the data confusion_matrix, FPR, TPR = random_forest_test_data( test_partition, ensemble, threshold) print('T: ' + str(T) + ' , thresh ' + str(threshold)) print('\n') print(' prediction') print(' -1 1') print(' -----') print('-1| ' + str(int(confusion_matrix[0][0])) + ' ' + str(int(confusion_matrix[0][1]))) print(' 1| ' + str(int(confusion_matrix[1][0])) + ' ' + str(int(confusion_matrix[1][1]))) print('\n') # calculating the false positive rate and the true positive rate print('false positive: ' + str(FPR)) print('true positive: ' + str(TPR))
def main(): opts = util.parse_args() train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) #training happens to get ensemble list and corresponding score list (ds_ensemble, scorelist) = construct_ensemble(opts, train_partition) #testing starts..to get list of predicted labels from each classifier tested_list = testing(test_partition, ds_ensemble, opts.threshold) #gets the final predicted labels for test data finalpred_lst = finaloutput(tested_list, test_partition, scorelist, opts.threshold) #contructs confusion matrix confusion_matrix = util.construct_cm(finalpred_lst, test_partition) #computes the true positive and false positive rates for the confusion matrix (true_pos, false_pos) = util.rates(confusion_matrix) #print statements print("T:", opts.T, ", thresh: ", opts.threshold) print(" prediction ") print(" -1 1") print("-1", "| ", confusion_matrix[0, 0], " ", confusion_matrix[0, 1]) print(" 1", "| ", confusion_matrix[1, 0], " ", confusion_matrix[1, 1]) print(" ") print("false positive: ", false_pos) print("true positive: ", true_pos)
def main(): opts = util.parse_args() train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) #training random forest first print(opts.T) rf_ensemble = random_forest.construct_ensemble(opts.T, train_partition) #training AdaBoost next (ad_ensemble, scorelist) = ada_boost.construct_ensemble(opts, train_partition) #initializing threshold that will be changed in the loop thresh = -0.1 #initializes two lists of size 20 to hold true and false positive rates for #both the ensemble methods for each threshold value rm_forest = [None] * 20 adaboost = [None] * 20 #loops to increment threshold for i in range(20): rm_forest[i] = test_random(test_partition, rf_ensemble, thresh) adaboost[i] = test_adaboost(test_partition, ad_ensemble, scorelist, thresh) thresh += 0.06 print(rm_forest[i], adaboost[i], thresh) #plots the roc curves plot_data(adaboost, rm_forest, opts.T)
def main(): opts = util.parse_args() train_partition = util.read_arff(opts.train_filename, True) test_partition = util.read_arff(opts.test_filename, False) # create an instance of the DecisionTree class from the train_partition tree = DecisionTree(train_partition, (vars(opts)).get("depth")) rootnode = tree.constructsubtree(train_partition, (vars(opts)).get("depth"), 0) #print text representation of the DecisionTree tree.printtree(rootnode)
def main(): opts = util.parse_args('') train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) for i in range(train_partition.n): example = train_partition.data[i] if i == 0 or i == 8: example.set_weight(0.25) else: example.set_weight(0.5 / (train_partition.n - 2)) for x in train_partition.F: print(train_partition.gain(x)) d = DecisionStump(train_partition) print(d) for x in test_partition.data: print(x.label, d.classify(x.features))
def main(): opts = util.parse_args() train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) #Creating Naive Bayes Model nb_model = NaiveBayes(train_partition) m = len(test_partition.labels) confusion_matrix = np.zeros((m, m)) #initializing the confusion matrix accuracy = 0 for x in test_partition.data: y_hat = nb_model.classify(x.features) y = x.label confusion_matrix[y][y_hat] += 1 if y == y_hat: accuracy += 1 print('Accuracy: ' + str(round(accuracy / test_partition.n, 6)) + ' (' + str(accuracy) + ' out of ' + str(test_partition.n) + ' correct)') print(confusion_matrix)
def main(): # Process the data opts = util.parse_args() train_partition = util.read_arff(opts.train_filename) test_partition = util.read_arff(opts.test_filename) # sanity check print("num train =", train_partition.n, ", num classes =", train_partition.K) print("num test =", test_partition.n, ", num classes =", test_partition.K) nb_model = NaiveBayes(train_partition) y_real = [] #list of real y's y_h = [] #list of predicted y's for example in test_partition.data: #loops through test example list y_hat = nb_model.classify(example.features) #calls classify on each example's feature y_real.append(int(example.label)) #appends the test data's label to y_real y_h.append(y_hat) #appends the predicted label to y_h\ ln = len(nb_model.classes) l = len(test_partition.data) confusion_matrix = np.zeros((ln,ln)) #makes a confusion matrix of zeroes of the right size first for i in range(l): y_r = y_real[i] pred_y = y_h[i] confusion_matrix[y_r][pred_y] += 1 #adds one to diagonal elements of the numpy array n = 0 #keeps track of number of accurate data points for i in range(ln): n += confusion_matrix[i][i] #sums the diagonal accuracy = n / (l) #computes accuracy #printing here print("Accuracy", round(accuracy, 7), "(", int(n), " out of ", l , " correct)") print("Confusion Matrix:") print(confusion_matrix)
def main(): opts = util.parse_args() train_partition = util.read_arff(opts.train_filename, True) test_partition = util.read_arff(opts.test_filename, False) # create a DecisionTree instance from training data if opts.depth: DecisionTree.max_depth = opts.depth train_dtree = DecisionTree(train_partition, 0) # print text representation of the decision tree print(train_dtree) # evaluate the decision tree on test data correct = 0 for e in test_partition.data: if train_dtree.predict(e) == e.label: correct += 1 print(f'{correct} out of {test_partition.n} correct') accuracy = Decimal(f'{correct / test_partition.n}').quantize( Decimal('1.0000')) print(f'accuracy: {accuracy}')
def main(): """ Loads data into partitions, creates a Naive Bayes model based on the train data, runs the model on the test data, and evaluates its accuracy. """ opts = util.parse_args() train_partition, test_partition = util.read_arff(opts.filename) nb_model = NaiveBayes(train_partition) examples = test_partition.data total = len(examples) total_correct = 0 K = test_partition.K confusion_matrix = np.zeros((K, K), int) for example in examples: y_hat = nb_model.classify(example.features) y = example.label confusion_matrix[y][y_hat] += 1 if y_hat == y: total_correct += 1 accuracy = round(total_correct / total, 6) accuracy_str = "Accuracy: " + str(accuracy) + " (" correct_str = str(total_correct) + " out of " + str(total) + " correct)" print(accuracy_str + correct_str) stretch = 8 prediction_labels = " " top_row = " " table = "" for y_hat in range(K): prediction_labels += " " * (stretch - len(str(y_hat + 1))) + str(y_hat + 1) top_row += "-" * stretch for y in range(K): table += " " + str(y + 1) + "|" for y_hat in range(K): entry = str(confusion_matrix[y][y_hat]) table += " " * (stretch - len(entry)) + entry table += "\n" print("\n\n prediction") print(prediction_labels) print(top_row) print(table)
""" Run ensemble methods to create ROC curves. Authors: Lamiaa Dakir Date: 10/28/2019 """ import util from random_forest import * from ada_boost import * import optparse import numpy as np import matplotlib.pyplot as plt from math import sqrt # read in data (y in {-1,1}) train_partition = util.read_arff('data/mushroom_train.arff') test_partition = util.read_arff('data/mushroom_test.arff') parser = optparse.OptionParser() parser.add_option('-T', '--classifier_nums', type='int', help='Number of classifiers') (opts, args) = parser.parse_args() T = opts.classifier_nums random_forest_FPRs = [] random_forest_TPRs = [] ada_boost_FPRs = [] ada_boost_TPRs = []
def main(): opts = util.parse_args() train_partition = util.read_arff(opts.train_filename, True) test_partition = util.read_arff(opts.test_filename, False)