def accuracy_test(num_folds): features, classes = loaddata() folds = generate_k_folds((features, classes), num_folds) for i in range(num_folds): print('Testing model on fold %d' % i) myClassifier = ChallengeClassifier() training_data, test_data = folds[i] training_features, training_classes = training_data test_features, test_classes = test_data myClassifier.fit(training_features, training_classes) result = myClassifier.classify(test_features) print('Accuracy test result - fold %d: %f' % (i, accuracy(result, test_classes)))
def test_random_forest_5_trees(self): learner = dt.RandomForest(num_trees=5, depth_limit=5, example_subsample_rate=0.5, attr_subsample_rate=0.5) learner.fit(self.train_features, self.train_classes) output = learner.classify(self.train_features) result = dt.confusion_matrix(output, self.train_classes) print "\n\nconfusion_matrix={}".format(result) print "accuracy={}".format(dt.accuracy(output, self.train_classes)) print "precision={}".format(dt.precision(output, self.train_classes)) print "recall={}".format(dt.recall(output, self.train_classes))
def test_accuracy_calculation(self): """Test accuracy calculation. Asserts: Accuracy matches for all true labels. """ answer = [0, 0, 0, 0, 0] true_label = [1, 1, 1, 1, 1] total_count = len(answer) for index in range(0, len(answer)): answer[index] = 1 accuracy = dt.accuracy(answer, true_label) assert accuracy == ((index + 1) / total_count)
def test_forest_all_data(self): """Test random forest. Asserts: classification on average is >= 75% """ # Values specified in the assignment tree = dt.RandomForest(5, 5, .5, .5) fold_count = 10 ten_folds = dt.generate_k_folds(self.dataset, fold_count) avg_accuracy = 0.0 for fold in ten_folds: training_set, test_set = fold tree.fit(training_set[0], training_set[1]) output = tree.classify(test_set[0]) avg_accuracy += dt.accuracy(output, test_set[1]) avg_accuracy = avg_accuracy / fold_count print '\n\nRandom Forest K-folds:', avg_accuracy assert avg_accuracy >= .75
def test_random_forest_5_trees(self): path = abspath("challenge_train.csv") self.train_features, self.train_classes = dt.load_csv(path, 0) #print classes # learner = dt.ChallengeClassifier() # learner.fit(features, classes) # output = learner.classify(features) # print output # result = dt.confusion_matrix(output, classes) # print "\n\nconfusion_matrix={}".format(result) # print "accuracy={}".format(dt.accuracy(output, classes)) # print "precision={}".format(dt.precision(output, classes)) # print "recall={}".format(dt.recall(output, classes)) learner = dt.ChallengeClassifier() learner.fit(self.train_features, self.train_classes) output = learner.classify(self.train_features) print output result = dt.confusion_matrix(output, self.train_classes) print "\n\nconfusion_matrix={}".format(result) print "accuracy={}".format(dt.accuracy(output, self.train_classes)) print "precision={}".format(dt.precision(output, self.train_classes)) print "recall={}".format(dt.recall(output, self.train_classes))
def test_challenge_all_data(self): """Test challenge section. Asserts: classification on average is >= 80% """ dataset = dt.load_csv('challenge_train.csv', 0) # Change as you see fit by adding parameters you have chosen or run # it with defaults tree = dt.ChallengeClassifier() fold_count = 10 ten_folds = dt.generate_k_folds(dataset, fold_count) avg_accuracy = 0.0 for fold in ten_folds: training_set, test_set = fold tree.fit(training_set[0], training_set[1]) output = tree.classify(test_set[0]) avg_accuracy += dt.accuracy(output, test_set[1]) avg_accuracy = avg_accuracy / fold_count print '\n\nChallenger K-folds:', avg_accuracy assert avg_accuracy >= .80