def handle_data(train_set, test_set): ''' get the predictions for three algorithms - decision tree, knn and naive bayes :param train_set: x :param test_set: y predict the y_hat, calc the accuracy and write to file the accuracies + the tree ''' # split the files and get the data and labels train_data, train_data_labels, attributes, label_key = split_train_data( train_set) test_data, test_data_labels = split_test_data(test_set) # get the algorithms decision_tree, knn, naive_bayes = DecisionTree.Model(), Knn.Model( ), NaiveBayes.Model() algorithms = [decision_tree, knn, naive_bayes] accuracies = [] # for every algorithm - get the prediction on the test set, calc the accuracy and add to list for algorithm in algorithms: algorithm.set_data(train_data, train_data_labels, [label_key, attributes]) prediction = algorithm.predict(test_data) accuracy = get_acc(prediction, test_data_labels) accuracies.append( "{0:.2f}".format(accuracy)) # get the 2 digits after point # get the output tree and write to the file tree = decision_tree.get_tree() tree.write_tree(OUTPUT_FILE) # write the accuracies to the same file write_accuracies(OUTPUT_FILE, accuracies)
print(I2F) print(L2I) print(I2L) print(V2I) print(I2V) ''' # loading the datasetes TRAIN = get_data("train.txt") TEST = get_data("test.txt") # creating the different models dt = DecisionTree.Model( TRAIN, values={i: value.keys() for i, value in I2V.items()}) knn = KNN.Model(TRAIN, k=5) nb = NaiveBayes.Model(TRAIN, I2L.keys()) # predicting on the TEST set r = open("output.txt", 'w') predict(TEST, [dt, knn, nb]) r.close() # printing the tree that DecisionTree created t = open("output_tree.txt", 'w') print_tree(dt.root) t.close() # deleting the last '\n' character with open("output_tree.txt", 'rb+') as t: t.seek(-1, os.SEEK_END) t.truncate()