def run_research(): # a research to exam which feature is best to ignore for highest accuracy knn_fac = classifier.knn_factory(1) folds = [ hw3_utils.load_data('ecg_fold_' + str(i + 1) + '.pickle') for i in range(2) ] features_num = len(folds[0][0][0]) max_accuracy_feature = None for run_num in range(1, 8): if max_accuracy_feature is not None: folds = [(np.delete(data, max_accuracy_feature, 1), labels, test) for data, labels, test in folds] features_num = len(folds[0][0][0]) results = [ evaluate_comp(knn_fac, folds, feature) for feature in range(features_num) ] max_accuracy_feature = max(results, key=lambda item: item[1])[0] with open('my_experiments' + str(run_num) + '.csv', 'w+') as result_file: for feature, accuracy, error in results: line = str(feature) + ',' + str(accuracy) + ',' + str( error) + '\n' result_file.write(line)
def experiment_6(): knn_values_list = [1, 3, 5, 7, 13] for k in knn_values_list: knn_k = knn_factory(k) res_accuracy, res_error = evaluate(knn_k, FOLDS) output = str(k) + "," + str(res_accuracy) + "," + str(res_error) print(output)
def experiment6(k_values=[1, 3, 5, 7, 13], num_folds=2): filename = "experiments6.csv" output_file = open(filename, 'w') for k in k_values: print("Testing classifier with k={}".format(k)) knn = classifier.knn_factory(k) accuracy, error = cross_validation.evaluate(knn, num_folds) output_file.write("{},{:.3f},{:.3f}\n".format(k, accuracy, error))
def run_knn(): ks = [1, 3, 5, 7, 13] with open('experiments6.csv', 'w+') as result_file: for k in ks: knn_fac = classifier.knn_factory(k) accuracy, error = utils.evaluate(knn_fac, 2) line = str(k) + ',' + str(accuracy) + ',' + str(error) + '\n' result_file.write(line) print("finished: ", k) print("FINISHED")
def KNN_test(): results = [] for k in [1, 3, 5, 7, 13]: knn_k = knn_factory(k) avg_accuracy, avg_error = evaluate(knn_k, 2) results.append([k, avg_accuracy, avg_error]) with open("experiments6.csv", "w") as csv_file: writer = csv.writer(csv_file, delimiter=',', lineterminator='\n') for row in results: writer.writerow(row)
def run_my_classify(): # predicts the test data with specific features data, labels, tests = hw3_utils.load_data() # features list to ignore that came from the research before features_to_ignore = [90, 23, 90, 103, 36] for feature in features_to_ignore: data = np.delete(data, feature, 1) tests = np.delete(tests, feature, 1) clf = classifier.knn_factory(1).train(data, labels) results = [clf.classify(test) for test in tests] hw3_utils.write_prediction(results)
from sklearn.feature_selection import SelectKBest, f_classif # question 3.2 patients, labels, test = utils.load_data() split_crosscheck_groups([patients, labels], 2) # question 5.1 k_list = [1, 3, 5, 7, 13] accuracy_list = [] file_name = 'experiments6.csv' with open(file_name, 'wb') as file: for k in k_list: knn_f = knn_factory(k) accuracy, error = evaluate(knn_f, 2) line = str(k) + "," + str(accuracy) + "," + str(error) + "\n" accuracy_list.append(accuracy) file.write(line.encode()) # question 5.2 plt.plot(k_list, accuracy_list) plt.xlabel('K value') plt.ylabel('Average accuracy') plt.title('Part B, question 5.2') plt.show() # questions 7.1, 7.2
from sklearn.ensemble import RandomForestClassifier examples, labels, test = load_data() data = [] data.append(examples) data.append(labels) data_new = [] data_new.append(SelectKBest(f_classif, 100).fit_transform(examples, labels)) data_new.append(labels) classifier.split_crosscheck_groups(data_new, 2) print("using CUT data\n") decision_tree = classifier.sklearn_factory_wrapper(RandomForestClassifier()) perceptron = classifier.sklearn_factory_wrapper(Perceptron()) knn = classifier.knn_factory(7) print("knn and perceptron: \n") ensemble = classifier.ensemble_factory([knn, perceptron]) accuracy, error = classifier.evaluate(ensemble, 2) print("%.3f, %.3f\n" % (accuracy, error)) print("knn and decision tree: \n") ensemble = classifier.ensemble_factory([knn, perceptron]) accuracy, error = classifier.evaluate(ensemble, 2) print("%.3f, %.3f\n" % (accuracy, error)) print("all three: \n") ensemble = classifier.ensemble_factory([knn, perceptron, decision_tree]) accuracy, error = classifier.evaluate(ensemble, 2) print("%.3f, %.3f\n" % (accuracy, error))
def main(): # Variables used for debug skip_knn = True skip_tree = True skip_perc = True train_features, train_labels, test_features = load_data('data/Data.pickle') # Split once the dataset to two folds. folds = 2 #split_crosscheck_groups(train_features, train_labels, folds) if skip_knn != True: # Evaluating KNN with different k value: k_list = [1, 3, 5, 7, 13] acc_list = [] err_list = [] with open('experiments6.csv', mode='w', newline='') as csv_file: exp_writer = csv.writer(csv_file) for k in k_list: knn_fac = knn_factory(k) err, acc = evaluate(knn_fac, folds) print("k=", k, " acc=", acc, " err=", err) exp_writer.writerow([k, acc, err]) acc_list.append(acc) err_list.append(err) # Plot KNN Results plt.subplot(2, 1, 1) plt.plot(k_list, acc_list, '--', color='g') plt.plot(k_list, acc_list, 'bo') plt.ylabel("Accuracy") plt.xlabel("k") plt.xticks(k_list) plt.subplot(2, 1, 2) plt.plot(k_list, err_list, '--', color='r') plt.plot(k_list, err_list, 'bo') plt.ylabel("Error") plt.xlabel("k") plt.xticks(k_list) plt.tight_layout() plt.show() # Perform classification for Perceptron and Tree and write to files. with open('experiments12.csv', mode='w', newline='') as csv_file: exp_writer = csv.writer(csv_file) if skip_tree != True: # Decision Tree experiment myTree = tree.DecisionTreeClassifier(criterion="entropy") err, acc = evaluate(myTree, folds) print("tree acc=", acc, " tree err=", err) exp_writer.writerow([1, acc, err]) if skip_perc != True: # Perceptron experiment myPerc = Perceptron(tol=1e-3, random_state=0) err, acc = evaluate(myPerc, folds) print("perceptron acc=", acc, " perceptron err=", err) exp_writer.writerow([2, acc, err]) # Competition: Classify test_features print("Triple model") my_model = triple_model() my_model.fit(train_features, train_labels) res = my_model.final_predict(preprocessing.scale(test_features)) write_prediction(res)