def main(): # import data train_set_full, train_tags, test_set_full = load_data('data/Data.pickle') # ### Pre-processing ### # Trim data train_set_full[train_set_full < 0] = 0 train_set_full[train_set_full > 1] = 1 test_set_full[test_set_full < 0] = 0 test_set_full[test_set_full > 1] = 1 # Select 70 best features feature_sel_1 = SelectKBest(f_classif, k=70) feature_sel_1.fit(train_set_full, train_tags) train_set_1 = feature_sel_1.transform(train_set_full) test_set_1 = feature_sel_1.transform(test_set_full) # ### Train classifiers ### clf_1 = neighbors.KNeighborsClassifier(n_neighbors=1, weights='uniform', p=2).fit(train_set_1, train_tags) clf_2 = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance', p=2).fit(train_set_1, train_tags) clf_3 = neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance', p=1).fit(train_set_1, train_tags) clf_4 = svm.SVC(kernel='poly', C=0.78, degree=11, coef0=2, gamma='auto').fit(train_set_1, train_tags) clf_5 = RandomForestClassifier(n_estimators=200, criterion='entropy', max_depth=None).fit(train_set_1, train_tags) # create voting classifier final_clf = VotingClassifier(estimators=[('knn1', clf_1), ('knn3', clf_2), ('knn5', clf_3), ('svm', clf_4), ('rf', clf_5)], voting='hard') final_clf.fit(train_set_1, train_tags) write_prediction(final_clf.predict(test_set_1).astype(int))
def competition_test(X_train_subset, train_labels, X_test_subset): # TODO: testing our classifier # this part is commented-out for the submittion in case the staff will want to run this part # these are our inner tests # total_avg = 0 # for k in [2, 4, 6, 8, 10]: # test_size = 1 / k # total_score_per_k_train = 0 # total_score_per_k_test = 0 # for i in range(k): # X_train, X_test, y_train, y_test = train_test_split(X_train_subset, train_labels, test_size=test_size, # random_state=i) # clf = CompetitionClassifier(X_train, y_train) # total_score_per_k_train += clf.test(X_train, y_train) # total_score_per_k_test += clf.test(X_test, y_test) # print('K value is: ' + str(k)) # print('Avg accuracy of CompetitionClassifier on test set: ' + str(total_score_per_k_test / k)) # total_avg += total_score_per_k_test / k # print('Total avg for all k is: ' + str(total_avg / 5)) # TODO: classify for contest clf = CompetitionClassifier(X_train_subset, train_labels) prediction = clf.classify(X_test_subset) write_prediction(prediction)
def experiment_contest(): clc = contest_classifier_factory() """ in order to evaluate classifier with existing Folds""" # res_accuracy, res_error = evaluate(clc, FOLDS) # output = str("contest") + "," + str(res_accuracy) + "," + str(res_error) # print(output) clf = clc.train(train_features_ds, train_labels_ds) test_class_list = [] for object_feature in test_features_ds: test_class_list.append(clf.classify(object_feature)) write_prediction(test_class_list)
def run_my_classify(): # predicts the test data with specific features data, labels, tests = hw3_utils.load_data() # features list to ignore that came from the research before features_to_ignore = [90, 23, 90, 103, 36] for feature in features_to_ignore: data = np.delete(data, feature, 1) tests = np.delete(tests, feature, 1) clf = classifier.knn_factory(1).train(data, labels) results = [clf.classify(test) for test in tests] hw3_utils.write_prediction(results)
with open(file_name, 'wb') as file: # ID3 RUN id3_f = id3_factory() accuracy, error = evaluate(id3_f, 2) line = "1" + "," + str(accuracy) + "," + str(error) + "\n" file.write(line.encode()) # Perceptron RUN perceptron_f = perceptron_factory() accuracy, error = evaluate(perceptron_f, 2) line = "2" + "," + str(accuracy) + "," + str(error) + "\n" file.write(line.encode()) # part C submission classifier patients, labels, test = utils.load_data() # create the factory one_nn_f = part_c_classifiers.one_nn_factory() # reduce the features selector = SelectKBest(score_func=f_classif, k=130) selector.fit(patients, labels) newData = selector.transform(patients) # train the algorithm with the new features one_nn_clf = one_nn_f.train(newData, labels) # write prediction of 300 in test to file results = [] for t in test: results.append(one_nn_clf.classify(t)) utils.write_prediction(results)
def main(): # Variables used for debug skip_knn = True skip_tree = True skip_perc = True train_features, train_labels, test_features = load_data('data/Data.pickle') # Split once the dataset to two folds. folds = 2 #split_crosscheck_groups(train_features, train_labels, folds) if skip_knn != True: # Evaluating KNN with different k value: k_list = [1, 3, 5, 7, 13] acc_list = [] err_list = [] with open('experiments6.csv', mode='w', newline='') as csv_file: exp_writer = csv.writer(csv_file) for k in k_list: knn_fac = knn_factory(k) err, acc = evaluate(knn_fac, folds) print("k=", k, " acc=", acc, " err=", err) exp_writer.writerow([k, acc, err]) acc_list.append(acc) err_list.append(err) # Plot KNN Results plt.subplot(2, 1, 1) plt.plot(k_list, acc_list, '--', color='g') plt.plot(k_list, acc_list, 'bo') plt.ylabel("Accuracy") plt.xlabel("k") plt.xticks(k_list) plt.subplot(2, 1, 2) plt.plot(k_list, err_list, '--', color='r') plt.plot(k_list, err_list, 'bo') plt.ylabel("Error") plt.xlabel("k") plt.xticks(k_list) plt.tight_layout() plt.show() # Perform classification for Perceptron and Tree and write to files. with open('experiments12.csv', mode='w', newline='') as csv_file: exp_writer = csv.writer(csv_file) if skip_tree != True: # Decision Tree experiment myTree = tree.DecisionTreeClassifier(criterion="entropy") err, acc = evaluate(myTree, folds) print("tree acc=", acc, " tree err=", err) exp_writer.writerow([1, acc, err]) if skip_perc != True: # Perceptron experiment myPerc = Perceptron(tol=1e-3, random_state=0) err, acc = evaluate(myPerc, folds) print("perceptron acc=", acc, " perceptron err=", err) exp_writer.writerow([2, acc, err]) # Competition: Classify test_features print("Triple model") my_model = triple_model() my_model.fit(train_features, train_labels) res = my_model.final_predict(preprocessing.scale(test_features)) write_prediction(res)