Пример #1
0
def main():
    # import data
    train_set_full, train_tags, test_set_full = load_data('data/Data.pickle')

    # ### Pre-processing ###
    # Trim data
    train_set_full[train_set_full < 0] = 0
    train_set_full[train_set_full > 1] = 1
    test_set_full[test_set_full < 0] = 0
    test_set_full[test_set_full > 1] = 1
    # Select 70 best features
    feature_sel_1 = SelectKBest(f_classif, k=70)
    feature_sel_1.fit(train_set_full, train_tags)
    train_set_1 = feature_sel_1.transform(train_set_full)
    test_set_1 = feature_sel_1.transform(test_set_full)

    # ### Train classifiers ###
    clf_1 = neighbors.KNeighborsClassifier(n_neighbors=1, weights='uniform', p=2).fit(train_set_1, train_tags)
    clf_2 = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance', p=2).fit(train_set_1, train_tags)
    clf_3 = neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance', p=1).fit(train_set_1, train_tags)
    clf_4 = svm.SVC(kernel='poly', C=0.78, degree=11, coef0=2, gamma='auto').fit(train_set_1, train_tags)
    clf_5 = RandomForestClassifier(n_estimators=200, criterion='entropy', max_depth=None).fit(train_set_1, train_tags)

    # create voting classifier
    final_clf = VotingClassifier(estimators=[('knn1', clf_1), ('knn3', clf_2), ('knn5', clf_3),
                                             ('svm', clf_4), ('rf', clf_5)], voting='hard')
    final_clf.fit(train_set_1, train_tags)
    write_prediction(final_clf.predict(test_set_1).astype(int))
Пример #2
0
def competition_test(X_train_subset, train_labels, X_test_subset):

    # TODO: testing our classifier
    # this part is commented-out for the submittion in case the staff will want to run this part
    # these are our inner tests
    # total_avg = 0
    # for k in [2, 4, 6, 8, 10]:
    #     test_size = 1 / k
    #     total_score_per_k_train = 0
    #     total_score_per_k_test = 0
    #     for i in range(k):
    #         X_train, X_test, y_train, y_test = train_test_split(X_train_subset, train_labels, test_size=test_size,
    #                                                             random_state=i)
    #         clf = CompetitionClassifier(X_train, y_train)
    #         total_score_per_k_train += clf.test(X_train, y_train)
    #         total_score_per_k_test += clf.test(X_test, y_test)
    #     print('K value is: ' + str(k))
    #     print('Avg accuracy of CompetitionClassifier on test set: ' + str(total_score_per_k_test / k))
    #     total_avg += total_score_per_k_test / k
    # print('Total avg for all k is: ' + str(total_avg / 5))

    # TODO: classify for contest
    clf = CompetitionClassifier(X_train_subset, train_labels)
    prediction = clf.classify(X_test_subset)
    write_prediction(prediction)
def experiment_contest():
    clc = contest_classifier_factory()
    """ in order to evaluate classifier with existing Folds"""
    # res_accuracy, res_error = evaluate(clc, FOLDS)
    # output = str("contest") + "," + str(res_accuracy) + "," + str(res_error)
    # print(output)

    clf = clc.train(train_features_ds, train_labels_ds)
    test_class_list = []
    for object_feature in test_features_ds:
        test_class_list.append(clf.classify(object_feature))

    write_prediction(test_class_list)
Пример #4
0
def run_my_classify():
    # predicts the test data with specific features

    data, labels, tests = hw3_utils.load_data()

    # features list to ignore that came from the research before
    features_to_ignore = [90, 23, 90, 103, 36]

    for feature in features_to_ignore:
        data = np.delete(data, feature, 1)
        tests = np.delete(tests, feature, 1)

    clf = classifier.knn_factory(1).train(data, labels)
    results = [clf.classify(test) for test in tests]

    hw3_utils.write_prediction(results)
Пример #5
0
with open(file_name, 'wb') as file:
    # ID3 RUN
    id3_f = id3_factory()
    accuracy, error = evaluate(id3_f, 2)
    line = "1" + "," + str(accuracy) + "," + str(error) + "\n"
    file.write(line.encode())

    # Perceptron RUN
    perceptron_f = perceptron_factory()
    accuracy, error = evaluate(perceptron_f, 2)
    line = "2" + "," + str(accuracy) + "," + str(error) + "\n"
    file.write(line.encode())

# part C submission classifier

patients, labels, test = utils.load_data()

# create the factory
one_nn_f = part_c_classifiers.one_nn_factory()
# reduce the features
selector = SelectKBest(score_func=f_classif, k=130)
selector.fit(patients, labels)
newData = selector.transform(patients)
# train the algorithm with the new features
one_nn_clf = one_nn_f.train(newData, labels)
# write prediction of 300 in test to file
results = []
for t in test:
    results.append(one_nn_clf.classify(t))
utils.write_prediction(results)
Пример #6
0
def main():
    # Variables used for debug
    skip_knn = True
    skip_tree = True
    skip_perc = True

    train_features, train_labels, test_features = load_data('data/Data.pickle')

    # Split once the dataset to two folds.
    folds = 2
    #split_crosscheck_groups(train_features, train_labels, folds)

    if skip_knn != True:
        # Evaluating KNN with different k value:
        k_list = [1, 3, 5, 7, 13]
        acc_list = []
        err_list = []
        with open('experiments6.csv', mode='w', newline='') as csv_file:
            exp_writer = csv.writer(csv_file)
            for k in k_list:
                knn_fac = knn_factory(k)
                err, acc = evaluate(knn_fac, folds)
                print("k=", k, " acc=", acc, " err=", err)
                exp_writer.writerow([k, acc, err])
                acc_list.append(acc)
                err_list.append(err)

        # Plot KNN Results
        plt.subplot(2, 1, 1)
        plt.plot(k_list, acc_list, '--', color='g')
        plt.plot(k_list, acc_list, 'bo')
        plt.ylabel("Accuracy")
        plt.xlabel("k")
        plt.xticks(k_list)
        plt.subplot(2, 1, 2)
        plt.plot(k_list, err_list, '--', color='r')
        plt.plot(k_list, err_list, 'bo')
        plt.ylabel("Error")
        plt.xlabel("k")
        plt.xticks(k_list)
        plt.tight_layout()
        plt.show()

    # Perform classification for Perceptron and Tree and write to files.
    with open('experiments12.csv', mode='w', newline='') as csv_file:
        exp_writer = csv.writer(csv_file)
        if skip_tree != True:
            # Decision Tree experiment
            myTree = tree.DecisionTreeClassifier(criterion="entropy")
            err, acc = evaluate(myTree, folds)
            print("tree acc=", acc, " tree err=", err)
            exp_writer.writerow([1, acc, err])

        if skip_perc != True:
            # Perceptron experiment
            myPerc = Perceptron(tol=1e-3, random_state=0)
            err, acc = evaluate(myPerc, folds)
            print("perceptron acc=", acc, " perceptron err=", err)
            exp_writer.writerow([2, acc, err])

    # Competition: Classify test_features
    print("Triple model")
    my_model = triple_model()
    my_model.fit(train_features, train_labels)
    res = my_model.final_predict(preprocessing.scale(test_features))
    write_prediction(res)