Exemplo n.º 1
0
def main(k, data_set_name=None):
    split_percentage = 0.7

    # Load dataset
    if data_set_name is not None:
        data_set = read_file_into_dataset(
            "C:\\Users\\Grant\\Documents\\School\\Winter 2016\\CS 450\\Prove01\\"
            + data_set_name)
        data_set = randomize_dataset(data_set)
    else:
        data_set_name = "iris"
        iris = datasets.load_iris()
        data_set = randomize_dataset(iris)

    data_set.data = normalize(data_set.data)
    data_sets = split_dataset(data_set, split_percentage)
    training_set = data_sets['train']
    testing_set = data_sets['test']

    # My Classifier
    knnClassifier = KnnClassifier()
    knnClassifier.k = k
    knnClassifier.train(training_set.data, training_set.target,
                        training_set.target_names)
    predictions = knnClassifier.predict(testing_set.data)

    my_accuracy = get_accuracy(predictions, testing_set.target)

    # Better Classifier
    better_classifier = KNeighborsClassifier(n_neighbors=k)
    better_classifier.fit(training_set.data, training_set.target)
    predictions = better_classifier.predict(testing_set.data)

    better_accuary = get_accuracy(predictions, testing_set.target)

    print("My results: " + str(my_accuracy) + "%")
    print("Better results: " + str(better_accuary) + "%")

    results = "k = " + str(k) + "\nMy results: " + str(
        my_accuracy) + "%\n" + "Better results: " + str(better_accuary) + "%\n"

    write_to_results_file(
        os.getcwd() + os.sep + ".." + os.sep + str(k) + "-" + data_set_name +
        "_results.txt", results, k)
Exemplo n.º 2
0
Arquivo: main.py Projeto: gshawm/CS450
def main(k, data_set_name=None):
    split_percentage = 0.7

    # Load dataset
    if data_set_name is not None:
        data_set = read_file_into_dataset("C:\\Users\\Grant\\Documents\\School\\Winter 2016\\CS 450\\Prove01\\" + data_set_name)
        data_set = randomize_dataset(data_set)
    else:
        data_set_name = "iris"
        iris = datasets.load_iris()
        data_set = randomize_dataset(iris)

    data_set.data = normalize(data_set.data)
    data_sets    = split_dataset(data_set, split_percentage)
    training_set = data_sets['train']
    testing_set  = data_sets['test']

    # My Classifier
    knnClassifier = KnnClassifier()
    knnClassifier.k = k
    knnClassifier.train(training_set.data, training_set.target, training_set.target_names)
    predictions = knnClassifier.predict(testing_set.data)

    my_accuracy = get_accuracy(predictions, testing_set.target)

    # Better Classifier
    better_classifier = KNeighborsClassifier(n_neighbors=k)
    better_classifier.fit(training_set.data, training_set.target)
    predictions = better_classifier.predict(testing_set.data)

    better_accuary = get_accuracy(predictions, testing_set.target)

    print("My results: " + str(my_accuracy) + "%")
    print("Better results: " + str(better_accuary) + "%")

    results = "k = " + str(k) + "\nMy results: " + str(my_accuracy) + "%\n" + "Better results: " + str(better_accuary) + "%\n"

    write_to_results_file(os.getcwd() + os.sep + ".." + os.sep + str(k) + "-" + data_set_name + "_results.txt", results, k)
Exemplo n.º 3
0
                        df.columns[i]].quantile(0.25) != 1:
            index.append(i)

    features = df[df.columns[index]]
    labels = df[df.columns[len(df.columns) - 1]]

    # Replacing alphabets to numbers for ease of use
    labels = labels.replace('A', 1).replace('B', 2).replace('C', 3).replace(
        'D', 4).replace('E', 5)

    X = features.values
    y = labels.values

    X_train, X_test, Y_train, Y_test = train_test_split(features.values,
                                                        labels.values,
                                                        test_size=0.2)

    # Running knn classifier on the dataset
    start = timeit.default_timer()
    knnClf = KnnClassifier(X_train, Y_train)
    predictions = knnClf.predict(X_test=X_test, k=41)
    stop = timeit.default_timer()

    print("Run Time: ", stop - start)

    # transform the list into an array
    predictions = np.asarray(predictions)
    # evaluating accuracy
    accuracy = accuracy_score(Y_test[0:100], predictions)
    print('\nThe accuracy of our classifier is %d%%' % accuracy)