示例#1
0
文件: hw01.py 项目: darc1/intro2ml
def run_for_k(k, n):
    print(f"running kNN for {n} images with k={k}")
    correct_predictions = 0
    for i in range(len(test)):
        prediction = knn(train[:n], train_labels, test[i], k=k)
        if prediction == test_labels[i]:
            correct_predictions += 1

    print(f"number of correct predictions for (k={k}, n={n}): {correct_predictions}")
    correct_predictions_rate = float(correct_predictions) / float(len(test))
    print(f"correct predication percentage for (k={k},n={n}): {correct_predictions_rate}")
    return correct_predictions_rate
示例#2
0
from nn import knn, cosine_distance, gaussian_distance, polyd2_distance

if __name__=="__main__":
    # Load data using specialized script
    train_dataset = load_mnist(path="../data/mnist/", dataset="training")
    test_dataset = load_mnist(path="../data/mnist/", dataset="testing")

    # Take a fraction of the data to speed computation
    train_images, train_labels = sample(train_dataset, 5000)
    test_images, test_labels = sample(test_dataset, 1000)

    # Get the bounds of the haar rectangles
    bounds = genbounds(28, 28, 100)

    # Create data, using same rectangles for training and testing
    train_data = genfeatures(train_images, bounds)
    test_data = genfeatures(test_images, bounds)

    # Normalize the data
    zmscaler = preprocessing.StandardScaler()
    train_data = zmscaler.fit_transform(train_data)
    test_data = zmscaler.transform(test_data)

    # Run knn
    for d in [cosine_distance, gaussian_distance, polyd2_distance]:
        for k in [1,3,7]:
            H = knn(train_data, test_data, train_labels, d=d, k=k)
            c = np.sum(test_labels.ravel()==H)
            print "k=%d:" % k, float(c)/float(len(test_labels))
示例#3
0
def pubmed():
    train = load_data("E:\\pubmed.csv")
    m, n = train.shape
    labels = load_data("E:\\pubmed_label.csv")
    train_copy = train
    list_k_fold = man_split(train_copy, labels, 5)
    acc_bayes = 0
    acc_funbayes = 0
    acc_knn = 0
    acc_funknn = 0

    #print("done")

    micro_bayes = 0
    micro_funbayes = 0
    micro_knn = 0
    micro_funknn = 0
    macro_bayes = 0
    macro_funbayes = 0
    macro_knn = 0
    macro_funknn = 0
    for k1 in range(5):
        #print(k1)
        #print("k1=",end=' ')
        #print(k1)
        test_set = []
        training_set = []
        training_label = []
        test_label = []
        prior = {}
        label1 = []
        #print(list_k_fold)
        for i in range(5):
            if i == k1:
                label1.extend(list_k_fold[i])

        #print("done")

        for i2 in range(len(labels)):
            if i2 in label1:
                test_set.append(train_copy[i2])
                test_label.append(labels[i2])
            else:
                training_set.append(train_copy[i2])
                #print(trainset)
                training_label.append(labels[i2])

        #print("done")

        dict_info = {}
        dict_info = form_dict(training_set, training_label)
        mean_dict = bay.find_mean(dict_info)
        std_dict = bay.find_std(dict_info, mean_dict)
        prior = bay.find_priors(dict_info)

        _, predictions0 = bay.fun_bayes(training_set, test_set, training_label,
                                        test_label)

        acc_funbayes += _
        micro_funbayes += f1_score(test_label, predictions0, average='micro')
        macro_funbayes += f1_score(test_label, predictions0, average='macro')

        #print(macro_funbayes)
        #print("a")

        _, predictions1 = nn.fun_knn(training_set, test_set, training_label,
                                     test_label)
        acc_funknn += _
        micro_funknn += f1_score(test_label, predictions1, average='micro')
        macro_funknn += f1_score(test_label, predictions1, average='macro')
        #print("b")

        _, predictions2 = nn.knn(training_set, training_label, test_set,
                                 test_label)
        acc_knn += _

        micro_knn += f1_score(test_label, predictions2, average='micro')
        macro_knn += f1_score(test_label, predictions2, average='macro')

        _, predictions3 = bay.bayes(mean_dict, std_dict, test_set, test_label,
                                    prior)
        acc_bayes += _
        micro_bayes += f1_score(test_label, predictions3, average='micro')
        macro_bayes += f1_score(test_label, predictions3, average='macro')
        #print("c")

    file1.write("Test Accuracy on pubmed using inbuilt bayes  ::" +
                str(acc_funbayes / 5) + "\n")
    file1.write("Test Accuracy on pubmed using inbuilt knn ::" +
                str(acc_funknn / 5) + "\n")
    file.write("Test Accuracy on pubmed using my bayes ::" +
               str(acc_bayes / 5) + "\n")
    file.write("Test Accuracy on pubmed using my knn ::" + str(acc_knn / 5) +
               "\n \n")

    file1.write("Test Macro F1 Score on pubmed using inbuilt bayes ::" +
                str(macro_funbayes / 5) + "\n")
    file1.write("Test Macro F1 Score on pubmed using inbuilt knn ::" +
                str(macro_funknn / 5) + "\n")
    file.write("Test Macro F1 Score on pubmed using my bayes ::" +
               str(macro_bayes / 5) + "\n")
    file.write("Test Macro F1 Score on pubmed using my knn ::" +
               str(macro_knn / 5) + "\n \n")

    file1.write("Test Micro F1 Score on pubmed using inbuilt bayes ::" +
                str(micro_funbayes / 5) + "\n")
    file1.write("Test Micro F1 Score on pubmed using inbuilt knn ::" +
                str(micro_funknn / 5) + "\n")
    file.write("Test Micro F1 Score on pubmed using my bayes ::" +
               str(micro_bayes / 5) + "\n")
    file.write("Test Micro F1 Score on pubmed using my knn ::" +
               str(micro_knn / 5) + "\n \n")
示例#4
0
                                            training_label, test_label)
            acc_funbayes += _
            micro_funbayes += f1_score(test_label,
                                       predictions0,
                                       average='micro')
            macro_funbayes += f1_score(test_label,
                                       predictions0,
                                       average='macro')

            _, predictions1 = nn.fun_knn(training_set, test_set,
                                         training_label, test_label)
            acc_funknn += _
            micro_funknn += f1_score(test_label, predictions1, average='micro')
            macro_funknn += f1_score(test_label, predictions1, average='macro')

            _, predictions2 = nn.knn(training_set, training_label, test_set,
                                     test_label)
            acc_knn += _
            micro_knn += f1_score(test_label, predictions2, average='micro')
            macro_knn += f1_score(test_label, predictions2, average='macro')

            _, predictions3 = bay.bayes(mean_dict, std_dict, test_set,
                                        test_label, prior)
            acc_bayes += _
            micro_bayes += f1_score(test_label, predictions3, average='micro')
            macro_bayes += f1_score(test_label, predictions3, average='macro')

            file1.write(
                "Test Accuracy on dolphin using inbuilt bayes and d =" +
                str(d) + "::" + str(acc_funbayes) + "\n")
            file1.write("Test Accuracy on dolphin using inbuilt knn and d =" +
                        str(d) + "::" + str(acc_funknn) + "\n")
示例#5
0
        kernel = POLY
elif args.k == "l":
        kernel = LINEAR
elif args.k == "r":
        kernel = RBF

data_file = "../data/spambase/spambase.data"
dmat = []
f = open(data_file, "r")
for line in f:
    x = line.split(',')
    x = [float(e) for e in x]
    dmat.append(x)
data = np.array(dmat)

# k-folds 
k = 10 
kfolder = KFolder(data, k, standard=True, shuffle=False)
for i in range(1):
    print "Fold:", i+1
    # Get data and labels at fold k
    X,Y = kfolder.training(i)
    # Get the testing data
    Xi,Yi = kfolder.testing(i)

    # Run knn
    for j in [1,2,3]:
        H = knn(X, Xi, Y, k=j)
        c = np.sum(Yi.ravel()==H)
        print "k=%d:" % j, float(c)/float(len(Yi))