def run_for_k(k, n): print(f"running kNN for {n} images with k={k}") correct_predictions = 0 for i in range(len(test)): prediction = knn(train[:n], train_labels, test[i], k=k) if prediction == test_labels[i]: correct_predictions += 1 print(f"number of correct predictions for (k={k}, n={n}): {correct_predictions}") correct_predictions_rate = float(correct_predictions) / float(len(test)) print(f"correct predication percentage for (k={k},n={n}): {correct_predictions_rate}") return correct_predictions_rate
from nn import knn, cosine_distance, gaussian_distance, polyd2_distance if __name__=="__main__": # Load data using specialized script train_dataset = load_mnist(path="../data/mnist/", dataset="training") test_dataset = load_mnist(path="../data/mnist/", dataset="testing") # Take a fraction of the data to speed computation train_images, train_labels = sample(train_dataset, 5000) test_images, test_labels = sample(test_dataset, 1000) # Get the bounds of the haar rectangles bounds = genbounds(28, 28, 100) # Create data, using same rectangles for training and testing train_data = genfeatures(train_images, bounds) test_data = genfeatures(test_images, bounds) # Normalize the data zmscaler = preprocessing.StandardScaler() train_data = zmscaler.fit_transform(train_data) test_data = zmscaler.transform(test_data) # Run knn for d in [cosine_distance, gaussian_distance, polyd2_distance]: for k in [1,3,7]: H = knn(train_data, test_data, train_labels, d=d, k=k) c = np.sum(test_labels.ravel()==H) print "k=%d:" % k, float(c)/float(len(test_labels))
def pubmed(): train = load_data("E:\\pubmed.csv") m, n = train.shape labels = load_data("E:\\pubmed_label.csv") train_copy = train list_k_fold = man_split(train_copy, labels, 5) acc_bayes = 0 acc_funbayes = 0 acc_knn = 0 acc_funknn = 0 #print("done") micro_bayes = 0 micro_funbayes = 0 micro_knn = 0 micro_funknn = 0 macro_bayes = 0 macro_funbayes = 0 macro_knn = 0 macro_funknn = 0 for k1 in range(5): #print(k1) #print("k1=",end=' ') #print(k1) test_set = [] training_set = [] training_label = [] test_label = [] prior = {} label1 = [] #print(list_k_fold) for i in range(5): if i == k1: label1.extend(list_k_fold[i]) #print("done") for i2 in range(len(labels)): if i2 in label1: test_set.append(train_copy[i2]) test_label.append(labels[i2]) else: training_set.append(train_copy[i2]) #print(trainset) training_label.append(labels[i2]) #print("done") dict_info = {} dict_info = form_dict(training_set, training_label) mean_dict = bay.find_mean(dict_info) std_dict = bay.find_std(dict_info, mean_dict) prior = bay.find_priors(dict_info) _, predictions0 = bay.fun_bayes(training_set, test_set, training_label, test_label) acc_funbayes += _ micro_funbayes += f1_score(test_label, predictions0, average='micro') macro_funbayes += f1_score(test_label, predictions0, average='macro') #print(macro_funbayes) #print("a") _, predictions1 = nn.fun_knn(training_set, test_set, training_label, test_label) acc_funknn += _ micro_funknn += f1_score(test_label, predictions1, average='micro') macro_funknn += f1_score(test_label, predictions1, average='macro') #print("b") _, predictions2 = nn.knn(training_set, training_label, test_set, test_label) acc_knn += _ micro_knn += f1_score(test_label, predictions2, average='micro') macro_knn += f1_score(test_label, predictions2, average='macro') _, predictions3 = bay.bayes(mean_dict, std_dict, test_set, test_label, prior) acc_bayes += _ micro_bayes += f1_score(test_label, predictions3, average='micro') macro_bayes += f1_score(test_label, predictions3, average='macro') #print("c") file1.write("Test Accuracy on pubmed using inbuilt bayes ::" + str(acc_funbayes / 5) + "\n") file1.write("Test Accuracy on pubmed using inbuilt knn ::" + str(acc_funknn / 5) + "\n") file.write("Test Accuracy on pubmed using my bayes ::" + str(acc_bayes / 5) + "\n") file.write("Test Accuracy on pubmed using my knn ::" + str(acc_knn / 5) + "\n \n") file1.write("Test Macro F1 Score on pubmed using inbuilt bayes ::" + str(macro_funbayes / 5) + "\n") file1.write("Test Macro F1 Score on pubmed using inbuilt knn ::" + str(macro_funknn / 5) + "\n") file.write("Test Macro F1 Score on pubmed using my bayes ::" + str(macro_bayes / 5) + "\n") file.write("Test Macro F1 Score on pubmed using my knn ::" + str(macro_knn / 5) + "\n \n") file1.write("Test Micro F1 Score on pubmed using inbuilt bayes ::" + str(micro_funbayes / 5) + "\n") file1.write("Test Micro F1 Score on pubmed using inbuilt knn ::" + str(micro_funknn / 5) + "\n") file.write("Test Micro F1 Score on pubmed using my bayes ::" + str(micro_bayes / 5) + "\n") file.write("Test Micro F1 Score on pubmed using my knn ::" + str(micro_knn / 5) + "\n \n")
training_label, test_label) acc_funbayes += _ micro_funbayes += f1_score(test_label, predictions0, average='micro') macro_funbayes += f1_score(test_label, predictions0, average='macro') _, predictions1 = nn.fun_knn(training_set, test_set, training_label, test_label) acc_funknn += _ micro_funknn += f1_score(test_label, predictions1, average='micro') macro_funknn += f1_score(test_label, predictions1, average='macro') _, predictions2 = nn.knn(training_set, training_label, test_set, test_label) acc_knn += _ micro_knn += f1_score(test_label, predictions2, average='micro') macro_knn += f1_score(test_label, predictions2, average='macro') _, predictions3 = bay.bayes(mean_dict, std_dict, test_set, test_label, prior) acc_bayes += _ micro_bayes += f1_score(test_label, predictions3, average='micro') macro_bayes += f1_score(test_label, predictions3, average='macro') file1.write( "Test Accuracy on dolphin using inbuilt bayes and d =" + str(d) + "::" + str(acc_funbayes) + "\n") file1.write("Test Accuracy on dolphin using inbuilt knn and d =" + str(d) + "::" + str(acc_funknn) + "\n")
kernel = POLY elif args.k == "l": kernel = LINEAR elif args.k == "r": kernel = RBF data_file = "../data/spambase/spambase.data" dmat = [] f = open(data_file, "r") for line in f: x = line.split(',') x = [float(e) for e in x] dmat.append(x) data = np.array(dmat) # k-folds k = 10 kfolder = KFolder(data, k, standard=True, shuffle=False) for i in range(1): print "Fold:", i+1 # Get data and labels at fold k X,Y = kfolder.training(i) # Get the testing data Xi,Yi = kfolder.testing(i) # Run knn for j in [1,2,3]: H = knn(X, Xi, Y, k=j) c = np.sum(Yi.ravel()==H) print "k=%d:" % j, float(c)/float(len(Yi))