sk = raw_input("Enter number of nearest neighbors to consider or 'S' to sweep values [S]: ") or 's' if sk.lower() == 's': print("Sweeping k") k_list = [1, 3, 5, 7, 21, 101, 401] else: k_list = [int(sk)] validation_errors = zeros((len(k_list), len(n_list))) for l, n in enumerate(n_list): for j, k in enumerate(k_list): xj = x[:n] tj = t[:n] start = time.time() for i, xi in enumerate(x_eval): ti = knn_classify(xj, tj, xi, k, euclidean_distance) if ti != t_eval[i]: validation_errors[j,l] += 1 end = time.time() print("K = %d, N = %d, Validation Errors = %d, Time = %f"%(k,n,validation_errors[j,l],end-start)) print(" ") if len(k_list) > 1: plot(k_list, validation_errors[:,0].flatten()/len(x_eval)*100, 'x-', label="N = %d"%(n_list[0])) xlabel('Neighbors Considered') ylabel('Validation Errors %') legend(loc='best') xscale('log') grid(True,which="both",ls="-") savefig("results/Task_2.eps") savetxt("results/Task_2.csv", array(zip(k_list, validation_errors[:,0])), fmt='%i %i')
x_test = x_test_f + x_test_m t_test = np.hstack((np.ones(len(x_test_f)), np.zeros(len(x_test_m)))) xto = x_train xteo = x_test x_train = [rgb2gray(imresize(x, (32,32))) for x in x_train] x_validation = [rgb2gray(imresize(x, (32,32))) for x in x_validation] x_test = [rgb2gray(imresize(x, (32,32))) for x in x_test] krange = [i for j in (range(1,10), range(11, len(x_train),5), [len(x_train)]) for i in j] validation_errors = np.zeros(len(krange)) validation_distances = defaultdict(list) for j, k in enumerate(krange): for i, xi in enumerate(x_validation): ti, _ = knn_classify(x_train, t_train, xi, k, euclidean_distance, validation_distances[i]) if ti != t_validation[i]: validation_errors[j] += 1 print "K = %d - Validation Errors = %d (%d%%)" %(k, validation_errors[j], validation_errors[j]/len(x_validation)*100) best_ki = np.where(validation_errors == validation_errors.min())[0] best_k = np.array(krange)[best_ki] best_perf = validation_errors[best_ki]/len(x_validation)*100 np.savetxt("results/part_5/eval_performance.csv", np.array(zip(best_k, best_perf)), fmt='%i %i') print "Best values for k: %s\n" %best_k test_errors = np.zeros(len(best_k)) test_distances = defaultdict(list) trigger = 0 nl=0
from favorite_language_data import coord_language_pairs from matplotlib import pyplot from k_nearest_neighbors import knn_classify for k in [1, 3, 5, 7]: correct_predictions = 0 for city in coord_language_pairs: other_cities = [ other_city for other_city in coord_language_pairs if other_city != city ] predicted_language = knn_classify(k, other_cities, city[0]) actual_language = city[1] print('coords, actual_language, predicted_language = %s, %s, %s' % (city[0], actual_language, predicted_language)) plots = {"Java": ([], []), "Python": ([], []), "R": ([], [])} markers = {"Java": "o", "Python": "s", "R": "^"} colors = {"Java": "r", "Python": "b", "R": "g"} for (latitude, longitude), language in coord_language_pairs: plots[language][0].append(latitude) plots[language][1].append(longitude) for language, (x, y) in plots.items(): pyplot.scatter(x, y, color=colors[language],
from decision_tree_classification import dt_classify from kernel_svm import ksvm_classify from logistic_regression import logreg_classify from naive_bayes import nb_classify from k_nearest_neighbors import knn_classify from random_forest_classification import rf_classify from support_vector_machine import lsvm_classify print('Decision Tree Model: ', dt_classify('breast_cancer.csv')) print('Random Forrest Model: ', rf_classify('breast_cancer.csv')) print('K-NN Model: ', knn_classify('breast_cancer.csv')) print('Linear SVM Model: ', lsvm_classify('breast_cancer.csv')) print('Kernel SVM Model: ', ksvm_classify('breast_cancer.csv')) print('Logistic Regression Model: ', logreg_classify('breast_cancer.csv')) print('Naive Bayes Model: ', nb_classify('breast_cancer.csv'))
"Enter number of nearest neighbors to consider or 'S' to sweep values [S]: " ) or 's' if sk.lower() == 's': print("Sweeping k") k_list = [1, 3, 5, 7, 21, 101, 401] else: k_list = [int(sk)] validation_errors = zeros((len(k_list), len(n_list))) for l, n in enumerate(n_list): for j, k in enumerate(k_list): xj = x[:n] tj = t[:n] start = time.time() for i, xi in enumerate(x_eval): ti = knn_classify(xj, tj, xi, k, euclidean_distance) if ti != t_eval[i]: validation_errors[j, l] += 1 end = time.time() print("K = %d, N = %d, Validation Errors = %d, Time = %f" % (k, n, validation_errors[j, l], end - start)) print(" ") if len(k_list) > 1: plot(k_list, validation_errors[:, 0].flatten() / len(x_eval) * 100, 'x-', label="N = %d" % (n_list[0])) xlabel('Neighbors Considered') ylabel('Validation Errors %') legend(loc='best')
from decision_tree_classification import dt_classify from kernel_svm import ksvm_classify from logistic_regression import logreg_classify from naive_bayes import nb_classify from k_nearest_neighbors import knn_classify from random_forest_classification import rf_classify from support_vector_machine import lsvm_classify logreg_classify('../Restaurant_Reviews.tsv') print('********************************************') knn_classify('../Restaurant_Reviews.tsv') print('********************************************') dt_classify('../Restaurant_Reviews.tsv') print('********************************************') ksvm_classify('../Restaurant_Reviews.tsv') print('********************************************') lsvm_classify('../Restaurant_Reviews.tsv') print('********************************************') rf_classify('../Restaurant_Reviews.tsv')
0, 20) genders = ['Male', 'Female'] x_train = x_train_f + x_train_m t_train = np.hstack((np.ones(len(x_train_f)), np.zeros(len(x_train_m)))) x_test = x_test_f + x_test_m t_test = np.hstack((np.ones(len(x_test_f)), np.zeros(len(x_test_m)))) # Resize to 32x32 and convert to grayscale x_train_bw = [rgb2gray(imresize(x, (32, 32))) for x in x_train] x_test_bw = [rgb2gray(imresize(x, (32, 32))) for x in x_test] # Classify pictures in test set for xi in np.random.permutation(len(x_test_bw)): ti, nn = knn_classify(x_train_bw, t_train, x_test_bw[xi], 5) # Plot image with classification plt.subplot(121) plt.imshow(x_test[xi]) plt.axis('off') plt.title(genders[int(ti)], color=('green' if ti == t_test[xi] else 'red'), weight='bold') # Plot nearest neighbors idx = [3, 4, 7, 8, 11] for i, nni in enumerate(nn): plt.subplot(3, 4, idx[i]) plt.imshow(x_train[nni]) plt.axis('off') plt.title(genders[int(t_train[nni])])