def run_k_fold_cross_validation(X_train, y_train, num_folds, k, k_accuracy): X_train_folds, y_train_folds = generate_folds(X_train, y_train, num_folds) accuracy = 0.0 accuracy_list = [] for i in range(num_folds): val_fold_x = X_train_folds[i] val_fold_y = y_train_folds[i] temp_X_train = np.concatenate(X_train_folds[:i] + X_train_folds[i + 1:]) temp_y_train = np.concatenate(y_train_folds[:i] + y_train_folds[i + 1:]) classifier = KNearestNeighbor() classifier.train(temp_X_train, temp_y_train) dists = classifier.compute_distances_no_loops(val_fold_x) val_pred_y = classifier.predict_labels(dists, k) num_correct = np.sum(val_pred_y == val_fold_y) accuracy_list.append((float(num_correct) / val_pred_y.shape[0])) accuracy = accuracy + (float(num_correct) / val_pred_y.shape[0]) k_accuracy[k] = accuracy_list accuracy = accuracy / num_folds return accuracy
num_test = 500 mask = list(range(num_test)) X_test = X_test[mask] y_test = y_test[mask] print("reshape....") # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) # (5000, 3072) X_test = np.reshape(X_test, (X_test.shape[0], -1)) # (500, 3072) print(X_train.shape, X_test.shape) print("training.....") # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) print("test compute_distances_two_loops implementation......") # Test your implementation: dists = classifier.compute_distances_two_loops(X_test) print(dists.shape) # (num_test x num_train) # We can visualize the distance matrix: each row is a single test example and # its distances to training examples plt.imshow(dists, interpolation='none') plt.show() print("set k=1 and test the data......") # Now implement the function predict_labels and run the code below: # We use k = 1 (which is Nearest Neighbor). y_test_pred = classifier.predict_labels(dists, k=1)
xtr = np.array(X_train_folds[:i] + X_train_folds[(i + 1):]) ytr = np.array(y_train_folds[:i] + y_train_folds[(i + 1):]) xdev = np.array(X_train_folds[i]) ydev = np.array(y_train_folds[i]) xtr = np.reshape(xtr, ((X_train.shape[0] * (num_folds - 1)) / num_folds, -1)) # (4000, 3072) ytr = np.reshape(ytr, ((y_train.shape[0] * (num_folds - 1)) / num_folds, -1)) # (4000, 1) xdev = np.reshape(xdev, (X_train.shape[0] / num_folds, -1)) # (1000, 3072) ydev = np.reshape(ydev, (y_train.shape[0] / num_folds, -1)) # (1000, 1) nn = KNearestNeighbor() nn.train(xtr, ytr) y_predict = nn.predict(xdev, k=k, num_loops=0) # (1000, ) # print(np.shape(y_predict)) 注意维度匹配 y_predict = np.reshape(y_predict, (y_predict.shape[0], -1)) num_correct = np.sum(y_predict == ydev) accuracy = num_correct / float(xdev.shape[0]) k_to_accuracies[k][i] = accuracy ################################################################################ # END OF YOUR CODE # ################################################################################ # Print out the computed accuracies print("the computed accuracies.....") for k in sorted(k_to_accuracies):
# In[10]: # Reshape the image data into rows X_train = np.reshape(X_train, (X_train.shape[0], -1)) X_test = np.reshape(X_test, (X_test.shape[0], -1)) print X_train.shape, X_test.shape # In[11]: from classifiers import KNearestNeighbor # Create a kNN classifier instance. # Remember that training a kNN classifier is a noop: # the Classifier simply remembers the data and does no further processing classifier = KNearestNeighbor() classifier.train(X_train, y_train) # In[12]: # We would now like to classify the test data with the kNN classifier. Recall that we can break down this process into two steps: # # 1. First we must compute the distances between all test examples and all train examples. # 2. Given these distances, for each test example we find the k nearest examples and have them vote for the label # # Lets begin with computing the distance matrix between all training and test examples. For example, if there are **Ntr** training examples and **Nte** test examples, this stage should result in a **Nte x Ntr** matrix where each element (i,j) is the distance between the i-th test and j-th train example. # # First, open `cs231n/classifiers/k_nearest_neighbor.py` and implement the function `compute_distances_two_loops` that uses a (very inefficient) double loop over all pairs of (test, train) examples and computes the distance matrix one element at a time.
mask = list(range(num_training)) x_train = x_train[mask] y_train = y_train[mask] num_test = 500 mask = list(range(num_test)) x_test = x_test[mask] y_test = y_test[mask] x_train = np.reshape(x_train, (x_train.shape[0], -1)) #reshape the image data into rows x_test = np.reshape(x_test, (x_test.shape[0], -1)) from classifiers import KNearestNeighbor classifier = KNearestNeighbor() classifier.train(x_train, y_train) dists = classifier.compute_distances_one_loop(x_test) num_folds = 5 k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100] x_train_folds = [] y_train_folds = [] x_train_folds = np.array_split(x_train, num_folds) y_train_folds = np.array_split(y_train, num_folds) k_to_accuracies = {} for i in k_choices: k_to_accuracies[i] = []
x_train = x_train[mask] y_train = y_train[mask] num_test = 500 mask = range(num_test) x_test = x_test[mask] y_test = y_test[mask] # the image data has three chanels # the next two step shape the image size 32*32*3 to 3072*1 x_train = np.reshape(x_train, (x_train.shape[0], -1)) x_test = np.reshape(x_test, (x_test.shape[0], -1)) print 'after subsample and re shape:' print 'x_train : ', x_train.shape, " x_test : ", x_test.shape # KNN classifier classifier = KNearestNeighbor() classifier.train(x_train, y_train) # compute the distance between test_data and train_data dists = classifier.compute_distances_no_loops(x_test) # each row is a single test example and its distances to training example print 'dist shape : ', dists.shape plt.imshow(dists, interpolation='none') plt.show() y_test_pred = classifier.predict_labels(dists, k=5) num_correct = np.sum(y_test_pred == y_test) acc = float(num_correct) / num_test print 'k=5 ,The Accurancy is : ', acc # Cross-Validation