Пример #1
0
def run_k_fold_cross_validation(X_train, y_train, num_folds, k, k_accuracy):
    X_train_folds, y_train_folds = generate_folds(X_train, y_train, num_folds)
    accuracy = 0.0
    accuracy_list = []
    for i in range(num_folds):
        val_fold_x = X_train_folds[i]
        val_fold_y = y_train_folds[i]
        temp_X_train = np.concatenate(X_train_folds[:i] +
                                      X_train_folds[i + 1:])
        temp_y_train = np.concatenate(y_train_folds[:i] +
                                      y_train_folds[i + 1:])
        classifier = KNearestNeighbor()
        classifier.train(temp_X_train, temp_y_train)
        dists = classifier.compute_distances_no_loops(val_fold_x)
        val_pred_y = classifier.predict_labels(dists, k)
        num_correct = np.sum(val_pred_y == val_fold_y)
        accuracy_list.append((float(num_correct) / val_pred_y.shape[0]))
        accuracy = accuracy + (float(num_correct) / val_pred_y.shape[0])
    k_accuracy[k] = accuracy_list
    accuracy = accuracy / num_folds
    return accuracy
Пример #2
0
# two matrices are similar; one of the simplest is the Frobenius norm. In case
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
print("test the difference between two loop and one loop........")
difference = np.linalg.norm(dists - dists_one, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
else:
    print('Uh-oh! The distance matrices are different')

print("use compute_distances_no_loop to calculate the dists.....")
# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)
# check that the distance matrix agrees with the one we computed before:
print("test the difference between two loop and no loop........")
difference = np.linalg.norm(dists - dists_two, ord='fro')
print('Difference was: %f' % (difference, ))
if difference < 0.001:
    print('Good! The distance matrices are the same')
else:
    print('Uh-oh! The distance matrices are different')


# Let's compare how fast the implementations are
def time_function(f, *args):
    """
    Call a function f with args and return the time (in seconds) that it took to execute.
    """
Пример #3
0
# you haven't seen it before, the Frobenius norm of two matrices is the square
# root of the squared sum of differences of all elements; in other words, reshape
# the matrices into vectors and compute the Euclidean distance between them.
difference = np.linalg.norm(dists - dists_one, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
else:
  print 'Uh-oh! The distance matrices are different'


# In[ ]:

# Now implement the fully vectorized version inside compute_distances_no_loops
# and run the code
dists_two = classifier.compute_distances_no_loops(X_test)

# check that the distance matrix agrees with the one we computed before:
difference = np.linalg.norm(dists - dists_two, ord='fro')
print 'Difference was: %f' % (difference, )
if difference < 0.001:
  print 'Good! The distance matrices are the same'
else:
  print 'Uh-oh! The distance matrices are different'


# In[ ]:

# Let's compare how fast the implementations are
def time_function(f, *args):
  """
Пример #4
0
mask = range(num_test)
x_test = x_test[mask]
y_test = y_test[mask]
# the image data has three chanels
# the next two step shape the image size 32*32*3 to 3072*1
x_train = np.reshape(x_train, (x_train.shape[0], -1))
x_test = np.reshape(x_test, (x_test.shape[0], -1))
print
'after subsample and re shape:'
print
'x_train : ', x_train.shape, " x_test : ", x_test.shape
# KNN classifier
classifier = KNearestNeighbor()
classifier.train(x_train, y_train)
# compute the distance between test_data and train_data
dists = classifier.compute_distances_no_loops(x_test)
# each row is a single test example and its distances to training example
print
'dist shape : ', dists.shape
plt.imshow(dists, interpolation='none')
plt.show()
y_test_pred = classifier.predict_labels(dists, k=5)
num_correct = np.sum(y_test_pred == y_test)
acc = float(num_correct) / num_test
print
'k=5 ,The Accurancy is : ', acc

# Cross-Validation

# 5-fold cross validation split the training data to 5 pieces
num_folds = 5