Пример #1
0
def test_KNN_train(sample_train, sample_test):
    #this test is designed to verify that input shapes are correct
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=10)

    with pytest.raises(ValueError):
        knn = KNearestNeighbor()
        knn.train(Xtrain, ytrain)
Пример #2
0
def test_KNN_train_reshape_input(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=10)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)
Пример #3
0
def test_KNN_dists_noloop_shape(sample_train, sample_test, in_count):
    Xtrain, ytrain = sample_train(count=in_count)
    Xtest, ytest   = sample_test(count=in_count-30)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)
    assert knn.compute_distances_no_loops(Xtest).shape == (Xtest.shape[0], Xtrain.shape[0])
Пример #4
0
def test_KNN_predict_loop_parameter(sample_train, sample_test, k, num_loops):
    Xtrain, ytrain = sample_train(count=40)
    Xtest,  ytest   = sample_test(count=10)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)

    assert knn.predict(Xtest,k,num_loops).shape == ytest.shape
Пример #5
0
def test_KNN_predict_labels_shape(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=10)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)

    dist_no  = knn.compute_distances_no_loops(Xtest)
    assert knn.predict_labels(dist_no, k=1).shape == ytest.shape
    assert knn.predict_labels(dist_no, k=2).shape == ytest.shape
    assert knn.predict_labels(dist_no, k=3).shape == ytest.shape
    assert knn.predict_labels(dist_no, k=4).shape == ytest.shape
Пример #6
0
def test_KNN_predict_incorrect_shape(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=500)
    Xtest, ytest   = sample_test(count=125)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)

    with pytest.raises(ValueError):
        knn.predict(ytrain)#using ytrain, shich has incorrect dimensions;
Пример #7
0
def test_KNN_predict_num_loop_parameter(sample_train, sample_test, num_loops):
    Xtrain, ytrain = sample_train(count=40)
    Xtest,  ytest   = sample_test(count=10)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)

    with pytest.raises(ValueError):
        knn.predict(Xtest,0,num_loops).shape
Пример #8
0
def test_KNN_dists_one_to_none(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=10)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)
    dist_one = knn.compute_distances_one_loop(Xtest)
    dist_no  = knn.compute_distances_no_loops(Xtest)
    assert np.linalg.norm(dist_one - dist_no, ord='fro') < 0.001
Пример #9
0
num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

from cs231n.classifiers.k_nearest_neighbor import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
"""
dists = classifier.compute_distances_two_loops(X_test)
pickle.dump(dists,open(r"D:\python\CS231n\assignment1\tmp.txt","wb"))
print(dists.shape)
print(dists)

plt.imshow(dists, interpolation='none')
plt.show()
"""
with open(r"D:\python\CS231n\assignment1\tmp.txt", "rb") as file:
    dists = pickle.load(file)
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
Пример #10
0
        if i == 0:
            plt.title(cls)
plt.show()

# Subsample the data for more efficient code execution in this exercise
num_training = 5000
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]

num_test = 500
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows

# CV: each row is a data vector of 3072 items.
# 5k for training and 500 for testing

X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print X_train.shape, X_test.shape

# CV: Just saves the data

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
Пример #11
0
num_test = 500
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]

# Reshape the image data into rows
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

from cs231n.classifiers.k_nearest_neighbor import KNearestNeighbor
# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_two_loops(X_test)

# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.imshow(dists, interpolation='none')
plt.show()

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
Пример #12
0
X_train = np.reshape(X_train, (X_train.shape[0], -1))
X_test = np.reshape(X_test, (X_test.shape[0], -1))
print(X_train.shape, X_test.shape)

#%%
#import sys

#new = cwd+'\\cs231n'
#sys.path.intert(0, new+'\\classifier') # where the classifier is stored

from cs231n.classifiers.k_nearest_neighbor import KNearestNeighbor

# Create a kNN classifier instance.
# Remember that training a kNN classifier is a noop:
# the Classifier simply remembers the data and does no further processing
classifier = KNearestNeighbor()
classifier.train(X_train, y_train)

#% calcualte the distance
# Open cs231n/classifiers/k_nearest_neighbor.py and implement
# compute_distances_two_loops.

# Test your implementation:
dists = classifier.compute_distances_no_loops(X_test)
#dists = classifier.compute_distances_two_loops(X_test)

#dists = classifier.compute_distances_one_loop(X_test)
#%%
# We can visualize the distance matrix: each row is a single test example and
# its distances to training examples
plt.figure()
Пример #13
0

num_training=50000
mask=range(num_training)
x_train=x_train[mask]
y_train=y_train[mask]
num_test=10000
mask=range(num_test)
x_test=x_test[mask]
y_test=y_test[mask]

x_train=np.reshape(x_train,(x_train.shape[0],-1))
x_test=np.reshape(x_test,(x_test.shape[0],-1))
print(x_train.shape,x_test.shape)

classifier=KNearestNeighbor()
classifier.train(x_train,y_train)

ks = range(1 , 10)
pre = []
num_correct = []
accuracy = []

for k in ks:
    pr = classifier.predict(x_test , k)
    num = np.sum(pr == y_test)
    pre.append(pr)
    num_correct.append(num)
    accuracy.append(float(num) / num_test)

plt.plot(ks , accuracy)
Пример #14
0
# Split the arrays into individual folds
X_train_folds = np.split(X_train, num_folds)
y_train_folds = np.split(y_train, num_folds)

# Dictionary holding the accuracies (list) for different values of k
k_to_accuracies = {}
# k-fold cross validation using fold i as validation, and all others as training
for choice in k_choices:
    for i in range(num_folds):
        # Partition training and test arrays
        X_tr = np.vstack([X_train_folds[x] for x in range(num_folds) if x!=i])
        y_tr = np.hstack([y_train_folds[x] for x in range(num_folds) if x!=i])
        X_te = X_train_folds[i]
        y_te = y_train_folds[i]
        # Create kNN classifier instance
        clf = KNearestNeighbor()
        clf.train(X_tr, y_tr)
        # Predict
        pred = clf.predict(X_te, k=choice)
        acc = float(np.sum(pred == y_te)) / y_te.shape[0]
        print(f"k = {choice}, accuracy = {acc}")
        if i == 0:
            k_to_accuracies[choice] = [acc]
        else:
            k_to_accuracies[choice].append(acc)

# Plot results
for k in k_choices:
    accs = k_to_accuracies[k]
    plt.scatter([k] * len(accs), accs)
# Plot trend line with error bars corresponding to standard deviation
Пример #15
0
        [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
    accuracies_std = np.array(
        [np.std(v) for k, v in sorted(k_to_accuracies.items())])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.xlabel('k')
    plt.ylabel('Cross-validation accuracy')
    plt.show()

    best_k = 1
    classifier.train(X_train, y_train)
    y_test_pred = classifier.predict(X_test, k=best_k)
    determine_accuracy(y_test_pred, y_test, num_test)


X_train, y_train, X_test, y_test, num_test = load_data()
raw_input('Any key to continue...')

classifier = KNearestNeighbor()
compute_distances(X_train, y_train, X_test, y_test, num_test)
print '# 1'
print classifier.y_train
print classifier.y_train.shape
raw_input('Any key to continue...')

no_loop_time = time_function(classifier.compute_distances_no_loops, X_test)
print 'No loop version took %f seconds' % no_loop_time
raw_input('Any key to continue...')

cross_validation(X_train, y_train)