Пример #1
0
def test_KNN_predict_incorrect_shape(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=500)
    Xtest, ytest   = sample_test(count=125)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)

    with pytest.raises(ValueError):
        knn.predict(ytrain)#using ytrain, shich has incorrect dimensions;
Пример #2
0
def test_KNN_predict_num_loop_parameter(sample_train, sample_test, num_loops):
    Xtrain, ytrain = sample_train(count=40)
    Xtest,  ytest   = sample_test(count=10)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)

    with pytest.raises(ValueError):
        knn.predict(Xtest,0,num_loops).shape
Пример #3
0
def test_KNN_predict_loop_parameter(sample_train, sample_test, k, num_loops):
    Xtrain, ytrain = sample_train(count=40)
    Xtest,  ytest   = sample_test(count=10)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtest = np.reshape(Xtest, (Xtest.shape[0], -1))

    knn = KNearestNeighbor()
    knn.train(Xtrain,ytrain)

    assert knn.predict(Xtest,k,num_loops).shape == ytest.shape
Пример #4
0
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:
    acc = []
    print(k)
    for i in range(num_folds):
        #(0,4000,3072),每个都是(0,1000,3072),竖着叠加,shape(4000,3072)
        x_train_fold = np.vstack(X_train_folds[0:i] + X_train_folds[i + 1:])
        #(,4000),每个都是(,1000),横向叠加,shape(4000,)
        y_train_fold = np.hstack((y_train_folds[0:i] + y_train_folds[i + 1:]))
        x_val = X_train_folds[i]
        y_val = y_train_folds[i]

        classifier = KNearestNeighbor()
        classifier.train(x_train_fold, y_train_fold)
        dists_two = classifier.compute_distances_no_loops(x_val)
        y_val_pred = classifier.predict(x_val, k)
        correct = np.sum(y_val_pred == y_val) / y_val.shape[0]
        acc.append(correct)
    k_to_accuracies[k] = acc
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))
Пример #5
0
# plot the raw observations
for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array(
    [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
accuracies_std = np.array(
    [np.std(v) for k, v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
plt.title('Cross-validation on k')
plt.xlabel('k')
plt.ylabel('Cross-validation accuracy')
plt.show()

# Based on the cross-validation results above, choose the best value for k,
# retrain the classifier using all the training data, and test it on the test
# data. You should be able to get above 28% accuracy on the test data.
best_k = 10

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
y_test_pred = classifier.predict(X_test, k=best_k)

# Compute and display the accuracy
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print('Got %d / %d correct => accuracy: %f' %
      (num_correct, num_test, accuracy))
Пример #6
0
mask=range(num_training)
x_train=x_train[mask]
y_train=y_train[mask]
num_test=10000
mask=range(num_test)
x_test=x_test[mask]
y_test=y_test[mask]

x_train=np.reshape(x_train,(x_train.shape[0],-1))
x_test=np.reshape(x_test,(x_test.shape[0],-1))
print(x_train.shape,x_test.shape)

classifier=KNearestNeighbor()
classifier.train(x_train,y_train)

ks = range(1 , 10)
pre = []
num_correct = []
accuracy = []

for k in ks:
    pr = classifier.predict(x_test , k)
    num = np.sum(pr == y_test)
    pre.append(pr)
    num_correct.append(num)
    accuracy.append(float(num) / num_test)

plt.plot(ks , accuracy)
plt.show()

Пример #7
0
# Dictionary holding the accuracies (list) for different values of k
k_to_accuracies = {}
# k-fold cross validation using fold i as validation, and all others as training
for choice in k_choices:
    for i in range(num_folds):
        # Partition training and test arrays
        X_tr = np.vstack([X_train_folds[x] for x in range(num_folds) if x!=i])
        y_tr = np.hstack([y_train_folds[x] for x in range(num_folds) if x!=i])
        X_te = X_train_folds[i]
        y_te = y_train_folds[i]
        # Create kNN classifier instance
        clf = KNearestNeighbor()
        clf.train(X_tr, y_tr)
        # Predict
        pred = clf.predict(X_te, k=choice)
        acc = float(np.sum(pred == y_te)) / y_te.shape[0]
        print(f"k = {choice}, accuracy = {acc}")
        if i == 0:
            k_to_accuracies[choice] = [acc]
        else:
            k_to_accuracies[choice].append(acc)

# Plot results
for k in k_choices:
    accs = k_to_accuracies[k]
    plt.scatter([k] * len(accs), accs)
# Plot trend line with error bars corresponding to standard deviation
accs_mean = np.array([np.mean(val) for key,val in sorted(k_to_accuracies.items())])
accs_std = np.array([np.std(val) for key,val in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accs_mean, yerr=accs_std)