Пример #1
0
def test(X_train, y_train, X_test, y_test, best_k):
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    y_test_pred = classifier.predict(X_test, k=best_k)

    # Compute and display the accuracy
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / len(y_test)
    print 'Best k=%d' % best_k
    print 'Got %d / %d correct => accuracy: %f' % (num_correct, len(y_test),
                                                   accuracy)
Пример #2
0
def cross_validation(train_data, train_label):
    """交叉验证的方式选择最优的超参数k"""
    num_folds = 5
    k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]
    # 任务:
    # 将训练数据切分,训练样本和对应的样本标签包含在数组
    # x_train_folds 和 y_train_folds 之中,数组的长度为num_folds
    # 其中y_train_folds[i] 是一个矢量,表示矢量x_train_folds[i]中所有样本的标签
    # 提示:可以尝试使用numpy的 array_spilt 方法
    x_train_folds = np.array_split(train_data, num_folds)
    y_train_folds = np.array_split(train_label, num_folds)
    # 我们将不同k值下的准确率保存在一个字典中。交叉验证之后,k_to_accuracies[k]保存了一个
    # 长度为num_folds的list,值为k值下的准确率
    k_to_accuracies = {}
    # 任务:
    # 通过k折的交叉验证找到最佳k值。对于每一个k值,执行KNN算法num_folds次,每一次执行中,选择一折为验证集
    # 其它折为训练集。将不同k值在不同折上的验证结果保存在k_to_accuracies字典中
    classifiers = KNearestNeighbor()
    for k in k_choices:
        accuracies = np.zeros(num_folds)
        for fold in range(num_folds):
            temp_x = x_train_folds.copy()
            temp_y = y_train_folds.copy()
            # 组成验证集
            x_validate_fold = temp_x.pop(fold)
            y_validate_fold = temp_y.pop(fold)
            # 组成训练集
            x_temp_train_fold = np.array([x for x_fold in temp_x for x in x_fold])
            y_temp_train_fold = np.array([y for y_fold in temp_y for y in y_fold])
            classifiers.train(x_temp_train_fold, y_temp_train_fold)
            # 进行验证
            y_test_predicted = classifiers.predict(x_validate_fold, k, 0)
            num_correct = np.sum(y_test_predicted == y_validate_fold)
            accuracy = float(num_correct) / y_validate_fold.shape[0]
            accuracies[fold] = accuracy
        k_to_accuracies[k] = accuracies
    # 输出准确率
    for k in sorted(k_to_accuracies):
        for accuracy in k_to_accuracies[k]:
            print('k = %d, accuracy = %f' % (k, accuracy))
    # 画图显示所有的精确度散点
    for k in k_choices:
        accuracies = k_to_accuracies[k]
        plt.scatter([k]*len(accuracies), accuracies)
    # plot the trend line with error bars that correspond to standard
    # 画出在不同k值下,误差均值和标准差
    accuracies_mean = np.array([np.mean(k_to_accuracies[k]) for k in sorted(k_to_accuracies)])
    accuracies_std = np.array([np.std(k_to_accuracies[k]) for k in sorted(k_to_accuracies)])
    plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
    plt.title('Cross-validation on k')
    plt.xlabel('k')
    plt.ylabel('Cross-validation accuracy')
    plt.show()
Пример #3
0
def main():
    X_train, y_train, X_test, y_test = load_CIFAR10('../cifar-10-batches-py')

    num_training = 48000
    mask = list(range(num_training))
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 1000
    mask = list(range(num_test))
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Reshape the image data into rows
    print(X_train.shape)
    '''
    (48000, 32, 32, 3)
    '''
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    print(X_train.shape)
    '''
    (48000, 3072)
    '''
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    print(X_train.shape, X_test.shape)
    '''
    (48000, 3072) (1000, 3072)
    '''
    classifier = KNearestNeighbor()
    classifier.train(X_train, y_train)
    y_test_pred = classifier.predict(X_test, k=5)
    print(y_test_pred)

    # Compute and display the accuracy
    num_correct = np.sum(y_test_pred == y_test)
    accuracy = float(num_correct) / num_test
    print('Got %d / %d correct => accuracy: %f' %
          (num_correct, num_test, accuracy))
    '''
Пример #4
0
acc_k = np.zeros((len(k_choices), num_folds), dtype=np.float)

################################################################################
# TODO:                                                                        #
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for ik, k in enumerate(k_choices):
    for i in range(num_folds):
        train_set = np.concatenate((X_train_folds[:i] + X_train_folds[i + 1:]))
        label_set = np.concatenate((y_train_folds[:i] + y_train_folds[i + 1:]))
        classifier.train(train_set, label_set)
        y_pred_fold = classifier.predict(X_train_folds[i], k=k, num_loops=0)
        num_correct = np.sum(y_pred_fold == y_train_folds[i])
        acc_k[ik, i] = float(num_correct) / num_split
    k_to_accuracies[k] = acc_k[ik]
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))

# plot the raw observations
fig = plt.figure()
for k in k_choices:
Пример #5
0
# 交叉验证:执行knn算法num_folds次,每次选择一折为验证集,其他折为训练集,将准确率保存在k_to_accuracy中。
classifier = KNearestNeighbor()
for k in k_choices:
    accuracies = np.zeros(num_folds)
    for fold in xrange(num_folds):
        temp_X = X_train_folds[:]
        temp_y = y_train_folds[:]
        X_validate_fold = temp_X.pop(fold)
        y_validate_fold = temp_y.pop(fold)

        temp_X = np.array([y for x in temp_X for y in x])
        temp_y = np.array([y for x in temp_y for y in x])
        classifier.train(temp_X, temp_y)

        y_test_pred = classifier.predict(X_validate_fold, k = k)
        num_correct = np.sum(y_test_pred == y_validate_fold)
        accuracy = float(num_correct) / len(y_test_pred)
        accuracies[fold] = accuracy
    k_to_accuracies[k] = accuracies    

# 输出准确率
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)

# 画图
for k in k_choices:
  accuracies = k_to_accuracies[k]
  plt.scatter([k] * len(accuracies), accuracies)
for k in k_choices:
    acc = np.zeros((num_folds))
    for f in xrange(num_folds):

        X_current_fold = list(X_train_folds)
        del X_current_fold[f]
        X_current_fold = np.concatenate(X_current_fold)

        y_current_fold = list(y_train_folds)
        del y_current_fold[f]
        y_current_fold = np.concatenate(y_current_fold)

        classifier = KNearestNeighbor()
        classifier.train(X_current_fold, y_current_fold)
        y_current_predict = classifier.predict(X_train_folds[f],
                                               k,
                                               num_loops=0)

        num_correct = np.sum(y_current_predict == y_train_folds[f])
        acc[f] = float(num_correct) / num_test

    k_to_accuracies[k] = acc

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)

# plot the raw observations
for k in k_choices:
    accuracies = k_to_accuracies[k]
Пример #7
0
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:  # for each k
    for n in range(num_folds):  # for each nth fold
        other_folds = [x for x in range(num_folds) if x != n]
        X_test_fold = X_train_folds[n]
        y_test_fold = y_train_folds[n]  # we have the test data
        X_train_other_folds = np.concatenate((X_train_folds[other_folds]),
                                             axis=0)
        y_train_other_folds = np.concatenate((y_train_folds[other_folds]),
                                             axis=0)
        classifier = KNearestNeighbor()
        classifier.train(X_train_other_folds, y_train_other_folds)
        y_pred_fold = classifier.predict(X=X_test_fold, k=k)
        num_correct_fold = np.sum(y_pred_fold == y_test_fold)
        acc_fold = float(num_correct_fold) / len(y_test_fold)
        k_to_accuracies.setdefault(k, []).append(acc_fold)
        #print(k_to_accuracies)
        #print("for k=%d choice, the accuracy= %f" % (k, acc_fold))
pass
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))
    print("mean for k=%d is %f1" % (k, np.mean(k_to_accuracies[k])))
Пример #8
0
        X_train_instance_list = X_train_folds[:idx_fold] + X_train_folds[
            idx_fold + 1:]
        y_train_instance_list = y_train_folds[:idx_fold] + y_train_folds[
            idx_fold + 1:]
        X_train_instance = np.concatenate(X_train_instance_list)
        y_train_instance = np.concatenate(y_train_instance_list)

        #        train
        classifier.train(X_train_instance, y_train_instance)

        #        cross-validation
        #        dists_cv = classifier.compute_distances_no_loops(X_cv_instance)
        #        y_cv_pred = classifier.predict_labels(dists_cv, k=k_cv)

        y_cv_pred = classifier.predict(X_cv_instance, k=k_cv)

        num_correct = np.sum(y_cv_pred == y_cv_instance)
        accuracy = float(num_correct) / len(y_cv_instance)
        k_to_accuracies[k_cv].append(accuracy)

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))

# In[ ]:
Пример #9
0
for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array(
    [np.mean(v) for k, v in sorted(k_to_accuracies.items())])
accuracies_std = np.array(
    [np.std(v) for k, v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
plt.title('Cross-validation on k')
plt.xlabel('k')
plt.ylabel('Cross-validation accuracy')
plt.show()

# In[ ]:

# Based on the cross-validation results above, choose the best value for k,
# retrain the classifier using all the training data, and test it on the test
# data. You should be able to get above 28% accuracy on the test data.
best_k = 1

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
y_test_pred = classifier.predict(X_test, k=best_k)

# Compute and display the accuracy
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
Пример #10
0
dists2 = classifier.compute_distances_two_loops(X_test)
dists1 = classifier.compute_distances_one_loop(X_test)
dists0 = classifier.compute_distances_no_loops(X_test)
dists  = classifier.compute_distances_no_loops(X_test)
print dists.shape

# Now implement the function predict_labels and run the code below:
# We use k = 1 (which is Nearest Neighbor).
y_test_pred = classifier.predict_labels(dists, k=1)

# Compute and print the fraction of correctly predicted examples
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)

classifier.predict(X_test, k=1, num_loops=0)
classifier.pridict_currency(X_test, y_test, k=1, num_loops=0)

# cross validation

num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]
X_train_folds = []
y_train_folds = []
X_train_folds = np.array_split(X_train, num_folds)
y_train_folds = np.array_split(y_train, num_folds)
k_to_accuracies = {}
for k in k_choices:
  validation_accuracies = []
  for i in range(num_folds):
    current_x_test = X_train_folds[i]
Пример #11
0
X_train_folds = np.array_split(X_train_temp, num_folds)
y_train_folds = np.array_split(y_train_temp, num_folds)
print(X_train_folds[4])

k_to_accuracies = {}

num_test = X_train_folds[0].shape[0]
for j in range(len(k_choices)):
    k = k_choices[j]
    for i in range(1,num_folds+1):
        X_train_temp = np.concatenate((X_train_folds[num_folds-i],X_train_folds[num_folds-i-1],X_train_folds[num_folds-i-2],X_train_folds[num_folds-i-3]),axis = 0)
        y_train_temp = np.concatenate((y_train_folds[num_folds-i],y_train_folds[num_folds-i-1],y_train_folds[num_folds-i-2],y_train_folds[num_folds-i-3]))
        X_test_temp = X_train_folds[num_folds-i-4]
        y_test_temp = y_train_folds[num_folds-i-4]
        classifier.train(X_train_temp, y_train_temp)
        y_test_pred = classifier.predict(X_test_temp, k=k)
        num_correct = np.sum(y_test_pred == y_test_temp)
        accuracy = float(num_correct) / num_test
        k_to_accuracies.setdefault(k,[]).append(accuracy)

for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))
for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])
accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean,yerr=accuracies_std)
y_train_folds = []

X_train_folds = np.array_split(classifier.X_train, num_folds)
y_train_folds = np.array_split(classifier.y_train, num_folds)
pass

k_to_accuracies = {}
X_val = X_train_folds[num_folds-1]
y_val = y_train_folds[num_folds-1]

for k in k_choices:
    k_to_accuracies[k] = []
    for i in range(num_folds):
        knn = KNearestNeighbor()
        knn.train(X_train_folds[i], y_train_folds[i])
        y_predict = knn.predict(X_val, k = k)
        acc = np.mean(y_predict == y_val)
        k_to_accuracies[k].append(acc)

print('k_to_accuracies')
print(k_to_accuracies)

pass
for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

plt.show()

for k,v in sorted(k_to_accuracies.items()):
  print(k, v)
Пример #13
0
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

num_per_fold = len(X_train)/num_folds
X_valid_fold = X_train_folds[-1]
y_valid_fold = y_train_folds[-1]
X_train_folds = X_train_folds[:-1]
y_train_folds = y_train_folds[:-1]

accuracies = []
for i in range(len(k_choices)):
    accuracies = [0.0] * (num_folds - 1)
    for n in range(num_folds-1):
        classifier.train(X_train_folds[n], y_train_folds[n])
        y_pred_folds = classifier.predict(
            X_valid_fold, k=k_choices[i], num_loops=0)
        num_correct = np.sum(y_valid_fold == y_pred_folds)
        accuracies[n] = float(num_correct) / num_per_fold
    k_to_accuracies.update({k_choices[i]: accuracies})


pass

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))
import numpy as np
import matplotlib.pyplot as plt
from cs231n.data_utils import load_CIFAR10
from cs231n.classifiers import KNearestNeighbor

#Load data

cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
X_tre, y_tre, X_te, y_te = load_CIFAR10(cifar10_dir)
X_tre_rows = X_tre.reshape(X_tre.shape[0], 32 * 32 * 3)
X_te_rows = X_te.reshape(X_te.shape[0], 32 * 32 * 3)

X_val_rows = X_tre_rows[:1000, :]
y_val_rows = y_tre[:1000]

X_tre_rows = X_tre_rows[1000:, :]
y_tre = y_tre[:1000]

val_acc = []
for k in [1, 3, 5, 10, 20, 50, 100]:

    knn = KNearestNeighbor()
    knn.train(X_tre, y_tre)

    y_val_predict = knn.predict(X_val_rows, k=k)
    acc = np.mean(y_val_predict == y_val_rows)
    print 'accuracy: %f' % (acc * 100)

    val_acc.append((k, acc))
Пример #15
0
k_to_accuracies = {}
for i in range(len(k_choices)):
    accuracy = []
    for j in range(num_folds):
        X_train_ = np.reshape(
            np.asarray(X_train_folds[:j] + X_train_folds[j + 1:]), (-1, 3072))
        y_train_ = np.reshape(
            np.asarray(y_train_folds[:j] + y_train_folds[j + 1:]), (4000, -1))
        X_test_ = np.asarray(X_train_folds[j])
        y_test_ = np.asarray(y_train_folds[j])

        from cs231n.classifiers import KNearestNeighbor
        classifier = KNearestNeighbor()
        classifier.train(X_train_, y_train_)
        y_test_pred = classifier.predict(X_test_, k=k_choices[i], num_loops=0)

        accuracy.append(np.sum(y_test_pred == y_test_) / y_test_.shape[0])
    k_to_accuracies[k_choices[i]] = accuracy

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))

# plot the raw observations
for k in k_choices:
    accuracies = k_to_accuracies[k]
    plt.scatter([k] * len(accuracies), accuracies)

# plot the trend line with error bars that correspond to standard deviation
Пример #16
0
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:
    k_to_accuracies[k] = []

for f in xrange(num_folds):
    X_train_val = np.concatenate(
        [j for i, j in enumerate(X_train_folds) if i != f])
    y_train_val = np.concatenate(
        [j for i, j in enumerate(y_train_folds) if i != f])
    classifier = KNearestNeighbor()
    classifier.train(X_train_val, y_train_val)

    for k in k_choices:
        y_pred = classifier.predict(X_train_folds[f], k)

        num_correct = np.sum(y_pred == y_train_folds[f])
        accuracy = float(num_correct) / float(y_train_folds[f].shape[0])
        k_to_accuracies[k].append(accuracy)

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)

# In[17]:
Пример #17
0
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
#pass
for k_ in k_choices:
    k_to_accuracies.setdefault(k_, [])
for i in range(num_folds):
    classifier = KNearestNeighbor()
    X_val_train = np.vstack(X_train_folds[:i] + X_train_folds[i + 1:])
    y_val_train = np.vstack(y_train_folds[:i] + y_train_folds[i + 1:])
    y_val_train = y_val_train[:, 0]  ##reshape
    classifier.train(X_val_train, y_val_train)
    for k_ in k_choices:
        y_val_pred = classifier.predict(X_train_folds[i], k=k_, num_loops=2)
        num_correct = np.sum(y_val_pred == y_train_folds[i][:, 0])
        num_correct = np.sum(y_val_pred == y_train_folds[i])
        accuracy = float(num_correct) / len(y_val_pred)
        k_to_accuracies[k_].append(accuracy)  ##try
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)

# In[ ]:
Пример #18
0
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
# pass
for k_ in k_choices:
    k_to_accuracies.setdefault(k_, [])
for i in range(num_folds):
    classifier = KNearestNeighbor()
    X_val_train = np.vstack(X_train_folds[0:i] + X_train_folds[i+1:])
    y_val_train = np.vstack(y_train_folds[0:i] + y_train_folds[i+1:])
    y_val_train = y_val_train[:,0]
    classifier.train(X_val_train, y_val_train)
    for k_ in k_choices:
        y_val_pred = classifier.predict(X_train_folds[i], k=k_)
        num_correct = np.sum(y_val_pred == y_train_folds[i][:,0])
        accuracy = float(num_correct) / len(y_val_pred)
        k_to_accuracies[k_] = k_to_accuracies[k_] + [accuracy]
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)
        
# plot the raw observations
for k in k_choices:
  accuracies = k_to_accuracies[k]
Пример #19
0
# kValue = [3]
kAccuracies = []
# print(xTrain)
for ptr, k in enumerate(kValue):
    kValueAcc = []
    for i in xrange(0, cvFold):
        xValid = xTrain[i]
        yValid = yTrain[i]
        xTrainCV = xTrain[np.arange(cvFold) != i]
        yTrainCV = yTrain[np.arange(cvFold) != i]
        xTrainCV = np.reshape(
            xTrainCV, (lengthTrain - lengthTrain / cvFold, xTrainCV.shape[2]))
        yTrainCV = np.reshape(yTrainCV, (lengthTrain - lengthTrain / cvFold, ))
        clsfr.train(xTrainCV, yTrainCV)
        yPredict = clsfr.predict(xValid, k=k)
        acc = np.sum(yPredict == yValid)
        kValueAcc.append([float(acc) / (lengthTrain / cvFold)])
    kAccuracies.append(kValueAcc)

print([np.mean(i) for i in kAccuracies])
plt.figure()
x = np.array(kValue)
y = np.array([np.mean(i) for i in kAccuracies])
print(x.shape)
print(y.shape)
plt.errorbar(np.array(kValue),
             np.array([np.mean(i) for i in kAccuracies]),
             yerr=np.array([np.std(i) for i in kAccuracies]))
plt.show()
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:
    for n in range(num_folds):
        # Concat all our folds together except for the nth fold for training.
        current_train_fold_x = np.concatenate(tuple([X_train_folds[i] for i in range(num_folds) if i!=n]))
        current_train_fold_y = np.concatenate(tuple([y_train_folds[i] for i in range(num_folds) if i!=n]))
        
        # Select the held out fold to be our test data.
        current_valid_fold_x = X_train_folds[n]
        current_valid_fold_y = y_train_folds[n]
        
        classifier.train(current_train_fold_x, current_train_fold_y)
        
        # Perform prediction on our test set, default is to use no loop version.
        y_test_pred = classifier.predict(current_valid_fold_x, k=k)
        
        # Evaluate and store in k_to_accuracies dict.
        num_correct = np.sum(y_test_pred == current_valid_fold_y)
        if k not in k_to_accuracies:
            k_to_accuracies[k] = [float(num_correct) / current_test_fold_x.shape[0]]
        else:
            k_to_accuracies[k].append(float(num_correct) / current_test_fold_x.shape[0])

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
Пример #21
0
num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []
X_train_folds = np.array_split(X_train, num_folds)
y_train_folds = np.array_split(y_train, num_folds)

k_to_accuracies = {}
for k in k_choices:
    k_to_accuracies[k] = []

#for f in xrange(num_folds):
#    X_train_val = np.concatenate([j for i,j in enumerate(X_train_folds) if i!=f])
#    y_train_val = np.concatenate([j for i,j in enumerate(y_train_folds) if i!=f])

X_train_val = np.concatenate([j for i,j in enumerate(X_train_folds) if i!=0])
y_train_val = np.concatenate([j for i,j in enumerate(y_train_folds) if i!=0])

    classifier.train(X_train_val, y_train_val)

    for k in k_choices:
        y_pred = classifier.predict(X_train_folds[f], k)
        
        num_correct = np.sum(y_pred == y_train_folds[f])
        accuracy = float(num_correct) / float(y_train_folds[f].shape[0])
        k_to_accuracies[k].append(accuracy)


Пример #22
0
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

for k in k_choices:
    k_to_accuracies[k] = []
    for i in range(5):
        classifier_k_fold = KNearestNeighbor()
        classifier_k_fold.train(
            np.delete(X_train_folds, i, axis=0).reshape(-1, 3072),
            np.delete(y_train_folds, i, axis=0).reshape(-1))
        y_predict_k_fold = classifier_k_fold.predict(X_train_folds[i], k=k)
        correct_count = np.sum(y_predict_k_fold == y_train_folds[i])
        k_to_accuracies[k].append(
            float(correct_count / y_predict_k_fold.shape[0]))

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))

# %%
# plot the raw observations
for k in k_choices:
    accuracies = k_to_accuracies[k]
Пример #23
0
    accuracy = float(num_correct) / num_test

    k_to_accuracies[k].append(accuracy)

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
  for accuracy in k_to_accuracies[k]:
    print 'k = %d, accuracy = %f' % (k, accuracy)

# plot the trend line with error bars that correspond to standard deviation
accuracies_mean = np.array([np.mean(v) for k,v in sorted(k_to_accuracies.items())])
accuracies_std = np.array([np.std(v) for k,v in sorted(k_to_accuracies.items())])
plt.errorbar(k_choices, accuracies_mean, yerr=accuracies_std)
plt.title('Cross-validation on k')
plt.xlabel('k')
plt.ylabel('Cross-validation accuracy')
plt.show()

# Based on the cross-validation results above, choose the best value for k,   
# retrain the classifier using all the training data, and test it on the test
# data.
best_k = 6

classifier = KNearestNeighbor()
classifier.train(X_train, y_train)
y_test_pred = classifier.predict(X_test, k=best_k)

# Compute and display the accuracy
num_correct = np.sum(y_test_pred == y_test)
accuracy = float(num_correct) / num_test
print 'Got %d / %d correct => accuracy: %f' % (num_correct, num_test, accuracy)
Пример #24
0
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
### study np.vstack.. np.hstack..
for ck in k_choices:
    k_to_accuracies[ck] = []
for it in range(num_folds):
	X_train_cv = np.vstack(X_train_folds[0:it]+X_train_folds[it+1:])
	X_test_cv = X_train_folds[it]

	y_train_cv = np.hstack(y_train_folds[0:it]+y_train_folds[it+1:])
	y_test_cv = y_train_folds[it]
	for ck in k_choices:
		classifier.train(X_train_cv,y_train_cv)
		y_predict = classifier.predict(X_test_cv,k=ck)
		k_to_accuracies[ck].append(np.mean(y_predict==y_test_cv))

################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
# for k in sorted(k_to_accuracies):
#     for accuracy in k_to_accuracies[k]:
#         print 'k = %d, accuracy = %f' % (k, accuracy)


# plot the raw observations
for k in k_choices:
  accuracies = k_to_accuracies[k]
Пример #25
0
num_folds = 5
k_choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]

X_train_folds = []
y_train_folds = []

X_train_folds = np.array_split(X_train, num_folds)
y_train_folds = np.array_split(y_train, num_folds)

k_to_accuracies = {k: np.zeros(num_folds) for k in k_choices}

for k in k_to_accuracies:
    # Training phase
    for i in range(num_folds):
        X_train = X_train_folds[i]
        y_test_pred = classifier.predict(X_test, k=k, num_loops=0)
        #print("y_test_pred[0:4]: ", y_test_pred[0:4])
        #print("y_train_folds[",i,",0:4]: ", y_train_folds[i][0:4])
        num_correct = np.sum(y_test_pred == y_test)
        print("k: ", k, " num_correct: ", num_correct, " len(y_test_pred): ",
              len(y_test_pred))
        accuracy = float(num_correct) / len(y_test_pred)
        print("accuracy: ", accuracy)
    #k_to_accuracies[k][i] = accuracy
    # Validation phase - need to make this different from training
    # y_val_pred = classifier.predict(X_train_folds[num_folds-1], k=k, num_loops=0)
    # num_correct = np.sum(y_val_pred == y_train_folds[num_folds-1])
    # accuracy = float(num_correct) / len(y_val_pred)
    # k_to_accuracies[k][num_folds-1] = accuracy
    # print("k_to_accuracies[",k,"]: ", k_to_accuracies[k])
Пример #26
0
# Perform k-fold cross validation to find the best value of k. For each        #
# possible value of k, run the k-nearest-neighbor algorithm num_folds times,   #
# where in each case you use all but one of the folds as training data and the #
# last fold as a validation set. Store the accuracies for all fold and all     #
# values of k in the k_to_accuracies dictionary.                               #
################################################################################
for k in k_choices:
    accuracy = []
    for fold in range(num_folds):
        Xval = X_train_folds[fold]
        yval = y_train_folds[fold]
        Xtrain = X_train_folds[range(num_folds)!=fold]
        ytrain = y_train_folds[range(num_folds)!=fold]
        import ipdb; ipdb.set_trace()
        classifier.train(Xtrain,ytrain)
        predictions = classifier.predict(Xval,k)
        acc = np.sum(predictions==yval)/float(yval.shape[0])
        accuracy.append(acc)
    k_to_accuracies[k] = accuracy
################################################################################
#                                 END OF YOUR CODE                             #
################################################################################

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print 'k = %d, accuracy = %f' % (k, accuracy)


# In[ ]:
Пример #27
0
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

for k in k_choices:
    k_to_accuracies[k] = []
for i in range(num_folds):
    X_train_ = []
    y_train_ = []
    for j in range(num_folds):
        if j!=i :
            X_train_.extend(X_train_folds[j])
            y_train_.extend(y_train_folds[j])
    classifier = KNearestNeighbor()
    classifier.train(np.array(X_train_), np.array(y_train_))
    X_val = np.array(X_train_folds[i])
    for k in k_choices:
        y_val_pred = classifier.predict(X_val,k=k)
        accuracy_val = np.mean(y_train_folds[i]== y_val_pred)
        k_to_accuracies[k].append(accuracy_val)



# pass

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))

Пример #28
0
X_train_folds = []
y_train_folds = []

X_train_folds = np.array_split(
    X_train, num_folds)  # (50000,3072) ==> (10000,3072)로 5(num_folds)개
y_train_folds = np.array_split(y_train, num_folds)

k_to_accuracies = {}

for k_val in k_choices:
    k_to_accuracies[k_val] = []
    for i in range(num_folds):
        # print 'Cross-validation :'+ str(i)
        X_train_cycle = np.concatenate(
            [f for j, f in enumerate(X_train_folds) if j != i])
        y_train_cycle = np.concatenate(
            [f for j, f in enumerate(y_train_folds) if j != i])
        X_val_cycle = X_train_folds[i]
        y_val_cycle = y_train_folds[i]
        knn = KNearestNeighbor()
        knn.train(X_train_cycle, y_train_cycle)
        y_val_pred = knn.predict(X_val_cycle, k_val)
        num_correct = np.sum(y_val_cycle == y_val_pred)
        k_to_accuracies[k_val].append(
            float(num_correct) / float(len(y_val_cycle)))

# Print out the computed accuracies
for k in sorted(k_to_accuracies):
    for accuracy in k_to_accuracies[k]:
        print('k = %d, accuracy = %f' % (k, accuracy))