示例#1
0
 def score_pca(k, p = 0.5, seed = 1):
     
     Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p, seed)
     mu = ps6.get_mean_face(Xtrain)
     eig_vecs, eig_vals = ps6.pca(Xtrain, k)
     Xtrain_proj = np.dot(Xtrain - mu, eig_vecs)
 
     # testing
     mu = ps6.get_mean_face(Xtest)
     Xtest_proj = np.dot(Xtest - mu, eig_vecs)
 
     good = 0
     bad = 0
 
     for i, obs in enumerate(Xtest_proj):
 
         dist = [np.linalg.norm(obs - x) for x in Xtrain_proj]
 
         idx = np.argmin(dist)
         y_pred = ytrain[idx]
 
         if y_pred == ytest[i]:
             good += 1
         else:
             bad += 1
     return good, bad
示例#2
0
def part_1c():
    p = 0.5  # Select a split percentage value
    k = 5  # Select a value for k

    size = [32, 32]
    X, y = ps6.load_images(YALE_FACES_DIR, size)
    Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p)

    # training
    mu = ps6.get_mean_face(Xtrain)
    eig_vecs, eig_vals = ps6.pca(Xtrain, k)
    Xtrain_proj = np.dot(Xtrain - mu, eig_vecs)

    # testing
    mu = ps6.get_mean_face(Xtest)
    Xtest_proj = np.dot(Xtest - mu, eig_vecs)

    good = 0
    bad = 0

    for i, obs in enumerate(Xtest_proj):

        dist = [np.linalg.norm(obs - x) for x in Xtrain_proj]

        idx = np.argmin(dist)
        y_pred = ytrain[idx]

        if y_pred == ytest[i]:
            good += 1

        else:
            bad += 1

    print 'Good predictions = ', good, 'Bad predictions = ', bad
    print '{0:.2f}% accuracy'.format(100 * float(good) / (good + bad))
示例#3
0
def part_1c():
    p = 0.5  # Select a split percentage value
    k = 5  # Select a value for k

    # testing values of k or comment this back in to see result set in a loop.
    # p_range = np.arange(0.1, 1.0, 0.1)
    # for j in p_range:

    size = [32, 32]
    X, y = ps6.load_images(YALE_FACES_DIR, size)
    Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p)

    # training
    mu = ps6.get_mean_face(Xtrain)
    eig_vecs, eig_vals = ps6.pca(Xtrain, k)
    Xtrain_proj = np.dot(Xtrain - mu, eig_vecs)

    # testing
    mu = ps6.get_mean_face(Xtest)
    Xtest_proj = np.dot(Xtest - mu, eig_vecs)

    good = 0
    bad = 0

    for i, obs in enumerate(Xtest_proj):
        dist = [np.linalg.norm(obs - x) for x in Xtrain_proj]
        idx = np.argmin(dist)
        y_pred = ytrain[idx]
        if y_pred == ytest[i]:
            good += 1
        else:
            bad += 1

    # Enable result comparsion to a random value selector.
    random_guess = np.random.randint(low=1, high=16, size=len(ytest))
    # random accuracy check.
    rand_good = 0
    rand_bad = 0
    for i in range(len(random_guess)):
        if random_guess[i] == ytest[i]:
            rand_good += 1
        else:
            rand_bad += 1

    print 'Results where P is {}'.format(p)
    print '-------------------------------'
    print 'Random Selection Results'
    print 'Good predictions = ', rand_good, 'Bad predictions = ', rand_bad
    print '(Random) Testing accuracy: {0:.2f}%'.format(100 * float(rand_good) / (rand_good + rand_bad))

    print 'Normal Dist Results'
    print 'Good predictions = ', good, 'Bad predictions = ', bad
    print '{0:.2f}% accuracy'.format(100 * float(good) / (good + bad))
    print '--------------------------------'
    print ''
def part_1c():
    p = 0.8  # Select a split percentage value
    k = 5  # Select a value for k

    size = [32, 32]
    X, y = ps6.load_images(YALE_FACES_DIR, size)
    Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p)

    # training
    mu = ps6.get_mean_face(Xtrain)
    eig_vecs, eig_vals = ps6.pca(Xtrain, k)
    Xtrain_proj = np.dot(Xtrain - mu, eig_vecs)

    # testing
    mu = ps6.get_mean_face(Xtest)
    Xtest_proj = np.dot(Xtest - mu, eig_vecs)

    good = 0
    bad = 0

    for i, obs in enumerate(Xtest_proj):

        dist = [np.linalg.norm(obs - x) for x in Xtrain_proj]

        idx = np.argmin(dist)
        y_pred = ytrain[idx]

        if y_pred == ytest[i]:
            good += 1

        else:
            bad += 1

    print('Good predictions = ', good, 'Bad predictions = ', bad)
    print('{0:.2f}% accuracy'.format(100 * float(good) / (good + bad)))

    rand_y = np.random.choice([1, 16], (len(ytrain)))
    temp_y = np.zeros_like(rand_y)
    temp_y[rand_y == ytrain] = 1
    rand_accuracy = 100 * float(np.sum(temp_y)) / (len(ytrain))  #None
    # raise NotImplementedError
    print('(Random) Training accuracy: {0:.2f}%'.format(rand_accuracy))
示例#5
0
def part_1a_1b():

    orig_size = (192, 231)
    small_size = (32, 32)
    X, y = ps6.load_images(YALE_FACES_DIR, small_size)

    # Get the mean face
    x_mean = ps6.get_mean_face(X)

    x_mean_image = visualize_mean_face(x_mean, small_size, orig_size)

    cv2.imwrite(os.path.join(OUTPUT_DIR, "ps6-1-a-1.png"), x_mean_image)

    # PCA dimension reduction
    k = 10
    eig_vecs, eig_vals = ps6.pca(X, k)
    plot_eigen_faces(eig_vecs.T, "ps6-1-b-1.png")
示例#6
0
    def test_mean_face(self):

        for i in range(1, 4):
            file_name = "x_data_mean_{}.npy".format(i)
            file_path = os.path.join(INPUT_DIR, file_name)
            x_data = np.load(file_path)

            file_name = "correct_mean_{}.npy".format(i)
            file_path = os.path.join(INPUT_DIR, file_name)
            x_mean = np.load(file_path)

            result = ps6.get_mean_face(x_data)

            correct = np.allclose(result, x_mean, atol=1)
            message = "Values do not match the reference solution. " \
                      "This function should only compute the mean of each " \
                      "column."
            self.assertTrue(correct, message)
示例#7
0
def part_1c():
    runk = 1
    runp = 1
    if runk:
        p = 0.5  # Select a split percentage value
        ks = []
        accuracy = []
        for k in range(1, 30):
            size = (32, 32)
            X, y = ps6.load_images(YALE_FACES_DIR, size)
            Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p)

            # training
            mu = ps6.get_mean_face(Xtrain)
            eig_vecs, eig_vals = ps6.pca(Xtrain, k)
            Xtrain_proj = np.dot(Xtrain - mu, eig_vecs)

            # testing
            mu = ps6.get_mean_face(Xtest)
            Xtest_proj = np.dot(Xtest - mu, eig_vecs)

            good = 0
            bad = 0

            for i, obs in enumerate(Xtest_proj):

                dist = [np.linalg.norm(obs - x) for x in Xtrain_proj]

                idx = np.argmin(dist)
                y_pred = ytrain[idx]

                if y_pred == ytest[i]:
                    good += 1
                else:
                    bad += 1

            print 'Good predictions = ', good, 'Bad predictions = ', bad
            ks.append(k)
            accuracy.append(100 * float(good) / (good + bad))
            print '{0:.2f}% accuracy'.format(100 * float(good) / (good + bad))

        plt.figure()
        plt.ylabel('Accuracy')
        plt.xlabel('# of PCs')
        plt.title('Accuracy vs Number of PCs ')
        plt.plot(ks, accuracy)
        plt.grid()
        plt.draw()
        plt.savefig('./pca_plot.png')
        #plt.show()

    if runp:
        k = 10
        accuracy = []
        ps = []
        for p in np.arange(0.2, 1.0, 0.1):
            size = (32, 32)
            X, y = ps6.load_images(YALE_FACES_DIR, size)
            Xtrain, ytrain, Xtest, ytest = ps6.split_dataset(X, y, p)

            # training
            mu = ps6.get_mean_face(Xtrain)
            eig_vecs, eig_vals = ps6.pca(Xtrain, k)
            Xtrain_proj = np.dot(Xtrain - mu, eig_vecs)

            # testing
            mu = ps6.get_mean_face(Xtest)
            Xtest_proj = np.dot(Xtest - mu, eig_vecs)

            good = 0
            bad = 0

            for i, obs in enumerate(Xtest_proj):

                dist = [np.linalg.norm(obs - x) for x in Xtrain_proj]

                idx = np.argmin(dist)
                y_pred = ytrain[idx]

                if y_pred == ytest[i]:
                    good += 1
                else:
                    bad += 1

            print 'Good predictions = ', good, 'Bad predictions = ', bad
            ps.append(p)
            accuracy.append(100 * float(good) / (good + bad))
            print '{0:.2f}% accuracy'.format(100 * float(good) / (good + bad))

        plt.figure()
        plt.ylabel('Accuracy')
        plt.xlabel('Percentage of data split')
        plt.title('Accuracy vs data split percentage ')
        plt.plot(ps, accuracy)
        plt.grid()
        plt.draw()
        plt.savefig('./split_P_plot.png')