示例#1
0
        # This should print a validation error of 0.19

        
    if question == '3.2':
        dataset = utils.load_dataset('animals')
        X = dataset['X'].astype(float)
        animals = dataset['animals']
        n,d = X.shape

        # standardize columns
        X = utils.standardize_cols(X)

        model = PCA(k=2)
        model.fit(X)
        Z = model.compress(X)
        fig, ax = plt.subplots()
        ax.scatter(Z[:,0], Z[:,1])
        plt.ylabel('z2')
        plt.xlabel('z1')
        plt.title('PCA')
        for i in range(n):
            ax.annotate(animals[i], (Z[i,0], Z[i,1]))

        utils.savefig('q3_2_PCA_animals.png')

        # code below isn't required.
        variance_explained = 1 - norm(model.expand(Z) - X, 'fro')**2 / norm(X, 'fro')**2
        print('Variance explained {}'.format(variance_explained))

    if question == '4.1':
示例#2
0
        # Plot the matrix
        plt.imshow(X)
        utils.savefig('unsatisfying_visualization_1.png')

        ## Randomly plot two features, and label all points
        x = X[:, np.random.choice(d, 2)]
        fig, ax = plt.subplots()
        ax.scatter(x[:, 0], x[:, 1])
        for i in range(n):
            ax.annotate(animals[i], (x[i, 0], x[i, 1]))
        utils.savefig('unsatisfying_visualization_2.png')

        # Create PCA model, fit and compress our X matrix
        model = PCA(k=5)
        model.fit(X)
        Z = model.compress(X)  # Z = (50, k)

        # z = Z[:,np.random.choice(Z.shape[1], 2)]

        fig_1, ax_1 = plt.subplots()
        ax_1.scatter(Z[:, 0], Z[:, 1])
        for i in range(Z.shape[0]):
            ax_1.annotate(animals[i], (Z[i, 0], Z[i, 1]))
        utils.savefig('satisfying_visualization.png')

        # Print % variance
        Z_new = model.expand(Z)
        print(norm(Z_new - X)**2 / norm(X)**2)

    if question == '4':
        X = utils.load_dataset('highway')['X'].astype(float) / 255