# This should print a validation error of 0.19 if question == '3.2': dataset = utils.load_dataset('animals') X = dataset['X'].astype(float) animals = dataset['animals'] n,d = X.shape # standardize columns X = utils.standardize_cols(X) model = PCA(k=2) model.fit(X) Z = model.compress(X) fig, ax = plt.subplots() ax.scatter(Z[:,0], Z[:,1]) plt.ylabel('z2') plt.xlabel('z1') plt.title('PCA') for i in range(n): ax.annotate(animals[i], (Z[i,0], Z[i,1])) utils.savefig('q3_2_PCA_animals.png') # code below isn't required. variance_explained = 1 - norm(model.expand(Z) - X, 'fro')**2 / norm(X, 'fro')**2 print('Variance explained {}'.format(variance_explained)) if question == '4.1':
# Plot the matrix plt.imshow(X) utils.savefig('unsatisfying_visualization_1.png') ## Randomly plot two features, and label all points x = X[:, np.random.choice(d, 2)] fig, ax = plt.subplots() ax.scatter(x[:, 0], x[:, 1]) for i in range(n): ax.annotate(animals[i], (x[i, 0], x[i, 1])) utils.savefig('unsatisfying_visualization_2.png') # Create PCA model, fit and compress our X matrix model = PCA(k=5) model.fit(X) Z = model.compress(X) # Z = (50, k) # z = Z[:,np.random.choice(Z.shape[1], 2)] fig_1, ax_1 = plt.subplots() ax_1.scatter(Z[:, 0], Z[:, 1]) for i in range(Z.shape[0]): ax_1.annotate(animals[i], (Z[i, 0], Z[i, 1])) utils.savefig('satisfying_visualization.png') # Print % variance Z_new = model.expand(Z) print(norm(Z_new - X)**2 / norm(X)**2) if question == '4': X = utils.load_dataset('highway')['X'].astype(float) / 255