# the example dataset we have provided. print('Running K-Means clustering on example dataset.') # Settings for running K-Means K = 3 max_iters = 10 # For consistency, here we set centroids to specific values # but in practice you want to generate them automatically, such as by # settings them to be random examples (as can be seen in # kMeansInitCentroids). initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Run K-Means algorithm. The 'true' at the end tells our function to plot # the progress of K-Means centroids, idx = runkMeans(X, initial_centroids, max_iters, True) print('K-Means Done.\n') # ============= Part 4: K-Means Clustering on Pixels =============== # In this exercise, you will use K-Means to compress an image. To do this, # you will first run K-Means on the colors of the pixels in the image and # then you will map each pixel on to it's closest centroid. # # You should now complete the code in kMeansInitCentroids.py print('Running K-Means clustering on pixels from an image.') # Load an image of a bird A = imread('bird_small.png') # If imread does not work for you, you can try instead
close('all') # Re-load the image from the previous exercise and run K-Means on it # For this to work, you need to complete the K-Means assignment first A = imread('bird_small.png').astype(float) # If imread does not work for you, you can try instead # A = loadmat('bird_small.mat')['A'].astype(float) / 255 img_size = shape(A) X = reshape(A, (-1, 3), order='F') K = 16 max_iters = 10 initial_centroids = kMeansInitCentroids(X, K) centroids, idx = runkMeans(X, initial_centroids, max_iters) # Sample 1000 random indexes (since working with all the data is # too expensive. If you have a fast computer, you may increase this. sel = (random.rand(1000) * size(X, 0)).astype(int) # Visualize the data and centroid memberships in 3D fig = figure() ax = Axes3D(fig) ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], s=100, c=idx[sel], cmap=cm.hsv, vmax=K + 1,
def ex7(): ## Machine Learning Online Class # Exercise 7 | Principle Component Analysis and K-Means Clustering # # Instructions # ------------ # # This file contains code that helps you get started on the # exercise. You will need to complete the following functions: # # pca.m # projectData.m # recoverData.m # computeCentroids.m # findClosestCentroids.m # kMeansInitCentroids.m # # For this exercise, you will not need to change any code in this file, # or any other files other than those mentioned above. # ## Initialization #clear ; close all; clc ## ================= Part 1: Find Closest Centroids ==================== # To help you implement K-Means, we have divided the learning algorithm # into two functions -- findClosestCentroids and computeCentroids. In this # part, you shoudl complete the code in the findClosestCentroids function. # print('Finding closest centroids.\n') # Load an example dataset that we will be using mat = scipy.io.loadmat('ex7data2.mat') X = mat['X'] # Select an initial set of centroids K = 3 # 3 Centroids initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Find the closest centroids for the examples using the # initial_centroids idx = findClosestCentroids(X, initial_centroids) print('Closest centroids for the first 3 examples: ') print(formatter(' %d', idx[:3] + 1)) print('\n(the closest centroids should be 1, 3, 2 respectively)') print('Program paused. Press enter to continue.') #pause ## ===================== Part 2: Compute Means ========================= # After implementing the closest centroids function, you should now # complete the computeCentroids function. # print('\nComputing centroids means.\n') # Compute means based on the closest centroids found in the previous part. centroids = computeCentroids(X, idx, K) print('Centroids computed after initial finding of closest centroids: ') print(centroids) print('\n(the centroids should be') print(' [ 2.428301 3.157924 ]') print(' [ 5.813503 2.633656 ]') print(' [ 7.119387 3.616684 ]\n') print('Program paused. Press enter to continue.') #pause ## =================== Part 3: K-Means Clustering ====================== # After you have completed the two functions computeCentroids and # findClosestCentroids, you have all the necessary pieces to run the # kMeans algorithm. In this part, you will run the K-Means algorithm on # the example dataset we have provided. # print('\nRunning K-Means clustering on example dataset.\n') # Load an example dataset mat = scipy.io.loadmat('ex7data2.mat') X = mat['X'] # Settings for running K-Means K = 3 max_iters = 10 # For consistency, here we set centroids to specific values # but in practice you want to generate them automatically, such as by # settings them to be random examples (as can be seen in # kMeansInitCentroids). initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Run K-Means algorithm. The 'true' at the end tells our function to plot # the progress of K-Means centroids, idx = runkMeans('1', X, initial_centroids, max_iters, True) print('\nK-Means Done.\n') print('Program paused. Press enter to continue.') #pause ## ============= Part 4: K-Means Clustering on Pixels =============== # In this exercise, you will use K-Means to compress an image. To do this, # you will first run K-Means on the colors of the pixels in the image and # then you will map each pixel on to it's closest centroid. # # You should now complete the code in kMeansInitCentroids.m # print('\nRunning K-Means clustering on pixels from an image.\n') # Load an image of a bird A = matplotlib.image.imread('bird_small.png') # If imread does not work for you, you can try instead # load ('bird_small.mat') A = A / 255 # Divide by 255 so that all values are in the range 0 - 1 # Size of the image #img_size = size(A) # Reshape the image into an Nx3 matrix where N = number of pixels. # Each row will contain the Red, Green and Blue pixel values # This gives us our dataset matrix X that we will use K-Means on. X = A.reshape(-1, 3) # Run your K-Means algorithm on this data # You should try different values of K and max_iters here K = 16 max_iters = 10 # When using K-Means, it is important the initialize the centroids # randomly. # You should complete the code in kMeansInitCentroids.m before proceeding initial_centroids = kMeansInitCentroids(X, K) # Run K-Means centroids, idx = runkMeans('2', X, initial_centroids, max_iters) print('Program paused. Press enter to continue.') #pause ## ================= Part 5: Image Compression ====================== # In this part of the exercise, you will use the clusters of K-Means to # compress an image. To do this, we first find the closest clusters for # each example. After that, we print('\nApplying K-Means to compress an image.\n') # Find closest cluster members idx = findClosestCentroids(X, centroids) # Essentially, now we have represented the image X as in terms of the # indices in idx. # We can now recover the image from the indices (idx) by mapping each pixel # (specified by it's index in idx) to the centroid value X_recovered = centroids[idx,:].reshape(A.shape) # Reshape the recovered image into proper dimensions X_recovered = X_recovered.reshape(A.shape) fig, ax = plt.subplots(1, 2, figsize=(8, 4)) # Display the original image ax[0].imshow(A * 255) ax[0].grid(False) ax[0].set_title('Original') # Display compressed image side by side ax[1].imshow(X_recovered * 255) ax[1].grid(False) ax[1].set_title('Compressed, with %d colors' % K) plt.savefig('figure3.png') print('Program paused. Press enter to continue.\n')
#load('ex7data2.mat'); # Settings for running K-Means K = 3 max_iters = 10 # For consistency, here we set centroids to specific values # but in practice you want to generate them automatically, such as by # settings them to be random examples (as can be seen in # kMeansInitCentroids). #initial_centroids = [3 3; 6 2; 8 5]; # Run K-Means algorithm. The 'true' at the end tells our function to plot # the progress of K-Means runPlot = False centroids, idx = runkMeans(X, initial_centroids, max_iters, runPlot) print('\nK-Means Done.\n\n') # ============= Part 4: K-Means Clustering on Pixels =============== # In this exercise, you will use K-Means to compress an image. To do this, # you will first run K-Means on the colors of the pixels in the image and # then you will map each pixel onto its closest centroid. # # You should now complete the code in kMeansInitCentroids.m print('\nRunning K-Means clustering on pixels from an image.\n\n') # Load an image of a bird #A = double(imread('bird_small.png')); A = msc.imread(ml_dir + 'bird_small.png')
#load('ex7data2.mat'); # Settings for running K-Means K = 3 max_iters = 10 # For consistency, here we set centroids to specific values # but in practice you want to generate them automatically, such as by # settings them to be random examples (as can be seen in # kMeansInitCentroids). #initial_centroids = [3 3; 6 2; 8 5]; # Run K-Means algorithm. The 'true' at the end tells our function to plot # the progress of K-Means runPlot = False centroids, idx = runkMeans(X, initial_centroids, max_iters, runPlot) print('\nK-Means Done.\n\n') # ============= Part 4: K-Means Clustering on Pixels =============== # In this exercise, you will use K-Means to compress an image. To do this, # you will first run K-Means on the colors of the pixels in the image and # then you will map each pixel onto its closest centroid. # # You should now complete the code in kMeansInitCentroids.m print('\nRunning K-Means clustering on pixels from an image.\n\n') # Load an image of a bird
def ex7_pca(): ## Machine Learning Online Class # Exercise 7 | Principle Component Analysis and K-Means Clustering # # Instructions # ------------ # # This file contains code that helps you get started on the # exercise. You will need to complete the following functions: # # pca.m # projectData.m # recoverData.m # computeCentroids.m # findClosestCentroids.m # kMeansInitCentroids.m # # For this exercise, you will not need to change any code in this file, # or any other files other than those mentioned above. # ## Initialization #clear ; close all; clc ## ================== Part 1: Load Example Dataset =================== # We start this exercise by using a small dataset that is easily to # visualize # print('Visualizing example dataset for PCA.\n') # The following command loads the dataset. You should now have the # variable X in your environment mat = scipy.io.loadmat('ex7data1.mat') X = mat['X'] # Visualize the example dataset plt.plot(X[:, 0], X[:, 1], 'wo', ms=10, mec='b', mew=1) plt.axis([0.5, 6.5, 2, 8]) plt.savefig('figure1.png') print('Program paused. Press enter to continue.') #pause ## =============== Part 2: Principal Component Analysis =============== # You should now implement PCA, a dimension reduction technique. You # should complete the code in pca.m # print('\nRunning PCA on example dataset.\n') # Before running PCA, it is important to first normalize X X_norm, mu, sigma = featureNormalize(X) # Run PCA U, S = pca(X_norm) # Compute mu, the mean of the each feature # Draw the eigenvectors centered at mean of data. These lines show the # directions of maximum variations in the dataset. #hold on print(S) print(U) drawLine(mu, mu + 1.5 * np.dot(S[0], U[:,0].T)) drawLine(mu, mu + 1.5 * np.dot(S[1], U[:,1].T)) #hold off plt.savefig('figure2.png') print('Top eigenvector: ') print(' U(:,1) = %f %f ' % (U[0,0], U[1,0])) print('\n(you should expect to see -0.707107 -0.707107)') print('Program paused. Press enter to continue.') #pause ## =================== Part 3: Dimension Reduction =================== # You should now implement the projection step to map the data onto the # first k eigenvectors. The code will then plot the data in this reduced # dimensional space. This will show you what the data looks like when # using only the corresponding eigenvectors to reconstruct it. # # You should complete the code in projectData.m # print('\nDimension reduction on example dataset.\n\n') # Plot the normalized dataset (returned from pca) fig = plt.figure() plt.plot(X_norm[:, 0], X_norm[:, 1], 'bo') # Project the data onto K = 1 dimension K = 1 Z = projectData(X_norm, U, K) print('Projection of the first example: %f' % Z[0]) print('\n(this value should be about 1.481274)\n') X_rec = recoverData(Z, U, K) print('Approximation of the first example: %f %f' % (X_rec[0, 0], X_rec[0, 1])) print('\n(this value should be about -1.047419 -1.047419)\n') # Draw lines connecting the projected points to the original points plt.plot(X_rec[:, 0], X_rec[:, 1], 'ro') for i in range(X_norm.shape[0]): drawLine(X_norm[i,:], X_rec[i,:]) #end plt.savefig('figure3.png') print('Program paused. Press enter to continue.\n') #pause ## =============== Part 4: Loading and Visualizing Face Data ============= # We start the exercise by first loading and visualizing the dataset. # The following code will load the dataset into your environment # print('\nLoading face dataset.\n\n') # Load Face dataset mat = scipy.io.loadmat('ex7faces.mat') X = mat['X'] # Display the first 100 faces in the dataset displayData(X[:100, :]) plt.savefig('figure4.png') print('Program paused. Press enter to continue.\n') #pause ## =========== Part 5: PCA on Face Data: Eigenfaces =================== # Run PCA and visualize the eigenvectors which are in this case eigenfaces # We display the first 36 eigenfaces. # print('\nRunning PCA on face dataset.\n(this mght take a minute or two ...)\n') # Before running PCA, it is important to first normalize X by subtracting # the mean value from each feature X_norm, mu, sigma = featureNormalize(X) # Run PCA U, S = pca(X_norm) # Visualize the top 36 eigenvectors found displayData(U[:, :36].T) plt.savefig('figure5.png') print('Program paused. Press enter to continue.') #pause ## ============= Part 6: Dimension Reduction for Faces ================= # Project images to the eigen space using the top k eigenvectors # If you are applying a machine learning algorithm print('\nDimension reduction for face dataset.\n') K = 100 Z = projectData(X_norm, U, K) print('The projected data Z has a size of: ') print(formatter('%d ', Z.shape)) print('\n\nProgram paused. Press enter to continue.') #pause ## ==== Part 7: Visualization of Faces after PCA Dimension Reduction ==== # Project images to the eigen space using the top K eigen vectors and # visualize only using those K dimensions # Compare to the original input, which is also displayed print('\nVisualizing the projected (reduced dimension) faces.\n') K = 100 X_rec = recoverData(Z, U, K) # Display normalized data #subplot(1, 2, 1) displayData(X_norm[:100,:]) plt.gcf().suptitle('Original faces') #axis square plt.savefig('figure6.a.png') # Display reconstructed data from only k eigenfaces #subplot(1, 2, 2) displayData(X_rec[:100,:]) plt.gcf().suptitle('Recovered faces') #axis square plt.savefig('figure6.b.png') print('Program paused. Press enter to continue.') #pause ## === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization === # One useful application of PCA is to use it to visualize high-dimensional # data. In the last K-Means exercise you ran K-Means on 3-dimensional # pixel colors of an image. We first visualize this output in 3D, and then # apply PCA to obtain a visualization in 2D. #close all; close all; clc # Re-load the image from the previous exercise and run K-Means on it # For this to work, you need to complete the K-Means assignment first A = matplotlib.image.imread('bird_small.png') # If imread does not work for you, you can try instead # load ('bird_small.mat') A = A / 255 X = A.reshape(-1, 3) K = 16 max_iters = 10 initial_centroids = kMeansInitCentroids(X, K) centroids, idx = runkMeans('7', X, initial_centroids, max_iters) # Sample 1000 random indexes (since working with all the data is # too expensive. If you have a fast computer, you may increase this. sel = np.random.choice(X.shape[0], size=1000) # Setup Color Palette #palette = hsv(K) #colors = palette(idx(sel), :) # Visualize the data and centroid memberships in 3D fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], cmap='rainbow', c=idx[sel], s=8**2) ax.set_title('Pixel dataset plotted in 3D. Color shows centroid memberships') plt.savefig('figure8.png') print('Program paused. Press enter to continue.') #pause ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization === # Use PCA to project this cloud to 2D for visualization # Subtract the mean to use PCA X_norm, mu, sigma = featureNormalize(X) # PCA and project the data to 2D U, S = pca(X_norm) Z = projectData(X_norm, U, 2) # Plot in 2D fig = plt.figure() plotDataPoints(Z[sel, :], [idx[sel]], K, 0) plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction') plt.savefig('figure9.png') print('Program paused. Press enter to continue.\n')
# Re-load the image from the previous exercise and run K-Means on it # For this to work, you need to complete the K-Means assignment first # A = double(imread('bird_small.png')); mat = scipy.io.loadmat('bird_small.mat') A = mat["A"] # from ex7.py, part 4 A = A / 255.0 img_size = A.shape X = A.reshape(img_size[0] * img_size[1], 3, order='F').copy() K = 16 max_iters = 10 initial_centroids = kmic.kMeansInitCentroids(X, K) centroids, idx = rkm.runkMeans(X, initial_centroids, max_iters) # Sample 1000 random indexes (since working with all the data is # too expensive. If you have a fast computer, you may increase this. # use flatten(). otherwise, Z[sel, :] yields array w shape [1000,1,2] sel = np.floor(np.random.rand(1000, 1) * X.shape[0]).astype(int).flatten() # Setup Color Palette palette = hsv.hsv(K) colors = np.array([palette[int(i)] for i in idx[sel]]) # Visualize the data and centroid memberships in 3D fig1 = plt.figure(1) ax = fig1.add_subplot(111, projection='3d') ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], s=100, c=colors) plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships')
# print(image) # print(image_size) # print(image.shape) x = image.shape[0] y = image.shape[1] image_formatted = np.reshape(image, (x * y, 3)) # print(image) k = 16 max_iters = 10 initial_centroids = kMeansInitCentroids(image_formatted, k) print(initial_centroids) # Run K-Means [centroids, idx] = runkMeans(image_formatted, initial_centroids, max_iters, False) idx = findClosestCentroids(image_formatted, centroids) feature_size = len(image_formatted[0]) len_idx = len(idx) X_recovered = [[0] * feature_size for _ in range(len_idx)] for i in range(len_idx): for j in range(feature_size): X_recovered[i][j] = centroids[idx[i] - 1][j] X_recovered = np.reshape(X_recovered, (x, y, 3)) X_recovered = X_recovered * 255 plt.subplot(1, 2, 1)
#Compute means based on the closest centroids found in the previous part. centroids = computeCentroids(X, indicies, K) print 'Centroids computed after initial finding of closest centroids: \n' print centroids[0], '\n', centroids[1], '\n', centroids[2] print '\nthe centroids should be\n' print ' [ 2.428301 3.157924 ]\n' print ' [ 5.813503 2.633656 ]\n' print ' [ 7.119387 3.616684 ]\n\n' raw_input("Press any key to continue") print '\nRunning K-Means clustering on example dataset.\n\n' # Run K-Means algorithm. The 'true' at the end tells our function to plot the progress centroids, indicies = runkMeans(X, initial_centroids, max_iters, True) print '\nK-Means Done.\n\n' # Step 2 Do K-Means on image colors for compression print '\nRunning K-Means clustering on pixels from an image.\n\n' # Open PNG image A = mpimg.imread('bird_small.png') A_size = asizeof.asizeof(A) #Unroll 3D (rows x columns x [R,G,B]) into 2D ([R,G,B] x pixels) A_flat = A.reshape(-1, 3) # Run your K-Means algorithm on this data # Test values
#A = double(imread('bird_small.png')); # If imread does not work for you, you can try instead # load ('bird_small.mat'); A = msc.imread(ml_dir+'bird_small.png') A = A / 255 img_size = A.shape X = A.reshape(img_size[0]*img_size[1],3) #X = reshape(A, img_size(1) * img_size(2), 3); K = 16 max_iters = 10 initial_centroids = kMeansInitCentroids(X, K) [centroids, idx] = runkMeans(X, initial_centroids, max_iters) # Sample 1000 random indexes (since working with all the data is # too expensive. If you have a fast computer, you may increase this. #sel = math.floor(math.rand(1000, 1) * X.shape[0]) + 1 sel = np.random.choice(X.shape[0], 1000) # Setup Color Palette #palette = hsv(K); #colors = palette(idx(sel), :); colors = idx[sel] # Visualize the data and centroid memberships in 3D #figure; fig = plt.figure() ax = fig.add_subplot(111, projection='3d')
mat = scipy.io.loadmat('ex7data2.mat') X = mat["X"] # Settings for running K-Means K = 3 max_iters = 10 # For consistency, here we set centroids to specific values # but in practice you want to generate them automatically, such as by # settings them to be random examples (as can be seen in # kMeansInitCentroids). initial_centroids = np.array([[3, 3], [6, 2], [8, 5]]) # Run K-Means algorithm. The 'true' at the end tells our function to plot # the progress of K-Means centroids, idx = rkm.runkMeans(X, initial_centroids, max_iters, True) print('\nK-Means Done.\n') raw_input('Program paused. Press enter to continue.') ## ============= Part 4: K-Means Clustering on Pixels =============== # In this exercise, you will use K-Means to compress an image. To do this, # you will first run K-Means on the colors of the pixels in the image and # then you will map each pixel on to it's closest centroid. # # You should now complete the code in kMeansInitCentroids.m # print('\nRunning K-Means clustering on pixels from an image.\n\n') # Load an image of a bird