示例#1
0
def plotProgresskMeans(X, centroids, previous, idx, K, i, color):
    """plots the data
    points with colors assigned to each centroid. With the previous
    centroids, it also plots a line between the previous locations and
    current locations of the centroids.
    """

    # Plot the examples
    plotDataPoints(X, idx)

    # Plot the centroids as black x's
    plt.scatter(centroids[:, 0],
                centroids[:, 1],
                marker='x',
                s=60,
                lw=3,
                edgecolor='k')

    # Plot the history of the centroids with lines
    for j in range(len(centroids)):
        plt.plot([centroids[j, 0], previous[j, 0]],
                 [centroids[j, 1], previous[j, 1]],
                 c=color)

# Title
    plt.title('Iteration number %d' % i)
    plt.show()
    input("Program paused. Press Enter to continue...")
示例#2
0
def plotProgresskMeans(X, centroids, previous, idx, K, i):
    #PLOTPROGRESSKMEANS is a helper function that displays the progress of
    #k-Means as it is running. It is intended for use only with 2D data.
    #   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
    #   points with colors assigned to each centroid. With the previous
    #   centroids, it also plots a line between the previous locations and
    #   current locations of the centroids.
    #

    # Plot the examples
    plotDataPoints(X, idx, K, i)

    current = centroids
    for last in previous[::-1]:
        # Plot the centroids as black x's
        plt.plot(current[:, 0],
                 current[:, 1],
                 linestyle='None',
                 marker='x',
                 markeredgecolor='k',
                 ms=10,
                 lw=3)

        # Plot the history of the centroids with lines
        for j in range(current.shape[0]):
            drawLine(current[j, :], last[j, :])
        current = last
    #end

    # Title
    plt.title('Iteration number %d' % i)
示例#3
0
def plotProgresskMeans(X, centroids, idx, K, i=0):
    '''
    PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
    points with colors assigned to each centroid. With the previous
    centroids, it also plots a line between the previous locations and
    current locations of the centroids.
     '''

    from plotDataPoints import plotDataPoints
    import matplotlib.pyplot as plt
    import numpy as np

    # Plot the examples
    plotDataPoints(X, idx, K)

    # Plot the centroids as black x's

    plt.plot(centroids[:,0], centroids[:, 1], 'x', \
        markeredgecolor = '#414042',  markersize = 7, markeredgewidth = 2)

    # Plot the history of the centroids with lines
    for j in range(K):
        # Group for centroids
        k = centroids[range(j, centroids.shape[0], K), :]
        plt.plot(k[:, 0], k[:, 1], color='k', linewidth=0.5)

    # Title
    plt.title('Iteration number %d' % (i))
def plotProgresskMeans(X, centroids, previous, idx, K, i):
    #PLOTPROGRESSKMEANS is a helper function that displays the progress of
    #k-Means as it is running. It is intended for use only with 2D data.
    #   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
    #   points with colors assigned to each centroid. With the previous
    #   centroids, it also plots a line between the previous locations and
    #   current locations of the centroids.
    #

    # plt.hold(True)

    # Plot the examples
    pdp.plotDataPoints(X, idx, K)

    # Plot the centroids as black x's
    plt.scatter(centroids[:, 0],
                centroids[:, 1],
                marker='x',
                s=400,
                c='k',
                linewidth=1)

    # Plot the history of the centroids with lines
    for j in range(centroids.shape[0]):
        dl.drawLine(centroids[j, :], previous[j, :], c='b')

    # Title
    plt.title('Iteration number {:d}'.format(i + 1))

    return
def plotProgresskMeans(X, centroids, previous, idx, K, i):
    #PLOTPROGRESSKMEANS is a helper function that displays the progress of 
    #k-Means as it is running. It is intended for use only with 2D data.
    #   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
    #   points with colors assigned to each centroid. With the previous
    #   centroids, it also plots a line between the previous locations and
    #   current locations of the centroids.
    #

    # plt.hold(True)

    # Plot the examples
    pdp.plotDataPoints(X, idx, K)

    # Plot the centroids as black x's
    plt.scatter(centroids[:,0], centroids[:,1], marker='x', s=400, c='k', linewidth=1)

    # Plot the history of the centroids with lines
    for j in xrange(centroids.shape[0]):
        dl.drawLine(centroids[j, :], previous[j, :], c='b')

    # Title
    plt.title('Iteration number {:d}'.format(i+1))

    return
def plotProgresskMeans(X, centroids, previous, idx, K, i):
    plotDataPoints(X, idx)

    plt.plot(previous[:, 0], previous[:, 1], 'rx', lw=3)
    plt.plot(centroids[:, 0], centroids[:, 1], 'kx', lw=3)

    for j in range(centroids.shape[0]):
        drawLine(centroids[j, :], previous[j, :])

    plt.title('Iteration number %d' % i)
    plt.show(block=False)
示例#7
0
def plotProgressKmeans(X, centroids, previous, idx, K, i):

    plotDataPoints(X, idx, K)

    plt.scatter(centroids[:, 0], centroids[:, 1], marker='x', edgecolors='b')

    for j in range(centroids.shape[0]):
        drawLine(centroids[j, :], previous[j, :])

    plt.title('Iteration number {}'.format(i))
    plt.show()
示例#8
0
def plotProgressKMeans(X, history_centroids, idx, K, i):
    #PLOTPROGRESSKMEANS is a helper function that displays the progress of
    #k-Means as it is running. It is intended for use only with 2D data.
    #   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
    #   points with colors assigned to each centroid. With the previous
    #   centroids, it also plots a line between the previous locations and
    #   current locations of the centroids.

    plotDataPoints(X, idx, K)
    plt.plot(history_centroids[0:i+1, :, 0], history_centroids[0:i+1, :, 1],
             linestyle='', marker='x', markersize=10, linewidth=3, color='k')
    plt.title('Iteration number {}'.format(i + 1))
    for centroid_idx in range(history_centroids.shape[1]):
        for iter_idx in range(i):
            drawLine(history_centroids[iter_idx, centroid_idx, :], history_centroids[iter_idx + 1, centroid_idx, :])
示例#9
0
def plotProgresskMeans(X, centroids, previous, idx, K, i):
    # Plot the examples
    plotDataPoints(X, idx, K)
    # Plot the centroids as black x's
    plt.plot(centroids[:, 0],
             centroids[:, 1],
             marker='x',
             markeredgecolor='k',
             markersize=10,
             linewidth=3,
             linestyle='None')
    #plt.scatter(centroids[:,0], centroids[:,1], marker='x', s=400, c='k', linewidth=1)

    # Plot the history of the centroids with lines
    for j in range(centroids.shape[0]):
        drawLine(centroids[j, :], previous[j, :], c='b')

    # Title
    plt.title('Iteration number ' + str(i))
    #return plt
示例#10
0
def plotProgresskMeans(X, centroids, previous, idx, K, i):
    #PLOTPROGRESSKMEANS is a helper function that displays the progress of
    #k-Means as it is running. It is intended for use only with 2D data.
    #   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
    #   points with colors assigned to each centroid. With the previous
    #   centroids, it also plots a line between the previous locations and
    #   current locations of the centroids.
    #

    # Plot the examples
    plotDataPoints(X, idx, K)

    # Plot the centroids as black x's
    plot(centroids[:,0], centroids[:,1], 'x', mec='k', ms=10, mew=3)

    # Plot the history of the centroids with lines
    for j in range(size(centroids, 0)):
        drawLine(centroids[j, :], previous[j, :], 'b')

    # Title
    title('Iteration number #%d' % (i+1))
def runkMeans(data, initial_centroids,max_iters, plot_progress):
    # Initialize values
    (m,n) = np.shape(data);
    print(m)
    print(n)
    k = len(initial_centroids)
    centroids = initial_centroids;
    previous_centroids = centroids;
    idx = [0] * m;
    for i in range(max_iters):
        print('K-Means iteration #d/#d...\n', i, max_iters);
        idx = findClosestCentroids(data, centroids);
        if plot_progress:
            # plotProgresskMeans(data, centroids, previous_centroids, idx, k, i);
            plotDataPoints(data, idx, k)
            previous_centroids = centroids;
            print('Press enter to continue.\n');
            # pause;
        # Given the memberships, compute new centroids
        centroids = computeCentroids(data, idx, k);
    return centroids, idx
def plotProgresskMeans(X, centroids, previous, idx, K, i, color):
    """plots the data
    points with colors assigned to each centroid. With the previous
    centroids, it also plots a line between the previous locations and
    current locations of the centroids.
    """

# Plot the examples
    plotDataPoints(X, idx)

# Plot the centroids as black x's
    plt.scatter(centroids[:, 0], centroids[:, 1],
                marker='x', s=60, lw=3, edgecolor='k')

# Plot the history of the centroids with lines
    for j in range(len(centroids)):
        plt.plot([centroids[j,0], previous[j,0]],
                 [centroids[j,1], previous[j,1]], c=color)

# Title
    plt.title('Iteration number %d' % i)
    show()
    raw_input("Program paused. Press Enter to continue...")
示例#13
0
           X[sel, 1],
           X[sel, 2],
           s=100,
           c=idx[sel],
           cmap=cm.hsv,
           vmax=K + 1,
           facecolors='none')
title('Pixel dataset plotted in 3D. Color shows centroid memberships')
fig.show()
print 'Program paused. Press enter to continue.'
raw_input()

## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
# Use PCA to project this cloud to 2D for visualization

# Subtract the mean to use PCA
X_norm, mu, sigma = featureNormalize(X)

# PCA and project the data to 2D
U, s = pca(X_norm)
Z = projectData(X_norm, U, 2)

# Plot in 2D
fig = figure()
plotDataPoints(Z[sel, :], idx[sel], K)
title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction')
fig.show()

print 'Program paused. Press enter to continue.'
raw_input()
示例#14
0
             marker='o',
             facecolors='none',
             lw=0.4,
             s=10)

plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships')
show()
input('Program paused. Press Enter to continue...')

## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
# Use PCA to project this cloud to 2D for visualization

# Subtract the mean to use PCA
X_norm, mu, sigma = featureNormalize(X)

# PCA and project the data to 2D
U, S, V = pca(X_norm)
Z = projectData(X_norm, U, 2)

# Plot in 2D
plt.figure()
zs = np.array([Z[s] for s in sel])
idxs = np.array([idx[s] for s in sel])

# plt.scatter(zs[:,0], zs[:,1])
plotDataPoints(zs, idxs)
plt.title(
    'Pixel dataset plotted in 2D, using PCA for dimensionality reduction')
show()
input('Program paused. Press Enter to continue...')
cmap = plt.get_cmap("jet")
idxn = sel.astype('float')/max(sel.astype('float'))
colors = cmap(idxn)
# ax = Axes3D(fig)
ax.scatter3D(xs, ys, zs=zs, edgecolors=colors, marker='o', facecolors='none', lw=0.4, s=10)

plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships')
show()
raw_input('Program paused. Press Enter to continue...')

## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
# Use PCA to project this cloud to 2D for visualization

# Subtract the mean to use PCA
X_norm, mu, sigma = featureNormalize(X)

# PCA and project the data to 2D
U, S, V = pca(X_norm)
Z = projectData(X_norm, U, 2)

# Plot in 2D
plt.figure()
zs = np.array([Z[s] for s in sel])
idxs = np.array([idx[s] for s in sel])

# plt.scatter(zs[:,0], zs[:,1])
plotDataPoints(zs, idxs)
plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction')
show()
raw_input('Program paused. Press Enter to continue...')
示例#16
0
#  too expensive. If you have a fast computer, you may increase this.
sel = (random.rand(1000) * size(X, 0)).astype(int)

#  Visualize the data and centroid memberships in 3D
fig = figure()
ax = Axes3D(fig)
ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], s=100, c=idx[sel], cmap=cm.hsv, vmax=K+1, facecolors='none')
title('Pixel dataset plotted in 3D. Color shows centroid memberships')
fig.show()
print 'Program paused. Press enter to continue.'
raw_input()

## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
# Use PCA to project this cloud to 2D for visualization

# Subtract the mean to use PCA
X_norm, mu, sigma = featureNormalize(X)

# PCA and project the data to 2D
U, s = pca(X_norm)
Z = projectData(X_norm, U, 2)

# Plot in 2D
fig = figure()
plotDataPoints(Z[sel, :], idx[sel], K)
title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction')
fig.show()

print 'Program paused. Press enter to continue.'
raw_input()
示例#17
0
def ex7_pca():
    ## Machine Learning Online Class
    #  Exercise 7 | Principle Component Analysis and K-Means Clustering
    #
    #  Instructions
    #  ------------
    #
    #  This file contains code that helps you get started on the
    #  exercise. You will need to complete the following functions:
    #
    #     pca.m
    #     projectData.m
    #     recoverData.m
    #     computeCentroids.m
    #     findClosestCentroids.m
    #     kMeansInitCentroids.m
    #
    #  For this exercise, you will not need to change any code in this file,
    #  or any other files other than those mentioned above.
    #

    ## Initialization
    #clear ; close all; clc

    ## ================== Part 1: Load Example Dataset  ===================
    #  We start this exercise by using a small dataset that is easily to
    #  visualize
    #
    print('Visualizing example dataset for PCA.\n')

    #  The following command loads the dataset. You should now have the 
    #  variable X in your environment
    mat = scipy.io.loadmat('ex7data1.mat')
    X = mat['X']

    #  Visualize the example dataset
    plt.plot(X[:, 0], X[:, 1], 'wo', ms=10, mec='b', mew=1)
    plt.axis([0.5, 6.5, 2, 8])

    plt.savefig('figure1.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## =============== Part 2: Principal Component Analysis ===============
    #  You should now implement PCA, a dimension reduction technique. You
    #  should complete the code in pca.m
    #
    print('\nRunning PCA on example dataset.\n')

    #  Before running PCA, it is important to first normalize X
    X_norm, mu, sigma = featureNormalize(X)

    #  Run PCA
    U, S = pca(X_norm)

    #  Compute mu, the mean of the each feature

    #  Draw the eigenvectors centered at mean of data. These lines show the
    #  directions of maximum variations in the dataset.
    #hold on
    print(S)
    print(U)
    drawLine(mu, mu + 1.5 * np.dot(S[0], U[:,0].T))
    drawLine(mu, mu + 1.5 * np.dot(S[1], U[:,1].T))
    #hold off
    plt.savefig('figure2.png')

    print('Top eigenvector: ')
    print(' U(:,1) = %f %f ' % (U[0,0], U[1,0]))
    print('\n(you should expect to see -0.707107 -0.707107)')

    print('Program paused. Press enter to continue.')
    #pause


    ## =================== Part 3: Dimension Reduction ===================
    #  You should now implement the projection step to map the data onto the 
    #  first k eigenvectors. The code will then plot the data in this reduced 
    #  dimensional space.  This will show you what the data looks like when 
    #  using only the corresponding eigenvectors to reconstruct it.
    #
    #  You should complete the code in projectData.m
    #
    print('\nDimension reduction on example dataset.\n\n')

    #  Plot the normalized dataset (returned from pca)
    fig = plt.figure()
    plt.plot(X_norm[:, 0], X_norm[:, 1], 'bo')

    #  Project the data onto K = 1 dimension
    K = 1
    Z = projectData(X_norm, U, K)
    print('Projection of the first example: %f' % Z[0])
    print('\n(this value should be about 1.481274)\n')

    X_rec = recoverData(Z, U, K)
    print('Approximation of the first example: %f %f' % (X_rec[0, 0], X_rec[0, 1]))
    print('\n(this value should be about  -1.047419 -1.047419)\n')

    #  Draw lines connecting the projected points to the original points
    plt.plot(X_rec[:, 0], X_rec[:, 1], 'ro')
    for i in range(X_norm.shape[0]):
        drawLine(X_norm[i,:], X_rec[i,:])
    #end
    plt.savefig('figure3.png')

    print('Program paused. Press enter to continue.\n')
    #pause

    ## =============== Part 4: Loading and Visualizing Face Data =============
    #  We start the exercise by first loading and visualizing the dataset.
    #  The following code will load the dataset into your environment
    #
    print('\nLoading face dataset.\n\n')

    #  Load Face dataset
    mat = scipy.io.loadmat('ex7faces.mat')
    X = mat['X']

    #  Display the first 100 faces in the dataset
    displayData(X[:100, :])
    plt.savefig('figure4.png')

    print('Program paused. Press enter to continue.\n')
    #pause

    ## =========== Part 5: PCA on Face Data: Eigenfaces  ===================
    #  Run PCA and visualize the eigenvectors which are in this case eigenfaces
    #  We display the first 36 eigenfaces.
    #
    print('\nRunning PCA on face dataset.\n(this mght take a minute or two ...)\n')

    #  Before running PCA, it is important to first normalize X by subtracting 
    #  the mean value from each feature
    X_norm, mu, sigma = featureNormalize(X)

    #  Run PCA
    U, S = pca(X_norm)

    #  Visualize the top 36 eigenvectors found
    displayData(U[:, :36].T)
    plt.savefig('figure5.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## ============= Part 6: Dimension Reduction for Faces =================
    #  Project images to the eigen space using the top k eigenvectors 
    #  If you are applying a machine learning algorithm 
    print('\nDimension reduction for face dataset.\n')

    K = 100
    Z = projectData(X_norm, U, K)

    print('The projected data Z has a size of: ')
    print(formatter('%d ', Z.shape))

    print('\n\nProgram paused. Press enter to continue.')
    #pause

    ## ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
    #  Project images to the eigen space using the top K eigen vectors and 
    #  visualize only using those K dimensions
    #  Compare to the original input, which is also displayed

    print('\nVisualizing the projected (reduced dimension) faces.\n')

    K = 100
    X_rec  = recoverData(Z, U, K)

    # Display normalized data
    #subplot(1, 2, 1)
    displayData(X_norm[:100,:])
    plt.gcf().suptitle('Original faces')
    #axis square

    plt.savefig('figure6.a.png')

    # Display reconstructed data from only k eigenfaces
    #subplot(1, 2, 2)
    displayData(X_rec[:100,:])
    plt.gcf().suptitle('Recovered faces')
    #axis square

    plt.savefig('figure6.b.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
    #  One useful application of PCA is to use it to visualize high-dimensional
    #  data. In the last K-Means exercise you ran K-Means on 3-dimensional 
    #  pixel colors of an image. We first visualize this output in 3D, and then
    #  apply PCA to obtain a visualization in 2D.

    #close all; close all; clc

    # Re-load the image from the previous exercise and run K-Means on it
    # For this to work, you need to complete the K-Means assignment first
    A = matplotlib.image.imread('bird_small.png')

    # If imread does not work for you, you can try instead
    #   load ('bird_small.mat')

    A = A / 255
    X = A.reshape(-1, 3)
    K = 16
    max_iters = 10
    initial_centroids = kMeansInitCentroids(X, K)
    centroids, idx = runkMeans('7', X, initial_centroids, max_iters)

    #  Sample 1000 random indexes (since working with all the data is
    #  too expensive. If you have a fast computer, you may increase this.
    sel = np.random.choice(X.shape[0], size=1000)

    #  Setup Color Palette
    #palette = hsv(K)
    #colors = palette(idx(sel), :)

    #  Visualize the data and centroid memberships in 3D
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], cmap='rainbow', c=idx[sel], s=8**2)
    ax.set_title('Pixel dataset plotted in 3D. Color shows centroid memberships')
    plt.savefig('figure8.png')

    print('Program paused. Press enter to continue.')
    #pause

    ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
    # Use PCA to project this cloud to 2D for visualization

    # Subtract the mean to use PCA
    X_norm, mu, sigma = featureNormalize(X)

    # PCA and project the data to 2D
    U, S = pca(X_norm)
    Z = projectData(X_norm, U, 2)

    # Plot in 2D
    fig = plt.figure()
    plotDataPoints(Z[sel, :], [idx[sel]], K, 0)
    plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction')
    plt.savefig('figure9.png')
    print('Program paused. Press enter to continue.\n')
示例#18
0
# function plotProgresskMeans(X, centroids, previous, idx, K, i)
#PLOTPROGRESSKMEANS is a helper function that displays the progress of
#k-Means as it is running. It is intended for use only with 2D data.
#   PLOTPROGRESSKMEANS(X, centroids, previous, idx, K, i) plots the data
#   points with colors assigned to each centroid. With the previous
#   centroids, it also plots a line between the previous locations and
#   current locations of the centroids.
#

# Plot the examples
plotDataPoints(X, idx, K);

# Plot the centroids as black x's
plot(centroids(:,1), centroids(:,2), 'x', ...
     'MarkerEdgeColor','k', ...
     'MarkerSize', 10, 'LineWidth', 3);

# Plot the history of the centroids with lines
for j=1:size(centroids,1)
    drawLine(centroids(j, :), previous(j, :));
end

# Title
title(sprintf('Iteration number #d', i))

end

from plotDataPoints import plotDataPoints
def plotProgresskMeans(data, centroids, previous, idx, k ,i):
    plotDataPoints(data, idx, k)
示例#19
0
def plotProgresskMeans(data, centroids, previous, idx, k ,i):
    plotDataPoints(data, idx, k)