Exemplos de runkMeans em Python, exemplos de runkMeans.runkMeans em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: ex7.py Projeto: syaning/stanford-machine-learning

# the example dataset we have provided.
print('Running K-Means clustering on example dataset.')

# Settings for running K-Means
K = 3
max_iters = 10

# For consistency, here we set centroids to specific values
# but in practice you want to generate them automatically, such as by
# settings them to be random examples (as can be seen in
# kMeansInitCentroids).
initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

# Run K-Means algorithm. The 'true' at the end tells our function to plot
# the progress of K-Means
centroids, idx = runkMeans(X, initial_centroids, max_iters, True)
print('K-Means Done.\n')


# ============= Part 4: K-Means Clustering on Pixels ===============
# In this exercise, you will use K-Means to compress an image. To do this,
# you will first run K-Means on the colors of the pixels in the image and
# then you will map each pixel on to it's closest centroid.
#
# You should now complete the code in kMeansInitCentroids.py
print('Running K-Means clustering on pixels from an image.')

# Load an image of a bird
A = imread('bird_small.png')

# If imread does not work for you, you can try instead

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ex7_pca.py Projeto: DXist/py-coursera

close('all')

# Re-load the image from the previous exercise and run K-Means on it
# For this to work, you need to complete the K-Means assignment first
A = imread('bird_small.png').astype(float)

# If imread does not work for you, you can try instead
#   A = loadmat('bird_small.mat')['A'].astype(float) / 255

img_size = shape(A)
X = reshape(A, (-1, 3), order='F')
K = 16
max_iters = 10
initial_centroids = kMeansInitCentroids(X, K)
centroids, idx = runkMeans(X, initial_centroids, max_iters)

#  Sample 1000 random indexes (since working with all the data is
#  too expensive. If you have a fast computer, you may increase this.
sel = (random.rand(1000) * size(X, 0)).astype(int)

#  Visualize the data and centroid memberships in 3D
fig = figure()
ax = Axes3D(fig)
ax.scatter(X[sel, 0],
           X[sel, 1],
           X[sel, 2],
           s=100,
           c=idx[sel],
           cmap=cm.hsv,
           vmax=K + 1,

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ex7.py Projeto: altermarkive/machine-learning-course

def ex7():
    ## Machine Learning Online Class
    #  Exercise 7 | Principle Component Analysis and K-Means Clustering
    #
    #  Instructions
    #  ------------
    #
    #  This file contains code that helps you get started on the
    #  exercise. You will need to complete the following functions:
    #
    #     pca.m
    #     projectData.m
    #     recoverData.m
    #     computeCentroids.m
    #     findClosestCentroids.m
    #     kMeansInitCentroids.m
    #
    #  For this exercise, you will not need to change any code in this file,
    #  or any other files other than those mentioned above.
    #

    ## Initialization
    #clear ; close all; clc

    ## ================= Part 1: Find Closest Centroids ====================
    #  To help you implement K-Means, we have divided the learning algorithm 
    #  into two functions -- findClosestCentroids and computeCentroids. In this
    #  part, you shoudl complete the code in the findClosestCentroids function. 
    #
    print('Finding closest centroids.\n')

    # Load an example dataset that we will be using
    mat = scipy.io.loadmat('ex7data2.mat')
    X = mat['X']

    # Select an initial set of centroids
    K = 3 # 3 Centroids
    initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

    # Find the closest centroids for the examples using the
    # initial_centroids
    idx = findClosestCentroids(X, initial_centroids)

    print('Closest centroids for the first 3 examples: ')
    print(formatter(' %d', idx[:3] + 1))
    print('\n(the closest centroids should be 1, 3, 2 respectively)')

    print('Program paused. Press enter to continue.')
    #pause

    ## ===================== Part 2: Compute Means =========================
    #  After implementing the closest centroids function, you should now
    #  complete the computeCentroids function.
    #
    print('\nComputing centroids means.\n')

    #  Compute means based on the closest centroids found in the previous part.
    centroids = computeCentroids(X, idx, K)

    print('Centroids computed after initial finding of closest centroids: ')
    print(centroids)
    print('\n(the centroids should be')
    print('   [ 2.428301 3.157924 ]')
    print('   [ 5.813503 2.633656 ]')
    print('   [ 7.119387 3.616684 ]\n')

    print('Program paused. Press enter to continue.')
    #pause


    ## =================== Part 3: K-Means Clustering ======================
    #  After you have completed the two functions computeCentroids and
    #  findClosestCentroids, you have all the necessary pieces to run the
    #  kMeans algorithm. In this part, you will run the K-Means algorithm on
    #  the example dataset we have provided. 
    #
    print('\nRunning K-Means clustering on example dataset.\n')

    # Load an example dataset
    mat = scipy.io.loadmat('ex7data2.mat')
    X = mat['X']

    # Settings for running K-Means
    K = 3
    max_iters = 10

    # For consistency, here we set centroids to specific values
    # but in practice you want to generate them automatically, such as by
    # settings them to be random examples (as can be seen in
    # kMeansInitCentroids).
    initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

    # Run K-Means algorithm. The 'true' at the end tells our function to plot
    # the progress of K-Means
    centroids, idx = runkMeans('1', X, initial_centroids, max_iters, True)
    print('\nK-Means Done.\n')

    print('Program paused. Press enter to continue.')
    #pause

    ## ============= Part 4: K-Means Clustering on Pixels ===============
    #  In this exercise, you will use K-Means to compress an image. To do this,
    #  you will first run K-Means on the colors of the pixels in the image and
    #  then you will map each pixel on to it's closest centroid.
    #  
    #  You should now complete the code in kMeansInitCentroids.m
    #

    print('\nRunning K-Means clustering on pixels from an image.\n')

    #  Load an image of a bird
    A = matplotlib.image.imread('bird_small.png')

    # If imread does not work for you, you can try instead
    #   load ('bird_small.mat')

    A = A / 255 # Divide by 255 so that all values are in the range 0 - 1

    # Size of the image
    #img_size = size(A)

    # Reshape the image into an Nx3 matrix where N = number of pixels.
    # Each row will contain the Red, Green and Blue pixel values
    # This gives us our dataset matrix X that we will use K-Means on.
    X = A.reshape(-1, 3)

    # Run your K-Means algorithm on this data
    # You should try different values of K and max_iters here
    K = 16
    max_iters = 10

    # When using K-Means, it is important the initialize the centroids
    # randomly. 
    # You should complete the code in kMeansInitCentroids.m before proceeding
    initial_centroids = kMeansInitCentroids(X, K)

    # Run K-Means
    centroids, idx = runkMeans('2', X, initial_centroids, max_iters)

    print('Program paused. Press enter to continue.')
    #pause


    ## ================= Part 5: Image Compression ======================
    #  In this part of the exercise, you will use the clusters of K-Means to
    #  compress an image. To do this, we first find the closest clusters for
    #  each example. After that, we 

    print('\nApplying K-Means to compress an image.\n')

    # Find closest cluster members
    idx = findClosestCentroids(X, centroids)

    # Essentially, now we have represented the image X as in terms of the
    # indices in idx. 

    # We can now recover the image from the indices (idx) by mapping each pixel
    # (specified by it's index in idx) to the centroid value
    X_recovered = centroids[idx,:].reshape(A.shape)

    # Reshape the recovered image into proper dimensions
    X_recovered = X_recovered.reshape(A.shape)

    fig, ax = plt.subplots(1, 2, figsize=(8, 4))

    # Display the original image 
    ax[0].imshow(A * 255)
    ax[0].grid(False)
    ax[0].set_title('Original')

    # Display compressed image side by side
    ax[1].imshow(X_recovered * 255)
    ax[1].grid(False)
    ax[1].set_title('Compressed, with %d colors' % K)

    plt.savefig('figure3.png')

    print('Program paused. Press enter to continue.\n')

Exemplo n.º 4

0

Exibir arquivo

#load('ex7data2.mat');

# Settings for running K-Means
K = 3
max_iters = 10

# For consistency, here we set centroids to specific values
# but in practice you want to generate them automatically, such as by
# settings them to be random examples (as can be seen in
# kMeansInitCentroids).
#initial_centroids = [3 3; 6 2; 8 5];

# Run K-Means algorithm. The 'true' at the end tells our function to plot
# the progress of K-Means
runPlot = False
centroids, idx = runkMeans(X, initial_centroids, max_iters, runPlot)
print('\nK-Means Done.\n\n')

# ============= Part 4: K-Means Clustering on Pixels ===============
#  In this exercise, you will use K-Means to compress an image. To do this,
#  you will first run K-Means on the colors of the pixels in the image and
#  then you will map each pixel onto its closest centroid.
#
#  You should now complete the code in kMeansInitCentroids.m

print('\nRunning K-Means clustering on pixels from an image.\n\n')

#  Load an image of a bird
#A = double(imread('bird_small.png'));

A = msc.imread(ml_dir + 'bird_small.png')

Exemplo n.º 5

0

Exibir arquivo

Arquivo: ex7.py Projeto: grixxy/ml_python

#load('ex7data2.mat');

# Settings for running K-Means
K = 3
max_iters = 10

# For consistency, here we set centroids to specific values
# but in practice you want to generate them automatically, such as by
# settings them to be random examples (as can be seen in
# kMeansInitCentroids).
#initial_centroids = [3 3; 6 2; 8 5];

# Run K-Means algorithm. The 'true' at the end tells our function to plot
# the progress of K-Means
runPlot = False
centroids, idx = runkMeans(X, initial_centroids, max_iters, runPlot)
print('\nK-Means Done.\n\n')



# ============= Part 4: K-Means Clustering on Pixels ===============
#  In this exercise, you will use K-Means to compress an image. To do this,
#  you will first run K-Means on the colors of the pixels in the image and
#  then you will map each pixel onto its closest centroid.
#
#  You should now complete the code in kMeansInitCentroids.m


print('\nRunning K-Means clustering on pixels from an image.\n\n')

#  Load an image of a bird

Exemplo n.º 6

0

Exibir arquivo

def ex7_pca():
    ## Machine Learning Online Class
    #  Exercise 7 | Principle Component Analysis and K-Means Clustering
    #
    #  Instructions
    #  ------------
    #
    #  This file contains code that helps you get started on the
    #  exercise. You will need to complete the following functions:
    #
    #     pca.m
    #     projectData.m
    #     recoverData.m
    #     computeCentroids.m
    #     findClosestCentroids.m
    #     kMeansInitCentroids.m
    #
    #  For this exercise, you will not need to change any code in this file,
    #  or any other files other than those mentioned above.
    #

    ## Initialization
    #clear ; close all; clc

    ## ================== Part 1: Load Example Dataset  ===================
    #  We start this exercise by using a small dataset that is easily to
    #  visualize
    #
    print('Visualizing example dataset for PCA.\n')

    #  The following command loads the dataset. You should now have the 
    #  variable X in your environment
    mat = scipy.io.loadmat('ex7data1.mat')
    X = mat['X']

    #  Visualize the example dataset
    plt.plot(X[:, 0], X[:, 1], 'wo', ms=10, mec='b', mew=1)
    plt.axis([0.5, 6.5, 2, 8])

    plt.savefig('figure1.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## =============== Part 2: Principal Component Analysis ===============
    #  You should now implement PCA, a dimension reduction technique. You
    #  should complete the code in pca.m
    #
    print('\nRunning PCA on example dataset.\n')

    #  Before running PCA, it is important to first normalize X
    X_norm, mu, sigma = featureNormalize(X)

    #  Run PCA
    U, S = pca(X_norm)

    #  Compute mu, the mean of the each feature

    #  Draw the eigenvectors centered at mean of data. These lines show the
    #  directions of maximum variations in the dataset.
    #hold on
    print(S)
    print(U)
    drawLine(mu, mu + 1.5 * np.dot(S[0], U[:,0].T))
    drawLine(mu, mu + 1.5 * np.dot(S[1], U[:,1].T))
    #hold off
    plt.savefig('figure2.png')

    print('Top eigenvector: ')
    print(' U(:,1) = %f %f ' % (U[0,0], U[1,0]))
    print('\n(you should expect to see -0.707107 -0.707107)')

    print('Program paused. Press enter to continue.')
    #pause


    ## =================== Part 3: Dimension Reduction ===================
    #  You should now implement the projection step to map the data onto the 
    #  first k eigenvectors. The code will then plot the data in this reduced 
    #  dimensional space.  This will show you what the data looks like when 
    #  using only the corresponding eigenvectors to reconstruct it.
    #
    #  You should complete the code in projectData.m
    #
    print('\nDimension reduction on example dataset.\n\n')

    #  Plot the normalized dataset (returned from pca)
    fig = plt.figure()
    plt.plot(X_norm[:, 0], X_norm[:, 1], 'bo')

    #  Project the data onto K = 1 dimension
    K = 1
    Z = projectData(X_norm, U, K)
    print('Projection of the first example: %f' % Z[0])
    print('\n(this value should be about 1.481274)\n')

    X_rec = recoverData(Z, U, K)
    print('Approximation of the first example: %f %f' % (X_rec[0, 0], X_rec[0, 1]))
    print('\n(this value should be about  -1.047419 -1.047419)\n')

    #  Draw lines connecting the projected points to the original points
    plt.plot(X_rec[:, 0], X_rec[:, 1], 'ro')
    for i in range(X_norm.shape[0]):
        drawLine(X_norm[i,:], X_rec[i,:])
    #end
    plt.savefig('figure3.png')

    print('Program paused. Press enter to continue.\n')
    #pause

    ## =============== Part 4: Loading and Visualizing Face Data =============
    #  We start the exercise by first loading and visualizing the dataset.
    #  The following code will load the dataset into your environment
    #
    print('\nLoading face dataset.\n\n')

    #  Load Face dataset
    mat = scipy.io.loadmat('ex7faces.mat')
    X = mat['X']

    #  Display the first 100 faces in the dataset
    displayData(X[:100, :])
    plt.savefig('figure4.png')

    print('Program paused. Press enter to continue.\n')
    #pause

    ## =========== Part 5: PCA on Face Data: Eigenfaces  ===================
    #  Run PCA and visualize the eigenvectors which are in this case eigenfaces
    #  We display the first 36 eigenfaces.
    #
    print('\nRunning PCA on face dataset.\n(this mght take a minute or two ...)\n')

    #  Before running PCA, it is important to first normalize X by subtracting 
    #  the mean value from each feature
    X_norm, mu, sigma = featureNormalize(X)

    #  Run PCA
    U, S = pca(X_norm)

    #  Visualize the top 36 eigenvectors found
    displayData(U[:, :36].T)
    plt.savefig('figure5.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## ============= Part 6: Dimension Reduction for Faces =================
    #  Project images to the eigen space using the top k eigenvectors 
    #  If you are applying a machine learning algorithm 
    print('\nDimension reduction for face dataset.\n')

    K = 100
    Z = projectData(X_norm, U, K)

    print('The projected data Z has a size of: ')
    print(formatter('%d ', Z.shape))

    print('\n\nProgram paused. Press enter to continue.')
    #pause

    ## ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
    #  Project images to the eigen space using the top K eigen vectors and 
    #  visualize only using those K dimensions
    #  Compare to the original input, which is also displayed

    print('\nVisualizing the projected (reduced dimension) faces.\n')

    K = 100
    X_rec  = recoverData(Z, U, K)

    # Display normalized data
    #subplot(1, 2, 1)
    displayData(X_norm[:100,:])
    plt.gcf().suptitle('Original faces')
    #axis square

    plt.savefig('figure6.a.png')

    # Display reconstructed data from only k eigenfaces
    #subplot(1, 2, 2)
    displayData(X_rec[:100,:])
    plt.gcf().suptitle('Recovered faces')
    #axis square

    plt.savefig('figure6.b.png')

    print('Program paused. Press enter to continue.')
    #pause


    ## === Part 8(a): Optional (ungraded) Exercise: PCA for Visualization ===
    #  One useful application of PCA is to use it to visualize high-dimensional
    #  data. In the last K-Means exercise you ran K-Means on 3-dimensional 
    #  pixel colors of an image. We first visualize this output in 3D, and then
    #  apply PCA to obtain a visualization in 2D.

    #close all; close all; clc

    # Re-load the image from the previous exercise and run K-Means on it
    # For this to work, you need to complete the K-Means assignment first
    A = matplotlib.image.imread('bird_small.png')

    # If imread does not work for you, you can try instead
    #   load ('bird_small.mat')

    A = A / 255
    X = A.reshape(-1, 3)
    K = 16
    max_iters = 10
    initial_centroids = kMeansInitCentroids(X, K)
    centroids, idx = runkMeans('7', X, initial_centroids, max_iters)

    #  Sample 1000 random indexes (since working with all the data is
    #  too expensive. If you have a fast computer, you may increase this.
    sel = np.random.choice(X.shape[0], size=1000)

    #  Setup Color Palette
    #palette = hsv(K)
    #colors = palette(idx(sel), :)

    #  Visualize the data and centroid memberships in 3D
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], cmap='rainbow', c=idx[sel], s=8**2)
    ax.set_title('Pixel dataset plotted in 3D. Color shows centroid memberships')
    plt.savefig('figure8.png')

    print('Program paused. Press enter to continue.')
    #pause

    ## === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
    # Use PCA to project this cloud to 2D for visualization

    # Subtract the mean to use PCA
    X_norm, mu, sigma = featureNormalize(X)

    # PCA and project the data to 2D
    U, S = pca(X_norm)
    Z = projectData(X_norm, U, 2)

    # Plot in 2D
    fig = plt.figure()
    plotDataPoints(Z[sel, :], [idx[sel]], K, 0)
    plt.title('Pixel dataset plotted in 2D, using PCA for dimensionality reduction')
    plt.savefig('figure9.png')
    print('Program paused. Press enter to continue.\n')

Exemplo n.º 7

0

Exibir arquivo

Arquivo: ex7_pca.py Projeto: arturomp/coursera-machine-learning-in-python

# Re-load the image from the previous exercise and run K-Means on it
# For this to work, you need to complete the K-Means assignment first

# A = double(imread('bird_small.png'));
mat = scipy.io.loadmat('bird_small.mat')
A = mat["A"]

# from ex7.py, part 4
A = A / 255.0
img_size = A.shape
X = A.reshape(img_size[0] * img_size[1], 3, order='F').copy()
K = 16 
max_iters = 10
initial_centroids = kmic.kMeansInitCentroids(X, K)
centroids, idx = rkm.runkMeans(X, initial_centroids, max_iters)

#  Sample 1000 random indexes (since working with all the data is
#  too expensive. If you have a fast computer, you may increase this.
#  use flatten(). otherwise, Z[sel, :] yields array w shape [1000,1,2]
sel = np.floor(np.random.rand(1000, 1) * X.shape[0]).astype(int).flatten()

#  Setup Color Palette
palette = hsv.hsv(K)
colors = np.array([palette[int(i)] for i in idx[sel]])

#  Visualize the data and centroid memberships in 3D
fig1 = plt.figure(1)
ax = fig1.add_subplot(111, projection='3d')
ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], s=100, c=colors)
plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships')

Exemplo n.º 8

0

Exibir arquivo

# print(image)
# print(image_size)
# print(image.shape)
x = image.shape[0]
y = image.shape[1]
image_formatted = np.reshape(image, (x * y, 3))
# print(image)

k = 16
max_iters = 10

initial_centroids = kMeansInitCentroids(image_formatted, k)
print(initial_centroids)

# Run K-Means
[centroids, idx] = runkMeans(image_formatted, initial_centroids, max_iters,
                             False)

idx = findClosestCentroids(image_formatted, centroids)

feature_size = len(image_formatted[0])
len_idx = len(idx)
X_recovered = [[0] * feature_size for _ in range(len_idx)]

for i in range(len_idx):
    for j in range(feature_size):
        X_recovered[i][j] = centroids[idx[i] - 1][j]

X_recovered = np.reshape(X_recovered, (x, y, 3))
X_recovered = X_recovered * 255

plt.subplot(1, 2, 1)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: Top.py Projeto: chmartin/Kmeans_study

#Compute means based on the closest centroids found in the previous part.
centroids = computeCentroids(X, indicies, K)

print 'Centroids computed after initial finding of closest centroids: \n'
print centroids[0], '\n', centroids[1], '\n', centroids[2]
print '\nthe centroids should be\n'
print '   [ 2.428301 3.157924 ]\n'
print '   [ 5.813503 2.633656 ]\n'
print '   [ 7.119387 3.616684 ]\n\n'
raw_input("Press any key to continue")

print '\nRunning K-Means clustering on example dataset.\n\n'

# Run K-Means algorithm. The 'true' at the end tells our function to plot the progress
centroids, indicies = runkMeans(X, initial_centroids, max_iters, True)

print '\nK-Means Done.\n\n'

# Step 2 Do K-Means on image colors for compression
print '\nRunning K-Means clustering on pixels from an image.\n\n'

# Open PNG image
A = mpimg.imread('bird_small.png')
A_size = asizeof.asizeof(A)

#Unroll 3D (rows x columns x [R,G,B]) into 2D ([R,G,B] x pixels)
A_flat = A.reshape(-1, 3)

# Run your K-Means algorithm on this data
# Test values

Exemplo n.º 10

0

Exibir arquivo

Arquivo: ex7_pca.py Projeto: grixxy/ml_python

#A = double(imread('bird_small.png'));

# If imread does not work for you, you can try instead
#   load ('bird_small.mat');

A = msc.imread(ml_dir+'bird_small.png')

A = A / 255
img_size = A.shape

X = A.reshape(img_size[0]*img_size[1],3)
#X = reshape(A, img_size(1) * img_size(2), 3);
K = 16
max_iters = 10
initial_centroids = kMeansInitCentroids(X, K)
[centroids, idx] = runkMeans(X, initial_centroids, max_iters)

#  Sample 1000 random indexes (since working with all the data is
#  too expensive. If you have a fast computer, you may increase this.
#sel = math.floor(math.rand(1000, 1) * X.shape[0]) + 1
sel = np.random.choice(X.shape[0], 1000)

#  Setup Color Palette
#palette = hsv(K);
#colors = palette(idx(sel), :);

colors = idx[sel]
#  Visualize the data and centroid memberships in 3D
#figure;
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

Exemplo n.º 11

0

Exibir arquivo

Arquivo: ex7.py Projeto: wenmm/coursera-machine-learning-in-python

mat = scipy.io.loadmat('ex7data2.mat')
X = mat["X"]

# Settings for running K-Means
K = 3
max_iters = 10

# For consistency, here we set centroids to specific values
# but in practice you want to generate them automatically, such as by
# settings them to be random examples (as can be seen in
# kMeansInitCentroids).
initial_centroids = np.array([[3, 3], [6, 2], [8, 5]])

# Run K-Means algorithm. The 'true' at the end tells our function to plot
# the progress of K-Means
centroids, idx = rkm.runkMeans(X, initial_centroids, max_iters, True)
print('\nK-Means Done.\n')

raw_input('Program paused. Press enter to continue.')

## ============= Part 4: K-Means Clustering on Pixels ===============
#  In this exercise, you will use K-Means to compress an image. To do this,
#  you will first run K-Means on the colors of the pixels in the image and
#  then you will map each pixel on to it's closest centroid.
#
#  You should now complete the code in kMeansInitCentroids.m
#

print('\nRunning K-Means clustering on pixels from an image.\n\n')

#  Load an image of a bird