Exemplo n.º 1
0
    def test_PCA_for_Visualization(self):
        plt.close()
        # Re-load the image from the previous exercise and run K-Means on it
        # For this to work, you need to complete the K-Means assignment first

        # A = double(imread('bird_small.png'));
        # If imread does not work for you, you can try instead
        mat = scipy.io.loadmat('resource/bird_small.mat')
        A = mat["A"]

        A = A / 255
        image_size = np.shape(A)
        X = A.reshape(image_size[0] * image_size[1], 3)
        K = 16
        max_iters = 10
        from ex7_K_means_Clustering_and_Principal_Component_Analysis.kMeansInitCentroids import kMeansInitCentroids
        initial_centroids = kMeansInitCentroids(X, K)
        from ex7_K_means_Clustering_and_Principal_Component_Analysis.runkMeans import runkMeans
        centorids, idx = runkMeans(X, initial_centroids, max_iters, True)

        # Sample 1000 random indexes(since working with all the data is
        # too expensive.If you have a fast computer, you may increase this.
        sel = np.floor(np.random.rand(1000, 1) *
                       X.shape[0]).astype(int).flatten()

        # Setup Color Palette
        from utils.hsv import hsv
        palette = hsv(K)
        colors = np.array([palette[int(i)] for i in idx[sel]])

        # fig = plt.figure()
        # ax = fig.add_subplot(111, projection='3d')
        # ax.scatter(X[sel, 0], X[sel, 1], X[sel, 2], s=100, c=colors)
        # plt.title('Pixel dataset plotted in 3D. Color shows centroid memberships')
        # plt.show(block=False)

        # === Part 8(b): Optional (ungraded) Exercise: PCA for Visualization ===
        # Use PCA to project this cloud to 2D for visualization

        from ex5_regularized_linear_regressionand_bias_vs_variance.featureNormalize import featureNormalize
        # Subtract the mean to use PCA
        X_norm, _, _ = featureNormalize(X)

        # PCA and project the data to 2D
        from ex7_K_means_Clustering_and_Principal_Component_Analysis.pca import pca
        U, S = pca(X_norm)

        from ex7_K_means_Clustering_and_Principal_Component_Analysis.projectData import projectData
        Z = projectData(X_norm, U, 2)

        # Plot in 2D
        plt.figure(2)
        from ex7_K_means_Clustering_and_Principal_Component_Analysis.plotDataPoints import plotDataPoints
        plotDataPoints(Z[sel, :], idx[sel], K)
        plt.title(
            'Pixel dataset plotted in 2D, using PCA for dimensionality reduction'
        )
        plt.show(block=False)
Exemplo n.º 2
0
    def test_PCA_on_Face_Data_Eignfaces(self):
        print("Running PCA on face dataset.")
        print("this mght take a minute or two ...")

        #  Load Face dataset
        mat = scipy.io.loadmat('resource/ex7faces.mat')
        X = np.array(mat["X"])

        #  Before running PCA, it is important to first normalize X by subtracting
        #  the mean value from each feature
        from utils.featureNormalize import featureNormalize
        X_norm, _, _ = featureNormalize(X)

        #  Run PCA
        from ex7_K_means_Clustering_and_Principal_Component_Analysis.pca import pca
        U, S = pca(X_norm)

        #  Visualize the top 36 eigenvectors found
        from utils.displayData import displayData
        displayData(U[:, :36].T)

        #  ============= Part 6: Dimension Reduction for Faces =================
        #  Project images to the eigen space using the top k eigenvectors
        #  If you are applying a machine learning algorithm
        print("Dimension reduction for face dataset.")
        K = 100
        from ex7_K_means_Clustering_and_Principal_Component_Analysis.projectData import projectData
        Z = projectData(X_norm, U, K)

        print("The projected data Z has a size of: {z}".format(z=np.shape(Z)))

        #  ==== Part 7: Visualization of Faces after PCA Dimension Reduction ====
        #  Project images to the eigen space using the top K eigen vectors and
        #  visualize only using those K dimensions
        #  Compare to the original input, which is also displayed

        print("Visualizing the projected (reduced dimension) faces.")
        K = 100
        from ex7_K_means_Clustering_and_Principal_Component_Analysis.recoverData import recoverData
        X_rec = recoverData(Z, U, K)

        # Display normalized data
        plt.close()
        plt.subplot(1, 2, 1)
        displayData(X_norm[:100, :])
        plt.title('Original faces')
        plt.gca().set_aspect('equal', adjustable='box')

        # Display reconstructed data from only k eigenfaces
        plt.subplot(1, 2, 2)
        displayData(X_rec[:100, :])
        plt.title('Recovered faces')
        plt.gca().set_aspect('equal', adjustable='box')
Exemplo n.º 3
0
def polyFeatures(X, p):
    X_poly = X  #np.zeros((X.shape[0], p))
    for i in range(2, p + 1):
        X_at_i = X**i
        X_poly = np.hstack((X_poly, X_at_i))
    return X_poly


p = 8

# Map X onto Polynomial Features and Normalize
X_poly = polyFeatures(X, p)
# print(X_poly.shape)
# print(X_poly[:,1])
# print(X)
X_poly, mu, sigma = utils.featureNormalize(X_poly)
X_poly = np.concatenate([np.ones((m, 1)), X_poly], axis=1)

# Map X_poly_test and normalize (using mu and sigma)
X_poly_test = polyFeatures(Xtest, p)
# print(X_poly_test.shape)
# print(X_poly_test[:,1])
# print(Xtest)
X_poly_test -= mu
X_poly_test /= sigma
X_poly_test = np.concatenate([np.ones((ytest.size, 1)), X_poly_test], axis=1)

# Map X_poly_val and normalize (using mu and sigma)
X_poly_val = polyFeatures(Xval, p)
X_poly_val -= mu
X_poly_val /= sigma
Exemplo n.º 4
0
#  Visualize the example dataset
pyplot.plot(X[:, 0], X[:, 1], 'bo', ms=10, mec='k', mew=1)
pyplot.axis([0.5, 6.5, 2, 8])
pyplot.gca().set_aspect('equal')
pyplot.grid(False)
pyplot.show()

## =============== Part 2: Principal Component Analysis ===============
#  You should now implement PCA, a dimension reduction technique. You
#  should complete the code in pca.m
#
print('\nRunning PCA on example dataset.\n\n')

#  Before running PCA, it is important to first normalize X
X_norm, mu, sigma = utils.featureNormalize(X)

#  Run PCA
U, S = pca.pca(X_norm)

#  Compute mu, the mean of the each feature

#  Draw the eigenvectors centered at mean of data. These lines show the
#  directions of maximum variations in the dataset.
fig, ax = pyplot.subplots()
ax.plot(X[:, 0], X[:, 1], 'bo', ms=10, mec='k', mew=0.25)

for i in range(2):
    ax.arrow(mu[0],
             mu[1],
             1.5 * S[i] * U[0, i],
Exemplo n.º 5
0
print('Загрузка данных...')
X = []
Y = []
with open('ex1data3.csv') as f:
    scv_reader = csv.reader(f, delimiter=',')
    for row in scv_reader:
        X.append([row[0], row[1]])
        Y.append([row[2]])
X = np.asarray(X).astype(np.float)
Y = np.asarray(Y).astype(np.float)
m = Y.size

# Задача 1 - Нормализация признаков
print('Нормализация признаков...')
X, mu, sigma = ut.featureNormalize(X)
ones_column = np.ones((m, 1))
X = np.hstack((ones_column, X))

# Задача 2 - Метод градиентного спуска
print('Выполнение градиентного спуска...')
alpha = 0.01
num_iters = 400

theta = np.zeros((3, 1))
theta, J_history = ut.gradient_descent(X, Y, theta, alpha, num_iters)

fig = plt.figure()
ax = plt.axes()
plt.plot(np.arange(J_history.size), J_history)
ax.set_xlabel("Число итераций")