示例#1
0
def getCxtSubspace(wl, dim, var_threshold=0.45):
    emb = []
    for word in wl:
        if (word not in vecDict):
            print "non-exist:", word
            continue
        wordEmbed = dict[word]
        emb.append(wordEmbed)
    emb = np.array(emb)

    pca = PCA()
    pca.fit(emb)
    varList = pca.explained_variance_ratio_
    cand = 0
    varSum = 0
    for var in varList:
        varSum += var
        cand += 1
        if (varSum >= var_threshold):
            break

    pca = PCA(n_components=cand)
    pca.fit(emb)
    top_embed = pca.components_
    print "dim:", len(top_embed.tolist()), cand
    return top_embed.tolist()
示例#2
0
def hack_pca(filename):
    '''
    Input: filename -- input image file name/path
    Output: img -- image without rotation
    '''

    # YOUR CODE HERE
    # begin answer
    img_r = (plt.imread(filename)).astype(np.float64)
    data = []
    for i in range(img_r.shape[0]):
        for j in range(img_r.shape[1]):
            if (img_r[i][j][3] > 0):
                tt = (img_r[i][j][0] * 299 + img_r[i][j][1] * 587 +
                      img_r[i][j][2] * 114 + 500) / 1000
                if (tt > 150):
                    data.append([i, j])
    data = np.array(data)

    w, _ = PCA(data)
    result = np.dot(data, w)
    result_min = np.min(result, axis=0)
    result_max = np.max(result, axis=0)
    result -= result_min
    result_max = np.max(result, axis=0)
    result[:, 0] = (result[:, 0] / result_max[0]) * 99
    result[:, 1] = (result[:, 1] / result_max[1]) * 99
    result_max = np.max(result, axis=0)
    new_img_r = np.zeros((100, 100, 3))
    for i in range(result.shape[0]):
        new_img_r[-int(result[i][1])][-int(result[i][0])] = [100, 100, 100]
    return new_img_r
示例#3
0
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Logistic Regression",
                   accuracy=accuracy)
示例#4
0
    def getCentVec(self, contextVecs):

        sample, rank, dim = contextVecs.shape
        contexts = np.reshape(contextVecs, (sample * rank, dim))
        pca = PCA(n_components=1)
        pca.fit(contexts)
        return pca.components_[0]
示例#5
0
def pic_handle(img_data, sd, ori_size):
    """
    做PCA处理之后,在还原到原来的维度,然后显示,之后输出信噪比
    """
    Pca = PCA(sd, img_data)
    c_data, w_star = Pca.pca()  # 进行pca降维,获取投影矩阵
    w_star = np.real(w_star)
    print(w_star)
    new_data = w_star * w_star.T * c_data + Pca.mean  # 还原到原来的维度
    total_img = []
    # 图片混合
    for i in range(Pca.data_size):
        if len(total_img) == 0:
            total_img = new_data[:, i].T.reshape(ori_size)
        else:
            total_img = np.hstack(
                [total_img, new_data[:, i].T.reshape(ori_size)])
    # 计算信噪比
    print('信噪比:')
    for i in range(Pca.data_size):
        a = psnr(np.array(data[:, i].T), np.array(new_data[:, i].T))
        print('图', i, '的信噪比为:', a, 'dB')
    # 处理图片
    total_img = np.array(total_img).astype(np.uint8)
    cv2.imwrite('pca image.jpg', total_img)  # 图片显示
    cv2.imshow('pca image', total_img)
    cv2.waitKey(0)
示例#6
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # Construct nearest neighbour graph
        G = np.zeros([n, n])
        for i in range(n):
            neighbours = np.argsort(D[i])[:self.nn + 1]
            for j in neighbours:
                G[i, j] = D[i, j]
                G[j, i] = D[j, i]

        # Compute ISOMAP distances
        D = utils.dijkstra(G)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
示例#7
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X,X)
        D = np.sqrt(D)

        #TODO:
        D = self.construct_dist_graph(X , D)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()


        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z,f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
示例#8
0
def faceLoader() -> None:
    '''
    Face loader and visualizer example code
    '''

    gall = importGallery()
    print(gall.shape)

    gall = gall[:, :10]
    print(gall.shape)

    # Show first image
    plt.figure(0)
    plt.title('First face')
    n = 0
    nComponents = 10
    pca = PCA(nComponents)
    face = gall[:, :1]
    print(face.shape)

   # face = face.reshape(24576,1)
   # print(face.shape)
    mu, U, C, data = pca.train(gall)
    alpha = pca.to_pca(data)
  #  print(alpha.shape)


#    faceId = gall.item(n)[0][0]
 #   print('Face got face id: {}'.format(faceId))
    face = alpha[:, :1]
    print(face.shape)
    face = face.reshape(192,128)
    plt.imshow(face, cmap='gray')

    plt.show()
示例#9
0
def hack_pca(filename):
    '''
    Input: filename -- input image file name/path
    Output: img -- image without rotation
    '''
    img_r = (plt.imread(filename)).astype(np.float64)  # 4 channels: R,G,B,A
    img_gray = img_r[:, :, 0] * 0.3 + img_r[:, :, 1] * 0.59 + img_r[:, :,
                                                                    2] * 0.11

    X_int = np.array(np.where(img_gray > 0))
    X = X_int.astype(np.float64)
    D, N = X.shape

    eigen_vec, eigen_val = PCA(X)
    print(eigen_vec, eigen_val)
    Y = np.matmul(X.T, eigen_vec).T

    Y_int = Y.astype(np.int32)
    dmin = np.min(Y_int, axis=1).reshape(D, 1)
    Y_int = Y_int - dmin
    bound = np.max(Y_int, axis=1) + 1
    new_img = np.zeros(bound)
    for t in range(Y_int.shape[1]):
        new_img[tuple(Y_int[:, t])] = img_gray[tuple(X_int[:, t])]
    new_img = new_img.T[::-1, ::-1]

    return new_img
示例#10
0
def plot_iris(y, y_classes, maxit=25, *args, **kwargs):
    # np.random.seed(0)
    fig, ax = plot_grid(5)
    #Variational bayes
    vbpca = VBPCA(y, *args, **kwargs)
    for i in range(maxit):
        vbpca.update()
    plot_scatter(vbpca.transform(), y_classes, ax[0])
    ax[0].set_title('VBPCA')
    #Laplace approximation
    lbpca = LBPCA(y.T)
    lbpca.fit(maxit)
    plot_scatter(lbpca.transform(2).T, y_classes, ax[1])
    ax[1].set_title('LBPCA')
    #Streaming LBPCA
    stream = create_distributed(np.copy(y.T), 10)
    stream.randomized_fit(1)
    plot_scatter(stream.transform(y.T, 2).T, y_classes, ax[2])
    ax[2].set_title('Batch BPCA')
    #Distributed LBPCA
    stream = create_distributed(np.copy(y.T), 10)
    stream.averaged_fit(maxit)
    plot_scatter(stream.transform(y.T, 2).T, y_classes, ax[3])
    ax[3].set_title('Parallel BPCA')
    #PCA
    pca = PCA(y.T)
    plot_scatter(pca.fit_transform().T, y_classes, ax[4])
    ax[4].set_title('PCA')
    plt.show()
示例#11
0
def toyExample():
    mat = scipy.io.loadmat('../data/toy_data.mat')
    data = mat['toy_data']

    # TODO: Train PCA
    pca = PCA(-1)
    pca.train(data)

    print("Variance of the data")
    # TODO 1.2: Compute data variance to the S vector computed by the PCA
    data_variance = np.var(data, axis=1)
    print(data_variance)
    print(np.power(pca.S, 2) / data.shape[1])
    # TODO 1.3: Compute data variance for the projected data (into 1D) to the S vector computed by the PCA
    Xout = pca.project(data, 1)
    print("Variance of the projected data")
    data_variance = np.var(Xout, axis=1)
    print(data_variance)
    print(np.power(pca.S[0], 2) / data.shape[1])

    plt.figure()
    plt.title('PCA plot')
    plt.subplot(1, 2, 1)  # Visualize given data and principal components
    # TODO 1.1: Plot original data (hint, use the plot_pca function
    pca.plot_pca(data)
    plt.subplot(1, 2, 2)
    # TODO 1.3: Plot data projected into 1 dimension
    pca.S[1] = 0
    pca.plot_pca(Xout)
    plt.show()
示例#12
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)
        # D is symmetric matrix

        geoD = np.zeros((n, n))

        # find nn-neighbours
        for i in range(n):
            sort = np.argsort(D[:, i])
            neigh = np.setdiff1d(sort[0:self.nn + 1], i)
            # find the nn+1 smallest indexes that are not i
            for j in range(len(neigh)):
                t = neigh[j]
                geoD[i, t] = D[i, t]
                geoD[t, i] = D[t, i]

        D = utils.dijkstra(geoD)
        # for disconnected vertices (distance is Inf)
        # set their dist = max_dist(graph)
        # to encourage they are far away from each other
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
示例#13
0
def get_update7(id):
    entry_to_update = PCA_table.query.get_or_404(id)

    if request.method == 'POST':
        entry_to_update.clump = float(request.form['clump'])
        entry_to_update.unifsize = float(request.form['unifsize'])
        entry_to_update.unifshape = float(request.form['unifshape'])
        entry_to_update.margadh = float(request.form['margadh'])
        entry_to_update.singepisize = float(request.form['singepisize'])
        entry_to_update.barenuc = float(request.form['barenuc'])
        entry_to_update.blandchrom = float(request.form['blandchrom'])
        entry_to_update.normnucl = float(request.form['normnucl'])
        entry_to_update.mit = float(request.form['mit'])
        payload = [
            entry_to_update.clump, entry_to_update.unifsize,
            entry_to_update.unifshape, entry_to_update.margadh,
            entry_to_update.singepisize, entry_to_update.barenuc,
            entry_to_update.blandchrom, entry_to_update.normnucl,
            entry_to_update.mit
        ]
        payload = np.array(payload)
        entry_to_update.cell_class = PCA(payload)

        try:
            db.session.commit()
            return redirect('/PCA')
        except:
            return 'Contact DBA'

    else:
        return render_template('PCA_update.html',
                               entry_to_update=entry_to_update)
示例#14
0
    def hullselect(self):
        def selectHullPoints(data, n=20):
            """ select data points for pairwise projections of the first n
            dimensions """

            # iterate over all projections and select data points
            idx = np.array([])

            # iterate over some pairwise combinations of dimensions
            for i in combinations(range(n), 2):

                # sample convex hull points in 2D projection
                convex_hull_d = quickhull(data[i, :].T)

                # get indices for convex hull data points
                idx = np.append(idx, dist.vq(data[i, :], convex_hull_d.T))
                idx = np.unique(idx)

            return np.int32(idx)

        # determine convex hull data points only if the total
        # amount of available data is >50
        #if self.data.shape[1] > 50:
        pcamodel = PCA(self.data, show_progress=self._show_progress)
        pcamodel.factorize()

        idx = selectHullPoints(pcamodel.H, n=self._base_sel)

        # set the number of subsampled data
        self.nsub = len(idx)

        return idx
示例#15
0
文件: manifold.py 项目: jaysc96/CS340
    def compress(self, X):
        n = X.shape[0]
        k = self.k
        K = self.K

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)
        nbrs = np.argsort(D, axis=1)[:, 1:K + 1]
        G = np.zeros((n, n))

        for i in range(n):
            for j in nbrs[i]:
                G[i, j] = D[i, j]
                G[j, i] = D[j, i]

        D = utils.dijkstra(G)
        D[D == np.inf] = -np.inf
        max = np.max(D)
        D[D == -np.inf] = max

        # Initialize low-dimensional representation with PCA
        Z = PCA(k).fit(X).compress(X)

        # Solve for the minimizer
        z = find_min(self._fun_obj_z, Z.flatten(), 500, False, D)
        Z = z.reshape(n, k)
        return Z
示例#16
0
def hack_pca(filename):
    '''
    Input: filename -- input image file name/path
    Output: img -- image without rotation
    '''
    img_r = (plt.imread(filename)).astype(np.float64) / 255
    img_r = rgb2gray(img_r)
    plt.imshow(img_r, cmap='gray')
    plt.show()
    m, n = img_r.shape
    xy = []
    xyv = []
    for i in range(m):
        for j in range(n):
            if img_r[i, j] > 0:
                xy.append((i, j))
                xyv.append((i, j, img_r[i, j]))
    xy = np.array(xy)
    vector, value = PCA(xy)
    d = np.array(np.round(np.dot(xy, vector))).astype(np.int)
    min_xy = np.min(d, axis=0)
    d -= min_xy
    max_xy = np.max(d, axis=0)
    img = np.zeros((max_xy[1] + 1, max_xy[0] + 1))
    for i in range(xy.shape[0]):
        img[max_xy[1] - d[i, 1], max_xy[0] - d[i, 0]] = xyv[i][2]
    plt.imshow(img, cmap='gray')
    plt.show()
    return img
示例#17
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        sorted_indices = np.argsort(D)
        G = np.zeros((n, n))

        for i in range(D.shape[0]):
            for j in range(self.nn + 1):
                G[i, sorted_indices[i, j]] = D[i, sorted_indices[i, j]]
                G[sorted_indices[i, j], i] = D[sorted_indices[i, j], i]

        dist = utils.dijkstra(G)

        dist[np.isinf(dist)] = dist[~np.isinf(dist)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, dist)
        Z = z.reshape(n, self.k)
        return Z
示例#18
0
    def __init__(self, img_dataset: np.ndarray):
        """
        :param img_dataset: An image dataset, which is a matrix with the shape of (N x H x W), where:
                - N: number of images
                - H: height of images
                - W: width of images
                - each item of the matrix is an real value between 0 and 1
            Notes: All images should have same width and height
        """
        # Get the shape of the input data
        super().__init__()
        assert len(img_dataset.shape) == 3
        self._n_samples, self._height, self._width = img_dataset.shape
        self.logger.info({
            'msg': 'Image dataset shape',
            'shape': img_dataset.shape
        })

        self._n_features = self._height * self._width

        # Flatten the images of shape (height, width) to vectors of length height x width
        self._flatten_dataset = img_dataset.reshape(
            (self._n_samples, self._n_features))

        # Build the PCA transformer
        self._pca_transformer = PCA(self._flatten_dataset)
示例#19
0
def hack_pca(filename, threshold=0.6):
    '''
    Input: filename -- input image file name/path
    Output: img -- image without rotation
    '''
    img_r = (plt.imread(filename)).astype(np.float64) / 255
    # YOUR CODE HERE
    img = img_r[:, :, 0] * 0.299 + img_r[:, :, 1] * 0.587 + img_r[:, :,
                                                                  2] * 0.114
    H, W = img.shape
    data = []
    for i in range(H):
        # x axis
        for j in range(W):
            # y axis
            if img[i, j] >= threshold:
                data.append([i, j])
    data = np.array(data)
    N = data.shape[0]
    eigvectors, eigvalues = PCA(data)
    (vx, vy) = eigvectors[:, 0]
    (vx, vy) = (vx, vy) if vy >= 0 else (-vx, -vy)
    theta = -math.asin(-vx) * 180 / math.pi
    R = np.array([[vy, vx], [-vx, vy]])  # rotate matrix
    odata = np.matmul(data, R)
    odata -= np.min(odata, axis=0)
    odata = odata.astype(int)
    nH, nW = np.max(odata, axis=0)
    oimg = np.zeros((nH + 1, nW + 1))
    for i in range(N):
        oimg[odata[i, 0], odata[i, 1]] = 1.
    return img, oimg, theta
示例#20
0
    def compress(self, X):
        n = X.shape[0]
        # nearest_neighbours = np.zeros((n, self.nn))

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.

        adjacency_matrix = np.zeros((n, n))
        nearest_neighbours = self.knn(X)
        for i, j in enumerate(nearest_neighbours):
            for neighbour in j:
                adjacency_matrix[i, neighbour] = D[i, neighbour]
                adjacency_matrix[neighbour, i] = D[neighbour, i]

        dijkstra = utils.dijkstra(adjacency_matrix)

        dijkstra[np.isinf(dijkstra)] = dijkstra[~np.isinf(dijkstra)].max()
        # Initialize low-dimensional representation with PCA
        Z = PCA(self.k).fit(X).compress(X)

        # Solve for the minimizer
        z = find_min(self._fun_obj_z, Z.flatten(), 500, False, dijkstra)
        Z = z.reshape(n, self.k)
        return Z
示例#21
0
def main():
    # reduce the dimensionality of the data to two dimension and plot the results.
    data = datasets.load_digits()
    X = data.data
    y = data.target

    # Project the data onto the 2 primary principal components
    X_trans = PCA().transform(X, 2)
    x1 = X_trans[:, 0]
    x2 = X_trans[:, 1]

    cmap = plt.get_cmap('viridis')
    colors = [cmap(i) for i in np.linspace(0, 1, len(np.unique(y)))]

    class_distr = []
    # Plot the different class distributions
    for i, l in enumerate(np.unique(y)):
        _x1 = x1[y == l]
        _x2 = x2[y == l]
        _y = y[y == l]
        class_distr.append(plt.scatter(_x1, _x2, color=colors[i]))

    plt.legend(class_distr, y, loc=1)
    plt.suptitle("PCA Dimensionality Reduction")
    plt.title("Digit Dataset")
    plt.xlabel('Principal Component 1')
    plt.ylabel('Principal Component 2')
    plt.show()
示例#22
0
def train_models():
    images, labels, labels_dic = collect_dat_set()

    rec_eig = PCA(500, 5)
    if images:
        rec_eig.train(images, labels)

    return rec_eig, labels_dic
示例#23
0
 def test_pca(self):
     X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
     pca = PCA(n_comp=2)
     pca.fit(X)
     self.assertEqual(
         np.allclose(pca.explained_variance,
                     np.array([0.9924, 0.0075]),
                     atol=1e-3), True)
示例#24
0
    def testePCA(self):
        pca = PCA()
        matrizX = Matrizx()
        idModelo = self.txtIdModelo.get()

        matrizPrincipal = matrizX.selectMatrizXModeloMMM(idModelo)

        pca.testePCA(matrizPrincipal)
示例#25
0
文件: test_pca.py 项目: nielsrolf/pca
 def test_transform_inverse_transform(self):
     X = self.data(500)
     pca = PCA(X)
     x = pca.transform(X, ndims=3)
     self.assertTrue(
         np.allclose(np.cov(x.T), np.eye(3), atol=1e-4, rtol=1e-2))
     X_ = pca.inverse_transform(x)
     self.assertTrue(np.allclose(X, X_))
示例#26
0
def toyExample() -> None:
    ## Toy Data Set
    mat = scipy.io.loadmat('../data/toy_data.mat')
    data = mat['toy_data']

    data = importGallery()
    ## limit datafor testing purposes
    data = data[:, :144].T
    print(data.shape)

    ## Iris dataset. Just for testing purposes
    #iris = datasets.load_iris()
    #data = iris['data'].astype(np.float32)  # a 150x4 matrix with features
    #data = data.T
    # TODO: Train PCA
    nComponents = 25

    pca = PCA(nComponents)

    ## 1.1 Calculate PCA manuelly. SVD is following
    #pca.pca_manuel(data)
    ## 1.2 Calculate PCA via SVD
    mu, U, C, dataCenter = pca.train(data)

    ## 2. Transform RAW data using first n principal components
    alpha = pca.to_pca(dataCenter)

    ## 3. Backtransform alpha to Raw data
    Xout = pca.from_pca(alpha)

    print("Variance")
    # TODO 1.2: Compute data variance to the eigenvalue vector computed by the PCA

    print(f'Total Variance: {np.var(data)}')
    print(f'Eigenvalues: {C} \n')

    # TODO 1.3: Compute data variance for the projected data (into 1D) to the S vector computed by the PCA
    print(f'Total Variance Transform: {np.var(alpha)}')
    print(f'Mean Eigenvalues: {np.mean(C)}')

    ## Plot only if fewer than 2 components
    if nComponents == 2:
        plt.figure()
        plt.title('PCA plot')
        plt.subplot(1, 2, 1)  # Visualize given data and principal components
        # TODO 1.1: Plot original data (hint, use the plot_pca function
        pca.plot_pca(data)
        plt.subplot(1, 2, 2)
        # TODO 1.3: Plot data projected into 1 dimension
        pca.plot_pca(Xout)
        plt.show()

        ## Plot variances
    else:
        x = np.arange(1, len(C) + 1)
        plt.bar(x, C)
        plt.show()
示例#27
0
def generate_pca_embedding_files():
    '''
		Generate PCA embedding csv files for the experiments.
	'''
    raw = genfromtxt('digits-raw.csv', delimiter=',')
    X = raw[:, 2:]
    pca = PCA(10)
    X_new = pca.fit_transform(X)
    raw_new = hstack((raw[:, :2], X_new))
    savetxt('digits-pca-embedding.csv', raw_new, delimiter=',')
示例#28
0
文件: test_pca.py 项目: nielsrolf/pca
 def test_invariance_to_many_transforms(self):
     X = self.data(500)
     pca = PCA(X)
     x = pca.transform(X, ndims=2)  # error introduced here
     self.assertTrue(
         np.allclose(np.cov(x.T), np.eye(2), atol=1e-4, rtol=1e-2))
     X1 = pca.inverse_transform(x)
     x1 = pca.transform(X1, ndims=2)
     X2 = pca.inverse_transform(x1)
     self.assertTrue(np.allclose(X1, X2))
示例#29
0
def main(matrix, pcomps):
    pca = PCA(pcomps, matrix)
    # get covariance matrix - saved as instance var
    pca.covariance_matrix()
    # find evecs and evals of covariance matrix
    pca.evecs_and_evals()
    # get top x principal components aka evecs corresponding to top evals
    pca.principal_components()
    # reduce image on those components
    return pca.get_evec_matrix()
示例#30
0
def main():
    whiten = False
    if len(sys.argv) > 1 and sys.argv[1] == '--whiten':
        whiten = True
        del sys.argv[1]

    if len(sys.argv) <= 3:
        print 'Usage: %s pcaDims n_hidden learningRate' % sys.argv[0]
        sys.exit(1)

    # loads data like datasets = ((train_x, train_y), ([], None), (test_x, None))
    datasets = loadUpsonData('../data/upson_rovio_1/train_15_50000.pkl.gz',
                             '../data/upson_rovio_1/test_15_50000.pkl.gz')
    img_dim = 15  # must match actual size of training data

    print 'done loading.'

    pcaDims = int(sys.argv[1])
    pca = PCA(datasets[0][0])  # train
    datasets[0][0] = pca.toPC(datasets[0][0], pcaDims, whiten=whiten)  # train
    datasets[1][0] = pca.toPC(
        datasets[1][0], pcaDims,
        whiten=whiten) if len(datasets[1][0]) > 0 else array([])  # valid
    datasets[2][0] = pca.toPC(datasets[2][0], pcaDims, whiten=whiten)  # test
    print 'reduced by PCA to'
    print('(%d, %d, %d) %d dimensional examples in (train, valid, test)' %
          (datasets[0][0].shape[0], datasets[1][0].shape[0],
           datasets[2][0].shape[0], datasets[0][0].shape[1]))

    # plot mean and principle components
    image = Image.fromarray(
        tile_raster_images(X=pca.meanAndPc(pcaDims).T,
                           img_shape=(img_dim, img_dim),
                           tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save(os.path.join(resman.rundir, 'meanAndPc.png'))

    # plot fractional stddev in PCA dimensions
    pyplot.semilogy(pca.fracStd, 'bo-')
    if pcaDims is not None:
        pyplot.axvline(pcaDims)
    pyplot.savefig(os.path.join(resman.rundir, 'fracStd.png'))
    pyplot.clf()

    test_rbm(datasets=datasets,
             training_epochs=45,
             img_dim=img_dim,
             n_input=pcaDims if pcaDims else img_dim * img_dim,
             n_hidden=int(sys.argv[2]),
             learning_rate=float(sys.argv[3]),
             output_dir=resman.rundir,
             quickHack=False,
             visibleModel='real',
             initWfactor=.01,
             imgPlotFunction=lambda xx: pca.fromPC(xx, unwhiten=whiten))