示例#1
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)
        # D is symmetric matrix

        geoD = np.zeros((n, n))

        # find nn-neighbours
        for i in range(n):
            sort = np.argsort(D[:, i])
            neigh = np.setdiff1d(sort[0:self.nn + 1], i)
            # find the nn+1 smallest indexes that are not i
            for j in range(len(neigh)):
                t = neigh[j]
                geoD[i, t] = D[i, t]
                geoD[t, i] = D[t, i]

        D = utils.dijkstra(geoD)
        # for disconnected vertices (distance is Inf)
        # set their dist = max_dist(graph)
        # to encourage they are far away from each other
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
示例#2
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # Construct nearest neighbour graph
        G = np.zeros([n, n])
        for i in range(n):
            neighbours = np.argsort(D[i])[:self.nn + 1]
            for j in neighbours:
                G[i, j] = D[i, j]
                G[j, i] = D[j, i]

        # Compute ISOMAP distances
        D = utils.dijkstra(G)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
示例#3
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        sorted_indices = np.argsort(D)
        G = np.zeros((n, n))

        for i in range(D.shape[0]):
            for j in range(self.nn + 1):
                G[i, sorted_indices[i, j]] = D[i, sorted_indices[i, j]]
                G[sorted_indices[i, j], i] = D[sorted_indices[i, j], i]

        dist = utils.dijkstra(G)

        dist[np.isinf(dist)] = dist[~np.isinf(dist)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, dist)
        Z = z.reshape(n, self.k)
        return Z
示例#4
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X,X)
        D = np.sqrt(D)

        #TODO:
        D = self.construct_dist_graph(X , D)

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()


        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z,f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
示例#5
0
    def compress(self, X):
        n = X.shape[0]
        k = self.k

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        # Initialize low-dimensional representation with PCA
        pca = PCA(k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, k)
        return Z
示例#6
0
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        np.fill_diagonal(D, np.inf)
        ########
        #"finding the neighbor at each point"
        G = np.matrix(np.ones((n, n)) * 0)
        for i in range(n):
            neighbours = np.argsort(D[:, i])
            #want only the k nearest
            for j in neighbours[1:self.nn + 1]:
                G[i, j] = D[i, j]
                G[j, i] = D[j, i]

        #weighted shortest path between points (dijksta's)
        D = np.zeros((n, n))
        for i in range(n):
            for j in range(i + 1, n):
                D[i, j] = utils.dijkstra(G, i, j)

        ########

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
    def compress(self, X):
        n = X.shape[0]

        # Compute Euclidean distances
        D = utils.euclidean_dist_squared(X, X)
        D = np.sqrt(D)

        ########
        # TODO #
        G = np.full((n, n), np.inf)
        for i in range(n):
            for j in range(n):
                #temp = np.list(D[i]).sort
                temp = sorted(D[i])
                #print(self.nn+1)
                if D[i][j] in temp[:(self.nn + 1)]:
                    G[i][j] = D[i][j]

        for i in range(n):
            for j in range(n):
                D[i][j] = utils.dijkstra(G, i, j)

        ########

        # If two points are disconnected (distance is Inf)
        # then set their distance to the maximum
        # distance in the graph, to encourage them to be far apart.
        D[np.isinf(D)] = D[~np.isinf(D)].max()
        #G[np.isinf(G)] = G[~np.isinf(G)].max()

        # Initialize low-dimensional representation with PCA
        pca = PCA(self.k)
        pca.fit(X)
        Z = pca.compress(X)

        # Solve for the minimizer
        z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D)
        Z = z.reshape(n, self.k)
        return Z
示例#8
0
        for i in range(n):
            plt.annotate(animals[i], (X[i, f1], X[i, f2]))
        utils.savefig('two_random_features.png')

    elif question == '2.2':
        dataset = load_dataset('animals.pkl')
        X = dataset['X'].astype(float)
        animals = dataset['animals']
        n, d = X.shape

        # standardize columns
        X = utils.standardize_cols(X)

        model = PCA(k=2)
        model.fit(X)
        Z = model.compress(X)
        fig, ax = plt.subplots()
        plt.ylabel('z2')
        plt.xlabel('z1')
        ax.scatter(Z[:, 0], Z[:, 1])
        for i in range(n):
            ax.annotate(animals[i], (Z[i, 0], Z[i, 1]))

        utils.savefig('q2_2_PCA_animals.png')

    elif question == '3.1':
        X = load_dataset('highway.pkl')['X'].astype(float) / 255
        n, d = X.shape
        print(n, d)
        h, w = 64, 64  # height and width of each image
示例#9
0
                        choices=['1.2', '2.1', '3', '3.1', '3.2'])

    io_args = parser.parse_args()
    question = io_args.question

    if question == '1.2':
        dataset = utils.load_dataset('animals')
        X = dataset['X'].astype(float)
        animals = dataset['animals']
        n, d = X.shape
        k = 5
        X = utils.standardize_cols(X)  # standardize columns

        model = PCA(k=2)
        model.fit(X)
        Z = model.compress(X)

        # Plot the matrix
        plt.imshow(Z)
        utils.savefig('q1_unsatisfying_visualization_1.png')

        ## Randomly plot two features, and label all points

        fig, ax = plt.subplots()
        ax.scatter(Z[:, 0], Z[:, 1])
        for i in range(n):
            ax.annotate(animals[i], (Z[i, 0], Z[i, 1]))
        utils.savefig('q1_unsatisfying_visualization_2.png')
        v = 1 - norm(np.dot(Z, model.W) - X, 'fro')**2 / norm(X, 'fro')**2
        print v  #The variance
示例#10
0
print("[+] Processing data...")
X = (data[:, 1:].astype(np.int) - 127.5) / 127.5
y = data[:, 0].astype(np.int)

print("[+] Running PCA...")
pca = PCA()
X = pca.fit_compress(X, 500)

print("[+] Fitting neural net...")
model = NeuralNetwork((500, 300, 100, 10), alpha=8e-2, reg=1e-3, batch_size=60, epochs=3, momentum = 0.8)
model.fit(X, y)

print("[+] Loading test data...")
reader = csv.reader(open("mnist_test.csv", "r"))
data = np.array(list(reader))

print("[+] Processing data...")
X = (data[:, 1:].astype(np.int) - 127.5) / 127.5
y = data[:, 0].astype(np.int)

print("[+] Compressing data...")
X = pca.compress(X)

print("[+] Making predictions...")
predictions = np.array(model.predict(X))

print("[+] Calculating accuracy...")
accuracy = sum(predictions == y) / len(y)
print(accuracy)
示例#11
0
fig = pyplot.figure()
ax = fig.add_subplot(1, 1, 1)
sns.scatterplot(features_tsne[:, 0],
                features_tsne[:, 1],
                hue=labels,
                legend='full')
ax.set_title("T-SNE on Iris Data-Set", fontsize=16)

##################################################

print("Plotting PCA projection of data-set and classifier.")

pca = PCA()
pca.analyze(features)
pca.save("iris_results/iris")
features_compressed = pca.compress(features, 2)

fig = pyplot.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set_title('MLP-Classification of the Iris Data-Set', fontsize=16)

ax.set_xlim([-4.0, 4.0])
ax.set_xlabel("PCA Component 0", fontsize=12)
ax.set_ylim([-1.5, 1.5])
ax.set_ylabel("PCA Component 1", fontsize=12)

XX, YY = np.meshgrid(np.arange(*ax.get_xlim(), 0.005),
                     np.arange(*ax.get_ylim(), 0.005))
XY = np.vstack((XX.ravel(), YY.ravel())).T
ZZ = np.argmax(model.predict(pca.decompress(XY)), axis=1).reshape(XX.shape)
ax.contourf(XX, YY, ZZ + 1e-6, levels=3, colors=['g', 'b', 'r'], alpha=0.2)
示例#12
0
for d in dimensions[:-1]:
    name += '_' + str(d)
print(name)

##################################################

pca = PCA()
new_pca = False

if new_pca:
    eigs = pca.analyze(samples_train)
    pca.save("faces_results/faces")
else:
    pca.load("faces_results/faces")

samples_train_compressed = pca.compress(samples_train, dimensionality=dimensions[0])
samples_test_compressed = pca.compress(samples_test, dimensionality=dimensions[0])

##################################################

mlp = MLP(dimensions)
new_mlp = False

if new_mlp:
    mlp.train(samples_train_compressed,
              targets_train,
              max_epochs=200,
              step=0.1,
              gain=0.9)
    mlp.save(name)
else: