Пример #1
0
    def test_generator(self):
        dim = 1000
        active = 666

        gen = ri.Generator(dim=dim, num_active=active)

        ri1 = gen.generate()

        self.assertEqual(len(ri1.negative), len(ri1.negative))
        self.assertEqual(len(ri1.positive), gen.num_active // 2)

        v1 = ri1.to_vector()

        self.assertEqual(len(v1), dim)
        self.assertEqual(v1.max(), 1)
        self.assertEqual(v1.min(), -1)
        self.assertEqual(v1.sum(), 0)

        vectors = [gen.generate().to_vector() for x in range(0, 18)]

        t0 = time.time()
        for i in range(999):
            v = gen.generate()
        t1 = time.time()
        print("time to generate: ", t1 - t0)
Пример #2
0
    def test_to_dist_vector(self):
        dim = 10
        active = 4
        gen = ri.Generator(dim=dim, num_active=active)
        ri1 = gen.generate()

        print(ri1)
        dist_v1 = ri1.to_dist_vector()
        print(dist_v1)
Пример #3
0
    def test_rescale(self):
        dim = 10
        active = 4

        gen = ri.Generator(dim=dim, num_active=active)

        ri1 = gen.generate().to_vector()
        ri2 = gen.generate().to_vector()

        s = ri1 + ri1 + ri2

        print(s)

        print(s / np.max(s, axis=0))
Пример #4
0
    def test_to_sparse(self):
        dim = 100
        active = 4

        gen = ri.Generator(dim=dim, num_active=active)

        index1 = gen.generate()
        ri_v1 = index1.to_vector()

        sparse_array = ri_to_sparse(index1)
        self.assertEqual(len(sparse_array.active), active)

        index2 = ri.ri_from_indexes(dim, sparse_array.active)
        ri_v2 = index2.to_vector()

        npt.assert_array_equal(ri_v1, ri_v2)
Пример #5
0
    def test_svd(self):
        dim = 500
        active = 5
        gen = ri.Generator(dim=dim, num_active=active)

        num_samples = 100

        c_matrix = np.matrix([gen.generate().to_vector() for i in range(num_samples)])

        # c_matrix = np.matrix([ri_v / np.max(ri_v, axis=0) for ri_v in c_matrix])

        print("Original: ", c_matrix.shape)

        # perform svd
        u, s, vt = np.linalg.svd(c_matrix, full_matrices=False)
        print("Decomposition: ", (u.shape, np.diag(s).shape, vt.shape))

        # reconstruct
        r_matrix = np.dot(u, np.dot(np.diag(s), vt))
        print("Re-Construction: ", r_matrix.shape)
        self.assertTrue(np.allclose(c_matrix, r_matrix))

        # low-rank approximation
        k = 2
        ru = u[:,:k]
        rs = np.diag(s[:k])
        rvt = vt[:k]

        print("Low-Rank Decomposition: ", (ru.shape, rs.shape, rvt.shape))
        lr_matrix = np.dot(ru, np.dot(rs, rvt))
        print("Low-Rank Approximation Shape: ", lr_matrix.shape)
        self.assertEqual(lr_matrix.shape, c_matrix.shape)

        # dimensional reduction (just take u and s, since vocab_size is used to convert back to the original matrix)
        ld_matrix = np.dot(ru,rs)
        print("Lower-Dimensional Matrix: ", ld_matrix.shape)
Пример #6
0
import os.path

import h5py
import numpy as np

from deepsign.rp import ri

home = os.getenv("HOME")
result_path = home + "/data/results/"

ri_dim = 500
ri_active = 5
num_samples = 100

gen = ri.Generator(dim=ri_dim, num_active=ri_active)
c_matrix = np.matrix([gen.generate().to_vector() for i in range(num_samples)])

# write the matrix to hdf5 dataset
sample_word = "test".encode("utf8")
vocab = [sample_word] * num_samples
vocab = np.array(vocab)

counts = [1] * num_samples
counts = np.array(counts)

filename = "random_indexing_test.hdf5"
dataset_path = result_path + filename
print("writing to ", dataset_path)

dataset_name = "ri_d{0}_a{1}".format(ri_dim, ri_active)
print("dataset: " + dataset_name)