def test_generator(self): dim = 1000 active = 666 gen = ri.Generator(dim=dim, num_active=active) ri1 = gen.generate() self.assertEqual(len(ri1.negative), len(ri1.negative)) self.assertEqual(len(ri1.positive), gen.num_active // 2) v1 = ri1.to_vector() self.assertEqual(len(v1), dim) self.assertEqual(v1.max(), 1) self.assertEqual(v1.min(), -1) self.assertEqual(v1.sum(), 0) vectors = [gen.generate().to_vector() for x in range(0, 18)] t0 = time.time() for i in range(999): v = gen.generate() t1 = time.time() print("time to generate: ", t1 - t0)
def test_to_dist_vector(self): dim = 10 active = 4 gen = ri.Generator(dim=dim, num_active=active) ri1 = gen.generate() print(ri1) dist_v1 = ri1.to_dist_vector() print(dist_v1)
def test_rescale(self): dim = 10 active = 4 gen = ri.Generator(dim=dim, num_active=active) ri1 = gen.generate().to_vector() ri2 = gen.generate().to_vector() s = ri1 + ri1 + ri2 print(s) print(s / np.max(s, axis=0))
def test_to_sparse(self): dim = 100 active = 4 gen = ri.Generator(dim=dim, num_active=active) index1 = gen.generate() ri_v1 = index1.to_vector() sparse_array = ri_to_sparse(index1) self.assertEqual(len(sparse_array.active), active) index2 = ri.ri_from_indexes(dim, sparse_array.active) ri_v2 = index2.to_vector() npt.assert_array_equal(ri_v1, ri_v2)
def test_svd(self): dim = 500 active = 5 gen = ri.Generator(dim=dim, num_active=active) num_samples = 100 c_matrix = np.matrix([gen.generate().to_vector() for i in range(num_samples)]) # c_matrix = np.matrix([ri_v / np.max(ri_v, axis=0) for ri_v in c_matrix]) print("Original: ", c_matrix.shape) # perform svd u, s, vt = np.linalg.svd(c_matrix, full_matrices=False) print("Decomposition: ", (u.shape, np.diag(s).shape, vt.shape)) # reconstruct r_matrix = np.dot(u, np.dot(np.diag(s), vt)) print("Re-Construction: ", r_matrix.shape) self.assertTrue(np.allclose(c_matrix, r_matrix)) # low-rank approximation k = 2 ru = u[:,:k] rs = np.diag(s[:k]) rvt = vt[:k] print("Low-Rank Decomposition: ", (ru.shape, rs.shape, rvt.shape)) lr_matrix = np.dot(ru, np.dot(rs, rvt)) print("Low-Rank Approximation Shape: ", lr_matrix.shape) self.assertEqual(lr_matrix.shape, c_matrix.shape) # dimensional reduction (just take u and s, since vocab_size is used to convert back to the original matrix) ld_matrix = np.dot(ru,rs) print("Lower-Dimensional Matrix: ", ld_matrix.shape)
import os.path import h5py import numpy as np from deepsign.rp import ri home = os.getenv("HOME") result_path = home + "/data/results/" ri_dim = 500 ri_active = 5 num_samples = 100 gen = ri.Generator(dim=ri_dim, num_active=ri_active) c_matrix = np.matrix([gen.generate().to_vector() for i in range(num_samples)]) # write the matrix to hdf5 dataset sample_word = "test".encode("utf8") vocab = [sample_word] * num_samples vocab = np.array(vocab) counts = [1] * num_samples counts = np.array(counts) filename = "random_indexing_test.hdf5" dataset_path = result_path + filename print("writing to ", dataset_path) dataset_name = "ri_d{0}_a{1}".format(ri_dim, ri_active) print("dataset: " + dataset_name)