Пример #1
0
class TestHashStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)

    def test_hash_memory_storage_none_config(self):
        conf = self.memory.load_hash_configuration('nonexistentHash')

        self.assertIsNone(conf)

    def test_hash_memory_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_memory_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_none_config(self):
        conf = self.redis_storage.load_hash_configuration('nonexistentHash')

        self.assertIsNone(conf)

    def test_hash_redis_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])
Пример #2
0
class TestRandomBinaryProjectionTree(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost',
                                  port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_retrieval(self):
        # We want 12 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 12, 20)

        # Create engine for 100 dimensional feature space, do not forget to set
        # nearest filter to 20, because default is 10
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 200000 random vectors
        #print 'Indexing...'
        for k in range(200000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        # Now do random queries and check result set size
        #print 'Querying...'
        for k in range(10):
            x = numpy.random.randn(100)
            n = self.engine.neighbours(x)
            #print "Candidate count = %d" % self.engine.candidate_count(x)
            #print "Result size = %d" % len(n)
            self.assertEqual(len(n), 20)

    def test_storage_memory(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.memory.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])

    def test_storage_redis(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)


        self.redis_storage.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.redis_storage.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])
Пример #3
0
class TestRandomBinaryProjectionTree(unittest.TestCase):
    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)
        numpy.random.seed(16)

    def test_retrieval(self):
        # We want 12 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 12, 20)

        # Create engine for 100 dimensional feature space, do not forget to set
        # nearest filter to 20, because default is 10
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 200000 random vectors
        for k in range(200000):
            x = numpy.random.randn(100)
            x_data = 'data {}'.format(k)
            self.engine.store_vector(x, x_data)

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            n = self.engine.neighbours(x)
            self.assertEqual(len(n), 20)

    def test_storage_memory(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.memory.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])

    def test_storage_redis(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.redis_storage.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(
            self.redis_storage.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])
Пример #4
0
class TestHashStorage(unittest.TestCase):

    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis(host='localhost',
                                  port=6379, db=0)
        self.redis_storage = RedisStorage(self.redis_object)

    def test_hash_memory_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.memory.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_memory_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_memory_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.memory.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_rbp(self):
        hash1 = RandomBinaryProjections('testRBPHash', 10)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomBinaryProjections(None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_rdp(self):
        hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1)
        hash1.reset(100)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = RandomDiscretizedProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.normals.shape[0]):
            for j in range(hash1.normals.shape[1]):
                self.assertEqual(hash1.normals[i, j], hash2.normals[i, j])

    def test_hash_redis_storage_pcabp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCABinaryProjections(None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])


    def test_hash_redis_storage_pcadp(self):
        train_vectors = numpy.random.randn(10, 100)
        hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1)

        self.redis_storage.store_hash_configuration(hash1)

        hash2 = PCADiscretizedProjections(None, None, None, None)
        hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash'))

        self.assertEqual(hash1.dim, hash2.dim)
        self.assertEqual(hash1.hash_name, hash2.hash_name)
        self.assertEqual(hash1.bin_width, hash2.bin_width)
        self.assertEqual(hash1.projection_count, hash2.projection_count)

        for i in range(hash1.components.shape[0]):
            for j in range(hash1.components.shape[1]):
                self.assertEqual(hash1.components[i, j], hash2.components[i, j])