def loadHashmap(self, feature_size=129, result_n=1000): #这里参数没有用到 ''' feature_size: hash空间维数大小 result_n :返回多少个最近邻 ''' # Create redis storage adapter redis_object = Redis(host='localhost', port=6379, db=0) redis_storage = RedisStorage(redis_object) try: # Get hash config from redis config = redis_storage.load_hash_configuration('test') # Config is existing, create hash with None parameters lshash = RandomBinaryProjections(None, None) # Apply configuration loaded from redis lshash.apply_config(config) except: # Config is not existing, create hash from scratch, with 10 projections lshash = RandomBinaryProjections('test', 0) # Create engine for feature space of 100 dimensions and use our hash. # This will set the dimension of the lshash only the first time, not when # using the configuration loaded from redis. Use redis storage to store # buckets. nearest = NearestFilter(result_n) #self.engine = Engine(feature_size, lshashes=[], vector_filters=[]) self.engine = Engine(feature_size, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=EuclideanDistance()) # Do some stuff like indexing or querying with the engine... # Finally store hash configuration in redis for later use redis_storage.store_hash_configuration(lshash)
def load_hashmap(self): # Create redis storage adapter # need to start redis service redis_object = Redis(host='localhost', port=6379, db=14) redis_storage = RedisStorage(redis_object) try: config = redis_storage.load_hash_configuration('test') lshash = RandomBinaryProjections(None, None) lshash.apply_config(config) except: # Config is not existing, create hash from scratch, with 10 projections lshash = RandomBinaryProjections('test', 10) nearest = NearestFilter(self.nn) # self.engine = Engine(feature_size, lshashes=[], vector_filters=[]) self.engine = Engine(self.feature_size, lshashes=[lshash], vector_filters=[nearest], storage=redis_storage, distance=CosineDistance()) # Do some stuff like indexing or querying with the engine... # Finally store hash configuration in redis for later use redis_storage.store_hash_configuration(lshash)
def __init__(self): redis_object = redis.Redis(host='localhost', port=6379, db=0) redis_storage = RedisStorage(redis_object) # Get hash config from redis config = redis_storage.load_hash_configuration('MyHash') if config is None: # Config is not existing, create hash from scratch, with 5 projections self.lshash = RandomBinaryProjections('MyHash', 5) else: # Config is existing, create hash with None parameters self.lshash = RandomBinaryProjections(None, None) # Apply configuration loaded from redis self.lshash.apply_config(config) # print("HERE") # Create engine for feature space of 100 dimensions and use our hash. # This will set the dimension of the lshash only the first time, not when # using the configuration loaded from redis. Use redis storage to store # buckets. self.engine = Engine(4, lshashes=[self.lshash], storage=redis_storage) redis_storage.store_hash_configuration(self.lshash)
class TestHashStorage(unittest.TestCase): def setUp(self): self.memory = MemoryStorage() self.redis_object = Redis() self.redis_storage = RedisStorage(self.redis_object) def test_hash_memory_storage_none_config(self): conf = self.memory.load_hash_configuration('nonexistentHash') self.assertIsNone(conf) def test_hash_memory_storage_rbp(self): hash1 = RandomBinaryProjections('testRBPHash', 10) hash1.reset(100) self.memory.store_hash_configuration(hash1) hash2 = RandomBinaryProjections(None, None) hash2.apply_config(self.memory.load_hash_configuration('testRBPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_memory_storage_rdp(self): hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1) hash1.reset(100) self.memory.store_hash_configuration(hash1) hash2 = RandomDiscretizedProjections(None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testRDPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_memory_storage_pcabp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors) self.memory.store_hash_configuration(hash1) hash2 = PCABinaryProjections(None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_memory_storage_pcadp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1) self.memory.store_hash_configuration(hash1) hash2 = PCADiscretizedProjections(None, None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_redis_storage_none_config(self): conf = self.redis_storage.load_hash_configuration('nonexistentHash') self.assertIsNone(conf) def test_hash_redis_storage_rbp(self): hash1 = RandomBinaryProjections('testRBPHash', 10) hash1.reset(100) self.redis_storage.store_hash_configuration(hash1) hash2 = RandomBinaryProjections(None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_redis_storage_rdp(self): hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1) hash1.reset(100) self.redis_storage.store_hash_configuration(hash1) hash2 = RandomDiscretizedProjections(None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_redis_storage_pcabp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors) self.redis_storage.store_hash_configuration(hash1) hash2 = PCABinaryProjections(None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_redis_storage_pcadp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1) self.redis_storage.store_hash_configuration(hash1) hash2 = PCADiscretizedProjections(None, None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j])
class TestRandomBinaryProjectionTree(unittest.TestCase): def setUp(self): self.memory = MemoryStorage() self.redis_object = Redis() self.redis_storage = RedisStorage(self.redis_object) numpy.random.seed(16) def test_retrieval(self): # We want 12 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 12, 20) # Create engine for 100 dimensional feature space, do not forget to set # nearest filter to 20, because default is 10 self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 200000 random vectors for k in range(200000): x = numpy.random.randn(100) x_data = 'data {}'.format(k) self.engine.store_vector(x, x_data) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) n = self.engine.neighbours(x) self.assertEqual(len(n), 20) def test_storage_memory(self): # We want 10 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 10, 20) # Create engine for 100 dimensional feature space self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 2000 random vectors for k in range(2000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) self.memory.store_hash_configuration(rbpt) rbpt2 = RandomBinaryProjectionTree(None, None, None) rbpt2.apply_config(self.memory.load_hash_configuration('testHash')) self.assertEqual(rbpt.dim, rbpt2.dim) self.assertEqual(rbpt.hash_name, rbpt2.hash_name) self.assertEqual(rbpt.projection_count, rbpt2.projection_count) for i in range(rbpt.normals.shape[0]): for j in range(rbpt.normals.shape[1]): self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j]) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) keys1 = rbpt.hash_vector(x, querying=True) keys2 = rbpt2.hash_vector(x, querying=True) self.assertEqual(len(keys1), len(keys2)) for k in range(len(keys1)): self.assertEqual(keys1[k], keys2[k]) def test_storage_redis(self): # We want 10 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 10, 20) # Create engine for 100 dimensional feature space self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 2000 random vectors for k in range(2000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) self.redis_storage.store_hash_configuration(rbpt) rbpt2 = RandomBinaryProjectionTree(None, None, None) rbpt2.apply_config( self.redis_storage.load_hash_configuration('testHash')) self.assertEqual(rbpt.dim, rbpt2.dim) self.assertEqual(rbpt.hash_name, rbpt2.hash_name) self.assertEqual(rbpt.projection_count, rbpt2.projection_count) for i in range(rbpt.normals.shape[0]): for j in range(rbpt.normals.shape[1]): self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j]) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) keys1 = rbpt.hash_vector(x, querying=True) keys2 = rbpt2.hash_vector(x, querying=True) self.assertEqual(len(keys1), len(keys2)) for k in range(len(keys1)): self.assertEqual(keys1[k], keys2[k])
class TestRandomBinaryProjectionTree(unittest.TestCase): def setUp(self): self.memory = MemoryStorage() self.redis_object = Redis(host='localhost', port=6379, db=0) self.redis_storage = RedisStorage(self.redis_object) def test_retrieval(self): # We want 12 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 12, 20) # Create engine for 100 dimensional feature space, do not forget to set # nearest filter to 20, because default is 10 self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 200000 random vectors #print 'Indexing...' for k in range(200000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) # Now do random queries and check result set size #print 'Querying...' for k in range(10): x = numpy.random.randn(100) n = self.engine.neighbours(x) #print "Candidate count = %d" % self.engine.candidate_count(x) #print "Result size = %d" % len(n) self.assertEqual(len(n), 20) def test_storage_memory(self): # We want 10 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 10, 20) # Create engine for 100 dimensional feature space self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 2000 random vectors for k in range(2000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) self.memory.store_hash_configuration(rbpt) rbpt2 = RandomBinaryProjectionTree(None, None, None) rbpt2.apply_config(self.memory.load_hash_configuration('testHash')) self.assertEqual(rbpt.dim, rbpt2.dim) self.assertEqual(rbpt.hash_name, rbpt2.hash_name) self.assertEqual(rbpt.projection_count, rbpt2.projection_count) for i in range(rbpt.normals.shape[0]): for j in range(rbpt.normals.shape[1]): self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j]) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) keys1 = rbpt.hash_vector(x, querying=True) keys2 = rbpt2.hash_vector(x, querying=True) self.assertEqual(len(keys1), len(keys2)) for k in range(len(keys1)): self.assertEqual(keys1[k], keys2[k]) def test_storage_redis(self): # We want 10 projections, 20 results at least rbpt = RandomBinaryProjectionTree('testHash', 10, 20) # Create engine for 100 dimensional feature space self.engine = Engine(100, lshashes=[rbpt], vector_filters=[NearestFilter(20)]) # First insert 2000 random vectors for k in range(2000): x = numpy.random.randn(100) x_data = 'data' self.engine.store_vector(x, x_data) self.redis_storage.store_hash_configuration(rbpt) rbpt2 = RandomBinaryProjectionTree(None, None, None) rbpt2.apply_config(self.redis_storage.load_hash_configuration('testHash')) self.assertEqual(rbpt.dim, rbpt2.dim) self.assertEqual(rbpt.hash_name, rbpt2.hash_name) self.assertEqual(rbpt.projection_count, rbpt2.projection_count) for i in range(rbpt.normals.shape[0]): for j in range(rbpt.normals.shape[1]): self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j]) # Now do random queries and check result set size for k in range(10): x = numpy.random.randn(100) keys1 = rbpt.hash_vector(x, querying=True) keys2 = rbpt2.hash_vector(x, querying=True) self.assertEqual(len(keys1), len(keys2)) for k in range(len(keys1)): self.assertEqual(keys1[k], keys2[k])
transforms.CenterCrop(64), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) dimension = 512 r = redis.Redis( host='redis', port=6379, # charset='utf-8', # decode_responses=True, ) redis_storage = RedisStorage(r) # Get hash config from redis config = redis_storage.load_hash_configuration('MyHash') if config is None: # Config is not existing, create hash from scratch, with 10 projections lshash = RandomBinaryProjections('MyHash', 50) else: # Config is existing, create hash with None parameters lshash = RandomBinaryProjections(None, None) # Apply configuration loaded from redis lshash.apply_config(config) # Create engine for feature space of 100 dimensions and use our hash. # This will set the dimension of the lshash only the first time, not when # using the configuration loaded from redis. Use redis storage to store # buckets. engine = Engine(dimension, lshashes=[lshash], storage=redis_storage) redis_storage.store_hash_configuration(lshash)
class TestHashStorage(unittest.TestCase): def setUp(self): self.memory = MemoryStorage() self.redis_object = Redis(host='localhost', port=6379, db=0) self.redis_storage = RedisStorage(self.redis_object) def test_hash_memory_storage_rbp(self): hash1 = RandomBinaryProjections('testRBPHash', 10) hash1.reset(100) self.memory.store_hash_configuration(hash1) hash2 = RandomBinaryProjections(None, None) hash2.apply_config(self.memory.load_hash_configuration('testRBPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_memory_storage_rdp(self): hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1) hash1.reset(100) self.memory.store_hash_configuration(hash1) hash2 = RandomDiscretizedProjections(None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testRDPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_memory_storage_pcabp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors) self.memory.store_hash_configuration(hash1) hash2 = PCABinaryProjections(None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testPCABPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_memory_storage_pcadp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1) self.memory.store_hash_configuration(hash1) hash2 = PCADiscretizedProjections(None, None, None, None) hash2.apply_config(self.memory.load_hash_configuration('testPCADPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_redis_storage_rbp(self): hash1 = RandomBinaryProjections('testRBPHash', 10) hash1.reset(100) self.redis_storage.store_hash_configuration(hash1) hash2 = RandomBinaryProjections(None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testRBPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_redis_storage_rdp(self): hash1 = RandomDiscretizedProjections('testRDPHash', 10, 0.1) hash1.reset(100) self.redis_storage.store_hash_configuration(hash1) hash2 = RandomDiscretizedProjections(None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testRDPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.normals.shape[0]): for j in range(hash1.normals.shape[1]): self.assertEqual(hash1.normals[i, j], hash2.normals[i, j]) def test_hash_redis_storage_pcabp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCABinaryProjections('testPCABPHash', 4, train_vectors) self.redis_storage.store_hash_configuration(hash1) hash2 = PCABinaryProjections(None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testPCABPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j]) def test_hash_redis_storage_pcadp(self): train_vectors = numpy.random.randn(10, 100) hash1 = PCADiscretizedProjections('testPCADPHash', 4, train_vectors, 0.1) self.redis_storage.store_hash_configuration(hash1) hash2 = PCADiscretizedProjections(None, None, None, None) hash2.apply_config(self.redis_storage.load_hash_configuration('testPCADPHash')) self.assertEqual(hash1.dim, hash2.dim) self.assertEqual(hash1.hash_name, hash2.hash_name) self.assertEqual(hash1.bin_width, hash2.bin_width) self.assertEqual(hash1.projection_count, hash2.projection_count) for i in range(hash1.components.shape[0]): for j in range(hash1.components.shape[1]): self.assertEqual(hash1.components[i, j], hash2.components[i, j])