def setUp(self):
        logging.basicConfig(level=logging.WARNING)
        numpy.random.seed(11)

        # Create permutations meta-hash
        self.permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp = RandomBinaryProjections('rbp1', 4, rand_seed=19)
        rbp_conf = {
            'num_permutation': 50,
            'beam_size': 10,
            'num_neighbour': 100
        }

        # Add rbp as child hash of permutations hash
        self.permutations.add_child_hash(rbp, rbp_conf)

        # Create engine with meta hash and cosine distance
        self.engine_perm = Engine(200,
                                  lshashes=[self.permutations],
                                  distance=CosineDistance())

        # Create engine without permutation meta-hash
        self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())
Exemplo n.º 2
0
    def test_random_discretized_projections(self):
        dim = 4
        vector_count = 5000
        vectors = numpy.random.randn(dim, vector_count)

        # First get recall and precision for one 1-dim random hash
        rdp = RandomDiscretizedProjections('rdp', 1, 0.01)
        nearest = NearestFilter(10 + 1)
        engine = Engine(dim, lshashes=[rdp], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        recall1 = result[0][0]
        precision1 = result[0][1]
        searchtime1 = result[0][2]

        print('\nRecall RDP: %f, Precision RDP: %f, SearchTime RDP: %f\n' % \
            (recall1, precision1, searchtime1))

        # Then get recall and precision for one 4-dim random hash
        rdp = RandomDiscretizedProjections('rdp', 2, 0.2)
        engine = Engine(dim, lshashes=[rdp], vector_filters=[nearest])
        result = exp.perform_experiment([engine])

        recall2 = result[0][0]
        precision2 = result[0][1]
        searchtime2 = result[0][2]

        print('\nRecall RDP: %f, Precision RDP: %f, SearchTime RDP: %f\n' % \
            (recall2, precision2, searchtime2))

        # Many things are random here, but the precision should increase
        # with dimension
        self.assertTrue(precision2 > precision1)
    def test_storage_issue(self):
        engine1 = Engine(100)
        engine2 = Engine(100)

        for k in range(1000):
            x = numpy.random.randn(100)
            x_data = 'data'
            engine1.store_vector(x, x_data)

        # Each engine should have its own default storage
        self.assertEqual(len(engine2.storage.buckets), 0)
class TestEngine(unittest.TestCase):
    def setUp(self):
        self.engine = Engine(1000)

    def test_storage_issue(self):
        engine1 = Engine(100)
        engine2 = Engine(100)

        for k in range(1000):
            x = numpy.random.randn(100)
            x_data = 'data'
            engine1.store_vector(x, x_data)

        # Each engine should have its own default storage
        self.assertEqual(len(engine2.storage.buckets), 0)

    def test_retrieval(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = numpy.random.randn(1000)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y, y_data, y_distance = n[0]
            normalized_x = unitvec(x)
            delta = 0.000000001
            self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(),
                                   0,
                                   delta=delta)
            self.assertEqual(y_data, x_data)
            self.assertAlmostEqual(y_distance, 0.0, delta=delta)

    def test_retrieval_sparse(self):
        for k in range(100):
            self.engine.clean_all_buckets()
            x = scipy.sparse.rand(1000, 1, density=0.05)
            x_data = 'data'
            self.engine.store_vector(x, x_data)
            n = self.engine.neighbours(x)
            y, y_data, y_distance = n[0]
            normalized_x = unitvec(x)
            delta = 0.000000001
            self.assertAlmostEqual(numpy.abs((normalized_x - y)).max(),
                                   0,
                                   delta=delta)
            self.assertEqual(y_data, x_data)
            self.assertAlmostEqual(y_distance, 0.0, delta=delta)
    def test_retrieval(self):
        # We want 12 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 12, 20)

        # Create engine for 100 dimensional feature space, do not forget to set
        # nearest filter to 20, because default is 10
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 200000 random vectors
        for k in range(200000):
            x = numpy.random.randn(100)
            x_data = 'data {}'.format(k)
            self.engine.store_vector(x, x_data)

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            n = self.engine.neighbours(x)
            self.assertEqual(len(n), 20)
    def test_storage_redis(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.redis_storage.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(
            self.redis_storage.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])
Exemplo n.º 7
0
    def test_experiment_with_list_1(self):
        dim = 50
        vector_count = 100
        vectors = []
        for index in range(vector_count):
            vectors.append(numpy.random.randn(dim))
        unibucket = UniBucket('testHash')
        nearest = NearestFilter(10 + 1)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        # Both recall and precision must be one in this case
        self.assertEqual(result[0][0], 1.0)
        self.assertEqual(result[0][1], 1.0)
Exemplo n.º 8
0
    def test_experiment_with_unibucket_3(self):
        dim = 50
        vector_count = 100
        vectors = numpy.random.randn(dim, vector_count)
        unibucket = UniBucket('testHash')
        nearest = NearestFilter(5 + 1)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        # In this case recall is only 0.5
        # because the engine returns 5 nearest, but
        # the experiment looks for 10 nearest.
        self.assertEqual(result[0][0], 0.5)
        self.assertEqual(result[0][1], 1.0)
Exemplo n.º 9
0
    def test_experiment_with_unibucket_1(self):
        dim = 50
        vector_count = 100
        vectors = numpy.random.randn(dim, vector_count)
        unibucket = UniBucket('testHash')
        nearest = NearestFilter(10 + 1)
        engine = Engine(dim,
                        lshashes=[unibucket],
                        vector_filters=[nearest],
                        distance=EuclideanDistance())
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        # Both recall and precision must be one in this case
        self.assertEqual(result[0][0], 1.0)
        self.assertEqual(result[0][1], 1.0)
Exemplo n.º 10
0
    def test_experiment_with_list_2(self):
        dim = 50
        vector_count = 100
        vectors = []
        for index in range(vector_count):
            vectors.append(numpy.random.randn(dim))
        unibucket = UniBucket('testHash')
        nearest = NearestFilter(10 + 1)
        engine = Engine(dim, lshashes=[unibucket], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(5, vectors)
        result = exp.perform_experiment([engine])

        # In this case precision is only 0.5
        # because the engine returns 10 nearest, but
        # the experiment only looks for 5 nearest.
        self.assertEqual(result[0][0], 1.0)
        self.assertEqual(result[0][1], 0.5)
Exemplo n.º 11
0
    def test_random_binary_projections(self):
        dim = 4
        vector_count = 5000
        vectors = numpy.random.randn(dim, vector_count)

        # First get recall and precision for one 1-dim random hash
        rbp = RandomBinaryProjections('rbp', 32)
        nearest = NearestFilter(10 + 1)
        engine = Engine(dim, lshashes=[rbp], vector_filters=[nearest])
        exp = RecallPrecisionExperiment(10, vectors)
        result = exp.perform_experiment([engine])

        recall1 = result[0][0]
        precision1 = result[0][1]
        searchtime1 = result[0][2]

        print('\nRecall RBP: %f, Precision RBP: %f, SearchTime RBP: %f\n' % \
            (recall1, precision1, searchtime1))
class TestPermutation(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(level=logging.WARNING)
        numpy.random.seed(11)

        # Create permutations meta-hash
        self.permutations = HashPermutations('permut')

        # Create binary hash as child hash
        rbp = RandomBinaryProjections('rbp1', 4, rand_seed=19)
        rbp_conf = {
            'num_permutation': 50,
            'beam_size': 10,
            'num_neighbour': 100
        }

        # Add rbp as child hash of permutations hash
        self.permutations.add_child_hash(rbp, rbp_conf)

        # Create engine with meta hash and cosine distance
        self.engine_perm = Engine(200,
                                  lshashes=[self.permutations],
                                  distance=CosineDistance())

        # Create engine without permutation meta-hash
        self.engine = Engine(200, lshashes=[rbp], distance=CosineDistance())

    def test_runnable(self):

        # First index some random vectors
        matrix = numpy.zeros((1000, 200))
        for i in xrange(1000):
            v = numpy.random.randn(200)
            matrix[i] = v
            self.engine.store_vector(v)
            self.engine_perm.store_vector(v)

        # Then update permuted index
        self.permutations.build_permuted_index()

        # Do random query on engine with permutations meta-hash
        query = numpy.random.randn(200)
        results = self.engine_perm.neighbours(query)
        permuted_dists = [x[2] for x in results]

        # Do random query on engine without permutations meta-hash (distances should be larger):'
        results = self.engine.neighbours(query)
        dists = [x[2] for x in results]

        self.assertLess(permuted_dists[0], dists[0])
Exemplo n.º 13
0
def createLSHFile(inputFilename, L, K):
    if os.path.isfile(inputFilename):
        siftVectorFile = open(inputFilename, 'r')
        # Dimension of our vector space
        dimension = 2
        noOfLayers = L
        noOfBits = K
        layers = []
        for i in xrange(noOfLayers):
            # Create a random binary hash with 10 bits
            layers.append(RandomBinaryProjections('layer' + str(i), noOfBits))

        # Create engine with pipeline configuration
        engine = Engine(dimension, lshashes=layers)
        outFile = open('filename_d.lsh', 'w')
        storeVectors(siftVectorFile, engine, outFile)
        outFile.close()
        siftVectorFile.close()

    else:
        print 'Filename incorrect!'
Exemplo n.º 14
0
def example1():

    # Dimension of feature space
    DIM = 100

    # Number of data points (dont do too much because of exact search)
    POINTS = 10000

    print('Creating engines')

    # We want 12 projections, 20 results at least
    rbpt = RandomBinaryProjectionTree('rbpt', 20, 20)

    # Create engine 1
    engine_rbpt = Engine(DIM, lshashes=[rbpt], distance=CosineDistance())

    # Create binary hash as child hash
    rbp = RandomBinaryProjections('rbp1', 20)

    # Create engine 2
    engine = Engine(DIM, lshashes=[rbp], distance=CosineDistance())

    # Create permutations meta-hash
    permutations = HashPermutations('permut')

    # Create binary hash as child hash
    rbp_perm = RandomBinaryProjections('rbp_perm', 20)
    rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100}

    # Add rbp as child hash of permutations hash
    permutations.add_child_hash(rbp_perm, rbp_conf)

    # Create engine 3
    engine_perm = Engine(DIM,
                         lshashes=[permutations],
                         distance=CosineDistance())

    # Create permutations meta-hash
    permutations2 = HashPermutationMapper('permut2')

    # Create binary hash as child hash
    rbp_perm2 = RandomBinaryProjections('rbp_perm2', 12)

    # Add rbp as child hash of permutations hash
    permutations2.add_child_hash(rbp_perm2)

    # Create engine 3
    engine_perm2 = Engine(DIM,
                          lshashes=[permutations2],
                          distance=CosineDistance())

    print('Indexing %d random vectors of dimension %d' % (POINTS, DIM))

    # First index some random vectors
    matrix = numpy.zeros((POINTS, DIM))
    for i in xrange(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine.store_vector(v)
        engine_rbpt.store_vector(v)
        engine_perm.store_vector(v)
        engine_perm2.store_vector(v)

    print('Buckets 1 = %d' % len(engine.storage.buckets['rbp1'].keys()))
    print('Buckets 2 = %d' % len(engine_rbpt.storage.buckets['rbpt'].keys()))

    print('Building permuted index for HashPermutations')

    # Then update permuted index
    permutations.build_permuted_index()

    print('Generate random data')

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 1
    print('\nNeighbour distances with RandomBinaryProjectionTree:')
    print('  -> Candidate count is %d' % engine_rbpt.candidate_count(query))
    results = engine_rbpt.neighbours(query)
    dists = [x[2] for x in results]
    print(dists)

    # Do random query on engine 2
    print('\nNeighbour distances with RandomBinaryProjections:')
    print('  -> Candidate count is %d' % engine.candidate_count(query))
    results = engine.neighbours(query)
    dists = [x[2] for x in results]
    print(dists)

    # Do random query on engine 3
    print('\nNeighbour distances with HashPermutations:')
    print('  -> Candidate count is %d' % engine_perm.candidate_count(query))
    results = engine_perm.neighbours(query)
    dists = [x[2] for x in results]
    print(dists)

    # Do random query on engine 4
    print('\nNeighbour distances with HashPermutations2:')
    print('  -> Candidate count is %d' % engine_perm2.candidate_count(query))
    results = engine_perm2.neighbours(query)
    dists = [x[2] for x in results]
    print(dists)

    # Real neighbours
    print('\nReal neighbour distances:')
    query = query.reshape((1, DIM))
    dists = CosineDistance().distance(matrix, query)
    dists = dists.reshape((-1, ))
    dists = sorted(dists)
    print(dists[:10])
 def setUp(self):
     self.engine = Engine(1000)
Exemplo n.º 16
0
def example2():

    # Dimension of feature space
    DIM = 100

    # Number of data points (dont do too much because of exact search)
    POINTS = 20000

    ##########################################################

    print('Performing indexing with HashPermutations...')
    t0 = time.time()

    # Create permutations meta-hash
    permutations = HashPermutations('permut')

    # Create binary hash as child hash
    rbp_perm = RandomBinaryProjections('rbp_perm', 14)
    rbp_conf = {'num_permutation': 50, 'beam_size': 10, 'num_neighbour': 100}

    # Add rbp as child hash of permutations hash
    permutations.add_child_hash(rbp_perm, rbp_conf)

    # Create engine
    engine_perm = Engine(DIM,
                         lshashes=[permutations],
                         distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS, DIM))
    for i in range(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_perm.store_vector(v)

    # Then update permuted index
    permutations.build_permuted_index()

    t1 = time.time()
    print('Indexing took %f seconds' % (t1 - t0))

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 3
    print('\nNeighbour distances with HashPermutations:')
    print('  -> Candidate count is %d' % engine_perm.candidate_count(query))
    results = engine_perm.neighbours(query)
    dists = [x[2] for x in results]
    print(dists)

    # Real neighbours
    print('\nReal neighbour distances:')
    query = query.reshape((DIM))
    dists = CosineDistance().distance(matrix, query)
    dists = dists.reshape((-1, ))
    dists = sorted(dists)
    print(dists[:10])

    ##########################################################

    print('\nPerforming indexing with HashPermutationMapper...')
    t0 = time.time()

    # Create permutations meta-hash
    permutations2 = HashPermutationMapper('permut2')

    # Create binary hash as child hash
    rbp_perm2 = RandomBinaryProjections('rbp_perm2', 14)

    # Add rbp as child hash of permutations hash
    permutations2.add_child_hash(rbp_perm2)

    # Create engine
    engine_perm2 = Engine(DIM,
                          lshashes=[permutations2],
                          distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS, DIM))
    for i in range(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_perm2.store_vector(v)

    t1 = time.time()
    print('Indexing took %f seconds' % (t1 - t0))

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 4
    print('\nNeighbour distances with HashPermutationMapper:')
    print('  -> Candidate count is %d' % engine_perm2.candidate_count(query))
    results = engine_perm2.neighbours(query)
    dists = [x[2] for x in results]
    print(dists)

    # Real neighbours
    print('\nReal neighbour distances:')
    query = query.reshape((DIM))
    dists = CosineDistance().distance(matrix, query)
    dists = dists.reshape((-1, ))
    dists = sorted(dists)
    print(dists[:10])

    ##########################################################

    print('\nPerforming indexing with multiple binary hashes...')
    t0 = time.time()

    hashes = []
    for k in range(20):
        hashes.append(RandomBinaryProjections('rbp_%d' % k, 10))

    # Create engine
    engine_rbps = Engine(DIM, lshashes=hashes, distance=CosineDistance())

    # First index some random vectors
    matrix = numpy.zeros((POINTS, DIM))
    for i in range(POINTS):
        v = numpy.random.randn(DIM)
        matrix[i] = v
        engine_rbps.store_vector(v)

    t1 = time.time()
    print('Indexing took %f seconds' % (t1 - t0))

    # Get random query vector
    query = numpy.random.randn(DIM)

    # Do random query on engine 4
    print('\nNeighbour distances with multiple binary hashes:')
    print('  -> Candidate count is %d' % engine_rbps.candidate_count(query))
    results = engine_rbps.neighbours(query)
    dists = [x[2] for x in results]
    print(dists)

    # Real neighbours
    print('\nReal neighbour distances:')
    query = query.reshape((DIM))
    dists = CosineDistance().distance(matrix, query)
    dists = dists.reshape((-1, ))
    dists = sorted(dists)
    print(dists[:10])
Exemplo n.º 17
0
# We are going to test these bin widths
bin_widths = [0.01 * x for x in range(1, 5)]
# Create engines for all configurations
for bin_width in bin_widths:
    # Use four random 1-dim discretized projections
    rdp1 = RandomDiscretizedProjections('rdp1', 4, bin_width)
    rdp2 = RandomDiscretizedProjections('rdp2', 4, bin_width)
    rdp3 = RandomDiscretizedProjections('rdp3', 4, bin_width)
    rdp4 = RandomDiscretizedProjections('rdp4', 4, bin_width)
    #ub1 = UniBucket('uni')

    # Create engine with this configuration
    #engine = Engine(dimension, lshashes=[rdp1, rdp2, rdp3, rdp4],
    #                vector_filters=[unique, nearest])
    engine = Engine(dimension,
                    lshashes=[rdp1, rdp2, rdp3, rdp4],
                    vector_filters=[nearest])

    # Add engine to list of engines to evaluate
    engines.append(engine)

print 'Creating experiment and performing exact search...'

# Create experiment (looking for ten closest neighbours).
# The constructor performs exact search for evaluation.
# So the data set should not be too large for experiments.
exp = DistanceRatioExperiment(N, vectors, coverage_ratio=0.01)

print 'Performing experiment for all engines...'

# Perform experiment for all engines
 def test_delete_vector_with_provided_value(self):
     engine = Engine(self.dim, lshashes=[UniBucket('testHash')])
     self.fill_engine(engine)
     engine.delete_vector(self.removed_value, self.removed_vector)
     self.check_delete(engine)
 def test_delete_vector_multiple_hash(self):
     hashes = [UniBucket('name_hash_%d' % k) for k in range(10)]
     engine = Engine(self.dim, lshashes=hashes)
     self.fill_engine(engine)
     engine.delete_vector(self.removed_value)
     self.check_delete(engine)
 def test_delete_vector_single_hash(self):
     engine = Engine(self.dim, lshashes=[UniBucket('testHash')])
     self.fill_engine(engine)
     engine.delete_vector(self.removed_value)
     self.check_delete(engine)
class TestRandomBinaryProjectionTree(unittest.TestCase):
    def setUp(self):
        self.memory = MemoryStorage()
        self.redis_object = Redis()
        self.redis_storage = RedisStorage(self.redis_object)
        numpy.random.seed(16)

    def test_retrieval(self):
        # We want 12 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 12, 20)

        # Create engine for 100 dimensional feature space, do not forget to set
        # nearest filter to 20, because default is 10
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 200000 random vectors
        for k in range(200000):
            x = numpy.random.randn(100)
            x_data = 'data {}'.format(k)
            self.engine.store_vector(x, x_data)

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            n = self.engine.neighbours(x)
            self.assertEqual(len(n), 20)

    def test_storage_memory(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.memory.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(self.memory.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])

    def test_storage_redis(self):
        # We want 10 projections, 20 results at least
        rbpt = RandomBinaryProjectionTree('testHash', 10, 20)

        # Create engine for 100 dimensional feature space
        self.engine = Engine(100,
                             lshashes=[rbpt],
                             vector_filters=[NearestFilter(20)])

        # First insert 2000 random vectors
        for k in range(2000):
            x = numpy.random.randn(100)
            x_data = 'data'
            self.engine.store_vector(x, x_data)

        self.redis_storage.store_hash_configuration(rbpt)

        rbpt2 = RandomBinaryProjectionTree(None, None, None)
        rbpt2.apply_config(
            self.redis_storage.load_hash_configuration('testHash'))

        self.assertEqual(rbpt.dim, rbpt2.dim)
        self.assertEqual(rbpt.hash_name, rbpt2.hash_name)
        self.assertEqual(rbpt.projection_count, rbpt2.projection_count)

        for i in range(rbpt.normals.shape[0]):
            for j in range(rbpt.normals.shape[1]):
                self.assertEqual(rbpt.normals[i, j], rbpt2.normals[i, j])

        # Now do random queries and check result set size
        for k in range(10):
            x = numpy.random.randn(100)
            keys1 = rbpt.hash_vector(x, querying=True)
            keys2 = rbpt2.hash_vector(x, querying=True)
            self.assertEqual(len(keys1), len(keys2))
            for k in range(len(keys1)):
                self.assertEqual(keys1[k], keys2[k])