def test_nn_small_leaves(self): np.random.seed(0) n = 10**4 dim = 256 depth = 10 # L ~ n/2**depth = 10^4 / 2^10 ~ 10 k = 200 # 3k/L = 60 num_trees = 60 d_set = [DescriptorMemoryElement('test', i) for i in range(n)] [d.set_vector(np.random.rand(dim)) for d in d_set] q = DescriptorMemoryElement('q', -1) q.set_vector(np.zeros((dim, ))) di = MemoryDescriptorSet() mrpt = MRPTNearestNeighborsIndex(di, num_trees=num_trees, depth=depth, random_seed=0) mrpt.build_index(d_set) nbrs, dists = mrpt.nn(q, k) self.assertEqual(len(nbrs), len(dists)) self.assertEqual(len(nbrs), k)
def test_nn_pathological_example(self): n = 10**4 dim = 256 depth = 10 # L ~ n/2**depth = 10^4 / 2^10 ~ 10 k = 200 # 3k/L = 60 num_trees = 60 d_set = [DescriptorMemoryElement('test', i) for i in range(n)] # Put all descriptors on a line so that different trees get same # divisions. # noinspection PyTypeChecker [d.set_vector(np.full(dim, d.uuid(), dtype=np.float64)) for d in d_set] q = DescriptorMemoryElement('q', -1) q.set_vector(np.zeros((dim, ))) di = MemoryDescriptorSet() mrpt = MRPTNearestNeighborsIndex(di, num_trees=num_trees, depth=depth, random_seed=0) mrpt.build_index(d_set) nbrs, dists = mrpt.nn(q, k) self.assertEqual(len(nbrs), len(dists)) # We should get about 10 descriptors back instead of the requested # 200 self.assertLess(len(nbrs), 20)
def test_many_descriptors(self): np.random.seed(0) n = 10 ** 4 dim = 256 depth = 5 num_trees = 10 d_index = [DescriptorMemoryElement('test', i) for i in range(n)] [d.set_vector(np.random.rand(dim)) for d in d_index] q = DescriptorMemoryElement('q', -1) q.set_vector(np.zeros((dim,))) di = MemoryDescriptorIndex() mrpt = MRPTNearestNeighborsIndex( di, num_trees=num_trees, depth=depth, random_seed=0) mrpt.build_index(d_index) nbrs, dists = mrpt.nn(q, 10) ntools.assert_equal(len(nbrs), len(dists)) ntools.assert_equal(len(nbrs), 10)