def test_count(self): index = MemoryDescriptorIndex() ntools.assert_equal(index.count(), 0) d1 = random_descriptor() index.add_descriptor(d1) ntools.assert_equal(index.count(), 1) d2 = random_descriptor() index.add_descriptor(d2) ntools.assert_equal(index.count(), 2)
def test_count(self): index = MemoryDescriptorIndex() ntools.assert_equal(index.count(), 0) d1 = random_descriptor() index.add_descriptor(d1) ntools.assert_equal(index.count(), 1) d2 = random_descriptor() index.add_descriptor(d2) ntools.assert_equal(index.count(), 2)
def test_update_index_no_existing_index(self): # Test that calling update_index with no existing index acts like # building the index fresh. This test is basically the same as # test_build_index_fresh_build but using update_index instead. descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors: d.set_vector(np.ones(1, float) * d.uuid()) index.update_index(descriptors) # Make sure descriptors are now in attached index and in key-value-store self.assertEqual(descr_index.count(), 5) for d in descriptors: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_build_index_fresh_build(self): descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for i, d in enumerate(descriptors): d.set_vector(np.ones(1, float) * i) index.build_index(descriptors) # Make sure descriptors are now in attached index and in key-value-store self.assertEqual(descr_index.count(), 5) for d in descriptors: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_count(self): index = MemoryDescriptorIndex() self.assertEqual(index.count(), 0) d1 = random_descriptor() index.add_descriptor(d1) self.assertEqual(index.count(), 1) d2, d3, d4 = (random_descriptor(), random_descriptor(), random_descriptor()) index.add_many_descriptors([d2, d3, d4]) self.assertEqual(index.count(), 4) d5 = random_descriptor() index.add_descriptor(d5) self.assertEqual(index.count(), 5)
def test_build_index_fresh_build(self): descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for i, d in enumerate(descriptors): d.set_vector(np.ones(1, float) * i) index.build_index(descriptors) # Make sure descriptors are now in attached index and in key-value-store self.assertEqual(descr_index.count(), 5) for d in descriptors: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_update_index_no_existing_index(self): # Test that calling update_index with no existing index acts like # building the index fresh. This test is basically the same as # test_build_index_fresh_build but using update_index instead. descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors: d.set_vector(np.ones(1, float) * d.uuid()) index.update_index(descriptors) # Make sure descriptors are now in attached index and in key-value-store self.assertEqual(descr_index.count(), 5) for d in descriptors: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i})
def test_count(self): index = MemoryDescriptorIndex() ntools.assert_equal(index.count(), 0) d1 = random_descriptor() index.add_descriptor(d1) ntools.assert_equal(index.count(), 1) d2, d3, d4 = random_descriptor(), random_descriptor( ), random_descriptor() index.add_many_descriptors([d2, d3, d4]) ntools.assert_equal(index.count(), 4) d5 = random_descriptor() index.add_descriptor(d5) ntools.assert_equal(index.count(), 5)
def test_count_empty_hash2uid(self): """ Test that an empty hash-to-uid mapping results in a 0 return regardless of descriptor-set state. """ descr_set = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() self.assertEqual(descr_set.count(), 0) self.assertEqual(hash_kvs.count(), 0) lsh = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) self.assertEqual(lsh.count(), 0) # Additions to the descriptor-set should not impact LSH index "size" lsh.descriptor_index.add_descriptor(DescriptorMemoryElement('t', 0)) self.assertEqual(lsh.descriptor_index.count(), 1) self.assertEqual(lsh.hash2uuids_kvstore.count(), 0) self.assertEqual(lsh.count(), 0) lsh.descriptor_index.add_descriptor(DescriptorMemoryElement('t', 1)) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.hash2uuids_kvstore.count(), 0) self.assertEqual(lsh.count(), 0) lsh.hash2uuids_kvstore.add(0, {0}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 1) lsh.hash2uuids_kvstore.add(0, {0, 1}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 2) lsh.hash2uuids_kvstore.add(0, {0, 1, 2}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 3)
def test_count_empty_hash2uid(self): """ Test that an empty hash-to-uid mapping results in a 0 return regardless of descriptor-set state. """ descr_set = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() self.assertEqual(descr_set.count(), 0) self.assertEqual(hash_kvs.count(), 0) lsh = LSHNearestNeighborIndex(DummyHashFunctor(), descr_set, hash_kvs) self.assertEqual(lsh.count(), 0) # Additions to the descriptor-set should not impact LSH index "size" lsh.descriptor_index.add_descriptor(DescriptorMemoryElement('t', 0)) self.assertEqual(lsh.descriptor_index.count(), 1) self.assertEqual(lsh.hash2uuids_kvstore.count(), 0) self.assertEqual(lsh.count(), 0) lsh.descriptor_index.add_descriptor(DescriptorMemoryElement('t', 1)) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.hash2uuids_kvstore.count(), 0) self.assertEqual(lsh.count(), 0) lsh.hash2uuids_kvstore.add(0, {0}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 1) lsh.hash2uuids_kvstore.add(0, {0, 1}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 2) lsh.hash2uuids_kvstore.add(0, {0, 1, 2}) self.assertEqual(lsh.descriptor_index.count(), 2) self.assertEqual(lsh.count(), 3)
def test_count(self): index = MemoryDescriptorIndex() self.assertEqual(index.count(), 0) d1 = random_descriptor() index.add_descriptor(d1) self.assertEqual(index.count(), 1) d2, d3, d4 = (random_descriptor(), random_descriptor(), random_descriptor()) index.add_many_descriptors([d2, d3, d4]) self.assertEqual(index.count(), 4) d5 = random_descriptor() index.add_descriptor(d5) self.assertEqual(index.count(), 5)
def test_update_index_add_new_descriptors(self): # Test that calling update index after a build index causes index # components to be properly updated. descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors1 = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] descriptors2 = [ DescriptorMemoryElement('t', 5), DescriptorMemoryElement('t', 6), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors1 + descriptors2: d.set_vector(np.ones(1, float) * d.uuid()) # Build initial index. index.build_index(descriptors1) self.assertEqual(descr_index.count(), 5) for d in descriptors1: self.assertIn(d, descr_index) for d in descriptors2: self.assertNotIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i}) # Update index and check that components have new data. index.update_index(descriptors2) self.assertEqual(descr_index.count(), 7) for d in descriptors1 + descriptors2: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 7) for i in range(7): self.assertSetEqual(hash_kvs.get(i), {i})
def test_update_index_add_new_descriptors(self): # Test that calling update index after a build index causes index # components to be properly updated. descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) descriptors1 = [ DescriptorMemoryElement('t', 0), DescriptorMemoryElement('t', 1), DescriptorMemoryElement('t', 2), DescriptorMemoryElement('t', 3), DescriptorMemoryElement('t', 4), ] descriptors2 = [ DescriptorMemoryElement('t', 5), DescriptorMemoryElement('t', 6), ] # Vectors of length 1 for easy dummy hashing prediction. for d in descriptors1 + descriptors2: d.set_vector(np.ones(1, float) * d.uuid()) # Build initial index. index.build_index(descriptors1) self.assertEqual(descr_index.count(), 5) for d in descriptors1: self.assertIn(d, descr_index) for d in descriptors2: self.assertNotIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 5) for i in range(5): self.assertSetEqual(hash_kvs.get(i), {i}) # Update index and check that components have new data. index.update_index(descriptors2) self.assertEqual(descr_index.count(), 7) for d in descriptors1 + descriptors2: self.assertIn(d, descr_index) # Dummy hash function bins sum of descriptor vectors. self.assertEqual(hash_kvs.count(), 7) for i in range(7): self.assertSetEqual(hash_kvs.get(i), {i})
def test_update_index_existing_descriptors_frozenset(self): """ Same as ``test_update_index_similar_descriptors`` but testing that we can update the index when seeded with structures with existing values. """ # Similar Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 5).set_vector([0]), DescriptorMemoryElement('t', 6).set_vector([1]), DescriptorMemoryElement('t', 7).set_vector([2]), DescriptorMemoryElement('t', 8).set_vector([3]), DescriptorMemoryElement('t', 9).set_vector([4]), ] descr_index = MemoryDescriptorIndex() descr_index.add_many_descriptors(descriptors1) hash_kvs = MemoryKeyValueStore() hash_kvs.add(0, frozenset({0})) hash_kvs.add(1, frozenset({1})) hash_kvs.add(2, frozenset({2})) hash_kvs.add(3, frozenset({3})) hash_kvs.add(4, frozenset({4})) index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) index.update_index(descriptors2) assert descr_index.count() == 10 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_index for d in descriptors2: assert d in descr_index # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0, 5} assert hash_kvs.get(1) == {1, 6} assert hash_kvs.get(2) == {2, 7} assert hash_kvs.get(3) == {3, 8} assert hash_kvs.get(4) == {4, 9}
def test_update_index_existing_descriptors_frozenset(self): """ Same as ``test_update_index_similar_descriptors`` but testing that we can update the index when seeded with structures with existing values. """ # Similar Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 5).set_vector([0]), DescriptorMemoryElement('t', 6).set_vector([1]), DescriptorMemoryElement('t', 7).set_vector([2]), DescriptorMemoryElement('t', 8).set_vector([3]), DescriptorMemoryElement('t', 9).set_vector([4]), ] descr_index = MemoryDescriptorIndex() descr_index.add_many_descriptors(descriptors1) hash_kvs = MemoryKeyValueStore() hash_kvs.add(0, frozenset({0})) hash_kvs.add(1, frozenset({1})) hash_kvs.add(2, frozenset({2})) hash_kvs.add(3, frozenset({3})) hash_kvs.add(4, frozenset({4})) index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) index.update_index(descriptors2) assert descr_index.count() == 10 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_index for d in descriptors2: assert d in descr_index # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0, 5} assert hash_kvs.get(1) == {1, 6} assert hash_kvs.get(2) == {2, 7} assert hash_kvs.get(3) == {3, 8} assert hash_kvs.get(4) == {4, 9}
def test_update_index_similar_descriptors(self): """ Test that updating a built index with similar descriptors (same vectors, different UUIDs) results in contained structures having an expected state. """ descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) # Similar Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 5).set_vector([0]), DescriptorMemoryElement('t', 6).set_vector([1]), DescriptorMemoryElement('t', 7).set_vector([2]), DescriptorMemoryElement('t', 8).set_vector([3]), DescriptorMemoryElement('t', 9).set_vector([4]), ] index.build_index(descriptors1) index.update_index(descriptors2) assert descr_index.count() == 10 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_index for d in descriptors2: assert d in descr_index # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0, 5} assert hash_kvs.get(1) == {1, 6} assert hash_kvs.get(2) == {2, 7} assert hash_kvs.get(3) == {3, 8} assert hash_kvs.get(4) == {4, 9}
def test_update_index_similar_descriptors(self): """ Test that updating a built index with similar descriptors (same vectors, different UUIDs) results in contained structures having an expected state. """ descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) # Similar Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 5).set_vector([0]), DescriptorMemoryElement('t', 6).set_vector([1]), DescriptorMemoryElement('t', 7).set_vector([2]), DescriptorMemoryElement('t', 8).set_vector([3]), DescriptorMemoryElement('t', 9).set_vector([4]), ] index.build_index(descriptors1) index.update_index(descriptors2) assert descr_index.count() == 10 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_index for d in descriptors2: assert d in descr_index # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0, 5} assert hash_kvs.get(1) == {1, 6} assert hash_kvs.get(2) == {2, 7} assert hash_kvs.get(3) == {3, 8} assert hash_kvs.get(4) == {4, 9}
def test_update_index_duplicate_descriptors(self): """ Test that updating a built index with the same descriptors results in idempotent behavior. """ descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) # Identical Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] index.build_index(descriptors1) index.update_index(descriptors2) assert descr_index.count() == 5 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_index for d in descriptors2: assert d in descr_index # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0} assert hash_kvs.get(1) == {1} assert hash_kvs.get(2) == {2} assert hash_kvs.get(3) == {3} assert hash_kvs.get(4) == {4}
def test_update_index_duplicate_descriptors(self): """ Test that updating a built index with the same descriptors results in idempotent behavior. """ descr_index = MemoryDescriptorIndex() hash_kvs = MemoryKeyValueStore() index = LSHNearestNeighborIndex(DummyHashFunctor(), descr_index, hash_kvs) # Identical Descriptors to build and update on (different instances) descriptors1 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] descriptors2 = [ DescriptorMemoryElement('t', 0).set_vector([0]), DescriptorMemoryElement('t', 1).set_vector([1]), DescriptorMemoryElement('t', 2).set_vector([2]), DescriptorMemoryElement('t', 3).set_vector([3]), DescriptorMemoryElement('t', 4).set_vector([4]), ] index.build_index(descriptors1) index.update_index(descriptors2) assert descr_index.count() == 5 # Above descriptors should be considered "in" the descriptor set now. for d in descriptors1: assert d in descr_index for d in descriptors2: assert d in descr_index # Known hashes of the above descriptors should be in the KVS assert set(hash_kvs.keys()) == {0, 1, 2, 3, 4} assert hash_kvs.get(0) == {0} assert hash_kvs.get(1) == {1} assert hash_kvs.get(2) == {2} assert hash_kvs.get(3) == {3} assert hash_kvs.get(4) == {4}