def test_remove_from_index_last_element_with_cache(self): """ Test removing final element also clears the cache element. """ c = DataMemoryElement() bt = SkLearnBallTreeHashIndex(cache_element=c, random_seed=0) index = np.ndarray((1, 256), bool) index[0] = int_to_bit_vector_large(1, 256) bt.build_index(index) self.assertEqual(bt.count(), 1) self.assertFalse(c.is_empty()) bt.remove_from_index(index) self.assertEqual(bt.count(), 0) self.assertTrue(c.is_empty())
def test_remove_from_index(self): # Test that we actually remove from the index. bt = SkLearnBallTreeHashIndex(random_seed=0) index = np.ndarray((1000, 256), bool) for i in range(1000): index[i] = int_to_bit_vector_large(i, 256) bt.build_index(index) # Copy post-build index for checking no removal occurred bt_data = np.copy(bt.bt.data) bt.remove_from_index([ int_to_bit_vector_large(42, 256), int_to_bit_vector_large(998, 256), ]) # Make sure expected arrays are missing from data block. new_data = np.asarray(bt.bt.data) self.assertEqual(new_data.shape, (998, 256)) new_data_set = set(tuple(r) for r in new_data.tolist()) self.assertNotIn(tuple(int_to_bit_vector_large(42, 256)), new_data_set) self.assertNotIn(tuple(int_to_bit_vector_large(998, 256)), new_data_set)
def test_remove_from_index(self): # Test that we actually remove from the index. bt = SkLearnBallTreeHashIndex(random_seed=0) index = np.ndarray((1000, 256), bool) for i in range(1000): index[i] = int_to_bit_vector_large(i, 256) bt.build_index(index) # BallTree data should now contain 1000 entries self.assertEqual(bt.bt.data.shape, (1000, 256)) bt.remove_from_index([ int_to_bit_vector_large(42, 256), int_to_bit_vector_large(998, 256), ]) # Make sure data block is of the expected shape (two rows shorter) new_data = np.asarray(bt.bt.data) self.assertEqual(new_data.shape, (998, 256)) # Make sure expected arrays are missing from data block. new_data_set = set(tuple(r) for r in new_data.tolist()) self.assertNotIn(tuple(int_to_bit_vector_large(42, 256)), new_data_set) self.assertNotIn(tuple(int_to_bit_vector_large(998, 256)), new_data_set)
def test_remove_from_index_last_element(self): """ Test removing the final the only element / final elements from the index. """ # Add one hash, remove one hash. bt = SkLearnBallTreeHashIndex(random_seed=0) index = np.ndarray((1, 256), bool) index[0] = int_to_bit_vector_large(1, 256) bt.build_index(index) self.assertEqual(bt.count(), 1) bt.remove_from_index(index) self.assertEqual(bt.count(), 0) self.assertIsNone(bt.bt) # Add many hashes, remove many hashes in batches until zero bt = SkLearnBallTreeHashIndex(random_seed=0) index = np.ndarray((1000, 256), bool) for i in range(1000): index[i] = int_to_bit_vector_large(i, 256) bt.build_index(index) # Remove first 250 bt.remove_from_index(index[:250]) self.assertEqual(bt.count(), 750) self.assertIsNotNone(bt.bt) # Remove second 250 bt.remove_from_index(index[250:500]) self.assertEqual(bt.count(), 500) self.assertIsNotNone(bt.bt) # Remove third 250 bt.remove_from_index(index[500:750]) self.assertEqual(bt.count(), 250) self.assertIsNotNone(bt.bt) # Remove final 250 bt.remove_from_index(index[750:]) self.assertEqual(bt.count(), 0) self.assertIsNone(bt.bt)