def verify_popcount_indices(arena): assert len(arena.popcount_indices) % 4 == 0 format = "i"*(len(arena.popcount_indices)//4) values = struct.unpack(format, arena.popcount_indices) assert len(values) == arena.metadata.num_bits+2, (len(values), arena.metadata.num_bits) assert values[-1] == len(arena), (values[-1], len(arena), values) for i in range(len(values)-1): start = values[i] end = values[i+1] for j in range(start, end): fp = arena[j][1] assert byte_popcount(fp) == i, (byte_popcount(fp), i, start, end)
def test_arena_is_ordered_by_popcount(self): arena = self._open(CHEBI_TARGETS) prev = 0 for id, fp in arena: popcount = bitops.byte_popcount(fp) self.assertTrue(prev <= popcount, (prev, popcount)) prev = popcount
def test_every_bit_sorted(self): # This is a bit tricker since there's no guaranteed order # of the contents of the arena all_bytes = [chr(i) for i in range(256)] expected = sorted(byte_popcount(fp) for fp in all_bytes) arena = _load(all_bytes, True) popcounts = map(byte_popcount, arena.arena) self.assertEquals(popcounts, expected) verify_popcount_indices(arena) arena = _load(all_bytes[::-1], True) popcounts = map(byte_popcount, arena.arena) self.assertEquals(popcounts, expected) verify_popcount_indices(arena)
def test_iter_arenas_select_size(self): arena = self._open(CHEBI_TARGETS) ids = [id for (id, fp) in arena] prev = 0 for subarena in arena.iter_arenas(100): self._check_target_metadata(subarena.metadata) self.assertEqual(len(subarena), 100) subids = [] for id, fp in subarena: subids.append(id) popcount = bitops.byte_popcount(fp) self.assertTrue(prev <= popcount, (prev, popcount)) prev = popcount self.assertEquals(ids[:100], subids) del ids[:100] self.assertFalse(ids)