def check_integrity(self, target_size=-1): """Verify the list has non-overlapping chunks only, and the total size matches target_size :param target_size: if not -1, the total size of the chain must be target_size :raise AssertionError: if the size doen't match""" if target_size > -1: assert self[-1].rbound() == target_size assert reduce(lambda x, y: x + y, (d.ts for d in self), 0) == target_size # END target size verification if len(self) < 2: return # check data for dc in self: assert dc.ts > 0 if dc.has_data(): assert len(dc.data) >= dc.ts # END for each dc left = islice(self, 0, len(self) - 1) right = iter(self) right.next() # this is very pythonic - we might have just use index based access here, # but this could actually be faster for lft, rgt in izip(left, right): assert lft.rbound() == rgt.to assert lft.to + lft.ts == rgt.to
def _set_cache_(self, attr): # currently this can only be _offset_map # TODO: make this a simple sorted offset array which can be bisected # to find the respective entry, from which we can take a +1 easily # This might be slower, but should also be much lighter in memory ! offsets_sorted = sorted(self._index.offsets()) last_offset = len(self._pack.data()) - self._pack.footer_size assert offsets_sorted, "Cannot handle empty indices" offset_map = None if len(offsets_sorted) == 1: offset_map = {offsets_sorted[0]: last_offset} else: iter_offsets = iter(offsets_sorted) iter_offsets_plus_one = iter(offsets_sorted) next(iter_offsets_plus_one) consecutive = izip(iter_offsets, iter_offsets_plus_one) offset_map = dict(consecutive) # the last offset is not yet set offset_map[offsets_sorted[-1]] = last_offset # END handle offset amount self._offset_map = offset_map