Exemplo n.º 1
def test_l1_layer(byte_value: int, num_bytes: int) -> None:
    bits = bitarray()
    bits.frombytes(bytes([byte_value]) * num_bytes)

    # Manually compute the popcount sums here.
    level_1_size = math.ceil(len(bits) / 2048)
    level_1: List[int] = [0] * level_1_size

    v = memoryview(bits)
    for byte_offset in range(0, len(v), 8):
        level_1_idx = 1 + byte_offset // 256
        if level_1_idx < len(level_1):
            level_1[level_1_idx] += popcount(v[byte_offset:byte_offset + 8])

    for byte_offset in range(0, num_bytes, 1 << 29):
        level_1_idx = byte_offset // 256
        level_1[level_1_idx] = 0

    for i in range(1, len(level_1)):
        level_1[i] += level_1[i - 1]

    poppy = Poppy(bits)
    # Python will literally asplode if we try to use list equality to compare
    # the two lists.
    for i in range(0, len(level_1)):
        assert poppy._level_1[2 * i] == level_1[i], f"Failed at {i}"
Exemplo n.º 2
    def rank(self, i: int) -> int:
        Returns the number of 1 bits up to and including position i.
        byte_offset = i // 8

        sum_rank = 0
        level_0_idx = byte_offset // (1 << 29)
        sum_rank += self._level_0[level_0_idx]

        level_1_idx = (byte_offset // 256) * 2
        sum_rank += self._level_1[level_1_idx]

        basic_block_idx = (byte_offset % 256) // 64
        level_2_idx = level_1_idx + 1
        packed_relative_counts = self._level_1[level_2_idx]
        left_block_idx = 0
        while left_block_idx < basic_block_idx:
            sum_rank += self._get_relative_count(
            left_block_idx += 1

        # Now do a manual popcount within the current basic block.
        start_bit = 8 * (byte_offset // 64) * 64
        end_bit = i + 1

        while start_bit + 64 <= end_bit:
            start_byte = start_bit // 8
            sum_rank += popcount(self._memory_view[start_byte:(start_byte +
            start_bit += 64

        while start_bit + 8 <= end_bit:
            start_byte = start_bit // 8
            sum_rank += RANK_IN_BYTE[256 * 7 + self._memory_view[start_byte]]
            start_bit += 8

        if start_bit < end_bit:
            slack = end_bit - start_bit - 1
            start_byte = start_bit // 8
            sum_rank += RANK_IN_BYTE[256 * slack +

        return sum_rank
Exemplo n.º 3
def test_l0_layer(byte_value: int, num_bytes: int) -> None:
    bits = bitarray()
    bits.frombytes(bytes([byte_value]) * num_bytes)

    # Manually compute the popcount sums here.
    num_popcount_sums = math.ceil(len(bits) / (2**32))
    popcount_sums: List[int] = [0] * num_popcount_sums

    v = memoryview(bits)
    for byte_offset in range(0, len(v), 8):
        popcount_idx = 1 + byte_offset // (2**29)
        if popcount_idx < len(popcount_sums):
            popcount_sums[popcount_idx] += popcount(v[byte_offset:byte_offset +

    for i in range(1, len(popcount_sums)):
        popcount_sums[i] += popcount_sums[i - 1]

    poppy = Poppy(bits)
    assert list(poppy._level_0) == popcount_sums
Exemplo n.º 4
    def _initialize_select_structure(self) -> "List[array]":
        For each upper block, we precompute the position of every 8192nd one bit
        (relative to the beginning of the upper block). These positions can be
        stored in 32 bits.
        bit_array_byte_length = len(self._memory_view)
        select_structure: "List[array]" = []
        for level_0_idx, level_0_sum in enumerate(self._level_0):
            rank_start = level_0_sum

            rank_end = self.rank(
                min((level_0_idx + 1) * (1 << 32) - 1,
                    len(self._bit_array) - 1))

            num_one_bits = rank_end - rank_start
            num_entries = math.ceil(num_one_bits / 8192)
            select_structure.append(array('L', [0] * num_entries))

            # Now scan through the upper level.
            popcount_sum = 0
            current_select_target = 0
            for byte_offset in range((1 << 29) * level_0_idx,
                                         (1 << 29) * (level_0_idx + 1)), 8):
                old_sum = popcount_sum
                popcount_sum += popcount(
                    self._memory_view[byte_offset:byte_offset + 8])
                if popcount_sum > current_select_target:
                    select_in_word = select(
                        self._memory_view[byte_offset:byte_offset + 8],
                        current_select_target - old_sum)
                    select_structure[level_0_idx][(current_select_target) //
                                                  8192] = (8 * (byte_offset - (
                                                      (1 << 29) * level_0_idx))
                                                           + select_in_word)
                    current_select_target += 8192

            for i in range(len(select_structure[level_0_idx])):
                assert select_structure[level_0_idx][i] >= 8192 * i
        return select_structure
Exemplo n.º 5
    def _initialize_rank_structure(self) -> "Tuple[array[int], array[int]]":
        # Need a level 0 entry for every 2**32 bits in the input array.
        level_0_size = math.ceil(len(self._bit_array) / (1 << 32))
        level_0 = array('Q', [0] * level_0_size)

        # How many L1/L2 entries do we need?
        # There is 1 64-bit entry for every 2048 bits in the input.
        # (Equivalently, 2 32-bit entries.)

        level_1_size = 2 * math.ceil(len(self._bit_array) / 2048)
        level_1 = array('L', [0] * level_1_size)

        # Iterate over the input bit array in size of at most 512 bits (64 bytes)
        bit_array_byte_length = len(self._memory_view)

        for byte_offset in range(0, bit_array_byte_length, 8):
            pop_count = popcount(self._memory_view[byte_offset:byte_offset +

            # Update the Level 0 cumulative sum
            level_0_idx = 1 + byte_offset // (1 << 29)
            if level_0_idx < level_0_size:
                level_0[level_0_idx] += pop_count

            # Update the Level 1 cumulative sum.
            level_1_idx = (byte_offset // 256) * 2 + 2
            if level_1_idx < level_1_size:
                level_1[level_1_idx] += pop_count

            # Update the Level 2 non-cumulative relative counts.
            # (But only for basic blocks 0, 1, and 2.  (Skip basic block 3.))
            basic_block_index = (byte_offset % 256) // 64
            if basic_block_index != 3:
                level_2_idx = (byte_offset // 256) * 2 + 1
                packed_relative_counts = level_1[level_2_idx]
                packed_relative_counts = self._add_relative_count(
                level_1[level_2_idx] = packed_relative_counts

        # Cumulative sums for level_0
        for i in range(1, level_0_size):
            level_0[i] += level_0[i - 1]

        # Cumulative sums for level_1.  Two-step process:
        # 1. Zero out the level_1 cumulative sums for the blocks that lie at
        # the beginning of an L0 upper block. (If we don't do that, the sums
        # could overflow.)
        for byte_offset in range(0, bit_array_byte_length, 1 << 29):
            level_1_idx = 2 * (byte_offset // 256)
            level_1[level_1_idx] = 0

        # 2. calculate the cumulative sums for level_1.
        byte_offset = 0
        for i in range(0, level_1_size, 2):
            if byte_offset % (1 << 29) != 0:
                level_1[i] += level_1[i - 2]
            byte_offset += 256

        return (level_0, level_1)
Exemplo n.º 6
    def select(self, rank: int) -> int:
        Returns the position of the 1-bit having the provided rank.
        If no such bit exists, -1 is returned.

        # Use binary search to find the upper (L0) block that contains the
        # bit with the target rank.
        # level_0_idx = bisect.bisect_right(self._level_0, rank)
        level_0_idx = self._binary_search_level_0(rank)
        if level_0_idx < 0:
            level_0_idx = -(level_0_idx) - 1
            assert level_0_idx >= 0

        # Maintain an (absolute) bit range where the bit with the target rank
        # could be. This range if half open: [low, high)
        # low = (1<<32) * level_0_idx
        # high = min((1 << 32) * (level_0_idx + 1), len(self._bit_array))
        relative_rank = rank - self._level_0[level_0_idx]
        assert relative_rank >= 0

        # Search the sampling answers corresponding to level_0_idx
        # Use them to find the lower block that contains the target
        # bit.
        sampling_answers = self._select_structure[level_0_idx]
        x = relative_rank // 8192
        if relative_rank % 8192 == 0:
            # Just use one of the precomputed answers.
            if x < len(sampling_answers):
                return sampling_answers[x]
            return -1

        # Otherwise we have to search.
        search_start_bit = sampling_answers[x]
        if x + 1 < len(sampling_answers):
            search_end_bit = sampling_answers[x + 1]
            search_end_bit = min(
                len(self) - 1 - level_0_idx, (1 << 32) * (level_0_idx + 1))

        # Do a binary search for the L1 block that contains the 1-bit
        # with the desired relative rank.
        level_1_idx = self._binary_search_level_1(
            relative_rank, (search_start_bit // 2048) * 2,
            (search_end_bit // 2048) * 2 - 2)
        if level_1_idx < 0:
            level_1_idx = -(level_1_idx) - 2

        relative_rank -= self._level_1[level_1_idx]
        assert relative_rank >= 0
        packed_relative_counts = self._level_1[level_1_idx + 1]

        for basic_block_idx in range(0, 4):
            if basic_block_idx == 3:
            relative_count = self._get_relative_count(
            if relative_rank < relative_count:
            relative_rank -= relative_count
            assert relative_rank >= 0

        # Now search within the 64-byte basic block.
        byte_offset = 64 * basic_block_idx + 256 * (level_1_idx // 2) + (
            1 << 29) * level_0_idx
        start_bit = 8 * (byte_offset // 64) * 64
        end_bit = min(start_bit + 4096,
                      len(self) - 1, (1 << 32) * (level_0_idx + 1) - 1)

        while start_bit + 64 <= end_bit:
            start_byte = start_bit // 8
            rank = popcount(self._memory_view[start_byte:(start_byte + 8)])
            if relative_rank < rank:
                return start_bit + select(
                    self._memory_view[start_byte:(start_byte + 8)],

            relative_rank -= rank
            assert relative_rank >= 0
            start_bit += 64

        while start_bit + 8 <= end_bit:
            start_byte = start_bit // 8
            rank = RANK_IN_BYTE[256 * 7 + self._memory_view[start_byte]]
            if relative_rank < rank:
                return start_bit + SELECT_IN_BYTE[
                    256 * (relative_rank) + self._memory_view[start_byte]]
            relative_rank -= rank
            assert relative_rank >= 0
            start_bit += 8

        if start_bit < end_bit:
            slack = end_bit - start_bit - 1
            start_byte = start_bit // 8
            rank = RANK_IN_BYTE[256 * slack + self._memory_view[start_byte]]
            if relative_rank < rank:
                return start_bit + SELECT_IN_BYTE[
                    256 * (relative_rank) + self._memory_view[start_byte]]
            relative_rank -= rank
            assert relative_rank >= 0

        if relative_rank == 0 and self._bit_array[end_bit]:
            return end_bit

        return -1
Exemplo n.º 7
def test_popcount(bb: bytes) -> None:
    manual_popcount = sum(bin(b).count("1") for b in bb)
    assert popcount(bb) == manual_popcount