示例#1
0
 def read_header(self):
     self.stream.seek(0)
     data = struct.unpack(HEADER, self.stream.read(HEADER_SIZE))
     assert data[0] == b'SBAsset6', 'Invalid header'
     self.metadata_offset = data[1]
     # Read the metadata as well.
     self.stream.seek(self.metadata_offset)
     assert self.stream.read(5) == b'INDEX', 'Invalid index data'
     self.metadata = sbon.read_map(self.stream)
     self.file_count = sbon.read_varint(self.stream)
     # Store the offset of where the file index starts.
     self.index_offset = self.stream.tell()
示例#2
0
 def get(self, key):
     if not hasattr(self, 'key_size'):
         self.read_header()
     assert len(key) == self.key_size, 'Invalid key length'
     # Traverse the B-tree until we reach a leaf.
     offset = HEADER_SIZE + self.block_size * self.root_block
     entry_size = self.key_size + 4
     s = self.stream
     while True:
         s.seek(offset)
         block_type = s.read(2)
         if block_type != INDEX:
             break
         # Read the index header and scan for the closest key.
         lo, (_, hi, block) = 0, struct.unpack('>Bii', s.read(9))
         offset += 11
         while lo < hi:
             mid = (lo + hi) // 2
             s.seek(offset + entry_size * mid)
             if key < s.read(self.key_size):
                 hi = mid
             else:
                 lo = mid + 1
         if lo > 0:
             s.seek(offset + entry_size * (lo - 1) + self.key_size)
             block, = struct.unpack('>i', s.read(4))
         offset = HEADER_SIZE + self.block_size * block
     assert block_type == LEAF, 'Did not reach a leaf'
     # Scan leaves for the key, then read the data.
     reader = LeafReader(self)
     num_keys, = struct.unpack('>i', reader.read(4))
     for i in range(num_keys):
         cur_key = reader.read(self.key_size)
         length = sbon.read_varint(reader)
         if key == cur_key:
             return reader.read(length)
         reader.seek(length, 1)
     # None of the keys in the leaf node matched.
     raise KeyError(binascii.hexlify(key))
示例#3
0
 def get_all_keys(self, start=None):
     """
     A generator which yields a list of all valid keys starting at the
     given `start` offset.  If `start` is `None`, we will start from
     the root of the tree.
     """
     s = self.stream
     if not start:
         start = HEADER_SIZE + self.block_size * self.root_block
     s.seek(start)
     block_type = s.read(2)
     if block_type == LEAF:
         reader = LeafReader(self)
         num_keys = struct.unpack('>i', reader.read(4))[0]
         for _ in range(num_keys):
             cur_key = reader.read(self.key_size)
             # We to a tell/seek here so that the user can read from
             # the file while this loop is still being run
             cur_pos = s.tell()
             yield cur_key
             s.seek(cur_pos)
             length = sbon.read_varint(reader)
             reader.seek(length, 1)
     elif block_type == INDEX:
         (_, num_keys, first_child) = struct.unpack('>Bii', s.read(9))
         children = [first_child]
         for _ in range(num_keys):
             # Skip the key field.
             _ = s.read(self.key_size)
             # Read pointer to the child block.
             next_child = struct.unpack('>i', s.read(4))[0]
             children.append(next_child)
         for child_loc in children:
             for key in self.get_all_keys(HEADER_SIZE + self.block_size * child_loc):
                 yield key
     elif block_type == FREE:
         pass
     else:
         raise Exception('Unhandled block type: {}'.format(block_type))