def read_header(self): self.stream.seek(0) data = struct.unpack(HEADER, self.stream.read(HEADER_SIZE)) assert data[0] == b'SBAsset6', 'Invalid header' self.metadata_offset = data[1] # Read the metadata as well. self.stream.seek(self.metadata_offset) assert self.stream.read(5) == b'INDEX', 'Invalid index data' self.metadata = sbon.read_map(self.stream) self.file_count = sbon.read_varint(self.stream) # Store the offset of where the file index starts. self.index_offset = self.stream.tell()
def get(self, key): if not hasattr(self, 'key_size'): self.read_header() assert len(key) == self.key_size, 'Invalid key length' # Traverse the B-tree until we reach a leaf. offset = HEADER_SIZE + self.block_size * self.root_block entry_size = self.key_size + 4 s = self.stream while True: s.seek(offset) block_type = s.read(2) if block_type != INDEX: break # Read the index header and scan for the closest key. lo, (_, hi, block) = 0, struct.unpack('>Bii', s.read(9)) offset += 11 while lo < hi: mid = (lo + hi) // 2 s.seek(offset + entry_size * mid) if key < s.read(self.key_size): hi = mid else: lo = mid + 1 if lo > 0: s.seek(offset + entry_size * (lo - 1) + self.key_size) block, = struct.unpack('>i', s.read(4)) offset = HEADER_SIZE + self.block_size * block assert block_type == LEAF, 'Did not reach a leaf' # Scan leaves for the key, then read the data. reader = LeafReader(self) num_keys, = struct.unpack('>i', reader.read(4)) for i in range(num_keys): cur_key = reader.read(self.key_size) length = sbon.read_varint(reader) if key == cur_key: return reader.read(length) reader.seek(length, 1) # None of the keys in the leaf node matched. raise KeyError(binascii.hexlify(key))
def get_all_keys(self, start=None): """ A generator which yields a list of all valid keys starting at the given `start` offset. If `start` is `None`, we will start from the root of the tree. """ s = self.stream if not start: start = HEADER_SIZE + self.block_size * self.root_block s.seek(start) block_type = s.read(2) if block_type == LEAF: reader = LeafReader(self) num_keys = struct.unpack('>i', reader.read(4))[0] for _ in range(num_keys): cur_key = reader.read(self.key_size) # We to a tell/seek here so that the user can read from # the file while this loop is still being run cur_pos = s.tell() yield cur_key s.seek(cur_pos) length = sbon.read_varint(reader) reader.seek(length, 1) elif block_type == INDEX: (_, num_keys, first_child) = struct.unpack('>Bii', s.read(9)) children = [first_child] for _ in range(num_keys): # Skip the key field. _ = s.read(self.key_size) # Read pointer to the child block. next_child = struct.unpack('>i', s.read(4))[0] children.append(next_child) for child_loc in children: for key in self.get_all_keys(HEADER_SIZE + self.block_size * child_loc): yield key elif block_type == FREE: pass else: raise Exception('Unhandled block type: {}'.format(block_type))