def setUp(self): Node = node_factory('<i') left = Node(True, [2, 7, 11], [32, 87, 43, 2]) right = Node(True, [15, 24, 31], [45, 67, 89, 0]) with open('foobar', 'wb') as file: pass manager = BufferManager() block = manager.get_file_block('foobar', 0) with pin(block): block.write(b'\0' * BufferManager.block_size) block = manager.get_file_block('foobar', 1) with pin(block): block.write(bytes(left)) block = manager.get_file_block('foobar', 2) with pin(block): block.write(bytes(right)) self.Node = Node self.left = left
def insert(self, attributes): """Insert the given record""" record_info = convert_str_to_bytes(attributes) + ( b'1', -1) # valid bit, next free space self.first_free_rec, self.rec_tail = self._parse_header() if self.first_free_rec >= 0: # There are space in free list first_free_blk, local_offset = self._calc(self.first_free_rec) block = self.buffer_manager.get_file_block(self.filename, first_free_blk) with pin(block): data = block.read() records = self._parse_block_data(data, first_free_blk) next_free_rec = records[self.first_free_rec][-1] records[local_offset] = record_info new_data = self._generate_new_data(records, first_free_blk) block.write(new_data) position = self.first_free_rec self.first_free_rec = next_free_rec else: # No space in free list, append the new record to the end of file self.rec_tail += 1 block_offset, local_offset = self._calc(self.rec_tail) block = self.buffer_manager.get_file_block(self.filename, block_offset) with pin(block): data = block.read() records = self._parse_block_data(data, block_offset) records.append(record_info) new_data = self._generate_new_data(records, block_offset) block.write(new_data) position = self.rec_tail self._update_header() return position
def _delete_node(self, node, block): """delete node and writes it to block just a shortcut to mark a block as deleted""" with pin(block): node.next_deleted = self.first_deleted_block block.write(bytes(node)) self.first_deleted_block = block.block_offset
def _update_header(self): # Update the file header after modifying the records block = self.buffer_manager.get_file_block(self.filename, 0) with pin(block): data = block.read() header_info = (self.first_free_rec, self.rec_tail) data[:self.header_struct.size] = self.header_struct.pack( *header_info) block.write(data)
def _parse_header(self): # Parse the file header, refresh corresponding info # and return the info with a tuple block = self.buffer_manager.get_file_block(self.filename, 0) # Get the first block with pin(block): data = block.read() header_info = self.header_struct.unpack_from(data, 0) return header_info
def read(self, record_offset): """ Return the record at the corresponding position """ block_offset, local_offset = self._calc(record_offset) block = self.buffer_manager.get_file_block(self.filename, block_offset) with pin(block): data = block.read() records = self._parse_block_data(data, block_offset) if records[local_offset][-2] == b'0': raise RuntimeError('Cannot read an empty record') return convert_bytes_to_str(tuple(records[local_offset][:-2]))
def dump_header(self): """write the header info to the index file MUST be called before the program exits, otherwise the header info in the file won't be updated""" meta_block = self._manager.get_file_block(self.index_file_path, 0) with pin(meta_block): meta_block.write( self.meta_struct.pack(self.total_blocks, self.first_deleted_block, self.root, self.first_leaf).ljust( BufferManager.block_size, b'\0'))
def _handle_overflow(self, node, block, path_to_parents): if not path_to_parents: # the root overflowed new_block = self._get_free_block() new_node, key, value = node.split(new_block.block_offset) with pin(block), pin(new_block): block.write(bytes(node)) new_block.write(bytes(new_node)) new_root_block = self._get_free_block() with pin(new_root_block): new_root_node = self.Node( False, [key], [block.block_offset, new_block.block_offset]) new_root_block.write(bytes(new_root_node)) self.root = new_root_block.block_offset return else: parent_offset = path_to_parents.pop() new_block = self._get_free_block() new_node, key, value = node.split(new_block.block_offset) with pin(block), pin(new_block): block.write(bytes(node)) new_block.write(bytes(new_node)) parent_block = self._manager.get_file_block( self.index_file_path, parent_offset) parent_node = self.Node.frombytes(parent_block.read()) parent_node.insert(key, value) if len(parent_node.keys) <= self.Node.n: with pin(parent_block): parent_block.write(bytes(parent_node)) else: self._handle_overflow(parent_node, parent_block, path_to_parents)
def modify(self, attributes, record_offset): """Modify the record at specified offset""" block_offset, local_offset = self._calc(record_offset) block = self.buffer_manager.get_file_block(self.filename, block_offset) record_info = convert_str_to_bytes(attributes) + ( b'1', -1) # Updated record must be real with pin(block): data = block.read() records = self._parse_block_data(data, block_offset) if records[local_offset][-2] == b'0': raise RuntimeError('Cannot update an empty record') records[local_offset] = record_info new_data = self._generate_new_data(records, block_offset) block.write(new_data)
def __next__(self): value = self.node.children[self.key_position] if self.key_position < len(self.node.keys): key = self.node.keys[self.key_position] self.key_position += 1 return key, value else: # jump if value == 0: raise StopIteration else: node_block = self.manager.get_file_block( self.index_file_path, value) with pin(node_block): self.node = self.Node.frombytes(node_block.read()) self.key_position = 1 return self.node.keys[0], self.node.children[0]
def _find_leaf(self, key): """find the first leaf node where key may reside key may not really reside in this node, in this case, the index file has no such key""" key = _convert_to_tuple(key) node_block_offset = self.root path_to_parents = [] while True: # find the insert position node_block = self._manager.get_file_block(self.index_file_path, node_block_offset) with pin(node_block): node = self.Node.frombytes(node_block.read()) if node.is_leaf: return node, node_block, path_to_parents else: # continue searching path_to_parents.append(node_block_offset) child_index = bisect.bisect_right(node.keys, key) node_block_offset = node.children[child_index]
def remove(self, record_offset): """Remove the record at specified position and update the free list""" self.first_free_rec, self.rec_tail = self._parse_header() block_offset, local_offset = self._calc(record_offset) block = self.buffer_manager.get_file_block(self.filename, block_offset) with pin(block): data = block.read() records = self._parse_block_data(data, block_offset) try: records[local_offset][-1] except IndexError: raise IndexError('The offset points to an empty space') if records[local_offset][-2] == b'0': raise RuntimeError('Cannot remove an empty record') records[local_offset][ -1] = self.first_free_rec # A positive number, putting this position into free list records[local_offset][-2] = b'0' self.first_free_rec = record_offset # update the head of free list new_data = self._generate_new_data(records, block_offset) block.write(new_data) self._update_header()
def insert(self, key, value): """insert a key-value pair into the index file if key already in this index, raise ValueError""" key = _convert_to_tuple(key) if self.root == 0: block = self._get_free_block() with pin(block): self.root = block.block_offset self.first_leaf = self.root node = self.Node(is_leaf=True, keys=[key], children=[value, 0]) block.write(bytes(node)) else: node, node_block, path_to_parents = self._find_leaf(key) key_position = bisect.bisect_left(node.keys, key) if key_position < len( node.keys) and node.keys[key_position] == key: raise ValueError('duplicate key {}'.format(key)) node.insert(key, value) if len(node.keys) <= node.n: node_block.write(bytes(node)) return else: # split self._handle_overflow(node, node_block, path_to_parents)
def __init__(self, index_file_path, fmt): """specify the path of the index file and the format of the keys, return a index manager if the index file exists, read data from the file otherwise create it and initialize its header info multiple index manager on the same file MUSTN'T simultaneously exist""" self.Node = node_factory(fmt) self.index_file_path = index_file_path self._manager = BufferManager() self.meta_struct = Struct( '<4i' ) # total blocks, offset of the first deleted block, offset of the root node try: meta_block = self._manager.get_file_block(self.index_file_path, 0) with pin(meta_block): self.total_blocks, self.first_deleted_block, self.root, self.first_leaf = self.meta_struct.unpack( meta_block.read()[:self.meta_struct.size]) except FileNotFoundError: # create and initialize an index file if not exits self.total_blocks, self.first_deleted_block, self.root, self.first_leaf = 1, 0, 0, 0 with open(index_file_path, 'wb') as f: f.write( self.meta_struct.pack(self.total_blocks, self.first_deleted_block, self.root, self.first_leaf).ljust( BufferManager.block_size, b'\0'))
def _handle_underflow(self, node, block, path_to_parents): """handle underflow after deletion will try to transfer from the left sibling first then try to transfer from the right sibling then try to fuse with the left sibling then try to fuse with the right sibling""" if block.block_offset == self.root: if not node.keys: # root has no key at all; this node is no longer needed if node.is_leaf: self.root = 0 self.first_leaf = 0 else: self.root = node.children[0] self._delete_node(node, block) else: block.write(bytes(node)) return # root underflow is not a problem parent_offset = path_to_parents.pop() parent_block = self._manager.get_file_block(self.index_file_path, parent_offset) with pin(parent_block): parent = self.Node.frombytes(parent_block.read()) my_position = bisect.bisect_right(parent.keys, node.keys[0]) if my_position > 0: # try find the left sibling left_sibling_offset = parent.children[my_position - 1] left_sibling_block = self._manager.get_file_block( self.index_file_path, left_sibling_offset) with pin(left_sibling_block): left_sibling = self.Node.frombytes(left_sibling_block.read()) if len(left_sibling.keys) > ceil( node.n / 2): # a transfer is possible node.transfer_from_left(left_sibling, parent, my_position - 1) with pin(block), pin(left_sibling_block), pin(parent_block): block.write(bytes(node)) left_sibling_block.write(bytes(left_sibling)) parent_block.write(bytes(parent)) return else: left_sibling = None # no left sibling if my_position < len(parent.keys) - 1: # try find the right sibling right_sibling_offset = parent.children[my_position + 1] right_sibling_block = self._manager.get_file_block( self.index_file_path, right_sibling_offset) with pin(right_sibling_block): right_sibling = self.Node.frombytes(right_sibling_block.read()) if len(right_sibling.keys) > ceil( node.n / 2): # a transfer is possible node.transfer_from_right(right_sibling, parent, my_position) with pin(block), pin(right_sibling_block), pin(parent_block): block.write(bytes(node)) right_sibling_block.write(bytes(right_sibling)) parent_block.write(bytes(parent)) return else: right_sibling = None # no right sibling if left_sibling is not None: # fuse with left sibling left_sibling.fuse_with(node, parent, my_position - 1) with pin(left_sibling_block): left_sibling_block.write(bytes(left_sibling)) self._delete_node(node, block) if len(parent.keys) >= ceil(node.n / 2): return else: self._handle_underflow(parent, parent_block, path_to_parents) else: # fuse with right sibling node.fuse_with(right_sibling, parent, my_position) with pin(block): block.write(bytes(node)) self._delete_node(right_sibling, right_sibling_block) if len(parent.keys) >= ceil(node.n / 2): return else: self._handle_underflow(parent, parent_block, path_to_parents)
def test_pin(self): block = Block(5, 'foo', 0) with pin(block): self.assertEqual(block.pin_count, 1) self.assertEqual(block.read(), b'Hello') self.assertEqual(block.pin_count, 0)