def _normalize_branch_node(self, node): """ A branch node which is left with only a single non-blank item should be turned into either a leaf or extension node. """ iter_node = iter(node) if any(iter_node) and any(iter_node): return node if node[16]: return [compute_leaf_key([]), node[16]] sub_node_idx, sub_node_hash = next( (idx, v) for idx, v in enumerate(node[:16]) if v) sub_node = self.get_node(sub_node_hash) sub_node_type = get_node_type(sub_node) self._prune_node(sub_node) if sub_node_type in {NODE_TYPE_LEAF, NODE_TYPE_EXTENSION}: new_subnode_key = encode_nibbles( tuple( itertools.chain( [sub_node_idx], decode_nibbles(sub_node[0]), ))) return [new_subnode_key, sub_node[1]] elif sub_node_type == NODE_TYPE_BRANCH: subnode_hash = self._persist_node(sub_node) return [encode_nibbles([sub_node_idx]), subnode_hash] else: raise Exception("Invariant: this code block should be unreachable")
def _iter(self, node, key): node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: return None elif node_type == NODE_TYPE_LEAF: descend_key = extract_key(node) if descend_key > key: return descend_key return None elif node_type == NODE_TYPE_BRANCH: scan_range = range(16) if len(key): sub_node = self.trie.get_node(node[key[0]]) nibbles = self._iter(sub_node, key[1:]) if nibbles is not None: return (key[0],) + nibbles scan_range = range(key[0] + 1, 16) for i in scan_range: sub_node = self.trie.get_node(node[i]) nibbles = self._get_next(sub_node) if nibbles is not None: return (i,) + nibbles return None elif node_type == NODE_TYPE_EXTENSION: descend_key = extract_key(node) sub_node = self.trie.get_node(node[1]) sub_key = key[len(descend_key):] if key_starts_with(key, descend_key): nibbles = self._iter(sub_node, sub_key) if nibbles is not None: return descend_key + nibbles return None
def _get_proof(self, node, trie_key, proven_len=0, last_proof=tuple()): updated_proof = last_proof + (node, ) unproven_key = trie_key[proven_len:] node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: return last_proof elif node_type == NODE_TYPE_LEAF: return updated_proof elif node_type == NODE_TYPE_EXTENSION: current_key = extract_key(node) if key_starts_with(unproven_key, current_key): next_node = self.get_node(node[1]) new_proven_len = proven_len + len(current_key) return self._get_proof(next_node, trie_key, new_proven_len, updated_proof) else: return updated_proof elif node_type == NODE_TYPE_BRANCH: if not unproven_key: return updated_proof next_node = self.get_node(node[unproven_key[0]]) new_proven_len = proven_len + 1 return self._get_proof(next_node, trie_key, new_proven_len, updated_proof) else: raise Exception("Invariant: This shouldn't ever happen")
def _get_children(node, depth): node_type = get_node_type(node) references = [] leaves = [] if node_type == NODE_TYPE_BLANK: pass elif node_type == NODE_TYPE_LEAF: leaves.append(node[1]) elif node_type == NODE_TYPE_EXTENSION: if isinstance(node[1], bytes) and len(node[1]) == 32: references.append((depth + 1, node[1])) elif isinstance(node[1], list): # the rlp encoding of the node is < 32 so rather than a 32-byte # reference, the actual rlp encoding of the node is inlined. sub_references, sub_leaves = _get_children(node[1], depth + 1) references.extend(sub_references) leaves.extend(sub_leaves) else: raise Exception("Invariant") elif node_type == NODE_TYPE_BRANCH: for sub_node in node[:16]: if isinstance(sub_node, bytes) and len(sub_node) == 32: # this is a reference to another node. references.append((depth + 1, sub_node)) else: sub_references, sub_leaves = _get_children(sub_node, depth) references.extend(sub_references) leaves.extend(sub_leaves) # The last item in a branch may contain a value. if not is_blank_node(node[16]): leaves.append(node[16]) return references, leaves
def _get(self, node, trie_key): node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: return BLANK_NODE elif node_type in {NODE_TYPE_LEAF, NODE_TYPE_EXTENSION}: return self._get_kv_node(node, trie_key) elif node_type == NODE_TYPE_BRANCH: return self._get_branch_node(node, trie_key) else: raise Exception("Invariant: This shouldn't ever happen")
def get_children(self, request): """Return all children of the node retrieved by the given request. :rtype: A two-tuple with one list containing the children that reference other nodes and another containing the leaf children. """ node = decode_node(request.data) node_type = get_node_type(node) references = [] leaves = [] if node_type == NODE_TYPE_LEAF: leaves.append(node[1]) elif node_type == NODE_TYPE_EXTENSION: depth = request.depth + len(node[0]) references.append((depth, node[1])) elif node_type == NODE_TYPE_BRANCH: depth = request.depth + 1 for item in node[:16]: if is_blank_node(item): continue # In a branch, the first 16 items are either a node whose RLP-encoded # representation is under 32 bytes or a reference to another node. if len(item) == 2: if get_node_type(item) != NODE_TYPE_LEAF: raise UnexpectedNodeType( "Expected a node of type leaf, but got %s" % item) leaves.append(item[1]) elif len(item) == 17: # NOTE: This can happen only if the RLP representation of all branch items fit # in less than 32 bytes, which means the keys/values are extremely short, so # it's probably not worth supporting it. raise RuntimeError("If you get this, see the NOTE above") else: references.append((depth, item)) # The last item in a branch may contain a value. if not is_blank_node(node[16]): leaves.append(node[16]) return references, leaves
def _traverse_from(self, node: RawHexaryNode, trie_key) -> Tuple[RawHexaryNode, Nibbles]: """ Traverse down the trie from the given node, using the trie_key to navigate. At each node, consume a prefix from the key, and navigate to its child. Repeat with that child node and so on, until: - there is no key remaining, or - the child node is a blank node, or - the child node is a leaf node :return: (the deepest child node, the unconsumed suffix of the key) :raises MissingTraversalNode: if a node body is missing from the database """ remaining_key = trie_key while remaining_key: node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: return BLANK_NODE, ( ) # type: ignore # mypy thinks BLANK_NODE != b'' elif node_type == NODE_TYPE_LEAF: leaf_key = extract_key(node) if key_starts_with(leaf_key, remaining_key): return node, remaining_key else: # The trie key and leaf node key branch away from each other, so there # is no node at the specified key. return BLANK_NODE, ( ) # type: ignore # mypy thinks BLANK_NODE != b'' elif node_type == NODE_TYPE_EXTENSION: try: next_node_pointer, remaining_key = self._traverse_extension( node, remaining_key) except _PartialTraversal: # could only descend part-way into an extension node return node, remaining_key elif node_type == NODE_TYPE_BRANCH: next_node_pointer = node[remaining_key[0]] remaining_key = remaining_key[1:] else: raise Exception("Invariant: This shouldn't ever happen") try: node = self.get_node(next_node_pointer) except KeyError as exc: used_key = trie_key[:len(trie_key) - len(remaining_key)] raise MissingTraversalNode(exc.args[0], used_key) # navigated down the full key return node, Nibbles(())
def _delete(self, node, trie_key): node_type = get_node_type(node) with self._prune_node(node): if node_type == NODE_TYPE_BLANK: # ignore attempt to delete key from empty node return BLANK_NODE elif node_type in {NODE_TYPE_LEAF, NODE_TYPE_EXTENSION}: return self._delete_kv_node(node, trie_key) elif node_type == NODE_TYPE_BRANCH: return self._delete_branch_node(node, trie_key) else: raise Exception("Invariant: This shouldn't ever happen")
def _delete_kv_node(self, node, trie_key): current_key = extract_key(node) if not key_starts_with(trie_key, current_key): # key not present?.... return node node_type = get_node_type(node) if node_type == NODE_TYPE_LEAF: if trie_key == current_key: return BLANK_NODE else: return node sub_node_key = trie_key[len(current_key):] sub_node = self.get_node(node[1]) new_sub_node = self._delete(sub_node, sub_node_key) encoded_new_sub_node = self._persist_node(new_sub_node) if encoded_new_sub_node == node[1]: return node if new_sub_node == BLANK_NODE: return BLANK_NODE new_sub_node_type = get_node_type(new_sub_node) if new_sub_node_type in {NODE_TYPE_LEAF, NODE_TYPE_EXTENSION}: self._prune_node(new_sub_node) new_key = current_key + decode_nibbles(new_sub_node[0]) return [encode_nibbles(new_key), new_sub_node[1]] if new_sub_node_type == NODE_TYPE_BRANCH: return [encode_nibbles(current_key), encoded_new_sub_node] raise Exception("Invariant, this code path should not be reachable")
def _set(self, node, trie_key, value): node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: return [ compute_leaf_key(trie_key), value, ] elif node_type in {NODE_TYPE_LEAF, NODE_TYPE_EXTENSION}: return self._set_kv_node(node, trie_key, value) elif node_type == NODE_TYPE_BRANCH: return self._set_branch_node(node, trie_key, value) else: raise Exception("Invariant: This shouldn't ever happen")
def _get_kv_node(self, node, trie_key): current_key = extract_key(node) node_type = get_node_type(node) if node_type == NODE_TYPE_LEAF: if trie_key == current_key: return node[1] else: return BLANK_NODE elif node_type == NODE_TYPE_EXTENSION: if key_starts_with(trie_key, current_key): sub_node = self.get_node(node[1]) return self._get(sub_node, trie_key[len(current_key):]) else: return BLANK_NODE else: raise Exception("Invariant: unreachable code path")
def traverse_node(db, path, nodehash, start: int): node_rlp, node = get_node(db, nodehash) node_type = get_node_type(node) if node_type == NODE_TYPE_BRANCH: yield Node('branch', node_rlp, path) yield from traverse_branch(db, path, node, start) elif node_type == NODE_TYPE_LEAF: rest = extract_key(node) # TODO: also traverse the state root? yield Node('leaf', node_rlp, path + rest) elif node_type == NODE_TYPE_EXTENSION: # TODO: decide whether to yield this node, does it still match {start} # TODO: test that we're building this path correctly rest = extract_key(node) full_path = path + rest yield Node('extension', node_rlp, full_path) yield from traverse_node(db, full_path, node[1], start[1:]) else: raise Exception(f"don't know how to handle type {node_type}")
def _get_children(node: Hash32, depth: int) -> Tuple[List[Tuple[int, Hash32]], List[bytes]]: """Return all children of the node with the given hash. :rtype: A two-tuple with one list containing the children that reference other nodes and another containing the leaf children. """ node_type = get_node_type(node) references = [] leaves = [] if node_type == NODE_TYPE_BLANK: pass elif node_type == NODE_TYPE_LEAF: leaves.append(node[1]) elif node_type == NODE_TYPE_EXTENSION: if isinstance(node[1], bytes) and len(node[1]) == 32: references.append((depth + 1, Hash32(node[1]))) elif isinstance(node[1], list): # the rlp encoding of the node is < 32 so rather than a 32-byte # reference, the actual rlp encoding of the node is inlined. sub_references, sub_leaves = _get_children(node[1], depth + 1) references.extend(sub_references) leaves.extend(sub_leaves) else: raise Exception("Invariant") elif node_type == NODE_TYPE_BRANCH: for sub_node in node[:16]: if isinstance(sub_node, bytes) and len(sub_node) == 32: # this is a reference to another node. references.append((depth + 1, sub_node)) else: # TODO: Follow up on mypy confusion around `int`, `bytes` and `Hash32` here sub_references, sub_leaves = _get_children( sub_node, depth) # type: ignore references.extend(sub_references) leaves.extend(sub_leaves) # type: ignore # The last item in a branch may contain a value. if not is_blank_node(node[16]): leaves.append(node[16]) return references, leaves # type: ignore
def regenerate_ref_count(self): new_ref_count = defaultdict(int) keys_to_count = [self.root_hash] while keys_to_count: key = keys_to_count.pop() if key == b'' or isinstance(key, list) or key == BLANK_NODE_HASH: continue new_ref_count[key] += 1 node = self.get_node(key) node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: continue if node_type == NODE_TYPE_BRANCH: keys_to_count.extend(node[:16]) elif node_type == NODE_TYPE_EXTENSION: keys_to_count.append(node[1]) return new_ref_count
def _get_next(self, node): node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: return None elif node_type == NODE_TYPE_LEAF: curr_key = extract_key(node) return curr_key elif node_type == NODE_TYPE_EXTENSION: curr_key = extract_key(node) sub_node = self.trie.get_node(node[1]) return curr_key + self._get_next(sub_node) elif node_type == NODE_TYPE_BRANCH: if node[16]: return (16,) for i in range(16): sub_node = self.trie.get_node(node[i]) nibbles = self._get_next(sub_node) if nibbles is not None: return (i,) + nibbles raise Exception("Invariant: this means we have an empty branch node") else: raise Exception("Invariant: unknown node type {0}".format(node))
def _get(self, root_hash, trie_key): node, remaining_key = self._traverse(root_hash, trie_key) node_type = get_node_type(node) if node_type == NODE_TYPE_BLANK: return BLANK_NODE elif node_type == NODE_TYPE_LEAF: if remaining_key == extract_key(node): return node[1] else: # Any remaining key that isn't an exact match for the leaf node must # be pointing to a value that doesn't exist. return BLANK_NODE elif node_type == NODE_TYPE_EXTENSION: if len(remaining_key) > 0: # Any remaining key should have traversed down into the extension's child. # (or returned a blank node if the key didn't match the extension) raise ValidationError( "Traverse should never return an extension node with remaining key, " f"but returned node {node!r} with remaining key {remaining_key}." ) else: return BLANK_NODE elif node_type == NODE_TYPE_BRANCH: if len(remaining_key) > 0: # Any remaining key should have traversed down into the branch's child, even # if the branch had an empty child, which would then return a BLANK_NODE. raise ValidationError( "Traverse should never return a non-empty branch node with remaining key, " f"but returned node {node!r} with remaining key {remaining_key}." ) else: return node[-1] else: raise Exception("Invariant: This shouldn't ever happen")