def __init__(self, db: BaseAtomicDB, storage_root: Hash32, address: Address) -> None: """ Database entries go through several pipes, like so... .. code:: db -> _storage_lookup -> _storage_cache -> _journal_storage db is the raw database, we can assume it hits disk when written to. Keys are stored as node hashes and rlp-encoded node values. _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie), writes to storage lookup *are* immeditaely applied to a trie, generating the appropriate trie nodes and and root hash (via the HexaryTrie). The writes are *not* persisted to db, until _storage_lookup is explicitly instructed to, via :meth:`StorageLookup.commit_to` _storage_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journal_storage, because the cache is only invalidated after a state root change. Otherwise, you will see data since the last storage root was calculated. Journaling batches writes at the _journal_storage layer, until persist is called. It manages all the checkpointing and rollbacks that happen during EVM execution. In both _storage_cache and _journal_storage, Keys are set/retrieved as the big_endian encoding of the slot integer, and the rlp-encoded value. """ self._address = address self._storage_lookup = StorageLookup(db, storage_root, address) self._storage_cache = CacheDB(self._storage_lookup) self._journal_storage = JournalDB(self._storage_cache)
def __init__(self, db: AtomicDatabaseAPI, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = KeyAccessLoggerAtomicDB(db, log_missing_keys=False) self._batchdb = BatchDB(self._raw_store_db) self._batchtrie = BatchDB(self._raw_store_db, read_through_deletes=True) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_logger = KeyAccessLoggerDB(self._trie, log_missing_keys=False) self._trie_cache = CacheDB(self._trie_logger) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores: Dict[Address, AccountStorageDatabaseAPI] = {} self._dirty_accounts: Set[Address] = set() self._root_hash_at_last_persist = state_root self._accessed_accounts: Set[Address] = set() self._accessed_bytecodes: Set[Address] = set() # Track whether an account or slot have been accessed during a given transaction: self._reset_access_counters()
def __init__(self, db: BaseDB, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: -> hash-trie -> storage lookups / db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: There is an opportunity to do something similar for storage AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048)
def __init__(self, db: AtomicDatabaseAPI, storage_root: Hash32, address: Address) -> None: """ Database entries go through several pipes, like so... .. code:: db -> _storage_lookup -> _storage_cache -> _locked_changes -> _journal_storage db is the raw database, we can assume it hits disk when written to. Keys are stored as node hashes and rlp-encoded node values. _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie), writes to storage lookup *are* immeditaely applied to a trie, generating the appropriate trie nodes and and root hash (via the HexaryTrie). The writes are *not* persisted to db, until _storage_lookup is explicitly instructed to, via :meth:`StorageLookup.commit_to` _storage_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journal_storage, because the cache is only invalidated after a state root change. Otherwise, you will see data since the last storage root was calculated. _locked_changes is a batch database that includes only those values that are un-revertable in the EVM. Currently, that means changes that completed in a previous transaction. Journaling batches writes at the _journal_storage layer, until persist is called. It manages all the checkpointing and rollbacks that happen during EVM execution. In both _storage_cache and _journal_storage, Keys are set/retrieved as the big_endian encoding of the slot integer, and the rlp-encoded value. """ self._address = address self._storage_lookup = StorageLookup(db, storage_root, address) self._storage_cache = CacheDB(self._storage_lookup) self._locked_changes = JournalDB(self._storage_cache) self._journal_storage = JournalDB(self._locked_changes) self._accessed_slots: Set[int] = set() # Track how many times we have cleared the storage. This is journaled # in lockstep with other storage changes. That way, we can detect if a revert # causes use to revert past the previous storage deletion. The clear count is used # as an index to find the base trie from before the revert. self._clear_count = JournalDB( MemoryDB({CLEAR_COUNT_KEY_NAME: to_bytes(0)}))
def db(request): base_db = MemoryDB() if request.param is JournalDB: yield JournalDB(base_db) elif request.param is BatchDB: yield BatchDB(base_db) elif request.param is MemoryDB: yield base_db elif request.param is AtomicDB: atomic_db = AtomicDB(base_db) with atomic_db.atomic_batch() as batch: yield batch elif request.param is CacheDB: yield CacheDB(base_db) else: raise Exception("Invariant")
class AccountDB(BaseAccountDB): logger = cast(ExtendedDebugLogger, logging.getLogger('eth.db.account.AccountDB')) def __init__(self, db: BaseDB, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: -> hash-trie -> storage lookups / db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: There is an opportunity to do something similar for storage AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool = True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account = self._get_account(address, from_journal) storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root)) slot_as_key = pad32(int_to_big_endian(slot)) if slot_as_key in storage: encoded_value = storage[slot_as_key] return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int) else: return 0 def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account = self._get_account(address) storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root)) slot_as_key = pad32(int_to_big_endian(slot)) if value: encoded_value = rlp.encode(value) storage[slot_as_key] = encoded_value else: del storage[slot_as_key] self._set_account(address, account.copy(storage_root=storage.root_hash)) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(storage_root=BLANK_ROOT_HASH)) # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") try: return self._journaldb[self.get_code_hash(address)] except KeyError: return b"" def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash( address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") return self._journaltrie.get(address, b'') != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce( address) and self.get_balance(address) == 0 # # Internal # def _get_account(self, address: Address, from_journal: bool = True) -> Account: if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = (self._journaltrie if from_journal else self._trie_cache).get(address, b'') if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account # # Record and discard API # def record(self) -> Tuple[UUID, UUID]: return (self._journaldb.record(), self._journaltrie.record()) def discard(self, changeset: Tuple[UUID, UUID]) -> None: db_changeset, trie_changeset = changeset self._journaldb.discard(db_changeset) self._journaltrie.discard(trie_changeset) self._account_cache.clear() def commit(self, changeset: Tuple[UUID, UUID]) -> None: db_changeset, trie_changeset = changeset self._journaldb.commit(db_changeset) self._journaltrie.commit(trie_changeset) def make_state_root(self) -> Hash32: self.logger.debug2("Generating AccountDB trie") self._journaldb.persist() self._journaltrie.persist() return self.state_root def persist(self) -> None: self.make_state_root() self._batchtrie.commit(apply_deletes=False) self._batchdb.commit(apply_deletes=True) def _log_pending_accounts(self) -> None: accounts_displayed = set() # type: Set[bytes] queued_changes = self._journaltrie.journal.journal_data.items() # mypy bug for ordered dict reversibility: https://github.com/python/typeshed/issues/2078 for _, accounts in reversed(queued_changes): for address in accounts: if address in accounts_displayed: continue else: accounts_displayed.add(address) account = self._get_account(Address(address)) self.logger.debug2( "Account %s: balance %d, nonce %d, storage root %s, code hash %s", encode_hex(address), account.balance, account.nonce, encode_hex(account.storage_root), encode_hex(account.code_hash), )
class AccountStorageDB(AccountStorageDatabaseAPI): logger = get_extended_debug_logger("eth.db.storage.AccountStorageDB") def __init__(self, db: AtomicDatabaseAPI, storage_root: Hash32, address: Address) -> None: """ Database entries go through several pipes, like so... .. code:: db -> _storage_lookup -> _storage_cache -> _locked_changes -> _journal_storage db is the raw database, we can assume it hits disk when written to. Keys are stored as node hashes and rlp-encoded node values. _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie), writes to storage lookup *are* immeditaely applied to a trie, generating the appropriate trie nodes and and root hash (via the HexaryTrie). The writes are *not* persisted to db, until _storage_lookup is explicitly instructed to, via :meth:`StorageLookup.commit_to` _storage_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journal_storage, because the cache is only invalidated after a state root change. Otherwise, you will see data since the last storage root was calculated. _locked_changes is a batch database that includes only those values that are un-revertable in the EVM. Currently, that means changes that completed in a previous transaction. Journaling batches writes at the _journal_storage layer, until persist is called. It manages all the checkpointing and rollbacks that happen during EVM execution. In both _storage_cache and _journal_storage, Keys are set/retrieved as the big_endian encoding of the slot integer, and the rlp-encoded value. """ self._address = address self._storage_lookup = StorageLookup(db, storage_root, address) self._storage_cache = CacheDB(self._storage_lookup) self._locked_changes = JournalDB(self._storage_cache) self._journal_storage = JournalDB(self._locked_changes) # Track how many times we have cleared the storage. This is journaled # in lockstep with other storage changes. That way, we can detect if a revert # causes use to revert past the previous storage deletion. The clear count is used # as an index to find the base trie from before the revert. self._clear_count = JournalDB( MemoryDB({CLEAR_COUNT_KEY_NAME: to_bytes(0)})) def get(self, slot: int, from_journal: bool = True) -> int: key = int_to_big_endian(slot) lookup_db = self._journal_storage if from_journal else self._locked_changes try: encoded_value = lookup_db[key] except MissingStorageTrieNode: raise except KeyError: return 0 if encoded_value == b'': return 0 else: return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int) def set(self, slot: int, value: int) -> None: key = int_to_big_endian(slot) if value: self._journal_storage[key] = rlp.encode(value) else: try: current_val = self._journal_storage[key] except KeyError: # deleting an empty key has no effect return else: if current_val != b'': # only try to delete the value if it's present del self._journal_storage[key] def delete(self) -> None: self.logger.debug2( "Deleting all storage in account 0x%s", self._address.hex(), ) self._journal_storage.clear() self._storage_cache.reset_cache() # Empty out the storage lookup trie (keeping history, in case of a revert) new_clear_count = self._storage_lookup.new_trie() # Look up the previous count of how many times the account has been deleted. # This can happen multiple times in one block, via CREATE2. old_clear_count = to_int(self._clear_count[CLEAR_COUNT_KEY_NAME]) # Gut check that we have incremented correctly if new_clear_count != old_clear_count + 1: raise ValidationError( f"Must increase clear count by one on each delete. Instead, went from" f" {old_clear_count} -> {new_clear_count} in account 0x{self._address.hex()}" ) # Save the new count, ie~ the index used for a future revert. self._clear_count[CLEAR_COUNT_KEY_NAME] = to_bytes(new_clear_count) def record(self, checkpoint: JournalDBCheckpoint) -> None: self._journal_storage.record(checkpoint) self._clear_count.record(checkpoint) def discard(self, checkpoint: JournalDBCheckpoint) -> None: self.logger.debug2('discard checkpoint %r', checkpoint) latest_clear_count = to_int(self._clear_count[CLEAR_COUNT_KEY_NAME]) if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.discard(checkpoint) self._clear_count.discard(checkpoint) else: # if the checkpoint comes before this account started tracking, # then simply reset to the beginning self._journal_storage.reset() self._clear_count.reset() self._storage_cache.reset_cache() reverted_clear_count = to_int(self._clear_count[CLEAR_COUNT_KEY_NAME]) if reverted_clear_count == latest_clear_count - 1: # This revert rewinds past a trie deletion, so roll back to the trie at # that point. We use the clear count as an index to get back to the # old base trie. self._storage_lookup.rollback_trie(reverted_clear_count) elif reverted_clear_count == latest_clear_count: # No change in the base trie, take no action pass else: # Although CREATE2 permits multiple creates and deletes in a single block, # you can still only revert across a single delete. That's because delete # is only triggered at the end of the transaction. raise ValidationError( f"This revert has changed the clear count in an invalid way, from" f" {latest_clear_count} to {reverted_clear_count}, in 0x{self._address.hex()}" ) def commit(self, checkpoint: JournalDBCheckpoint) -> None: if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.commit(checkpoint) self._clear_count.commit(checkpoint) else: # if the checkpoint comes before this account started tracking, # then flatten all changes, without persisting self._journal_storage.flatten() self._clear_count.flatten() def lock_changes(self) -> None: if self._journal_storage.has_clear(): self._locked_changes.clear() self._journal_storage.persist() def make_storage_root(self) -> None: self.lock_changes() self._locked_changes.persist() def _validate_flushed(self) -> None: """ Will raise an exception if there are some changes made since the last persist. """ journal_diff = self._journal_storage.diff() if len(journal_diff) > 0: raise ValidationError( f"StorageDB had a dirty journal when it needed to be clean: {journal_diff!r}" ) @property def has_changed_root(self) -> bool: return self._storage_lookup.has_changed_root def get_changed_root(self) -> Hash32: return self._storage_lookup.get_changed_root() def persist(self, db: DatabaseAPI) -> None: self._validate_flushed() if self._storage_lookup.has_changed_root: self._storage_lookup.commit_to(db)
class AccountDB(BaseAccountDB): logger = cast(ExtendedDebugLogger, logging.getLogger('eth.db.account.AccountDB')) def __init__(self, db: BaseAtomicDB, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = db self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db, read_through_deletes=True) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores = {} # type: Dict[Address, AccountStorageDB] self._dirty_accounts = set() # type: Set[Address] self._root_hash_at_last_persist = state_root @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: if self._trie.root_hash != value: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool = True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account_store = self._get_address_store(address) return account_store.get(slot, from_journal) def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.set(slot, value) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") self._set_storage_root(address, BLANK_ROOT_HASH) self._wipe_storage(address) def _wipe_storage(self, address: Address) -> None: """ Wipe out the storage, without explicitly handling the storage root update """ account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.delete() def _get_address_store(self, address: Address) -> AccountStorageDB: if address in self._account_stores: store = self._account_stores[address] else: storage_root = self._get_storage_root(address) store = AccountStorageDB(self._raw_store_db, storage_root, address) self._account_stores[address] = store return store def _dirty_account_stores( self) -> Iterable[Tuple[Address, AccountStorageDB]]: for address in self._dirty_accounts: store = self._account_stores[address] yield address, store @to_tuple def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]: # list all the accounts that were changed, and their new storage roots for address, store in self._dirty_account_stores(): if store.has_changed_root: yield address, store.get_changed_root() def _get_storage_root(self, address: Address) -> Hash32: account = self._get_account(address) return account.storage_root def _set_storage_root(self, address: Address, new_storage_root: Hash32) -> None: account = self._get_account(address) self._set_account(address, account.copy(storage_root=new_storage_root)) def _validate_flushed_storage(self, address: Address, store: AccountStorageDB) -> None: if store.has_changed_root: actual_storage_root = self._get_storage_root(address) expected_storage_root = store.get_changed_root() if expected_storage_root != actual_storage_root: raise ValidationError( "Storage root was not saved to account before trying to persist roots. " "Account %r had storage %r, but should be %r." % ( address, actual_storage_root, expected_storage_root, )) # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") code_hash = self.get_code_hash(address) if code_hash == EMPTY_SHA3: return b'' else: try: return self._journaldb[code_hash] except KeyError: raise MissingBytecode(code_hash) from KeyError def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash( address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] self._wipe_storage(address) def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") account_rlp = self._get_encoded_account(address, from_journal=True) return account_rlp != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce( address) and self.get_balance(address) == 0 # # Internal # def _get_encoded_account(self, address: Address, from_journal: bool = True) -> bytes: lookup_trie = self._journaltrie if from_journal else self._trie_cache try: return lookup_trie[address] except trie_exceptions.MissingTrieNode as exc: raise MissingAccountTrieNode(*exc.args) from exc except KeyError: # In case the account is deleted in the JournalDB return b'' def _get_account(self, address: Address, from_journal: bool = True) -> Account: if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = self._get_encoded_account(address, from_journal) if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account # # Record and discard API # def record(self) -> JournalDBCheckpoint: checkpoint = self._journaldb.record() self._journaltrie.record(checkpoint) for _, store in self._dirty_account_stores(): store.record(checkpoint) return checkpoint def discard(self, checkpoint: JournalDBCheckpoint) -> None: self._journaldb.discard(checkpoint) self._journaltrie.discard(checkpoint) self._account_cache.clear() for _, store in self._dirty_account_stores(): store.discard(checkpoint) def commit(self, checkpoint: JournalDBCheckpoint) -> None: self._journaldb.commit(checkpoint) self._journaltrie.commit(checkpoint) for _, store in self._dirty_account_stores(): store.commit(checkpoint) def make_state_root(self) -> Hash32: for _, store in self._dirty_account_stores(): store.make_storage_root() for address, storage_root in self._get_changed_roots(): self.logger.debug2( "Updating account 0x%s to storage root 0x%s", address.hex(), storage_root.hex(), ) self._set_storage_root(address, storage_root) self._journaldb.persist() diff = self._journaltrie.diff() # In addition to squashing (which is redundant here), this context manager causes # an atomic commit of the changes, so exceptions will revert the trie with self._trie.squash_changes() as memory_trie: self._apply_account_diff_without_proof(diff, memory_trie) self._journaltrie.reset() self._trie_cache.reset_cache() return self.state_root def persist(self) -> None: self.make_state_root() # persist storage with self._raw_store_db.atomic_batch() as write_batch: for address, store in self._dirty_account_stores(): self._validate_flushed_storage(address, store) store.persist(write_batch) for address, new_root in self._get_changed_roots(): if new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH: raise ValidationError( "After persisting storage trie, a root node was not found. " "State root for account 0x%s is missing for hash 0x%s." % ( address.hex(), new_root.hex(), )) # reset local storage trackers self._account_stores = {} self._dirty_accounts = set() # persist accounts self._validate_generated_root() new_root_hash = self.state_root self.logger.debug2("Persisting new state root: 0x%s", new_root_hash.hex()) with self._raw_store_db.atomic_batch() as write_batch: self._batchtrie.commit_to(write_batch, apply_deletes=False) self._batchdb.commit_to(write_batch, apply_deletes=False) self._root_hash_at_last_persist = new_root_hash def _validate_generated_root(self) -> None: db_diff = self._journaldb.diff() if len(db_diff): raise ValidationError( "AccountDB had a dirty db when it needed to be clean: %r" % db_diff) trie_diff = self._journaltrie.diff() if len(trie_diff): raise ValidationError( "AccountDB had a dirty trie when it needed to be clean: %r" % trie_diff) def _log_pending_accounts(self) -> None: diff = self._journaltrie.diff() for address in sorted(diff.pending_keys()): account = self._get_account(Address(address)) self.logger.debug2( "Pending Account %s: balance %d, nonce %d, storage root %s, code hash %s", to_checksum_address(address), account.balance, account.nonce, encode_hex(account.storage_root), encode_hex(account.code_hash), ) for deleted_address in sorted(diff.deleted_keys()): cast_deleted_address = Address(deleted_address) self.logger.debug2( "Deleted Account %s, empty? %s, exists? %s", to_checksum_address(deleted_address), self.account_is_empty(cast_deleted_address), self.account_exists(cast_deleted_address), ) def _apply_account_diff_without_proof(self, diff: DBDiff, trie: BaseDB) -> None: """ Apply diff of trie updates, when original nodes might be missing. Note that doing this naively will raise exceptions about missing nodes from *intermediate* trie roots. This captures exceptions and uses the previous trie root hash that will be recognized by other nodes. """ # It's fairly common that when an account is deleted, we need to retrieve nodes # for accounts that were not needed during normal execution. We only need these # nodes to refactor the trie. for delete_key in diff.deleted_keys(): try: del trie[delete_key] except trie_exceptions.MissingTrieNode as exc: self.logger.debug( "Missing node while deleting account with key %s: %s", encode_hex(delete_key), exc, ) raise MissingAccountTrieNode( exc.missing_node_hash, self._root_hash_at_last_persist, exc.requested_key, ) from exc # It's fairly unusual, but possible, that setting an account will need unknown # nodes during a trie refactor. Here is an example that seems to cause it: # # Setup: # - Root node is a branch, with 0 pointing to a leaf # - The complete leaf key is (0, 1, 2), so (1, 2) is in the leaf node # - We know the leaf node hash but not the leaf node body # Refactor that triggers missing node: # - Add value with key (0, 3, 4) # - We need to replace the current leaf node with a branch that points leaves at 1 and 3 # - The leaf for key (0, 1, 2) now contains only the (2) part, so needs to be rebuilt # - We need the full body of the old (1, 2) leaf node, to rebuild for key, val in diff.pending_items(): try: trie[key] = val except trie_exceptions.MissingTrieNode as exc: self.logger.debug( "Missing node on account update key %s to %s: %s", encode_hex(key), encode_hex(val), exc, ) raise MissingAccountTrieNode( exc.missing_node_hash, self._root_hash_at_last_persist, exc.requested_key, ) from exc
class AccountStorageDB(AccountStorageDatabaseAPI): logger = get_extended_debug_logger("eth.db.storage.AccountStorageDB") def __init__(self, db: AtomicDatabaseAPI, storage_root: Hash32, address: Address) -> None: """ Database entries go through several pipes, like so... .. code:: db -> _storage_lookup -> _storage_cache -> _locked_changes -> _journal_storage db is the raw database, we can assume it hits disk when written to. Keys are stored as node hashes and rlp-encoded node values. _storage_lookup is itself a pair of databases: (BatchDB -> HexaryTrie), writes to storage lookup *are* immeditaely applied to a trie, generating the appropriate trie nodes and and root hash (via the HexaryTrie). The writes are *not* persisted to db, until _storage_lookup is explicitly instructed to, via :meth:`StorageLookup.commit_to` _storage_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journal_storage, because the cache is only invalidated after a state root change. Otherwise, you will see data since the last storage root was calculated. _locked_changes is a batch database that includes only those values that are un-revertable in the EVM. Currently, that means changes that completed in a previous transaction. Journaling batches writes at the _journal_storage layer, until persist is called. It manages all the checkpointing and rollbacks that happen during EVM execution. In both _storage_cache and _journal_storage, Keys are set/retrieved as the big_endian encoding of the slot integer, and the rlp-encoded value. """ self._address = address self._storage_lookup = StorageLookup(db, storage_root, address) self._storage_cache = CacheDB(self._storage_lookup) self._locked_changes = BatchDB(self._storage_cache) self._journal_storage = JournalDB(self._locked_changes) def get(self, slot: int, from_journal: bool=True) -> int: key = int_to_big_endian(slot) lookup_db = self._journal_storage if from_journal else self._locked_changes try: encoded_value = lookup_db[key] except MissingStorageTrieNode: raise except KeyError: return 0 if encoded_value == b'': return 0 else: return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int) def set(self, slot: int, value: int) -> None: key = int_to_big_endian(slot) if value: self._journal_storage[key] = rlp.encode(value) else: del self._journal_storage[key] def delete(self) -> None: self.logger.debug2( "Deleting all storage in account 0x%s, hashed 0x%s", self._address.hex(), keccak(self._address).hex(), ) self._journal_storage.clear() self._storage_cache.reset_cache() def record(self, checkpoint: JournalDBCheckpoint) -> None: self._journal_storage.record(checkpoint) def discard(self, checkpoint: JournalDBCheckpoint) -> None: self.logger.debug2('discard checkpoint %r', checkpoint) if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.discard(checkpoint) else: # if the checkpoint comes before this account started tracking, # then simply reset to the beginning self._journal_storage.reset() self._storage_cache.reset_cache() def commit(self, checkpoint: JournalDBCheckpoint) -> None: if self._journal_storage.has_checkpoint(checkpoint): self._journal_storage.commit(checkpoint) else: # if the checkpoint comes before this account started tracking, # then flatten all changes, without persisting self._journal_storage.flatten() def lock_changes(self) -> None: self._journal_storage.persist() def make_storage_root(self) -> None: self.lock_changes() self._locked_changes.commit(apply_deletes=True) def _validate_flushed(self) -> None: """ Will raise an exception if there are some changes made since the last persist. """ journal_diff = self._journal_storage.diff() if len(journal_diff) > 0: raise ValidationError( f"StorageDB had a dirty journal when it needed to be clean: {journal_diff!r}" ) @property def has_changed_root(self) -> bool: return self._storage_lookup.has_changed_root def get_changed_root(self) -> Hash32: return self._storage_lookup.get_changed_root() def persist(self, db: DatabaseAPI) -> None: self._validate_flushed() if self._storage_lookup.has_changed_root: self._storage_lookup.commit_to(db)
class AccountDB(AccountDatabaseAPI): logger = get_extended_debug_logger('eth.db.account.AccountDB') def __init__(self, db: AtomicDatabaseAPI, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = KeyAccessLoggerAtomicDB(db, log_missing_keys=False) self._batchdb = BatchDB(self._raw_store_db) self._batchtrie = BatchDB(self._raw_store_db, read_through_deletes=True) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_logger = KeyAccessLoggerDB(self._trie, log_missing_keys=False) self._trie_cache = CacheDB(self._trie_logger) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores: Dict[Address, AccountStorageDatabaseAPI] = {} self._dirty_accounts: Set[Address] = set() self._root_hash_at_last_persist = state_root self._accessed_accounts: Set[Address] = set() self._accessed_bytecodes: Set[Address] = set() # Track whether an account or slot have been accessed during a given transaction: self._reset_access_counters() @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: if self._trie.root_hash != value: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool = True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account_store = self._get_address_store(address) return account_store.get(slot, from_journal) def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.set(slot, value) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") self._set_storage_root(address, BLANK_ROOT_HASH) self._wipe_storage(address) def is_storage_warm(self, address: Address, slot: int) -> bool: key = self._get_storage_tracker_key(address, slot) return key in self._journal_accessed_state def mark_storage_warm(self, address: Address, slot: int) -> None: key = self._get_storage_tracker_key(address, slot) if key not in self._journal_accessed_state: self._journal_accessed_state[key] = IS_PRESENT_VALUE def _get_storage_tracker_key(self, address: Address, slot: int) -> bytes: """ Get the key used to track whether a storage slot has been accessed during this transaction. """ return address + int_to_big_endian(slot) def _wipe_storage(self, address: Address) -> None: """ Wipe out the storage, without explicitly handling the storage root update """ account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.delete() def _get_address_store(self, address: Address) -> AccountStorageDatabaseAPI: if address in self._account_stores: store = self._account_stores[address] else: storage_root = self._get_storage_root(address) store = AccountStorageDB(self._raw_store_db, storage_root, address) self._account_stores[address] = store return store def _dirty_account_stores( self) -> Iterable[Tuple[Address, AccountStorageDatabaseAPI]]: for address in self._dirty_accounts: store = self._account_stores[address] yield address, store @to_tuple def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]: # list all the accounts that were changed, and their new storage roots for address, store in self._dirty_account_stores(): if store.has_changed_root: yield address, store.get_changed_root() def _get_storage_root(self, address: Address) -> Hash32: account = self._get_account(address) return account.storage_root def _set_storage_root(self, address: Address, new_storage_root: Hash32) -> None: account = self._get_account(address) self._set_account(address, account.copy(storage_root=new_storage_root)) def _validate_flushed_storage(self, address: Address, store: AccountStorageDatabaseAPI) -> None: if store.has_changed_root: actual_storage_root = self._get_storage_root(address) expected_storage_root = store.get_changed_root() if expected_storage_root != actual_storage_root: raise ValidationError( "Storage root was not saved to account before trying to persist roots. " f"Account {address!r} had storage {actual_storage_root!r}, " f"but should be {expected_storage_root!r}.") # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") code_hash = self.get_code_hash(address) if code_hash == EMPTY_SHA3: return b'' else: try: return self._journaldb[code_hash] except KeyError: raise MissingBytecode(code_hash) from KeyError finally: if code_hash in self._get_accessed_node_hashes(): self._accessed_bytecodes.add(address) def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash( address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") # We must wipe the storage first, because if it's the first time we load it, # then we want to load it with the original storage root hash, not the # empty one. (in case of a later revert, we don't want to poison the storage cache) self._wipe_storage(address) if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") account_rlp = self._get_encoded_account(address, from_journal=True) return account_rlp != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce( address) and self.get_balance(address) == 0 def is_address_warm(self, address: Address) -> bool: return address in self._journal_accessed_state def mark_address_warm(self, address: Address) -> None: if address not in self._journal_accessed_state: self._journal_accessed_state[address] = IS_PRESENT_VALUE # # Internal # def _get_encoded_account(self, address: Address, from_journal: bool = True) -> bytes: self._accessed_accounts.add(address) lookup_trie = self._journaltrie if from_journal else self._trie_cache try: return lookup_trie[address] except trie_exceptions.MissingTrieNode as exc: raise MissingAccountTrieNode(*exc.args) from exc except KeyError: # In case the account is deleted in the JournalDB return b'' def _get_account(self, address: Address, from_journal: bool = True) -> Account: if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = self._get_encoded_account(address, from_journal) if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account def _reset_access_counters(self) -> None: # Account accesses and storage accesses recorded in the same journal # Accounts just use the address as the key (and an empty value as a flag) # Storage use a concatenation of address and slot converted to bytes (and empty value) self._journal_accessed_state = JournalDB(MemoryDB()) # # Record and discard API # def record(self) -> JournalDBCheckpoint: checkpoint = self._journaldb.record() self._journaltrie.record(checkpoint) self._journal_accessed_state.record(checkpoint) for _, store in self._dirty_account_stores(): store.record(checkpoint) return checkpoint def discard(self, checkpoint: JournalDBCheckpoint) -> None: self._journaldb.discard(checkpoint) self._journaltrie.discard(checkpoint) self._journal_accessed_state.discard(checkpoint) self._account_cache.clear() for _, store in self._dirty_account_stores(): store.discard(checkpoint) def commit(self, checkpoint: JournalDBCheckpoint) -> None: self._journaldb.commit(checkpoint) self._journaltrie.commit(checkpoint) self._journal_accessed_state.commit(checkpoint) for _, store in self._dirty_account_stores(): store.commit(checkpoint) def lock_changes(self) -> None: for _, store in self._dirty_account_stores(): store.lock_changes() self._reset_access_counters() def make_state_root(self) -> Hash32: for _, store in self._dirty_account_stores(): store.make_storage_root() for address, storage_root in self._get_changed_roots(): self.logger.debug2( "Updating account 0x%s to storage root 0x%s", address.hex(), storage_root.hex(), ) if self.account_exists(address) or storage_root != BLANK_ROOT_HASH: self._set_storage_root(address, storage_root) self._journaldb.persist() diff = self._journaltrie.diff() if diff.deleted_keys() or diff.pending_items(): # In addition to squashing (which is redundant here), this context manager causes # an atomic commit of the changes, so exceptions will revert the trie with self._trie.squash_changes() as memory_trie: self._apply_account_diff_without_proof(diff, memory_trie) self._journaltrie.reset() self._trie_cache.reset_cache() return self.state_root def persist(self) -> MetaWitnessAPI: self.make_state_root() # persist storage with self._raw_store_db.atomic_batch() as write_batch: for address, store in self._dirty_account_stores(): self._validate_flushed_storage(address, store) store.persist(write_batch) for address, new_root in self._get_changed_roots(): if new_root is None: raise ValidationError( f"Cannot validate new root of account 0x{address.hex()} " f"which has a new root hash of None") elif new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH: raise ValidationError( "After persisting storage trie, a root node was not found. " f"State root for account 0x{address.hex()} " f"is missing for hash 0x{new_root.hex()}.") # generate witness (copy) before clearing the underlying data meta_witness = self._get_meta_witness() # reset local storage trackers self._account_stores = {} self._dirty_accounts = set() self._accessed_accounts = set() self._accessed_bytecodes = set() # We have to clear the account cache here so that future account accesses # will get added to _accessed_accounts correctly. Account accesses that # are cached do not add the address to the list of accessed accounts. self._account_cache.clear() # persist accounts self._validate_generated_root() new_root_hash = self.state_root self.logger.debug2("Persisting new state root: 0x%s", new_root_hash.hex()) with self._raw_store_db.atomic_batch() as write_batch: self._batchtrie.commit_to(write_batch, apply_deletes=False) self._batchdb.commit_to(write_batch, apply_deletes=False) self._root_hash_at_last_persist = new_root_hash return meta_witness def _get_accessed_node_hashes(self) -> Set[Hash32]: return cast(Set[Hash32], self._raw_store_db.keys_read) @to_dict def _get_access_list( self) -> Iterable[Tuple[Address, AccountQueryTracker]]: """ Get the list of addresses that were accessed, whether the bytecode was accessed, and which storage slots were accessed. """ for address in self._accessed_accounts: did_access_bytecode = address in self._accessed_bytecodes if address in self._account_stores: accessed_storage_slots = self._account_stores[ address].get_accessed_slots() else: accessed_storage_slots = frozenset() yield address, AccountQueryTracker(did_access_bytecode, accessed_storage_slots) def _get_meta_witness(self) -> MetaWitness: """ Get a variety of metadata about the state witness needed to execute the block. This creates a copy, so that underlying changes do not affect the returned MetaWitness. """ return MetaWitness(self._get_accessed_node_hashes(), self._get_access_list()) def _validate_generated_root(self) -> None: db_diff = self._journaldb.diff() if len(db_diff): raise ValidationError( f"AccountDB had a dirty db when it needed to be clean: {db_diff!r}" ) trie_diff = self._journaltrie.diff() if len(trie_diff): raise ValidationError( f"AccountDB had a dirty trie when it needed to be clean: {trie_diff!r}" ) def _log_pending_accounts(self) -> None: # This entire method is about debug2 logging, so skip it if debug2 is disabled if not self.logger.show_debug2: return diff = self._journaltrie.diff() for address in sorted(diff.pending_keys()): account = self._get_account(Address(address)) self.logger.debug2( "Pending Account %s: balance %d, nonce %d, storage root %s, code hash %s", to_checksum_address(address), account.balance, account.nonce, encode_hex(account.storage_root), encode_hex(account.code_hash), ) for deleted_address in sorted(diff.deleted_keys()): # Check if the account was accessed before accessing/logging info about the address was_account_accessed = deleted_address in self._accessed_accounts cast_deleted_address = Address(deleted_address) self.logger.debug2( "Deleted Account %s, empty? %s, exists? %s", to_checksum_address(deleted_address), self.account_is_empty(cast_deleted_address), self.account_exists(cast_deleted_address), ) # If the account was not accessed previous to the log, (re)mark it as not accessed if not was_account_accessed: self._accessed_accounts.remove(cast_deleted_address) def _apply_account_diff_without_proof(self, diff: DBDiff, trie: DatabaseAPI) -> None: """ Apply diff of trie updates, when original nodes might be missing. Note that doing this naively will raise exceptions about missing nodes from *intermediate* trie roots. This captures exceptions and uses the previous trie root hash that will be recognized by other nodes. """ # It's fairly common that when an account is deleted, we need to retrieve nodes # for accounts that were not needed during normal execution. We only need these # nodes to refactor the trie. for delete_key in diff.deleted_keys(): try: del trie[delete_key] except trie_exceptions.MissingTrieNode as exc: self.logger.debug( "Missing node while deleting account with key %s: %s", encode_hex(delete_key), exc, ) raise MissingAccountTrieNode( exc.missing_node_hash, self._root_hash_at_last_persist, exc.requested_key, ) from exc # It's fairly unusual, but possible, that setting an account will need unknown # nodes during a trie refactor. Here is an example that seems to cause it: # # Setup: # - Root node is a branch, with 0 pointing to a leaf # - The complete leaf key is (0, 1, 2), so (1, 2) is in the leaf node # - We know the leaf node hash but not the leaf node body # Refactor that triggers missing node: # - Add value with key (0, 3, 4) # - We need to replace the current leaf node with a branch that points leaves at 1 and 3 # - The leaf for key (0, 1, 2) now contains only the (2) part, so needs to be rebuilt # - We need the full body of the old (1, 2) leaf node, to rebuild for key, val in diff.pending_items(): try: trie[key] = val except trie_exceptions.MissingTrieNode as exc: self.logger.debug( "Missing node on account update key %s to %s: %s", encode_hex(key), encode_hex(val), exc, ) raise MissingAccountTrieNode( exc.missing_node_hash, self._root_hash_at_last_persist, exc.requested_key, ) from exc
class AccountDB(BaseAccountDB): logger = cast(ExtendedDebugLogger, logging.getLogger('eth.db.account.AccountDB')) def __init__(self, db: BaseAtomicDB, state_root: Hash32=BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = db self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie(HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores = {} # type: Dict[Address, AccountStorageDB] self._dirty_accounts = set() # type: Set[Address] @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: if self._trie.root_hash != value: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool=True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account_store = self._get_address_store(address) return account_store.get(slot, from_journal) def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.set(slot, value) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") self._set_storage_root(address, BLANK_ROOT_HASH) self._wipe_storage(address) def _wipe_storage(self, address: Address) -> None: """ Wipe out the storage, without explicitly handling the storage root update """ account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.delete() def _get_address_store(self, address: Address) -> AccountStorageDB: if address in self._account_stores: store = self._account_stores[address] else: storage_root = self._get_storage_root(address) store = AccountStorageDB(self._raw_store_db, storage_root, address) self._account_stores[address] = store return store def _dirty_account_stores(self) -> Iterable[Tuple[Address, AccountStorageDB]]: for address in self._dirty_accounts: store = self._account_stores[address] yield address, store @to_tuple def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]: # list all the accounts that were changed, and their new storage roots for address, store in self._dirty_account_stores(): if store.has_changed_root: yield address, store.get_changed_root() def _get_storage_root(self, address: Address) -> Hash32: account = self._get_account(address) return account.storage_root def _set_storage_root(self, address: Address, new_storage_root: Hash32) -> None: account = self._get_account(address) self._set_account(address, account.copy(storage_root=new_storage_root)) def _validate_flushed_storage(self, address: Address, store: AccountStorageDB) -> None: if store.has_changed_root: actual_storage_root = self._get_storage_root(address) expected_storage_root = store.get_changed_root() if expected_storage_root != actual_storage_root: raise ValidationError( "Storage root was not saved to account before trying to persist roots. " "Account %r had storage %r, but should be %r." % ( address, actual_storage_root, expected_storage_root, ) ) # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") try: return self._journaldb[self.get_code_hash(address)] except KeyError: return b"" def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash(address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] self._wipe_storage(address) def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") return self._journaltrie.get(address, b'') != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce(address) and self.get_balance(address) == 0 # # Internal # def _get_account(self, address: Address, from_journal: bool=True) -> Account: if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = (self._journaltrie if from_journal else self._trie_cache).get(address, b'') if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account # # Record and discard API # def record(self) -> UUID: changeset_id = self._journaldb.record() self._journaltrie.record(changeset_id) for _, store in self._dirty_account_stores(): store.record(changeset_id) return changeset_id def discard(self, changeset: UUID) -> None: self._journaldb.discard(changeset) self._journaltrie.discard(changeset) self._account_cache.clear() for _, store in self._dirty_account_stores(): store.discard(changeset) def commit(self, changeset: UUID) -> None: self._journaldb.commit(changeset) self._journaltrie.commit(changeset) for _, store in self._dirty_account_stores(): store.commit(changeset) def make_state_root(self) -> Hash32: for _, store in self._dirty_account_stores(): store.make_storage_root() for address, storage_root in self._get_changed_roots(): self.logger.debug2( "Updating account 0x%s to storage root 0x%s", address.hex(), storage_root.hex(), ) self._set_storage_root(address, storage_root) self._journaldb.persist() self._journaltrie.persist() return self.state_root def persist(self) -> None: self.make_state_root() # persist storage with self._raw_store_db.atomic_batch() as write_batch: for address, store in self._dirty_account_stores(): self._validate_flushed_storage(address, store) store.persist(write_batch) for address, new_root in self._get_changed_roots(): if new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH: raise ValidationError( "After persisting storage trie, a root node was not found. " "State root for account 0x%s is missing for hash 0x%s." % ( address.hex(), new_root.hex(), ) ) # reset local storage trackers self._account_stores = {} self._dirty_accounts = set() # persist accounts self._validate_generated_root() with self._raw_store_db.atomic_batch() as write_batch: self._batchtrie.commit_to(write_batch, apply_deletes=False) self._batchdb.commit_to(write_batch, apply_deletes=False) def _validate_generated_root(self) -> None: db_diff = self._journaldb.diff() if len(db_diff): raise ValidationError( "AccountDB had a dirty db when it needed to be clean: %r" % db_diff ) trie_diff = self._journaltrie.diff() if len(trie_diff): raise ValidationError( "AccountDB had a dirty trie when it needed to be clean: %r" % trie_diff ) def _log_pending_accounts(self) -> None: accounts_displayed = set() # type: Set[bytes] queued_changes = self._journaltrie.journal.journal_data.items() # mypy bug for ordered dict reversibility: https://github.com/python/typeshed/issues/2078 for _, accounts in reversed(queued_changes): for address in accounts: if address in accounts_displayed: continue else: accounts_displayed.add(address) account = self._get_account(Address(address)) self.logger.debug2( "Account %s: balance %d, nonce %d, storage root %s, code hash %s", encode_hex(address), account.balance, account.nonce, encode_hex(account.storage_root), encode_hex(account.code_hash), )