Exemplo n.º 1
0
    def test_hits(self):
        for size in SIZES:
            l = LRU(size)
            for i in range(size):
                l[i] = str(i)

            val = l[0]
            self.assertTrue(l.get_hits() == 1)
            self.assertTrue(l.get_misses() == 0)

            val = l.get(0, None)
            self.assertTrue(l.get_hits() == 2)
            self.assertTrue(l.get_misses() == 0)

            val = l.get(-1, None)
            self.assertTrue(l.get_hits() == 2)
            self.assertTrue(l.get_misses() == 1)

            try:
                val = l[-1]
            except:
                pass

            self.assertTrue(l.get_hits() == 2)
            self.assertTrue(l.get_misses() == 2)

            l.clear()
            self.assertTrue(len(l) == 0)
            self.assertTrue(l.get_hits() == 0)
            self.assertTrue(l.get_misses() == 0)
Exemplo n.º 2
0
    def test_stats(self):
        for size in SIZES:
            l = LRU(size)
            for i in range(size):
                l[i] = str(i)

            self.assertTrue(l.get_stats() == (0, 0))

            val = l[0]
            self.assertTrue(l.get_stats() == (1, 0))

            val = l.get(0, None)
            self.assertTrue(l.get_stats() == (2, 0))

            val = l.get(-1, None)
            self.assertTrue(l.get_stats() == (2, 1))

            try:
                val = l[-1]
            except:
                pass

            self.assertTrue(l.get_stats() == (2, 2))

            l.clear()
            self.assertTrue(len(l) == 0)
            self.assertTrue(l.get_stats() == (0, 0))
Exemplo n.º 3
0
    def test_stats(self):
        for size in SIZES:
            l = LRU(size)
            for i in range(size):
                l[i] = str(i)

            self.assertTrue(l.get_stats() == (0, 0))

            val = l[0]
            self.assertTrue(l.get_stats() == (1, 0))

            val = l.get(0, None)
            self.assertTrue(l.get_stats() == (2, 0))

            val = l.get(-1, None)
            self.assertTrue(l.get_stats() == (2, 1))

            try:
                val = l[-1]
            except:
                pass

            self.assertTrue(l.get_stats() == (2, 2))

            l.clear()
            self.assertTrue(len(l) == 0)
            self.assertTrue(l.get_stats() == (0, 0))
Exemplo n.º 4
0
class CacheMixin(object):
    MAX_CACHE_SPACE = 40000000
    CACHE_LENGTH = 10000

    def __init__(self, *args, **kwargs):
        super(CacheMixin, self).__init__(*args, **kwargs)
        self._caches = None
        self.clear_cache()
        self._raw_history_bars = self.raw_history_bars
        self.raw_history_bars = self.decorator_raw_history_bars(self.raw_history_bars)

    @classmethod
    def set_cache_length(cls, value):
        cls.CACHE_LENGTH = value

    @classmethod
    def set_max_cache_space(cls, value):
        cls.MAX_CACHE_SPACE = value

    def clear_cache(self):
        if self._caches is None:
            self._caches = LRU(self.MAX_CACHE_SPACE // self.CACHE_LENGTH)
        else:
            self._caches.clear()

    def update_cache(self, cache, dt):
        if len(cache):
            last = cache.last_dt + timedelta(seconds=1)
        else:
            bar_data = self._raw_history_bars(cache.instrument, cache.frequency,
                                              end_dt=dt - timedelta(seconds=1), length=cache.chunk)
            if bar_data is not None and len(bar_data):
                cache.update_bars(bar_data, len(bar_data))
            last = dt
        bar_data = self._raw_history_bars(cache.instrument, cache.frequency, start_dt=last, length=cache.chunk)
        if bar_data is not None and len(bar_data):
            cache.update_bars(bar_data, cache.chunk)
        else:
            cache.close()

    def decorator_raw_history_bars(self, func):
        @functools.wraps(func)
        def wrapped(instrument, frequency, start_dt=None, end_dt=None, length=None):
            key = (instrument.order_book_id, frequency)
            if key not in self._caches:
                self._caches[key] = Cache(self, self.CACHE_LENGTH, instrument, frequency)
            data = self._caches[key].raw_history_bars(start_dt, end_dt, length)
            if data is not None:
                return data
            else:
                system_log.debug("缓存未命中: 品种[{}]频率[{}] from {} to {}, length {}".format(
                    instrument.order_book_id, frequency, start_dt, end_dt, length
                ))
                return func(instrument, frequency, start_dt=start_dt, end_dt=end_dt, length=length)

        return wrapped

    def raw_history_bars(self, instrument, frequency, start_dt=None, end_dt=None, length=None):
        raise NotImplementedError
Exemplo n.º 5
0
 def test_clear(self):
     for size in SIZES:
         l = LRU(size)
         for i in range(size+5):
             l[i] = str(i)
         l.clear()
         for i in range(size):
             l[i] = str(i)
         for i in range(size):
             _ = l[random.randint(0, size-1)]
         l.clear()
         self.assertTrue(len(l) == 0)
Exemplo n.º 6
0
 def test_clear(self):
     for size in SIZES:
         l = LRU(size)
         for i in range(size+5):
             l[i] = str(i)
         l.clear()
         for i in range(size):
             l[i] = str(i)
         for i in xrange(size):
             _ = l[random.randint(0, size-1)]
         l.clear()
         self.assertTrue(len(l) == 0)
Exemplo n.º 7
0
 def test_setdefault(self):
     l = LRU(2)
     l[1] = '1'
     val = l.setdefault(1)
     self.assertEqual('1', val)
     self.assertEqual((1, 0), l.get_stats())
     val = l.setdefault(2, '2')
     self.assertEqual('2', val)
     self.assertEqual((1, 1), l.get_stats())
     self.assertEqual(val, l[2])
     l.clear()
     val = 'long string' * 512
     l.setdefault(1, val)
     l[2] = '2'
     l[3] = '3'
     self.assertTrue(val)
Exemplo n.º 8
0
class CacheRecord(BaseDB):
    def __init__(self, db: BaseDB, size=1024):
        self._db = db
        self._size = size
        self._cached = LRU(self._size)

    def cache_clear(self):
        self._cached.clear()

    def __setitem__(self, key, value):
        self._cached[key] = value
        self._db[key] = value

    def __getitem__(self, key):
        if key not in self._cached:
            self._cached[key] = self._db[key]
        return self._cached[key]

    def __contains__(self, key):
        return key in self._db
Exemplo n.º 9
0
# Would print [(3, '3'), (5, '5'), (2, '2')]
print(l.get_size())
# Would print 3
print(l.has_key(5))
# Would print True
print(2 in l)
# Would print True

l.get_stats()
# Would print (1, 0)

l.update(5='0')  # Update an item
print l.items()
# Would print [(5, '0'), (3, '3'), (2, '2')]

l.clear()
print l.items()
# Would print []


def evicted(key, value):
    print "removing: %s, %s" % (key, value)


l = LRU(1, callback=evicted)

l[1] = '1'
l[2] = '2'
# callback would print removing: 1, 1

l[2] = '3'
Exemplo n.º 10
0
class AccountDB(AccountDatabaseAPI):
    logger = get_extended_debug_logger('eth.db.account.AccountDB')

    def __init__(self,
                 db: AtomicDatabaseAPI,
                 state_root: Hash32 = BLANK_ROOT_HASH) -> None:
        r"""
        Internal implementation details (subject to rapid change):
        Database entries go through several pipes, like so...

        .. code::

            db > _batchdb ---------------------------> _journaldb ----------------> code lookups
             \
              -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups

        Journaling sequesters writes at the _journal* attrs ^, until persist is called.

        _batchtrie enables us to prune all trie changes while building
        state,  without deleting old trie roots.

        _batchdb and _batchtrie together enable us to make the state root,
        without saving everything to the database.

        _journaldb is a journaling of the keys and values used to store
        code and account storage.

        _trie is a hash-trie, used to generate the state root

        _trie_cache is a cache tied to the state root of the trie. It
        is important that this cache is checked *after* looking for
        the key in _journaltrie, because the cache is only invalidated
        after a state root change.

        _journaltrie is a journaling of the accounts (an address->rlp mapping,
        rather than the nodes stored by the trie). This enables
        a squashing of all account changes before pushing them into the trie.

        .. NOTE:: StorageDB works similarly

        AccountDB synchronizes the snapshot/revert/persist of both of the
        journals.
        """
        self._raw_store_db = KeyAccessLoggerAtomicDB(db,
                                                     log_missing_keys=False)
        self._batchdb = BatchDB(self._raw_store_db)
        self._batchtrie = BatchDB(self._raw_store_db,
                                  read_through_deletes=True)
        self._journaldb = JournalDB(self._batchdb)
        self._trie = HashTrie(
            HexaryTrie(self._batchtrie, state_root, prune=True))
        self._trie_logger = KeyAccessLoggerDB(self._trie,
                                              log_missing_keys=False)
        self._trie_cache = CacheDB(self._trie_logger)
        self._journaltrie = JournalDB(self._trie_cache)
        self._account_cache = LRU(2048)
        self._account_stores: Dict[Address, AccountStorageDatabaseAPI] = {}
        self._dirty_accounts: Set[Address] = set()
        self._root_hash_at_last_persist = state_root
        self._accessed_accounts: Set[Address] = set()
        self._accessed_bytecodes: Set[Address] = set()

    @property
    def state_root(self) -> Hash32:
        return self._trie.root_hash

    @state_root.setter
    def state_root(self, value: Hash32) -> None:
        if self._trie.root_hash != value:
            self._trie_cache.reset_cache()
            self._trie.root_hash = value

    def has_root(self, state_root: bytes) -> bool:
        return state_root in self._batchtrie

    #
    # Storage
    #
    def get_storage(self,
                    address: Address,
                    slot: int,
                    from_journal: bool = True) -> int:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(slot, title="Storage Slot")

        account_store = self._get_address_store(address)
        return account_store.get(slot, from_journal)

    def set_storage(self, address: Address, slot: int, value: int) -> None:
        validate_uint256(value, title="Storage Value")
        validate_uint256(slot, title="Storage Slot")
        validate_canonical_address(address, title="Storage Address")

        account_store = self._get_address_store(address)
        self._dirty_accounts.add(address)
        account_store.set(slot, value)

    def delete_storage(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        self._set_storage_root(address, BLANK_ROOT_HASH)
        self._wipe_storage(address)

    def _wipe_storage(self, address: Address) -> None:
        """
        Wipe out the storage, without explicitly handling the storage root update
        """
        account_store = self._get_address_store(address)
        self._dirty_accounts.add(address)
        account_store.delete()

    def _get_address_store(self,
                           address: Address) -> AccountStorageDatabaseAPI:
        if address in self._account_stores:
            store = self._account_stores[address]
        else:
            storage_root = self._get_storage_root(address)
            store = AccountStorageDB(self._raw_store_db, storage_root, address)
            self._account_stores[address] = store
        return store

    def _dirty_account_stores(
            self) -> Iterable[Tuple[Address, AccountStorageDatabaseAPI]]:
        for address in self._dirty_accounts:
            store = self._account_stores[address]
            yield address, store

    @to_tuple
    def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]:
        # list all the accounts that were changed, and their new storage roots
        for address, store in self._dirty_account_stores():
            if store.has_changed_root:
                yield address, store.get_changed_root()

    def _get_storage_root(self, address: Address) -> Hash32:
        account = self._get_account(address)
        return account.storage_root

    def _set_storage_root(self, address: Address,
                          new_storage_root: Hash32) -> None:
        account = self._get_account(address)
        self._set_account(address, account.copy(storage_root=new_storage_root))

    def _validate_flushed_storage(self, address: Address,
                                  store: AccountStorageDatabaseAPI) -> None:
        if store.has_changed_root:
            actual_storage_root = self._get_storage_root(address)
            expected_storage_root = store.get_changed_root()
            if expected_storage_root != actual_storage_root:
                raise ValidationError(
                    "Storage root was not saved to account before trying to persist roots. "
                    f"Account {address!r} had storage {actual_storage_root!r}, "
                    f"but should be {expected_storage_root!r}.")

    #
    # Balance
    #
    def get_balance(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.balance

    def set_balance(self, address: Address, balance: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(balance, title="Account Balance")

        account = self._get_account(address)
        self._set_account(address, account.copy(balance=balance))

    #
    # Nonce
    #
    def get_nonce(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.nonce

    def set_nonce(self, address: Address, nonce: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(nonce, title="Nonce")

        account = self._get_account(address)
        self._set_account(address, account.copy(nonce=nonce))

    def increment_nonce(self, address: Address) -> None:
        current_nonce = self.get_nonce(address)
        self.set_nonce(address, current_nonce + 1)

    #
    # Code
    #
    def get_code(self, address: Address) -> bytes:
        validate_canonical_address(address, title="Storage Address")

        code_hash = self.get_code_hash(address)
        if code_hash == EMPTY_SHA3:
            return b''
        else:
            try:
                return self._journaldb[code_hash]
            except KeyError:
                raise MissingBytecode(code_hash)  #from KeyError
            finally:
                if code_hash in self._get_accessed_node_hashes():
                    self._accessed_bytecodes.add(address)

    def set_code(self, address: Address, code: bytes) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_is_bytes(code, title="Code")

        account = self._get_account(address)

        code_hash = keccak(code)
        self._journaldb[code_hash] = code
        self._set_account(address, account.copy(code_hash=code_hash))

    def get_code_hash(self, address: Address) -> Hash32:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.code_hash

    def delete_code(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account.copy(code_hash=EMPTY_SHA3))

    #
    # Account Methods
    #
    def account_has_code_or_nonce(self, address: Address) -> bool:
        return self.get_nonce(address) != 0 or self.get_code_hash(
            address) != EMPTY_SHA3

    def delete_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        # We must wipe the storage first, because if it's the first time we load it,
        #   then we want to load it with the original storage root hash, not the
        #   empty one. (in case of a later revert, we don't want to poison the storage cache)
        self._wipe_storage(address)

        if address in self._account_cache:
            del self._account_cache[address]
        del self._journaltrie[address]

    def account_exists(self, address: Address) -> bool:
        validate_canonical_address(address, title="Storage Address")
        account_rlp = self._get_encoded_account(address, from_journal=True)
        return account_rlp != b''

    def touch_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account)

    def account_is_empty(self, address: Address) -> bool:
        return not self.account_has_code_or_nonce(
            address) and self.get_balance(address) == 0

    #
    # Internal
    #
    def _get_encoded_account(self,
                             address: Address,
                             from_journal: bool = True) -> bytes:
        self._accessed_accounts.add(address)
        lookup_trie = self._journaltrie if from_journal else self._trie_cache

        try:
            return lookup_trie[address]
        except trie_exceptions.MissingTrieNode as exc:
            raise MissingAccountTrieNode(*exc.args) from exc
        except KeyError:
            # In case the account is deleted in the JournalDB
            return b''

    def _get_account(self,
                     address: Address,
                     from_journal: bool = True) -> Account:
        if from_journal and address in self._account_cache.keys():
            return self._account_cache[address]

        rlp_account = self._get_encoded_account(address, from_journal)

        if rlp_account:
            account = rlp.decode(rlp_account, sedes=Account)
        else:
            account = Account()
        if from_journal:
            self._account_cache[address] = account
        return account

    def _set_account(self, address: Address, account: Account) -> None:
        self._account_cache[address] = account
        rlp_account = rlp.encode(account, sedes=Account)
        self._journaltrie[address] = rlp_account

    #
    # Record and discard API
    #
    def record(self) -> JournalDBCheckpoint:
        checkpoint = self._journaldb.record()
        self._journaltrie.record(checkpoint)

        for _, store in self._dirty_account_stores():
            store.record(checkpoint)
        return checkpoint

    def discard(self, checkpoint: JournalDBCheckpoint) -> None:
        self._journaldb.discard(checkpoint)
        self._journaltrie.discard(checkpoint)
        self._account_cache.clear()
        for _, store in self._dirty_account_stores():
            store.discard(checkpoint)

    def commit(self, checkpoint: JournalDBCheckpoint) -> None:
        self._journaldb.commit(checkpoint)
        self._journaltrie.commit(checkpoint)
        for _, store in self._dirty_account_stores():
            store.commit(checkpoint)

    def lock_changes(self) -> None:
        for _, store in self._dirty_account_stores():
            store.lock_changes()

    def make_state_root(self) -> Hash32:
        for _, store in self._dirty_account_stores():
            store.make_storage_root()

        for address, storage_root in self._get_changed_roots():
            if self.account_exists(address) or storage_root != BLANK_ROOT_HASH:
                self._set_storage_root(address, storage_root)

        self._journaldb.persist()

        diff = self._journaltrie.diff()
        if diff.deleted_keys() or diff.pending_items():
            # In addition to squashing (which is redundant here), this context manager causes
            # an atomic commit of the changes, so exceptions will revert the trie
            with self._trie.squash_changes() as memory_trie:
                self._apply_account_diff_without_proof(diff, memory_trie)

        self._journaltrie.reset()
        self._trie_cache.reset_cache()

        return self.state_root

    def persist(self) -> MetaWitnessAPI:
        self.make_state_root()

        # persist storage
        with self._raw_store_db.atomic_batch() as write_batch:
            for address, store in self._dirty_account_stores():
                self._validate_flushed_storage(address, store)
                store.persist(write_batch)

        for address, new_root in self._get_changed_roots():
            if new_root is None:
                raise ValidationError(
                    f"Cannot validate new root of account 0x{address.hex()} "
                    f"which has a new root hash of None")
            elif new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH:
                raise ValidationError(
                    "After persisting storage trie, a root node was not found. "
                    f"State root for account 0x{address.hex()} "
                    f"is missing for hash 0x{new_root.hex()}.")

        # generate witness (copy) before clearing the underlying data
        meta_witness = self._get_meta_witness()

        # reset local storage trackers
        self._account_stores = {}
        self._dirty_accounts = set()
        self._accessed_accounts = set()
        self._accessed_bytecodes = set()
        # We have to clear the account cache here so that future account accesses
        #   will get added to _accessed_accounts correctly. Account accesses that
        #   are cached do not add the address to the list of accessed accounts.
        self._account_cache.clear()

        # persist accounts
        self._validate_generated_root()
        new_root_hash = self.state_root
        with self._raw_store_db.atomic_batch() as write_batch:
            self._batchtrie.commit_to(write_batch, apply_deletes=False)
            self._batchdb.commit_to(write_batch, apply_deletes=False)
        self._root_hash_at_last_persist = new_root_hash

        return meta_witness

    def _get_accessed_node_hashes(self) -> Set[Hash32]:
        return cast(Set[Hash32], self._raw_store_db.keys_read)

    @to_dict
    def _get_access_list(
            self) -> Iterable[Tuple[Address, AccountQueryTracker]]:
        """
        Get the list of addresses that were accessed, whether the bytecode was accessed, and
        which storage slots were accessed.
        """
        for address in self._accessed_accounts:
            did_access_bytecode = address in self._accessed_bytecodes
            if address in self._account_stores:
                accessed_storage_slots = self._account_stores[
                    address].get_accessed_slots()
            else:
                accessed_storage_slots = frozenset()
            yield address, AccountQueryTracker(did_access_bytecode,
                                               accessed_storage_slots)

    def _get_meta_witness(self) -> MetaWitness:
        """
        Get a variety of metadata about the state witness needed to execute the block.

        This creates a copy, so that underlying changes do not affect the returned MetaWitness.
        """
        return MetaWitness(self._get_accessed_node_hashes(),
                           self._get_access_list())

    def _validate_generated_root(self) -> None:
        db_diff = self._journaldb.diff()
        if len(db_diff):
            raise ValidationError(
                f"AccountDB had a dirty db when it needed to be clean: {db_diff!r}"
            )
        trie_diff = self._journaltrie.diff()
        if len(trie_diff):
            raise ValidationError(
                f"AccountDB had a dirty trie when it needed to be clean: {trie_diff!r}"
            )

    def _apply_account_diff_without_proof(self, diff: DBDiff,
                                          trie: DatabaseAPI) -> None:
        """
        Apply diff of trie updates, when original nodes might be missing.
        Note that doing this naively will raise exceptions about missing nodes
        from *intermediate* trie roots. This captures exceptions and uses the previous
        trie root hash that will be recognized by other nodes.
        """
        # It's fairly common that when an account is deleted, we need to retrieve nodes
        # for accounts that were not needed during normal execution. We only need these
        # nodes to refactor the trie.
        for delete_key in diff.deleted_keys():
            try:
                del trie[delete_key]
            except trie_exceptions.MissingTrieNode as exc:
                raise MissingAccountTrieNode(
                    exc.missing_node_hash,
                    self._root_hash_at_last_persist,
                    exc.requested_key,
                ) from exc

        # It's fairly unusual, but possible, that setting an account will need unknown
        # nodes during a trie refactor. Here is an example that seems to cause it:
        #
        # Setup:
        #   - Root node is a branch, with 0 pointing to a leaf
        #   - The complete leaf key is (0, 1, 2), so (1, 2) is in the leaf node
        #   - We know the leaf node hash but not the leaf node body
        # Refactor that triggers missing node:
        #   - Add value with key (0, 3, 4)
        #   - We need to replace the current leaf node with a branch that points leaves at 1 and 3
        #   - The leaf for key (0, 1, 2) now contains only the (2) part, so needs to be rebuilt
        #   - We need the full body of the old (1, 2) leaf node, to rebuild

        for key, val in diff.pending_items():
            try:
                trie[key] = val
            except trie_exceptions.MissingTrieNode as exc:
                raise MissingAccountTrieNode(
                    exc.missing_node_hash,
                    self._root_hash_at_last_persist,
                    exc.requested_key,
                ) from exc
Exemplo n.º 11
0
class AccountDB(BaseAccountDB):

    logger = cast(ExtendedDebugLogger,
                  logging.getLogger('eth.db.account.AccountDB'))

    def __init__(self,
                 db: BaseAtomicDB,
                 state_root: Hash32 = BLANK_ROOT_HASH) -> None:
        r"""
        Internal implementation details (subject to rapid change):
        Database entries go through several pipes, like so...

        .. code::

            db > _batchdb ---------------------------> _journaldb ----------------> code lookups
             \
              -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups

        Journaling sequesters writes at the _journal* attrs ^, until persist is called.

        _batchtrie enables us to prune all trie changes while building
        state,  without deleting old trie roots.

        _batchdb and _batchtrie together enable us to make the state root,
        without saving everything to the database.

        _journaldb is a journaling of the keys and values used to store
        code and account storage.

        _trie is a hash-trie, used to generate the state root

        _trie_cache is a cache tied to the state root of the trie. It
        is important that this cache is checked *after* looking for
        the key in _journaltrie, because the cache is only invalidated
        after a state root change.

        _journaltrie is a journaling of the accounts (an address->rlp mapping,
        rather than the nodes stored by the trie). This enables
        a squashing of all account changes before pushing them into the trie.

        .. NOTE:: StorageDB works similarly

        AccountDB synchronizes the snapshot/revert/persist of both of the
        journals.
        """
        self._raw_store_db = db
        self._batchdb = BatchDB(db)
        self._batchtrie = BatchDB(db, read_through_deletes=True)
        self._journaldb = JournalDB(self._batchdb)
        self._trie = HashTrie(
            HexaryTrie(self._batchtrie, state_root, prune=True))
        self._trie_cache = CacheDB(self._trie)
        self._journaltrie = JournalDB(self._trie_cache)
        self._account_cache = LRU(2048)
        self._account_stores = {}  # type: Dict[Address, AccountStorageDB]
        self._dirty_accounts = set()  # type: Set[Address]
        self._root_hash_at_last_persist = state_root

    @property
    def state_root(self) -> Hash32:
        return self._trie.root_hash

    @state_root.setter
    def state_root(self, value: Hash32) -> None:
        if self._trie.root_hash != value:
            self._trie_cache.reset_cache()
            self._trie.root_hash = value

    def has_root(self, state_root: bytes) -> bool:
        return state_root in self._batchtrie

    #
    # Storage
    #
    def get_storage(self,
                    address: Address,
                    slot: int,
                    from_journal: bool = True) -> int:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(slot, title="Storage Slot")

        account_store = self._get_address_store(address)
        return account_store.get(slot, from_journal)

    def set_storage(self, address: Address, slot: int, value: int) -> None:
        validate_uint256(value, title="Storage Value")
        validate_uint256(slot, title="Storage Slot")
        validate_canonical_address(address, title="Storage Address")

        account_store = self._get_address_store(address)
        self._dirty_accounts.add(address)
        account_store.set(slot, value)

    def delete_storage(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        self._set_storage_root(address, BLANK_ROOT_HASH)
        self._wipe_storage(address)

    def _wipe_storage(self, address: Address) -> None:
        """
        Wipe out the storage, without explicitly handling the storage root update
        """
        account_store = self._get_address_store(address)
        self._dirty_accounts.add(address)
        account_store.delete()

    def _get_address_store(self, address: Address) -> AccountStorageDB:
        if address in self._account_stores:
            store = self._account_stores[address]
        else:
            storage_root = self._get_storage_root(address)
            store = AccountStorageDB(self._raw_store_db, storage_root, address)
            self._account_stores[address] = store
        return store

    def _dirty_account_stores(
            self) -> Iterable[Tuple[Address, AccountStorageDB]]:
        for address in self._dirty_accounts:
            store = self._account_stores[address]
            yield address, store

    @to_tuple
    def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]:
        # list all the accounts that were changed, and their new storage roots
        for address, store in self._dirty_account_stores():
            if store.has_changed_root:
                yield address, store.get_changed_root()

    def _get_storage_root(self, address: Address) -> Hash32:
        account = self._get_account(address)
        return account.storage_root

    def _set_storage_root(self, address: Address,
                          new_storage_root: Hash32) -> None:
        account = self._get_account(address)
        self._set_account(address, account.copy(storage_root=new_storage_root))

    def _validate_flushed_storage(self, address: Address,
                                  store: AccountStorageDB) -> None:
        if store.has_changed_root:
            actual_storage_root = self._get_storage_root(address)
            expected_storage_root = store.get_changed_root()
            if expected_storage_root != actual_storage_root:
                raise ValidationError(
                    "Storage root was not saved to account before trying to persist roots. "
                    "Account %r had storage %r, but should be %r." % (
                        address,
                        actual_storage_root,
                        expected_storage_root,
                    ))

    #
    # Balance
    #
    def get_balance(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.balance

    def set_balance(self, address: Address, balance: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(balance, title="Account Balance")

        account = self._get_account(address)
        self._set_account(address, account.copy(balance=balance))

    #
    # Nonce
    #
    def get_nonce(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.nonce

    def set_nonce(self, address: Address, nonce: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(nonce, title="Nonce")

        account = self._get_account(address)
        self._set_account(address, account.copy(nonce=nonce))

    def increment_nonce(self, address: Address) -> None:
        current_nonce = self.get_nonce(address)
        self.set_nonce(address, current_nonce + 1)

    #
    # Code
    #
    def get_code(self, address: Address) -> bytes:
        validate_canonical_address(address, title="Storage Address")

        code_hash = self.get_code_hash(address)
        if code_hash == EMPTY_SHA3:
            return b''
        else:
            try:
                return self._journaldb[code_hash]
            except KeyError:
                raise MissingBytecode(code_hash) from KeyError

    def set_code(self, address: Address, code: bytes) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_is_bytes(code, title="Code")

        account = self._get_account(address)

        code_hash = keccak(code)
        self._journaldb[code_hash] = code
        self._set_account(address, account.copy(code_hash=code_hash))

    def get_code_hash(self, address: Address) -> Hash32:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.code_hash

    def delete_code(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account.copy(code_hash=EMPTY_SHA3))

    #
    # Account Methods
    #
    def account_has_code_or_nonce(self, address: Address) -> bool:
        return self.get_nonce(address) != 0 or self.get_code_hash(
            address) != EMPTY_SHA3

    def delete_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        if address in self._account_cache:
            del self._account_cache[address]
        del self._journaltrie[address]

        self._wipe_storage(address)

    def account_exists(self, address: Address) -> bool:
        validate_canonical_address(address, title="Storage Address")
        account_rlp = self._get_encoded_account(address, from_journal=True)
        return account_rlp != b''

    def touch_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account)

    def account_is_empty(self, address: Address) -> bool:
        return not self.account_has_code_or_nonce(
            address) and self.get_balance(address) == 0

    #
    # Internal
    #
    def _get_encoded_account(self,
                             address: Address,
                             from_journal: bool = True) -> bytes:
        lookup_trie = self._journaltrie if from_journal else self._trie_cache

        try:
            return lookup_trie[address]
        except trie_exceptions.MissingTrieNode as exc:
            raise MissingAccountTrieNode(*exc.args) from exc
        except KeyError:
            # In case the account is deleted in the JournalDB
            return b''

    def _get_account(self,
                     address: Address,
                     from_journal: bool = True) -> Account:
        if from_journal and address in self._account_cache:
            return self._account_cache[address]

        rlp_account = self._get_encoded_account(address, from_journal)

        if rlp_account:
            account = rlp.decode(rlp_account, sedes=Account)
        else:
            account = Account()
        if from_journal:
            self._account_cache[address] = account
        return account

    def _set_account(self, address: Address, account: Account) -> None:
        self._account_cache[address] = account
        rlp_account = rlp.encode(account, sedes=Account)
        self._journaltrie[address] = rlp_account

    #
    # Record and discard API
    #
    def record(self) -> JournalDBCheckpoint:
        checkpoint = self._journaldb.record()
        self._journaltrie.record(checkpoint)

        for _, store in self._dirty_account_stores():
            store.record(checkpoint)
        return checkpoint

    def discard(self, checkpoint: JournalDBCheckpoint) -> None:
        self._journaldb.discard(checkpoint)
        self._journaltrie.discard(checkpoint)
        self._account_cache.clear()
        for _, store in self._dirty_account_stores():
            store.discard(checkpoint)

    def commit(self, checkpoint: JournalDBCheckpoint) -> None:
        self._journaldb.commit(checkpoint)
        self._journaltrie.commit(checkpoint)
        for _, store in self._dirty_account_stores():
            store.commit(checkpoint)

    def make_state_root(self) -> Hash32:
        for _, store in self._dirty_account_stores():
            store.make_storage_root()

        for address, storage_root in self._get_changed_roots():
            self.logger.debug2(
                "Updating account 0x%s to storage root 0x%s",
                address.hex(),
                storage_root.hex(),
            )
            self._set_storage_root(address, storage_root)

        self._journaldb.persist()

        diff = self._journaltrie.diff()
        # In addition to squashing (which is redundant here), this context manager causes
        # an atomic commit of the changes, so exceptions will revert the trie
        with self._trie.squash_changes() as memory_trie:
            self._apply_account_diff_without_proof(diff, memory_trie)

        self._journaltrie.reset()
        self._trie_cache.reset_cache()

        return self.state_root

    def persist(self) -> None:
        self.make_state_root()

        # persist storage
        with self._raw_store_db.atomic_batch() as write_batch:
            for address, store in self._dirty_account_stores():
                self._validate_flushed_storage(address, store)
                store.persist(write_batch)

        for address, new_root in self._get_changed_roots():
            if new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH:
                raise ValidationError(
                    "After persisting storage trie, a root node was not found. "
                    "State root for account 0x%s is missing for hash 0x%s." % (
                        address.hex(),
                        new_root.hex(),
                    ))

        # reset local storage trackers
        self._account_stores = {}
        self._dirty_accounts = set()

        # persist accounts
        self._validate_generated_root()
        new_root_hash = self.state_root
        self.logger.debug2("Persisting new state root: 0x%s",
                           new_root_hash.hex())
        with self._raw_store_db.atomic_batch() as write_batch:
            self._batchtrie.commit_to(write_batch, apply_deletes=False)
            self._batchdb.commit_to(write_batch, apply_deletes=False)
        self._root_hash_at_last_persist = new_root_hash

    def _validate_generated_root(self) -> None:
        db_diff = self._journaldb.diff()
        if len(db_diff):
            raise ValidationError(
                "AccountDB had a dirty db when it needed to be clean: %r" %
                db_diff)
        trie_diff = self._journaltrie.diff()
        if len(trie_diff):
            raise ValidationError(
                "AccountDB had a dirty trie when it needed to be clean: %r" %
                trie_diff)

    def _log_pending_accounts(self) -> None:
        diff = self._journaltrie.diff()
        for address in sorted(diff.pending_keys()):
            account = self._get_account(Address(address))
            self.logger.debug2(
                "Pending Account %s: balance %d, nonce %d, storage root %s, code hash %s",
                to_checksum_address(address),
                account.balance,
                account.nonce,
                encode_hex(account.storage_root),
                encode_hex(account.code_hash),
            )
        for deleted_address in sorted(diff.deleted_keys()):
            cast_deleted_address = Address(deleted_address)
            self.logger.debug2(
                "Deleted Account %s, empty? %s, exists? %s",
                to_checksum_address(deleted_address),
                self.account_is_empty(cast_deleted_address),
                self.account_exists(cast_deleted_address),
            )

    def _apply_account_diff_without_proof(self, diff: DBDiff,
                                          trie: BaseDB) -> None:
        """
        Apply diff of trie updates, when original nodes might be missing.
        Note that doing this naively will raise exceptions about missing nodes
        from *intermediate* trie roots. This captures exceptions and uses the previous
        trie root hash that will be recognized by other nodes.
        """
        # It's fairly common that when an account is deleted, we need to retrieve nodes
        # for accounts that were not needed during normal execution. We only need these
        # nodes to refactor the trie.
        for delete_key in diff.deleted_keys():
            try:
                del trie[delete_key]
            except trie_exceptions.MissingTrieNode as exc:
                self.logger.debug(
                    "Missing node while deleting account with key %s: %s",
                    encode_hex(delete_key),
                    exc,
                )
                raise MissingAccountTrieNode(
                    exc.missing_node_hash,
                    self._root_hash_at_last_persist,
                    exc.requested_key,
                ) from exc

        # It's fairly unusual, but possible, that setting an account will need unknown
        # nodes during a trie refactor. Here is an example that seems to cause it:
        #
        # Setup:
        #   - Root node is a branch, with 0 pointing to a leaf
        #   - The complete leaf key is (0, 1, 2), so (1, 2) is in the leaf node
        #   - We know the leaf node hash but not the leaf node body
        # Refactor that triggers missing node:
        #   - Add value with key (0, 3, 4)
        #   - We need to replace the current leaf node with a branch that points leaves at 1 and 3
        #   - The leaf for key (0, 1, 2) now contains only the (2) part, so needs to be rebuilt
        #   - We need the full body of the old (1, 2) leaf node, to rebuild

        for key, val in diff.pending_items():
            try:
                trie[key] = val
            except trie_exceptions.MissingTrieNode as exc:
                self.logger.debug(
                    "Missing node on account update key %s to %s: %s",
                    encode_hex(key),
                    encode_hex(val),
                    exc,
                )
                raise MissingAccountTrieNode(
                    exc.missing_node_hash,
                    self._root_hash_at_last_persist,
                    exc.requested_key,
                ) from exc
Exemplo n.º 12
0
class AlchemyModel(t.Generic[T],
                   QtCore.QAbstractTableModel,
                   metaclass=_SqlAlchemyTableModelMeta):
    _columns: t.Sequence[ConvertibleColumn]

    def __init__(
        self,
        model_type: t.Type[T],
        order_by: QueryableAttribute,
        *,
        columns: t.Optional[t.Sequence[ConvertibleColumn]] = None,
        page_size: int = 64,
        auto_commit: bool = True,
    ):
        super().__init__()
        self._model_type = model_type
        self._order_by_column = order_by
        self._page_size = page_size
        self._auto_commit = auto_commit

        if columns is not None:
            self._columns = columns

        if not self._columns:
            raise ValueError('Specify at least one column')

        self._header_names = tuple(' '.join(v.capitalize()
                                            for v in c.column.name.split('_'))
                                   for c in self._columns)

        self._cache = LRU(int(page_size * 2))
        self._cached_size = None

    def filter_query(self, query: Query) -> Query:
        return query

    def get_query(self) -> Query:
        return self.filter_query(EDB.Session.query(self._model_type))

    def clear_cache(self) -> None:
        self._cache.clear()
        self._cached_size = None

    def _load_page(self, offset: int, limit: int) -> None:
        for idx, model in enumerate(self.get_query().order_by(
                self._order_by_column).limit(limit).offset(offset)):
            self._cache[idx + offset] = model

    def get_item_at_index(self, index: int) -> t.Optional[T]:
        if index < 0:
            return None
        try:
            return self._cache[index]
        except KeyError:
            self._load_page(index, self._page_size)
            return self._cache.get(index, None)

    def rowCount(self, parent: QModelIndex = ...) -> int:
        if self._cached_size is None:
            self._cached_size = self.get_query().count()
        return self._cached_size

    def columnCount(self, parent: QModelIndex = ...) -> int:
        return len(self._columns)

    def data(self, index: QModelIndex, role: int = ...) -> t.Any:
        if not role in (Qt.DisplayRole, Qt.EditRole):
            return None

        row = self.get_item_at_index(index.row())
        if not row:
            return None

        column = self._columns[index.column()]

        return column.to_primitive(getattr(row, column.column.name))

    def setData(self,
                index: QModelIndex,
                value: t.Any,
                role: int = ...) -> bool:
        if role != Qt.EditRole:
            return False

        row = self.get_item_at_index(index.row())
        if not row:
            return False

        column = self._columns[index.column()]

        setattr(row, column.column.name, column.from_primitive(value))

        if self._auto_commit:
            EDB.Session.commit()

        if self._columns[index.column()] == self._order_by_column:
            self.clear_cache()

        return True

    def flags(self, index: QModelIndex) -> Qt.ItemFlags:
        return Qt.ItemIsSelectable | Qt.ItemIsEditable | Qt.ItemIsEnabled

    def headerData(self,
                   section: int,
                   orientation: Qt.Orientation,
                   role: int = ...) -> t.Any:
        if role != Qt.DisplayRole:
            return None

        if orientation == Qt.Vertical:
            return str(section + 1)

        return self._header_names[section]

    def removeRows(self,
                   row: int,
                   count: int,
                   parent: QModelIndex = ...) -> bool:
        self.beginRemoveRows(parent, row, row - 1 + count)
        pk_column = self._model_type.__mapper__.primary_key[0]
        items = list(
            filter(lambda i: i is not None,
                   (getattr(self.get_item_at_index(idx), pk_column.name)
                    for idx in range(row, row + count))))

        if not items:
            return False

        EDB.Session.query(self._model_type).filter(
            pk_column.in_(items)).delete(syncronize_session='fetch', )

        if self._auto_commit:
            EDB.Session.commit()

        self.clear_cache()
        self.endRemoveRows()
        return True

    def pop(self, row: int) -> t.Optional[T]:
        model = self.get_item_at_index(row)
        if not model:
            return
        self.removeRows(row, 1)
        return model

    def moveRows(
        self,
        sourceParent: QModelIndex,
        sourceRow: int,
        count: int,
        destinationParent: QModelIndex,
        destinationChild: int,
    ) -> bool:
        self.beginMoveRows(QModelIndex(), sourceRow, sourceRow + count - 1,
                           QModelIndex(), destinationChild)

        floor = min(sourceRow, destinationChild)

        items = [
            _item for _item in (
                self.get_item_at_index(idx)
                for idx in range(floor,
                                 max(sourceRow, destinationChild) + count))
            if _item is not None
        ]
        old_values = [
            getattr(_item, self._order_by_column.name) for _item in items
        ]

        for _ in range(count):
            items.insert(destinationChild - floor,
                         items.pop(sourceRow - floor))

        for item, new_value in zip(items, old_values):
            setattr(item, self._order_by_column.name, new_value)

        if self._auto_commit:
            EDB.Session.commit()

        self.clear_cache()

        self.endMoveRows()
        return True

    def reset(self) -> None:
        self.beginResetModel()
        self.clear_cache()
        self.endResetModel()
Exemplo n.º 13
0
class SpotifyCache:
    def __init__(self) -> None:
        self._tracks = LRU(50)
        self._artists = LRU(50)
        self._audio_features = LRU(50)
        self._top_tracks = LRU(50)
        self._albums = LRU(50)
        self._queries: typing.Dict[str, LRU] = {
            i: LRU(50)
            for i in ("artist", "track", "album")
        }
        self._task: asyncio.Task[None] = asyncio.create_task(
            self.start_clear_loop())

    def __del__(self) -> None:
        self._task.cancel()

    # pylint: disable=redefined-builtin
    def get_container(self, type: str) -> LRU:
        return getattr(self, f"{type}{'s'*(not type.endswith('s'))}")

    def update_items(self, items: typing.Sequence[BaseSpotify]) -> None:
        if not items:
            return

        self.get_container(items[0].type).update({
            item.id: item
            for item in items
            if not item.__class__.__name__.startswith("Partial")
        })

    def set_item(self, item: T) -> T:
        if item.__class__.__name__.startswith("Partial"):
            return item

        self.get_container(item.type)[item.id] = item
        return item

    @property
    def albums(self) -> LRU:
        return self._albums

    @property
    def tracks(self) -> LRU:
        return self._tracks

    @property
    def artists(self) -> LRU:
        return self._artists

    @property
    def audio_features(self) -> LRU:
        return self._audio_features

    @property
    def top_tracks(self) -> LRU:
        return self._top_tracks

    @property
    def queries(self) -> typing.Dict[str, LRU]:
        return self._queries

    def get_queries(self, type_name: str) -> LRU:
        return self._queries[type_name]

    async def start_clear_loop(self) -> None:
        while 1:
            await asyncio.sleep(86400)
            self._tracks.clear()
            self._artists.clear()
            self._audio_features.clear()
            self._top_tracks.clear()
            self._albums.clear()
            _ = [i.clear() for i in self._queries.values()]
Exemplo n.º 14
0
class GelbooruViewer:
    API_URL = "https://gelbooru.com/index.php?page=dapi&s=post&q=index"
    MAX_ID = 1
    MAX_ID_LOCK = Lock()
    MAX_CACHE_SIZE = 32
    MAX_CACHE_TIME = 24 * 60  # minutes
    PICTURES_PER_TAG = 200

    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update(
            {
                'Accept': 'application/json, application/xml',
                'Accept-Language': 'en-US',
                'User-Agent': 'Mozilla/5.0 GelbooruViewer/1.0 (+https://github.com/ArchieMeng/GelbooruViewer)'
            }
        )
        # only cache for get_all with tags while pid is 0!!!

        if importlib.find_loader('lru'):
            from lru import LRU
            self.cache = LRU(GelbooruViewer.MAX_CACHE_SIZE)
        else:
            self.cache = dict()

        self.cache_lock = Lock()
        # occasionally update cache
        self.last_cache_used = time()
        self.update_cache_thread = Thread(target=self._update_cache_loop, daemon=True)
        self.update_cache_thread.start()

        # get latest image to update MAX_ID
        self.get(limit=0)

    def _update_cache(self, tags, num=None):
        """
        Do the update cache task
        :param tags: tags of picture to update to cache
        :param num:  amount of pictures
        :return:
        """
        if tags:
            result = [*self.get_all_generator(tags, 0, num, thread_limit=1, limit=100)]
            if result:
                key = '+'.join(tags)
                with self.cache_lock:
                    self.cache[key] = result

    def _update_cache_loop(self):
        """
        Occasionally refresh cache. Clear cache if unused for a long time.
        :return:
        """
        minutes = 2 * 60
        while True:
            sleep(60 * minutes)
            if time() - self.last_cache_used > self.MAX_CACHE_TIME * 60:
                self.cache.clear()
                gc.collect()
                continue
            with self.cache_lock:
                keys = self.cache.keys()
            with ThreadPoolExecutor(max_workers=2) as executor:
                futures = [executor.submit(self._update_cache, key.split('+'), GelbooruViewer.PICTURES_PER_TAG) for key in keys]
                for future in as_completed(futures):
                    try:
                        result = future.result()
                        print(result)
                    except Exception as e:
                        print("Exception happened in GelbooruViewer._update_cache_loop", type(e), e)

    def get_raw_content(self, **kwargs):
        content = None
        with self.session as session:
            response = session.get(GelbooruViewer.API_URL, params=kwargs)
            try:
                content = response.content
            except Exception as e:
                logging.error(str(e))
                pass
        return content

    def get(self, **kwargs)->list:
        """
        use Gelbooru api to fetch picture info.

        :param kwargs: allowed args includes
        limit: How many posts you want to retrieve. There is a hard limit of 100 posts per request.

        pid: The page number.

        cid: Change ID of the post.
        This is in Unix time so there are likely others with the same value if updated at the same time.

        tags: The tags to search for. Any tag combination that works on the web site will work here.
        This includes all the meta-tags. See cheatsheet for more information.

        :return: a list of type GelbooruPicture, if sth wrong happened, a empty list will be return
        """
        attempt = 0
        content = None
        while attempt < 3 and content is None:
            attempt += 1
            content = self.get_raw_content(**kwargs)

        if content is None:
            return []
        if isinstance(content, bytes):
            xml_str = content.decode('utf-8')
        else:
            xml_str = content

        root = ElementTree.fromstring(xml_str)
        posts = root.findall('post')
        picture_list = []

        if posts:
            cur_max_id = int(posts[0].attrib['id'])
            with GelbooruViewer.MAX_ID_LOCK:
                GelbooruViewer.MAX_ID = max(GelbooruViewer.MAX_ID, cur_max_id)
        else:
            return None

        for post in posts:
            info = post.attrib
            picture_list.append(
                GelbooruPicture(
                    info['width'],
                    info['height'],
                    info['score'],
                    info['source'],
                    "https:"+info['preview_url'],
                    "https:"+info['sample_url'],
                    "https:"+info['file_url'],
                    info['created_at'],
                    info['creator_id'],
                    [tag for tag in info['tags'].split(' ') if tag and not tag.isspace()],
                    info['id'],
                    info['rating']
                )
            )
        return picture_list

    def get_all(self, tags: list, pid=0, num=None, thread_limit=5, use_cache=True, limit=25):
        """
        regardless of official request limit amount, use threading to request amount you want

        When pictures is found in cache, list is returned.

        When pictures is found but not in cache, generator is returned.

        Else, None is returned

        :param limit: number of pictures in per request

        :param use_cache: whether prefer internal cache

        :param thread_limit: amount of threads running at the same time

        :param tags: tags must be provided

        :param pid: beginning page id , index from 0

        :param num: num of picture you want.
        This function might return less pictures than u want only if Gelbooru hasn't got enough picture

        :return: a generator of gelboorupicture or list or None

        """
        tags.sort()
        if use_cache and pid == 0:
            with self.cache_lock:
                key = '+'.join(tags)
                if key in self.cache and isinstance(self.cache[key], list):
                    self.last_cache_used = time()
                    if not num:
                        return self.cache[key]
                    else:
                        return self.cache[key][:num]
                elif key not in self.cache or isinstance(self.cache[key], str):
                    self.last_cache_used = time()
                    # only one thread is executed during update. When update executed, a str is put into cache
                    self.cache[key] = "executing"
                    # currently cache size is limited in cate of Memory leak.
                    thread = Thread(
                        target=self._update_cache,
                        args=(tags, GelbooruViewer.PICTURES_PER_TAG),
                        daemon=True
                    )
                    thread.start()

        content = self.get_raw_content(tags=tags, limit=0)
        xml_str = content.decode('utf-8')
        root = ElementTree.fromstring(xml_str)
        try:
            total = int(root.attrib['count'])
        except:
            return None
        if total > 0:
            return self.get_all_generator(tags, pid, num, thread_limit, total, limit)
        else:
            return None

    def get_all_generator(
            self,
            tags: list,
            pid=0,
            num=None,
            thread_limit=5,
            total=None,
            limit=25
    ):
        """
        True function of get all. Generator is returned
        :param thread_limit: max threads to fetch pictures at one time
        :param tags: tags of pictures

        :param pid: beginning page id , index from 0

        :param num: num of picture you want.num of picture you want.
        This function might return less pictures than u want only if Gelbooru hasn't got enough picture

        :param total: total amount of picture, just set None if u don't know it. This is used by internal function
        :param limit: picture number per request.
        Generally, limit=10 cost 1.2s per request, while 25 cost 1.4s, 50 cost 2.2s, 100 cost 2.6s.
        The Larger limit , the faster speed in per request, but larger in total get_all timing.

        :return:
        """
        if limit < 0 or limit > 100:
            limit = 10

        def _get(tags, pid):
            content = self.get_raw_content(tags=tags, limit=limit, pid=pid)
            xml_string = content.decode()
            posts = ElementTree.fromstring(xml_string).findall('post')
            return posts
        if total is None:
            content = self.get_raw_content(tags=tags, limit=0)
            xml_str = content.decode('utf-8')
            root = ElementTree.fromstring(xml_str)
            total = int(root.attrib['count'])
        if isinstance(num, int):
            if num > 0:
                # if total amount is too large, use num instead.
                total = min(total, num)
        if tags and total > 0:
            with ThreadPoolExecutor(max_workers=thread_limit) as executor:
                final_pid = int(total / limit)
                start = pid
                tasks = []
                while start < final_pid + 1:
                    futures2idx = {
                        executor.submit(_get, tags, i): i
                        for i in tasks + [j for j in range(start, min(start + thread_limit, final_pid + 1))]
                    }
                    tasks = []
                    for future in as_completed(futures2idx):
                        idx = futures2idx[future]
                        try:
                            posts = future.result()
                            for post in posts:
                                info = post.attrib
                                yield GelbooruPicture(
                                    info['width'],
                                    info['height'],
                                    info['score'],
                                    info['source'],
                                    "https:" + info['preview_url'],
                                    "https:" + info['sample_url'],
                                    "https:" + info['file_url'],
                                    info['created_at'],
                                    info['creator_id'],
                                    [tag for tag in info['tags'].split(' ') if tag and not tag.isspace()],
                                    info['id'],
                                    info['rating']
                                )
                        except Exception as e:
                            print("GelbooruViewer.get_all_generators raise", type(e), e)
                            tasks.append(idx)
                        start += thread_limit
Exemplo n.º 15
0
class FCP(BaseTask):
    def __init__(self, circle, src, dest,
                 treewalk=None,
                 totalsize=0,
                 hostcnt=0,
                 prune=False,
                 verify=False,
                 resume=False,
                 workq=None):
        BaseTask.__init__(self, circle)
        self.circle = circle
        self.treewalk = treewalk
        self.totalsize = totalsize
        self.prune = prune
        self.workq = workq
        self.resume = resume
        self.checkpoint_file = None
        self.src = src
        self.dest = os.path.abspath(dest)

        # cache, keep the size conservative
        # TODO: we need a more portable LRU size

        if hostcnt != 0:
            max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
            procs_per_host = self.circle.size / hostcnt
            self._read_cache_limit = ((max_ofile - 64) / procs_per_host) / 3
            self._write_cache_limit = ((max_ofile - 64) / procs_per_host) * 2 / 3

        if self._read_cache_limit <= 0 or self._write_cache_limit <= 0:
            self._read_cache_limit = 1
            self._write_cache_limit = 8

        self.rfd_cache = LRU(self._read_cache_limit)
        self.wfd_cache = LRU(self._write_cache_limit)

        self.cnt_filesize_prior = 0
        self.cnt_filesize = 0

        self.blocksize = 1024 * 1024
        self.chunksize = 1024 * 1024

        # debug
        self.d = {"rank": "rank %s" % circle.rank}
        self.wtime_started = MPI.Wtime()
        self.wtime_ended = None
        self.workcnt = 0  # this is the cnt for the enqued items
        self.reduce_items = 0  # this is the cnt for processed items
        if self.treewalk:
            log.debug("treewalk files = %s" % treewalk.flist, extra=self.d)

        # fini_check
        self.fini_cnt = Counter()

        # verify
        self.verify = verify
        self.chunksums = []

        # checkpointing
        self.checkpoint_interval = sys.maxsize
        self.checkpoint_last = MPI.Wtime()

        if self.circle.rank == 0:
            print("Start copying process ...")

    def rw_cache_limit(self):
        return (self._read_cache_limit, self._write_cache_limit)

    def set_fixed_chunksize(self, sz):
        self.chunksize = sz

    def set_adaptive_chunksize(self, totalsz):
        self.chunksize = utils.calc_chunksize(totalsz)
        if self.circle.rank == 0:
            print("Adaptive chunksize: %s" % bytes_fmt(self.chunksize))

    def cleanup(self):
        for f in self.rfd_cache.values():
            try:
                os.close(f)
            except OSError as e:
                pass

        for f in self.wfd_cache.values():
            try:
                os.close(f)
            except OSError as e:
                pass

        # remove checkpoint file
        if self.checkpoint_file and os.path.exists(self.checkpoint_file):
            os.remove(self.checkpoint_file)

        # we need to do this because if last job didn't finish cleanly
        # the fwalk files can be found as leftovers
        # and if fcp cleanup has a chance, it should clean up that

        fwalk = "%s/fwalk.%s" % (self.circle.tempdir, self.circle.rank)
        if os.path.exists(fwalk):
            os.remove(fwalk)

    def new_fchunk(self, fitem):
        fchunk = FileChunk()  # default cmd = copy
        fchunk.src = fitem.path
        fchunk.dest = destpath(fitem, self.dest)
        return fchunk

    def enq_file(self, fi):
        """ Process a single file, represented by "fi" - FileItem
        It involves chunking this file and equeue all chunks. """

        chunks = fi.st_size / self.chunksize
        remaining = fi.st_size % self.chunksize

        workcnt = 0

        if fi.st_size == 0:  # empty file
            fchunk = self.new_fchunk(fi)
            fchunk.offset = 0
            fchunk.length = 0
            self.enq(fchunk)
            workcnt += 1
        else:
            for i in range(chunks):
                fchunk = self.new_fchunk(fi)
                fchunk.offset = i * self.chunksize
                fchunk.length = self.chunksize
                self.enq(fchunk)
            workcnt += chunks

        if remaining > 0:
            # send remainder
            fchunk = self.new_fchunk(fi)
            fchunk.offset = chunks * self.chunksize
            fchunk.length = remaining
            self.enq(fchunk)
            workcnt += 1

        # save work cnt
        self.workcnt += workcnt

        log.debug("enq_file(): %s, size = %s, workcnt = %s" % (fi.path, fi.st_size, workcnt),
                     extra=self.d)

    def handle_fitem(self, fi):
        if os.path.islink(fi.path):
            dest = destpath(fi, self.dest)
            linkto = os.readlink(fi.path)
            try:
                os.symlink(linkto, dest)
            except Exception as e:
                log.debug("%s, skipping sym link %s" % (e, fi.path), extra=self.d)
        elif stat.S_ISREG(fi.st_mode):
            self.enq_file(fi)  # where chunking takes place

    def create(self):
        """ Each task has one create(), which is invoked by circle ONCE.
        For FCP, each task will handle_fitem() -> enq_file()
        to process each file gathered during the treewalk stage. """

        if not G.use_store and self.workq:  # restart
            self.setq(self.workq)
            return

        if self.resume:
            return

        # construct and enable all copy operations
        # we batch operation hard-coded
        log.info("create() starts, flist length = %s" % len(self.treewalk.flist),
                    extra=self.d)

        if G.use_store:
            while self.treewalk.flist.qsize > 0:
                fitems, _ = self.treewalk.flist.mget(G.DB_BUFSIZE)
                for fi in fitems:
                    self.handle_fitem(fi)
                self.treewalk.flist.mdel(G.DB_BUFSIZE)

            # store checkpoint
            log.debug("dbname = %s" % self.circle.dbname)
            dirname = os.path.dirname(self.circle.dbname)
            basename = os.path.basename(self.circle.dbname)
            chkpointname = basename + ".CHECK_OK"
            self.checkpoint_file = os.path.join(dirname, chkpointname)
            with open(self.checkpoint_file, "w") as f:
                f.write("%s" % self.totalsize)

        else:  # use memory
            for fi in self.treewalk.flist:
                self.handle_fitem(fi)

            # memory-checkpoint
            if self.checkpoint_file:
                self.do_no_interrupt_checkpoint()
                self.checkpoint_last = MPI.Wtime()

    def do_open(self, k, d, flag, limit):
        """
        @param k: the file path
        @param d: dictionary of <path, file descriptor>
        @return: file descriptor
        """
        if d.has_key(k):
            return d[k]

        if len(d.keys()) >= limit:
            # over the limit
            # clean up the least used
            old_k, old_v = d.items()[-1]
            try:
                os.close(old_v)
            except OSError as e:
                log.warn("FD for %s not valid when closing" % old_k, extra=self.d)

        fd = -1
        try:
            fd = os.open(k, flag)
        except OSError as e:
            if e.errno == 28:  # no space left
                log.error("Critical error: %s, exit!" % e, extra=self.d)
                self.circle.exit(0)  # should abort
            else:
                log.error("OSError({0}):{1}, skipping {2}".format(e.errno, e.strerror, k), extra=self.d)
        else:
            if fd > 0:
                d[k] = fd
        finally:
            return fd

    @staticmethod
    def do_mkdir(work):
        src = work.src
        dest = work.dest
        if not os.path.exists(dest):
            os.makedirs(dest)

    def do_copy(self, work):
        src = work.src
        dest = work.dest

        basedir = os.path.dirname(dest)
        if not os.path.exists(basedir):
            os.makedirs(basedir)

        rfd = self.do_open(src, self.rfd_cache, os.O_RDONLY, self._read_cache_limit)
        if rfd < 0:
            return False
        wfd = self.do_open(dest, self.wfd_cache, os.O_WRONLY | os.O_CREAT, self._write_cache_limit)
        if wfd < 0:
            if args.force:
                try:
                    os.unlink(dest)
                except OSError as e:
                    log.error("Failed to unlink %s, %s " % (dest, e), extra=self.d)
                    return False
                else:
                    wfd = self.do_open(dest, self.wfd_cache, os.O_WRONLY, self._write_cache_limit)
            else:
                log.error("Failed to create output file %s" % dest, extra=self.d)
                return False

        # do the actual copy
        self.write_bytes(rfd, wfd, work)

        # update tally
        self.cnt_filesize += work.length

        if G.verbosity > 2:
            log.debug("Transferred %s bytes from:\n\t [%s] to [%s]" %
                         (self.cnt_filesize, src, dest), extra=self.d)

        return True

    def do_no_interrupt_checkpoint(self):
        a = Thread(target=self.do_checkpoint)
        a.start()
        a.join()
        log.debug("checkpoint: %s" % self.checkpoint_file, extra=self.d)

    def do_checkpoint(self):
        for k in self.wfd_cache.keys():
            os.close(self.wfd_cache[k])

        # clear the cache
        self.wfd_cache.clear()

        tmp_file = self.checkpoint_file + ".part"
        with open(tmp_file, "wb") as f:
            cobj = Checkpoint(self.src, self.dest, self.get_workq(), self.totalsize)
            pickle.dump(cobj, f, pickle.HIGHEST_PROTOCOL)
        # POSIX requires rename to be atomic
        os.rename(tmp_file, self.checkpoint_file)

    def process(self):
        """
        The only work is "copy"
        TODO: clean up other actions such as mkdir/fini_check
        """
        if not G.use_store:
            curtime = MPI.Wtime()
            if curtime - self.checkpoint_last > self.checkpoint_interval:
                self.do_no_interrupt_checkpoint()
                log.info("Checkpointing done ...", extra=self.d)
                self.checkpoint_last = curtime

        work = self.deq()
        self.reduce_items += 1
        if isinstance(work, FileChunk):
            self.do_copy(work)
        else:
            log.warn("Unknown work object: %s" % work, extra=self.d)

    def reduce_init(self, buf):
        buf['cnt_filesize'] = self.cnt_filesize

    def reduce(self, buf1, buf2):
        buf1['cnt_filesize'] += buf2['cnt_filesize']
        return buf1

    def reduce_report(self, buf):
        out = ""
        if self.totalsize != 0:
            out += "%.2f %% finished, " % (100 * float(buf['cnt_filesize']) / self.totalsize)

        out += "%s copied" % bytes_fmt(buf['cnt_filesize'])

        if self.circle.reduce_time_interval != 0:
            rate = float(buf['cnt_filesize'] - self.cnt_filesize_prior) / self.circle.reduce_time_interval
            self.cnt_filesize_prior = buf['cnt_filesize']
            out += ", estimated transfer rate: %s/s" % bytes_fmt(rate)

        print(out)

    def reduce_finish(self, buf):
        # self.reduce_report(buf)
        pass

    def epilogue(self):
        global taskloads
        self.wtime_ended = MPI.Wtime()
        taskloads = self.circle.comm.gather(self.reduce_items)
        if self.circle.rank == 0:
            if self.totalsize == 0:
                print("\nZero filesize detected, done.\n")
                return
            tlapse = self.wtime_ended - self.wtime_started
            rate = float(self.totalsize) / tlapse
            print("\nFCP Epilogue:\n")
            print("\t{:<20}{:<20}".format("Ending at:", utils.current_time()))
            print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse)))
            print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate)))
            print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads))

    def read_then_write(self, rfd, wfd, work, num_of_bytes, m):
        """ core entry point for copy action: first read then write.

        @param num_of_bytes: the exact amount of bytes we will copy
        @return: False if unsuccessful.

        """
        buf = None
        try:
            buf = readn(rfd, num_of_bytes)
        except IOError:
            self.logger.error("Failed to read %s", work.src, extra=self.d)
            return False

        try:
            writen(wfd, buf)
        except IOError:
            self.logger.error("Failed to write %s", work.dest, extra=self.d)
            return False

        if m:
            m.update(buf)

        return True

    def write_bytes(self, rfd, wfd, work):
        os.lseek(rfd, work.offset, os.SEEK_SET)
        os.lseek(wfd, work.offset, os.SEEK_SET)

        m = None
        if self.verify:
            m = hashlib.sha1()

        remaining = work.length
        while remaining != 0:
            if remaining >= self.blocksize:
                self.read_then_write(rfd, wfd, work, self.blocksize, m)
                remaining -= self.blocksize
            else:
                self.read_then_write(rfd, wfd, work, remaining, m)
                remaining = 0

        if self.verify:
            # use src path here
            ck = ChunkSum(work.src, offset=work.offset, length=work.length,
                          digest=m.hexdigest())
            self.chunksums.append(ck)
Exemplo n.º 16
0
class Cache:
    """Class representing D3N."""

    # Replacement policies
    LRU = "LRU"
    LFU = "LFU"
    LRU_S = "LRU_S"
    FIFO = "FIFO"
    RAND = "RAND"

    # Write policies
    WRITE_BACK = "WB"
    WRITE_THROUGH = "WT"

    # Layer
    L1 = "L1"
    L2 = "L2"

    consistent = "consistent"
    rendezvous = "rendezvous"
    rr = "rr"

    def __init__(self, layer, size, replace_pol, write_pol, hash_ring,
                 hash_type, obj_size, full_size, logger):
        self._replace_pol = replace_pol  # Replacement policy
        self._write_pol = write_pol  # Write policy
        self._layer = layer  # Layer info
        self._size = size  # Cache size
        self.spaceLeft = size  # Cache size
        self._logger = logger
        self.hashmap = {}  # Mapping
        self.hash_ring = hash_ring
        self._hash_type = hash_type
        self._obj_size = obj_size

        if (self._size == 0):
            self.zerosize = True
            self._size = 1
        else:
            self.zerosize = False

        if (self._replace_pol == Cache.LRU):
            self.cache = LRU(self._size)
        elif (self._replace_pol == Cache.FIFO):
            self.cache = deque()
        elif (self._replace_pol == Cache.LRU_S):
            self.cache = LRU(self._size)
            self.shadow = LRU(full_size)
            self.hist = []
            for i in range(full_size):
                self.hist.append(0)

    # Statistics
        self._hit_count = 0
        self._miss_count = 0
        self._backend_bw = 0
        self._crossrack_bw = 0
        self._intrarack_bw = 0
        self.miss_lat = 0
        self.lat_count = 0

    def _insert1(self, key, size):
        # No eviction
        if not self.zerosize:
            if (self._replace_pol == Cache.LRU_S):
                self.shadow[key] = 1

            if (int(size) <= self.spaceLeft):
                if (self._replace_pol == Cache.LRU):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.LRU_S):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.FIFO):
                    self.cache.append(key)
                self.hashmap[key] = int(size)
                self.spaceLeft -= int(size)
            else:
                while (int(size) > self.spaceLeft):
                    self._evict()
                if (self._replace_pol == Cache.LRU):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.LRU_S):
                    self.cache[key] = int(size)
                elif (self._replace_pol == Cache.FIFO):
                    self.cache.append(key)
                self.hashmap[key] = int(size)
                self.spaceLeft -= int(size)

    def _insert(self, key, size):
        # No eviction
        if not self.zerosize:
            if (self._replace_pol == Cache.LRU_S):
                self.cache[key] = int(size)
                self.shadow[key] = int(size)
            elif (self._replace_pol == Cache.LRU):
                self.cache[key] = int(size)
            else:
                if (int(size) <= self.spaceLeft):
                    if (self._replace_pol == Cache.LRU):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.LRU_S):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.FIFO):
                        self.cache.append(key)
                    self.hashmap[key] = int(size)
                    self.spaceLeft -= int(size)
                else:
                    while (int(size) > self.spaceLeft):
                        self._evict()
                    if (self._replace_pol == Cache.LRU):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.LRU_S):
                        self.cache[key] = int(size)
                    elif (self._replace_pol == Cache.FIFO):
                        self.cache.append(key)
                    self.hashmap[key] = int(size)
                    self.spaceLeft -= int(size)

    def read1(self, key, size):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return None
        """Read a object from the cache."""
        r = None

        if (self._replace_pol == Cache.LRU_S):
            if self.shadow.has_key(key):
                count = 0
                for i in self.shadow.keys():
                    if i == key:
                        self.hist[count] += 1
                        break
                    count += 1
                self.shadow[key] = 1

        if key in self.hashmap:
            if (self._replace_pol == Cache.LRU):
                self._update_use(key)
            elif (self._replace_pol == Cache.LRU_S):
                self._update_use(key)
            self._hit_count += 1
            r = 1
        else:
            self._miss_count += 1
        return r

    def read(self, key, size):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return None
        """Read a object from the cache."""
        r = None

        if (self._replace_pol == Cache.LRU_S):
            if self.cache.has_key(key):
                self._hit_count += 1
                self.cache[key] = self.cache[key]
                r = 1
            else:
                self._miss_count += 1

            if self.shadow.has_key(key):
                count = 0
                for i in self.shadow.keys():
                    if i == key:
                        self.hist[count] += 1
                        break
                    count += 1
                self.shadow[key] = 1

        else:
            if key in self.hashmap:
                if (self._replace_pol == Cache.LRU):
                    self._update_use(key)
                elif (self._replace_pol == Cache.LRU_S):
                    self._update_use(key)
                self._hit_count += 1
                r = 1
            else:
                self._miss_count += 1
        return r

    def checkKey(self, key):
        if self._layer == "BE":
            return 1
        if self.zerosize == True:
            return 0
        """Read a object from the cache."""
        r = 0

        if (self._replace_pol == Cache.LRU_S) or (self._replace_pol
                                                  == Cache.LRU):
            if self.cache.has_key(key):
                r = 1
            else:
                r = 0
        return r

    def _evict(self):
        if (self._replace_pol == Cache.LRU):
            id = self.cache.peek_last_item()[0]
            del self.cache[id]
        elif (self._replace_pol == Cache.LRU_S):
            id = self.cache.peek_last_item()[0]
            del self.cache[id]
        elif (self._replace_pol == Cache.FIFO):
            id = self.cache.popleft()
        self.spaceLeft += int(self.hashmap[id])
        del self.hashmap[id]

    def _update_use(self, key):
        """Update the use of a cache."""
        if (self._replace_pol == Cache.LRU):
            self.cache[key] = self.hashmap[key]
        if (self._replace_pol == Cache.LRU_S):
            self.cache[key] = self.hashmap[key]

    def set_cache_size(self, size):
        new_size = self.cache.get_size() + int(size)
        self.cache.set_size(int(new_size))

    def set_backend_bw(self, value):
        self._backend_bw += value

    def set_crossrack_bw(self, value):
        self._crossrack_bw += value

    def set_intrarack_bw(self, value):
        self._intrarack_bw += value

    def get_backend_bw(self):
        return self._backend_bw

    def get_crossrack_bw(self):
        return self._crossrack_bw

    def get_intrarack_bw(self):
        return self._intrarack_bw

    def get_replace_pol(self):
        return self._replace_pol

    def get_hit_count(self):
        return self._hit_count

    def get_miss_count(self):
        return self._miss_count

    def get_available_space(self):
        return self.spaceLeft

    def get_replace_poll(self):
        return self._replace_pol

    def reset_shadow_cache():
        self.shadow.clear()

    def print_cache(self):
        print self.cache

    def get_l2_address(self, key):
        if (self._hash_type == Cache.consistent):
            return self.hash_ring.get_node(key)
        elif (self._hash_type == Cache.rendezvous):
            return self.hash_ring.find_node(key)
        elif (self._hash_type == Cache.rr):
            val = key.split("_")[1]
            res = int(val) % int(self.hash_ring)
            return res
Exemplo n.º 17
0
class PolygonIndex(object):
    include_only_properties = None
    simplify_tolerance = 0.0001
    preserve_topology = True
    persistent_polygons = False
    cache_size = 0
    fix_invalid_polygons = False

    INDEX_FILENAME = None
    POLYGONS_DB_DIR = 'polygons'

    def __init__(self, index=None, polygons=None, polygons_db=None, save_dir=None,
                 index_filename=None,
                 polygons_db_path=None,
                 include_only_properties=None):
        if save_dir:
            self.save_dir = save_dir
        else:
            self.save_dir = None

        if not index_filename:
            index_filename = self.INDEX_FILENAME

        self.index_path = os.path.join(save_dir or '.', index_filename)

        if not index:
            self.create_index(overwrite=True)
        else:
            self.index = index

        if include_only_properties and hasattr(include_only_properties, '__contains__'):
            self.include_only_properties = include_only_properties

        if not polygons and not self.persistent_polygons:
            self.polygons = {}
        elif polygons and not self.persistent_polygons:
            self.polygons = polygons
        elif self.persistent_polygons and self.cache_size > 0:
            self.polygons = LRU(self.cache_size)
            if polygons:
                for key, value in six.iteritems(polygons):
                    self.polygons[key] = value

            self.cache_hits = 0
            self.cache_misses = 0

            self.get_polygon = self.get_polygon_cached

        if not polygons_db_path:
            polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR)

        if not polygons_db:
            self.polygons_db = LevelDB(polygons_db_path)
        else:
            self.polygons_db = polygons_db

        self.setup()

        self.i = 0

    def create_index(self, overwrite=False):
        raise NotImplementedError('Children must implement')

    def index_polygon(self, polygon):
        raise NotImplementedError('Children must implement')

    def setup(self):
        pass

    def clear_cache(self, garbage_collect=True):
        if self.persistent_polygons and self.cache_size > 0:
            self.polygons.clear()
            if garbage_collect:
                gc.collect()

    def simplify_polygon(self, poly, simplify_tolerance=None, preserve_topology=None):
        if simplify_tolerance is None:
            simplify_tolerance = self.simplify_tolerance
        if preserve_topology is None:
            preserve_topology = self.preserve_topology
        return poly.simplify(simplify_tolerance, preserve_topology=preserve_topology)

    def index_polygon_properties(self, properties):
        pass

    def polygon_geojson(self, poly, properties):
        return {
            'type': 'Feature',
            'geometry': mapping(poly),
        }

    def add_polygon(self, poly, properties, cache=False, include_only_properties=None):
        if include_only_properties is not None:
            properties = {k: v for k, v in properties.iteritems() if k in include_only_properties}

        if not self.persistent_polygons or cache:
            self.polygons[self.i] = prep(poly)

        if self.persistent_polygons:
            self.polygons_db.Put(self.polygon_key(self.i), json.dumps(self.polygon_geojson(poly, properties)))

        self.polygons_db.Put(self.properties_key(self.i), json.dumps(properties))
        self.index_polygon_properties(properties)
        self.i += 1

    @classmethod
    def create_from_shapefiles(cls, inputs, output_dir,
                               index_filename=None,
                               include_only_properties=None):
        index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
        for input_file in inputs:
            if include_only_properties is not None:
                include_props = include_only_properties.get(input_file, cls.include_only_properties)
            else:
                include_props = cls.include_only_properties

            f = fiona.open(input_file)

            index.add_geojson_like_file(f)

        return index

    @classmethod
    def fix_polygon(cls, poly):
        '''
        Coerce to valid polygon
        '''
        if not poly.is_valid:
            poly = poly.buffer(0)
            if not poly.is_valid:
                return None
        return poly

    @classmethod
    def to_polygon(cls, coords, holes=None, test_point=None):
        '''
        Create shapely polygon from list of coordinate tuples if valid
        '''
        if not coords or len(coords) < 3:
            return None

        # Fix for polygons crossing the 180th meridian
        lons = [lon for lon, lat in coords]
        if (max(lons) - min(lons) > 180):
            coords = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in coords]
            if holes:
                holes = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in holes]

        poly = Polygon(coords, holes)
        try:
            if test_point is None:
                test_point = poly.representative_point()
            invalid = cls.fix_invalid_polygons and not poly.is_valid and not poly.contains(test_point)
        except Exception:
            invalid = True

        if invalid:
            try:
                poly_fix = cls.fix_polygon(poly)

                if poly_fix is not None and poly_fix.bounds and len(poly_fix.bounds) == 4 and poly_fix.is_valid and poly_fix.type == poly.type:
                    if test_point is None:
                        test_point = poly_fix.representative_point()

                    if poly_fix.contains(test_point):
                        poly = poly_fix
            except Exception:
                pass

        return poly

    def add_geojson_like_record(self, rec, include_only_properties=None):
        if not rec or not rec.get('geometry') or 'type' not in rec['geometry']:
            return
        poly_type = rec['geometry']['type']
        if poly_type == 'Polygon':
            coords = rec['geometry']['coordinates'][0]
            poly = self.to_polygon(coords)
            if poly is None or not poly.bounds or len(poly.bounds) != 4:
                return
            self.index_polygon(poly)
            self.add_polygon(poly, rec['properties'], include_only_properties=include_only_properties)
        elif poly_type == 'MultiPolygon':
            polys = []
            poly_coords = rec['geometry']['coordinates']
            for coords in poly_coords:
                poly = self.to_polygon(coords[0])
                if poly is None or not poly.bounds or len(poly.bounds) != 4:
                    continue
                polys.append(poly)
                self.index_polygon(poly)

            self.add_polygon(MultiPolygon(polys), rec['properties'], include_only_properties=include_only_properties)
        else:
            return

    def add_geojson_like_file(self, f, include_only_properties=None):
        '''
        Add either GeoJSON or a shapefile record to the index
        '''

        for rec in f:
            self.add_geojson_like_record(rec, include_only_properties=include_only_properties)

    @classmethod
    def create_from_geojson_files(cls, inputs, output_dir,
                                  index_filename=None,
                                  polys_filename=DEFAULT_POLYS_FILENAME,
                                  include_only_properties=None):
        index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME)
        for input_file in inputs:
            if include_only_properties is not None:
                include_props = include_only_properties.get(input_file, cls.include_only_properties)
            else:
                include_props = cls.include_only_properties

            f = json.load(open(input_file))

            index.add_geojson_like_file(f['features'], include_only_properties=include_props)

        return index

    def compact_polygons_db(self):
        self.polygons_db.CompactRange('\x00', '\xff')

    def save(self):
        self.save_index()
        self.save_properties(os.path.join(self.save_dir, DEFAULT_PROPS_FILENAME))
        if not self.persistent_polygons:
            self.save_polygons(os.path.join(self.save_dir, DEFAULT_POLYS_FILENAME))
        self.compact_polygons_db()
        self.save_polygon_properties(self.save_dir)

    def load_properties(self, filename):
        properties = json.load(open(filename))
        self.i = int(properties.get('num_polygons', self.i))

    def save_properties(self, out_filename):
        out = open(out_filename, 'w')
        json.dump({'num_polygons': str(self.i)}, out)

    def save_polygons(self, out_filename):
        out = open(out_filename, 'w')
        for i in xrange(self.i):
            poly = self.polygons[i]
            feature = {
                'type': 'Feature',
                'geometry': mapping(poly.context),
            }
            out.write(json.dumps(feature) + u'\n')

    def save_index(self):
        raise NotImplementedError('Children must implement')

    def load_polygon_properties(self, d):
        pass

    def save_polygon_properties(self, d):
        pass

    @classmethod
    def polygon_from_geojson(cls, feature):
        poly_type = feature['geometry']['type']
        if poly_type == 'Polygon':
            coords = feature['geometry']['coordinates']
            poly = cls.to_polygon(coords[0], holes=coords[1:] or None)
            return poly
        elif poly_type == 'MultiPolygon':
            polys = []
            for coords in feature['geometry']['coordinates']:
                poly = cls.to_polygon(coords[0], holes=coords[1:] or None)
                polys.append(poly)

            return MultiPolygon(polys)

    @classmethod
    def load_polygons(cls, filename):
        f = open(filename)
        polygons = {}
        cls.i = 0
        for line in f:
            feature = json.loads(line.rstrip())
            polygons[cls.i] = prep(cls.polygon_from_geojson(feature))
            cls.i += 1
        return polygons

    @classmethod
    def load_index(cls, d, index_name=None):
        raise NotImplementedError('Children must implement')

    @classmethod
    def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME,
             properties_filename=DEFAULT_PROPS_FILENAME,
             polys_db_dir=POLYGONS_DB_DIR):
        index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME)
        if not cls.persistent_polygons:
            polys = cls.load_polygons(os.path.join(d, polys_filename))
        else:
            polys = None
        polygons_db = LevelDB(os.path.join(d, polys_db_dir))
        polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d)
        polygon_index.load_properties(os.path.join(d, properties_filename))
        polygon_index.load_polygon_properties(d)
        return polygon_index

    def get_candidate_polygons(self, lat, lon):
        raise NotImplementedError('Children must implement')

    def get_properties(self, i):
        return json.loads(self.polygons_db.Get(self.properties_key(i)))

    def get_polygon(self, i):
        return self.polygons[i]

    def get_polygon_cached(self, i):
        poly = self.polygons.get(i, None)
        if poly is None:
            data = json.loads(self.polygons_db.Get(self.polygon_key(i)))
            poly = prep(self.polygon_from_geojson(data))
            self.polygons[i] = poly
            self.cache_misses += 1
        else:
            self.cache_hits += 1
        return poly

    def __iter__(self):
        for i in xrange(self.i):
            yield self.get_properties(i), self.get_polygon(i)

    def __len__(self):
        return self.i

    def polygons_contain(self, candidates, point, return_all=False):
        containing = None
        if return_all:
            containing = []
        for i in candidates:
            poly = self.get_polygon(i)
            contains = poly.contains(point)
            if contains:
                properties = self.get_properties(i)
                if not return_all:
                    return properties
                else:
                    containing.append(properties)
        return containing

    def polygon_key(self, i):
        return 'poly:{}'.format(i)

    def properties_key(self, i):
        return 'props:{}'.format(i)

    def point_in_poly(self, lat, lon, return_all=False):
        candidates = self.get_candidate_polygons(lat, lon)
        point = Point(lon, lat)
        return self.polygons_contain(candidates, point, return_all=return_all)
Exemplo n.º 18
0
class SecurityPolicy:
    def __init__(self, principal: IPrincipal):
        self.principal = principal
        self._cache = LRU(100)

    def invalidate_cache(self):
        self._cache.clear()

    @profilable
    def check_permission(self, permission, obj):
        # Always allow public attributes
        if permission is Public:
            return True

        if IView.providedBy(obj):
            obj = obj.__parent__

        # Iterate through participations ('principals')
        # and check permissions they give
        if self.principal is not None:

            # System user always has access
            if self.principal is SystemUser:
                return True

            # Check the permission
            groups = getattr(self.principal, "groups", None) or []
            if self.cached_decision(obj, self.principal.id, groups, permission):
                return True

        return False

    def cache(self, parent, level=""):
        serial = getattr(parent, "__serial__", "")
        oid = getattr(parent, "__uuid__", "")
        cache_key = f"{id(parent)}-{oid}-{serial}-{level}"
        cache = self._cache.get(cache_key)
        if cache is None:
            cache = CacheEntry()
            self._cache[cache_key] = cache
        return cache

    @profilable
    def cached_decision(self, parent, principal, groups, permission):
        # Return the decision for a principal and permission
        cache = self.cache(parent)
        try:
            cache_decision = cache.decision
        except AttributeError:
            cache_decision = cache.decision = {}

        cache_decision_prin = cache_decision.get(principal)
        if not cache_decision_prin:
            cache_decision_prin = cache_decision[principal] = {}

        try:
            return cache_decision_prin[permission]
        except KeyError:
            pass

        # cache_decision_prin[permission] is the cached decision for a
        # principal and permission.

        # Check direct permissions
        # First recursive function to get the permissions of a principal
        decision = self.cached_principal_permission(parent, principal, groups, permission, "o")

        if decision is not None:
            cache_decision_prin[permission] = decision
            return decision

        # Check Roles permission
        # First get the Roles needed
        roles = cached_roles(parent, permission, "o")
        if roles:
            # Get the roles from the user
            prin_roles = self.cached_principal_roles(parent, principal, groups, "o")
            for role, setting in prin_roles.items():
                if setting and (role in roles):
                    cache_decision_prin[permission] = decision = True
                    return decision

        cache_decision_prin[permission] = decision = False
        return decision

    @profilable
    def cached_principal_permission(self, parent, principal, groups, permission, level):
        # Compute the permission, if any, for the principal.
        cache = self.cache(parent, level)

        try:
            # We need to caches for first level and Allow Single
            if level == "o":
                cache_prin = cache.prino
            else:
                cache_prin = cache.prin
        except AttributeError:
            if level == "o":
                cache_prin = cache.prino = {}
            else:
                cache_prin = cache.prin = {}

        cache_prin_per = cache_prin.get(principal)
        if not cache_prin_per:
            cache_prin_per = cache_prin[principal] = {}

        try:
            return cache_prin_per[permission]
        except KeyError:
            pass

        # We reached the end of the recursive we check global / local
        if parent is None:
            # We check the global configuration of the user and groups
            prinper = self._global_permissions_for(principal, permission)
            if prinper is not None:
                cache_prin_per[permission] = prinper
                return prinper

            # If we did not found the permission for the user look at code
            prinper = SettingAsBoolean[code_principal_permission_setting(permission, principal, None)]
            # Now look for the group ids
            if prinper is None:
                for group in groups:
                    prinper = SettingAsBoolean[code_principal_permission_setting(permission, group, None)]
                    if prinper is not None:
                        continue
            cache_prin_per[permission] = prinper
            return prinper

        # Get the local map of the permissions
        # As we want to quit as soon as possible we check first locally
        prinper_map = IPrincipalPermissionMap(parent, None)
        if prinper_map is not None:
            prinper = level_setting_as_boolean(level, prinper_map.get_setting(permission, principal, None))
            if prinper is None:
                for group in groups:
                    prinper = level_setting_as_boolean(
                        level, prinper_map.get_setting(permission, group, None)
                    )
                    if prinper is not None:
                        # Once we conclude we exit
                        # May happen that first group Deny and second
                        # allows which will result on Deny for the first
                        break
            if prinper is not None:
                return prinper

        # Find the permission recursivelly set to a user
        parent = getattr(parent, "__parent__", None)
        prinper = self.cached_principal_permission(parent, principal, groups, permission, "p")
        cache_prin_per[permission] = prinper
        return prinper

    def global_principal_roles(self, principal, groups):
        roles = dict(
            [(role, SettingAsBoolean[setting]) for (role, setting) in code_roles_for_principal(principal)]
        )
        for group in groups:
            for role, settings in code_roles_for_principal(group):
                roles[role] = SettingAsBoolean[settings]
        roles["guillotina.Anonymous"] = True  # Everybody has Anonymous

        # First the global roles from user + group
        groles = self._global_roles_for(principal)
        roles.update(groles)
        return roles

    def cached_principal_roles(self, parent, principal, groups, level):
        # Redefine it to get global roles
        cache = self.cache(parent, level)
        try:
            cache_principal_roles = cache.principal_roles
        except AttributeError:
            cache_principal_roles = cache.principal_roles = {}
        try:
            return cache_principal_roles[principal]
        except KeyError:
            pass

        # We reached the end so we go to see the global ones
        if parent is None:
            # Then the code roles
            roles = self.global_principal_roles(principal, groups)

            cache_principal_roles[principal] = roles
            return roles

        roles = self.cached_principal_roles(getattr(parent, "__parent__", None), principal, groups, "p")

        # We check the local map of roles
        prinrole = IPrincipalRoleMap(parent, None)

        if prinrole:
            roles = roles.copy()
            for role, setting in prinrole.get_roles_for_principal(principal):
                roles[role] = level_setting_as_boolean(level, setting)
            for group in groups:
                for role, setting in prinrole.get_roles_for_principal(group):
                    roles[role] = level_setting_as_boolean(level, setting)

        cache_principal_roles[principal] = roles
        return roles

    def _global_roles_for(self, principal):
        """On a principal (user/group) get global roles."""
        roles = {}
        groups = get_utility(IGroups)
        if self.principal and principal == self.principal.id:
            # Its the actual user id
            # We return all the global roles (including group)
            roles = self.principal.roles.copy()

            for group in self.principal.groups:
                roles.update(groups.get_principal(group, self.principal).roles)
            return roles

        # We are asking for group id so only group roles
        if groups:
            group = groups.get_principal(principal)
            return group.roles.copy()

    def _global_permissions_for(self, principal, permission):
        """On a principal (user + group) get global permissions."""
        groups = get_utility(IGroups)
        if self.principal and principal == self.principal.id:
            # Its the actual user
            permissions = self.principal.permissions.copy()
            if permission in permissions:
                return level_setting_as_boolean("p", permissions[permission])

            for group in self.principal.groups:
                permissions = groups.get_principal(group, self.principal).permissions
                if permission in permissions:
                    return level_setting_as_boolean("p", permissions[permission])
        return None
Exemplo n.º 19
0
class FCP(BaseTask):
    def __init__(self, circle, src, dest,
                 treewalk=None,
                 totalsize=0,
                 hostcnt=0,
                 prune=False,
                 verify=False,
                 resume=False,
                 workq=None):
        BaseTask.__init__(self, circle)
        self.circle = circle
        self.treewalk = treewalk
        self.totalsize = totalsize
        self.prune = prune
        self.workq = workq
        self.resume = resume
        self.checkpoint_file = None
        self.checkpoint_db = None
        self.src = src
        self.dest = os.path.abspath(dest)

        # cache, keep the size conservative
        # TODO: we need a more portable LRU size

        if hostcnt != 0:
            max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
            procs_per_host = self.circle.size / hostcnt
            self._read_cache_limit = ((max_ofile - 64) / procs_per_host) / 3
            self._write_cache_limit = ((max_ofile - 64) / procs_per_host) * 2 / 3

        if self._read_cache_limit <= 0 or self._write_cache_limit <= 0:
            self._read_cache_limit = 1
            self._write_cache_limit = 8

        self.rfd_cache = LRU(self._read_cache_limit)
        self.wfd_cache = LRU(self._write_cache_limit)

        self.cnt_filesize_prior = 0
        self.cnt_filesize = 0

        self.blocksize = 1024 * 1024
        self.chunksize = 1024 * 1024

        # debug
        self.d = {"rank": "rank %s" % circle.rank}
        self.wtime_started = MPI.Wtime()
        self.wtime_ended = None
        self.workcnt = 0  # this is the cnt for the enqued items
        self.reduce_items = 0  # this is the cnt for processed items
        if self.treewalk:
            log.debug("treewalk files = %s" % treewalk.flist, extra=self.d)

        # fini_check
        self.fini_cnt = Counter()

        # verify
        self.verify = verify
        self.use_store = False
        if self.verify:
            self.chunksums_mem = []
            self.chunksums_buf = []

        # checkpointing
        self.checkpoint_interval = sys.maxsize
        self.checkpoint_last = MPI.Wtime()

        if self.circle.rank == 0:
            print("Start copying process ...")

    def rw_cache_limit(self):
        return (self._read_cache_limit, self._write_cache_limit)

    def set_fixed_chunksize(self, sz):
        self.chunksize = sz

    def set_adaptive_chunksize(self, totalsz):
        self.chunksize = utils.calc_chunksize(totalsz)
        if self.circle.rank == 0:
            print("Adaptive chunksize: %s" % bytes_fmt(self.chunksize))

    def cleanup(self):
        for f in self.rfd_cache.values():
            try:
                os.close(f)
            except OSError as e:
                pass

        for f in self.wfd_cache.values():
            try:
                os.close(f)
            except OSError as e:
                pass

        # remove checkpoint file
        if self.checkpoint_file and os.path.exists(self.checkpoint_file):
            os.remove(self.checkpoint_file)
        if self.checkpoint_db and os.path.exists(self.checkpoint_db):
            os.remove(self.checkpoint_db)

        # remove provided checkpoint file
        if G.resume and G.chk_file and os.path.exists(G.chk_file):
            os.remove(G.chk_file)
        if G.resume and G.chk_file_db and os.path.exists(G.chk_file_db):
            os.remove(G.chk_file_db)

        # remove chunksums file
        if self.verify:
            if hasattr(self, "chunksums_db"):
                self.chunksums_db.cleanup()

        # we need to do this because if last job didn't finish cleanly
        # the fwalk files can be found as leftovers
        # and if fcp cleanup has a chance, it should clean up that
        """
        fwalk = "%s/fwalk.%s" % (G.tempdir, self.circle.rank)
        if os.path.exists(fwalk):
            os.remove(fwalk)
        """

    def new_fchunk(self, fitem):
        fchunk = FileChunk()  # default cmd = copy
        fchunk.src = fitem.path
        fchunk.dest = destpath(fitem, self.dest)
        return fchunk

    def enq_file(self, fi):
        """ Process a single file, represented by "fi" - FileItem
        It involves chunking this file and equeue all chunks. """

        chunks = fi.st_size / self.chunksize
        remaining = fi.st_size % self.chunksize

        workcnt = 0

        if fi.st_size == 0:  # empty file
            fchunk = self.new_fchunk(fi)
            fchunk.offset = 0
            fchunk.length = 0
            self.enq(fchunk)
            workcnt += 1
        else:
            for i in range(chunks):
                fchunk = self.new_fchunk(fi)
                fchunk.offset = i * self.chunksize
                fchunk.length = self.chunksize
                self.enq(fchunk)
            workcnt += chunks

        if remaining > 0:
            # send remainder
            fchunk = self.new_fchunk(fi)
            fchunk.offset = chunks * self.chunksize
            fchunk.length = remaining
            self.enq(fchunk)
            workcnt += 1

        # save work cnt
        self.workcnt += workcnt

        log.debug("enq_file(): %s, size = %s, workcnt = %s" % (fi.path, fi.st_size, workcnt),
                     extra=self.d)

    def handle_fitem(self, fi):
        if os.path.islink(fi.path):
            dest = destpath(fi, self.dest)
            linkto = os.readlink(fi.path)
            try:
                os.symlink(linkto, dest)
            except Exception as e:
                log.debug("%s, skipping sym link %s" % (e, fi.path), extra=self.d)
        elif stat.S_ISREG(fi.st_mode):
            self.enq_file(fi)  # where chunking takes place

    def create(self):
        """ Each task has one create(), which is invoked by circle ONCE.
        For FCP, each task will handle_fitem() -> enq_file()
        to process each file gathered during the treewalk stage. """

        if not G.use_store and self.workq:  # restart
            self.setq(self.workq)
            return

        if self.resume:
            return

        # construct and enable all copy operations
        # we batch operation hard-coded
        log.info("create() starts, flist length = %s" % len(self.treewalk.flist),
                    extra=self.d)

        # flist in memory
        if len(self.treewalk.flist) > 0:
            for fi in self.treewalk.flist:
                self.handle_fitem(fi)

        # flist in buf
        if len(self.treewalk.flist_buf) > 0:
            for fi in self.treewalk.flist_buf:
                self.handle_fitem(fi)

        # flist in database
        if self.treewalk.use_store:
            while self.treewalk.flist_db.qsize > 0:
                fitems, _ = self.treewalk.flist_db.mget(G.DB_BUFSIZE)
                for fi in fitems:
                    self.handle_fitem(fi)
                self.treewalk.flist_db.mdel(G.DB_BUFSIZE)

        # both memory and databse checkpoint
        if self.checkpoint_file:
            self.do_no_interrupt_checkpoint()
            self.checkpoint_last = MPI.Wtime()

        # gather total_chunks
        self.circle.comm.barrier()
        G.total_chunks = self.circle.comm.allreduce(self.workcnt, op=MPI.SUM)
        #G.total_chunks = self.circle.comm.bcast(G.total_chunks)
        #print("Total chunks: ",G.total_chunks)

    def do_open(self, k, d, flag, limit):
        """
        @param k: the file path
        @param d: dictionary of <path, file descriptor>
        @return: file descriptor
        """
        if d.has_key(k):
            return d[k]

        if len(d.keys()) >= limit:
            # over the limit
            # clean up the least used
            old_k, old_v = d.items()[-1]
            try:
                os.close(old_v)
            except OSError as e:
                log.warn("FD for %s not valid when closing" % old_k, extra=self.d)

        fd = -1
        try:
            fd = os.open(k, flag)
        except OSError as e:
            if e.errno == 28:  # no space left
                log.error("Critical error: %s, exit!" % e, extra=self.d)
                self.circle.exit(0)  # should abort
            else:
                log.error("OSError({0}):{1}, skipping {2}".format(e.errno, e.strerror, k), extra=self.d)
        else:
            if fd > 0:
                d[k] = fd
        finally:
            return fd

    @staticmethod
    def do_mkdir(work):
        src = work.src
        dest = work.dest
        if not os.path.exists(dest):
            os.makedirs(dest)

    def do_copy(self, work):
        src = work.src
        dest = work.dest

        basedir = os.path.dirname(dest)
        if not os.path.exists(basedir):
            os.makedirs(basedir)

        rfd = self.do_open(src, self.rfd_cache, os.O_RDONLY, self._read_cache_limit)
        if rfd < 0:
            return False
        wfd = self.do_open(dest, self.wfd_cache, os.O_WRONLY | os.O_CREAT, self._write_cache_limit)
        if wfd < 0:
            if args.force:
                try:
                    os.unlink(dest)
                except OSError as e:
                    log.error("Failed to unlink %s, %s " % (dest, e), extra=self.d)
                    return False
                else:
                    wfd = self.do_open(dest, self.wfd_cache, os.O_WRONLY, self._write_cache_limit)
            else:
                log.error("Failed to create output file %s" % dest, extra=self.d)
                return False

        # do the actual copy
        self.write_bytes(rfd, wfd, work)

        # update tally
        self.cnt_filesize += work.length

        if G.verbosity > 2:
            log.debug("Transferred %s bytes from:\n\t [%s] to [%s]" %
                         (self.cnt_filesize, src, dest), extra=self.d)

        return True

    def do_no_interrupt_checkpoint(self):
        a = Thread(target=self.do_checkpoint)
        a.start()
        a.join()
        log.debug("checkpoint: %s" % self.checkpoint_file, extra=self.d)
        print("\nMake checkpoint files: ", self.checkpoint_file)

    def do_checkpoint(self):
        # when make checkpoint, first write workq and workq_buf into checkpoint file, then make a copy of workq_db if it exists
        for k in self.wfd_cache.keys():
            os.close(self.wfd_cache[k])

        # clear the cache
        self.wfd_cache.clear()

        tmp_file = self.checkpoint_file + ".part"
        with open(tmp_file, "wb") as f:
            self.circle.workq.extend(self.circle.workq_buf)
            self.circle.workq_buf.clear()
            cobj = Checkpoint(self.src, self.dest, self.get_workq(), self.totalsize)
            pickle.dump(cobj, f, pickle.HIGHEST_PROTOCOL)
        # POSIX requires rename to be atomic
        os.rename(tmp_file, self.checkpoint_file)

        # copy workq_db database file
        if hasattr(self.circle, "workq_db") and len(self.circle.workq_db) > 0:
            self.checkpoint_db = self.checkpoint_file + ".db"
            if not G.resume:
                shutil.copy2(self.circle.dbname, self.checkpoint_db)
            else:
                # in resume mode, make a copy of current workq db file, which is provided checkpoint db file
                self.workdir = os.getcwd()
                existingCheckpoint = os.path.join(self.workdir,".pcp_workq.%s.%s.db" % (G.rid, self.circle.rank))
                shutil.copy2(existingCheckpoint,self.checkpoint_db)

    def process(self):
        """
        The only work is "copy"
        TODO: clean up other actions such as mkdir/fini_check
        """
        if not G.use_store:
            curtime = MPI.Wtime()
            if curtime - self.checkpoint_last > self.checkpoint_interval:
                self.do_no_interrupt_checkpoint()
                log.info("Checkpointing done ...", extra=self.d)
                self.checkpoint_last = curtime

        work = self.deq()
        self.reduce_items += 1
        if isinstance(work, FileChunk):
            self.do_copy(work)
        else:
            log.warn("Unknown work object: %s" % work, extra=self.d)
            err_and_exit("Not a correct workq format")

    def reduce_init(self, buf):
        buf['cnt_filesize'] = self.cnt_filesize
        if sys.platform == 'darwin':
            buf['mem_snapshot'] = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
        else:
            buf['mem_snapshot'] = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024

    def reduce(self, buf1, buf2):
        buf1['cnt_filesize'] += buf2['cnt_filesize']
        buf1['mem_snapshot'] += buf2['mem_snapshot']
        return buf1

    def reduce_report(self, buf):
        out = ""
        if self.totalsize != 0:
            out += "%.2f %% finished, " % (100 * float(buf['cnt_filesize']) / self.totalsize)

        out += "%s copied" % bytes_fmt(buf['cnt_filesize'])

        if self.circle.reduce_time_interval != 0:
            rate = float(buf['cnt_filesize'] - self.cnt_filesize_prior) / self.circle.reduce_time_interval
            self.cnt_filesize_prior = buf['cnt_filesize']
            out += ", estimated transfer rate: %s/s" % bytes_fmt(rate)

        out += ", memory usage: %s" % bytes_fmt(buf['mem_snapshot'])
        print(out)

    def reduce_finish(self, buf):
        # self.reduce_report(buf)
        pass

    def epilogue(self):
        global taskloads
        self.wtime_ended = MPI.Wtime()
        taskloads = self.circle.comm.gather(self.reduce_items)
        if self.circle.rank == 0:
            if self.totalsize == 0:
                print("\nZero filesize detected, done.\n")
                return
            tlapse = self.wtime_ended - self.wtime_started
            rate = float(self.totalsize) / tlapse
            print("\nFCP Epilogue:\n")
            print("\t{:<20}{:<20}".format("Ending at:", utils.current_time()))
            print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse)))
            print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate)))
            print("\t{:<20}{:<20}".format("Use store chunksums:", "%s" % self.use_store))
            print("\t{:<20}{:<20}".format("Use store workq:", "%s" % self.circle.use_store))
            print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads))

    def read_then_write(self, rfd, wfd, work, num_of_bytes, m):
        """ core entry point for copy action: first read then write.

        @param num_of_bytes: the exact amount of bytes we will copy
        @return: False if unsuccessful.

        """
        buf = None
        try:
            buf = readn(rfd, num_of_bytes)
        except IOError:
            self.logger.error("Failed to read %s", work.src, extra=self.d)
            return False

        try:
            writen(wfd, buf)
        except IOError:
            self.logger.error("Failed to write %s", work.dest, extra=self.d)
            return False

        if m:
            m.update(buf)

        return True

    def write_bytes(self, rfd, wfd, work):
        os.lseek(rfd, work.offset, os.SEEK_SET)
        os.lseek(wfd, work.offset, os.SEEK_SET)

        m = None
        if self.verify:
            m = hashlib.sha1()

        remaining = work.length
        while remaining != 0:
            if remaining >= self.blocksize:
                self.read_then_write(rfd, wfd, work, self.blocksize, m)
                remaining -= self.blocksize
            else:
                self.read_then_write(rfd, wfd, work, remaining, m)
                remaining = 0

        if self.verify:
            # use src path here
            ck = ChunkSum(work.dest, offset=work.offset, length=work.length,
                          digest=m.hexdigest())

            if len(self.chunksums_mem) < G.memitem_threshold:
                self.chunksums_mem.append(ck)
            else:
                self.chunksums_buf.append(ck)
                if len(self.chunksums_buf) == G.DB_BUFSIZE:
                    if self.use_store == False:
                        self.workdir = os.getcwd()
                        self.chunksums_dbname = "%s/chunksums.%s" % (G.tempdir, self.circle.rank)
                        self.chunksums_db = DbStore(dbname=self.chunksums_dbname)
                        self.use_store = True
                    self.chunksums_db.mput(self.chunksums_buf)
                    del self.chunksums_buf[:]
Exemplo n.º 20
0
class AccountDB(BaseAccountDB):

    logger = cast(ExtendedDebugLogger,
                  logging.getLogger('eth.db.account.AccountDB'))

    def __init__(self,
                 db: BaseDB,
                 state_root: Hash32 = BLANK_ROOT_HASH) -> None:
        r"""
        Internal implementation details (subject to rapid change):
        Database entries go through several pipes, like so...

        .. code::

                                                                    -> hash-trie -> storage lookups
                                                                  /
            db > _batchdb ---------------------------> _journaldb ----------------> code lookups
             \
              -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups

        Journaling sequesters writes at the _journal* attrs ^, until persist is called.

        _batchtrie enables us to prune all trie changes while building
        state,  without deleting old trie roots.

        _batchdb and _batchtrie together enable us to make the state root,
        without saving everything to the database.

        _journaldb is a journaling of the keys and values used to store
        code and account storage.

        _trie is a hash-trie, used to generate the state root

        _trie_cache is a cache tied to the state root of the trie. It
        is important that this cache is checked *after* looking for
        the key in _journaltrie, because the cache is only invalidated
        after a state root change.

        _journaltrie is a journaling of the accounts (an address->rlp mapping,
        rather than the nodes stored by the trie). This enables
        a squashing of all account changes before pushing them into the trie.

        .. NOTE:: There is an opportunity to do something similar for storage

        AccountDB synchronizes the snapshot/revert/persist of both of the
        journals.
        """
        self._batchdb = BatchDB(db)
        self._batchtrie = BatchDB(db)
        self._journaldb = JournalDB(self._batchdb)
        self._trie = HashTrie(
            HexaryTrie(self._batchtrie, state_root, prune=True))
        self._trie_cache = CacheDB(self._trie)
        self._journaltrie = JournalDB(self._trie_cache)
        self._account_cache = LRU(2048)

    @property
    def state_root(self) -> Hash32:
        return self._trie.root_hash

    @state_root.setter
    def state_root(self, value: Hash32) -> None:
        self._trie_cache.reset_cache()
        self._trie.root_hash = value

    def has_root(self, state_root: bytes) -> bool:
        return state_root in self._batchtrie

    #
    # Storage
    #
    def get_storage(self,
                    address: Address,
                    slot: int,
                    from_journal: bool = True) -> int:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(slot, title="Storage Slot")

        account = self._get_account(address, from_journal)
        storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root))

        slot_as_key = pad32(int_to_big_endian(slot))

        if slot_as_key in storage:
            encoded_value = storage[slot_as_key]
            return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int)
        else:
            return 0

    def set_storage(self, address: Address, slot: int, value: int) -> None:
        validate_uint256(value, title="Storage Value")
        validate_uint256(slot, title="Storage Slot")
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root))

        slot_as_key = pad32(int_to_big_endian(slot))

        if value:
            encoded_value = rlp.encode(value)
            storage[slot_as_key] = encoded_value
        else:
            del storage[slot_as_key]

        self._set_account(address,
                          account.copy(storage_root=storage.root_hash))

    def delete_storage(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account.copy(storage_root=BLANK_ROOT_HASH))

    #
    # Balance
    #
    def get_balance(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.balance

    def set_balance(self, address: Address, balance: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(balance, title="Account Balance")

        account = self._get_account(address)
        self._set_account(address, account.copy(balance=balance))

    #
    # Nonce
    #
    def get_nonce(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.nonce

    def set_nonce(self, address: Address, nonce: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(nonce, title="Nonce")

        account = self._get_account(address)
        self._set_account(address, account.copy(nonce=nonce))

    def increment_nonce(self, address: Address) -> None:
        current_nonce = self.get_nonce(address)
        self.set_nonce(address, current_nonce + 1)

    #
    # Code
    #
    def get_code(self, address: Address) -> bytes:
        validate_canonical_address(address, title="Storage Address")

        try:
            return self._journaldb[self.get_code_hash(address)]
        except KeyError:
            return b""

    def set_code(self, address: Address, code: bytes) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_is_bytes(code, title="Code")

        account = self._get_account(address)

        code_hash = keccak(code)
        self._journaldb[code_hash] = code
        self._set_account(address, account.copy(code_hash=code_hash))

    def get_code_hash(self, address: Address) -> Hash32:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.code_hash

    def delete_code(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account.copy(code_hash=EMPTY_SHA3))

    #
    # Account Methods
    #
    def account_has_code_or_nonce(self, address: Address) -> bool:
        return self.get_nonce(address) != 0 or self.get_code_hash(
            address) != EMPTY_SHA3

    def delete_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")
        if address in self._account_cache:
            del self._account_cache[address]
        del self._journaltrie[address]

    def account_exists(self, address: Address) -> bool:
        validate_canonical_address(address, title="Storage Address")
        return self._journaltrie.get(address, b'') != b''

    def touch_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account)

    def account_is_empty(self, address: Address) -> bool:
        return not self.account_has_code_or_nonce(
            address) and self.get_balance(address) == 0

    #
    # Internal
    #
    def _get_account(self,
                     address: Address,
                     from_journal: bool = True) -> Account:
        if from_journal and address in self._account_cache:
            return self._account_cache[address]
        rlp_account = (self._journaltrie if from_journal else
                       self._trie_cache).get(address, b'')
        if rlp_account:
            account = rlp.decode(rlp_account, sedes=Account)
        else:
            account = Account()
        if from_journal:
            self._account_cache[address] = account
        return account

    def _set_account(self, address: Address, account: Account) -> None:
        self._account_cache[address] = account
        rlp_account = rlp.encode(account, sedes=Account)
        self._journaltrie[address] = rlp_account

    #
    # Record and discard API
    #
    def record(self) -> Tuple[UUID, UUID]:
        return (self._journaldb.record(), self._journaltrie.record())

    def discard(self, changeset: Tuple[UUID, UUID]) -> None:
        db_changeset, trie_changeset = changeset
        self._journaldb.discard(db_changeset)
        self._journaltrie.discard(trie_changeset)
        self._account_cache.clear()

    def commit(self, changeset: Tuple[UUID, UUID]) -> None:
        db_changeset, trie_changeset = changeset
        self._journaldb.commit(db_changeset)
        self._journaltrie.commit(trie_changeset)

    def make_state_root(self) -> Hash32:
        self.logger.debug2("Generating AccountDB trie")
        self._journaldb.persist()
        self._journaltrie.persist()
        return self.state_root

    def persist(self) -> None:
        self.make_state_root()
        self._batchtrie.commit(apply_deletes=False)
        self._batchdb.commit(apply_deletes=True)

    def _log_pending_accounts(self) -> None:
        accounts_displayed = set()  # type: Set[bytes]
        queued_changes = self._journaltrie.journal.journal_data.items()
        # mypy bug for ordered dict reversibility: https://github.com/python/typeshed/issues/2078
        for _, accounts in reversed(queued_changes):
            for address in accounts:
                if address in accounts_displayed:
                    continue
                else:
                    accounts_displayed.add(address)
                    account = self._get_account(Address(address))
                    self.logger.debug2(
                        "Account %s: balance %d, nonce %d, storage root %s, code hash %s",
                        encode_hex(address),
                        account.balance,
                        account.nonce,
                        encode_hex(account.storage_root),
                        encode_hex(account.code_hash),
                    )
Exemplo n.º 21
0
class BaseSerializable(collections.abc.Sequence):
    cache = None

    def __init__(self, *args, cache=None, **kwargs):
        arg_names = self._meta.field_names or ()
        validate_args_and_kwargs(args, kwargs, arg_names)
        field_values = merge_kwargs_to_args(args, kwargs, arg_names)

        # Ensure that all the fields have been given values in initialization
        if len(field_values) != len(arg_names):
            raise TypeError(
                "Argument count mismatch. expected {0} - got {1} - missing {2}"
                .format(
                    len(arg_names),
                    len(field_values),
                    ",".join(arg_names[len(field_values):]),
                ))

        for value, attr in zip(field_values, self._meta.field_attrs or ()):
            setattr(self, attr, make_immutable(value))

        self.cache = LRU(DEFAULT_CACHE_SIZE) if cache is None else cache

    def as_dict(self):
        return dict((field, value)
                    for field, value in zip(self._meta.field_names, self))

    def __iter__(self):
        for attr in self._meta.field_attrs:
            yield getattr(self, attr)

    def __getitem__(self, index):
        if isinstance(index, int):
            attr = self._meta.field_attrs[index]
            return getattr(self, attr)
        elif isinstance(index, slice):
            field_slice = self._meta.field_attrs[index]
            return tuple(getattr(self, field) for field in field_slice)
        elif isinstance(index, str):
            return getattr(self, index)
        else:
            raise IndexError("Unsupported type for __getitem__: {0}".format(
                type(index)))

    def __len__(self):
        return len(self._meta.fields)

    def __eq__(self, other):
        satisfies_class_relationship = issubclass(
            self.__class__, other.__class__) or issubclass(
                other.__class__, self.__class__)

        if not satisfies_class_relationship:
            return False
        else:
            return self.hash_tree_root == other.hash_tree_root

    def __getstate__(self):
        state = self.__dict__.copy()
        # The hash() builtin is not stable across processes
        # (https://docs.python.org/3/reference/datamodel.html#object.__hash__), so we do this here
        # to ensure pickled instances don't carry the cached hash() as that may cause issues like
        # https://github.com/ethereum/py-evm/issues/1318
        state["_hash_cache"] = None
        return state

    _hash_cache = None

    def __hash__(self):
        if self._hash_cache is None:
            self._hash_cache = hash(self.__class__) * int.from_bytes(
                self.hash_tree_root, "little")
        return self._hash_cache

    def copy(self, *args, **kwargs):
        missing_overrides = (set(self._meta.field_names).difference(
            kwargs.keys()).difference(self._meta.field_names[:len(args)]))
        unchanged_kwargs = {
            key:
            value if is_immutable_field_value(value) else copy.deepcopy(value)
            for key, value in self.as_dict().items()
            if key in missing_overrides
        }

        combined_kwargs = dict(**unchanged_kwargs, **kwargs)
        all_kwargs = merge_args_to_kwargs(args, combined_kwargs,
                                          self._meta.field_names)

        result = type(self)(**all_kwargs)
        result.cache = self.cache

        return result

    def reset_cache(self):
        self.cache.clear()
        self._fixed_size_section_length_cache = None
        self._serialize_cache = None

    def __copy__(self):
        return self.copy()

    def __deepcopy__(self, memodict=None):
        if memodict is None:
            memodict = {}

        cls = self.__class__
        result = cls.__new__(cls)
        memodict[id(self)] = result

        for k, v in self.__dict__.items():
            if k != "cache":
                setattr(result, k, copy.deepcopy(v, memodict))

        result.cache = self.cache
        result._fixed_size_section_length_cache = self._fixed_size_section_length_cache

        return result

    _fixed_size_section_length_cache = None
    _serialize_cache = None

    @property
    def hash_tree_root(self):
        return self.__class__.get_hash_tree_root(self, cache=True)

    @classmethod
    def get_sedes_id(cls) -> str:
        # Serializable implementation name should be unique
        return cls.__name__

    def get_key(self) -> bytes:
        # Serilaize with self._meta.container_sedes
        key = get_base_key(self._meta.container_sedes, self).hex()

        if len(key) == 0:
            key = ""

        return f"{self.__class__.get_sedes_id()}{key}"
Exemplo n.º 22
0
Arquivo: base.py Projeto: aquadrop/py
class BaseKernel:
    appid = "841e6cd456e05713213f413e8765648e"
    user_ids = np.array([
        '0112DBCD5299791D5A53287D27F4E18A5',
        '0480704B8A3471FF360DD22AB5C3D9F8E',
        '09B78AFBFCF3F97F34F12F945769FBD8B'
    ])

    def __init__(self):
        # if not self.user_ids or self.user_ids.size == 0:
        #     self.uid = self.register()
        # else:
        self.uid = self.user_ids[1]  #np.random.choice(self.user_ids, 1)[0]
        self.cache = LRU(300)
        # thread = Thread(target=self.schedule)
        # thread.start()

    def kernel(self, q):
        # if q in self.cache:
        #     return self.cache[q]
        answer = self.chat(q)
        # self.cache[q] = answer
        return answer

    def register(self):
        register_data = {"cmd": "register", "appid": self.appid}
        url = "http://idc.emotibot.com/api/ApiKey/openapi.php"
        r = requests.post(url, params=register_data)
        response = json.dumps(r.json(), ensure_ascii=False)
        jsondata = json.loads(response)
        datas = jsondata.get('data')
        for data in datas:
            return data.get('value')

    def chat(self, q):
        try:
            register_data = {
                "cmd": "chat",
                "appid": self.appid,
                "userid": self.uid,
                "text": q,
                "location": "南京"
            }
            url = "http://idc.emotibot.com/api/ApiKey/openapi.php"
            r = requests.post(url, params=register_data)
            response = json.dumps(r.json(), ensure_ascii=False)
            jsondata = json.loads(response)
            datas = jsondata.get("data")
            for data in datas:
                response = data.get('value')
                if response:
                    break
            return response
        except Exception:
            return 'base kernel is detached'

    def clear_cache(self):
        print('clear')
        self.cache.clear()

    def schedule(self):
        schedule.every().hour.do(self.clear_cache)
        while True:
            schedule.run_pending()
            time.sleep(60)
Exemplo n.º 23
0
class AccountDB(BaseAccountDB):

    logger = cast(ExtendedDebugLogger, logging.getLogger('eth.db.account.AccountDB'))

    def __init__(self, db: BaseAtomicDB, state_root: Hash32=BLANK_ROOT_HASH) -> None:
        r"""
        Internal implementation details (subject to rapid change):
        Database entries go through several pipes, like so...

        .. code::

            db > _batchdb ---------------------------> _journaldb ----------------> code lookups
             \
              -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups

        Journaling sequesters writes at the _journal* attrs ^, until persist is called.

        _batchtrie enables us to prune all trie changes while building
        state,  without deleting old trie roots.

        _batchdb and _batchtrie together enable us to make the state root,
        without saving everything to the database.

        _journaldb is a journaling of the keys and values used to store
        code and account storage.

        _trie is a hash-trie, used to generate the state root

        _trie_cache is a cache tied to the state root of the trie. It
        is important that this cache is checked *after* looking for
        the key in _journaltrie, because the cache is only invalidated
        after a state root change.

        _journaltrie is a journaling of the accounts (an address->rlp mapping,
        rather than the nodes stored by the trie). This enables
        a squashing of all account changes before pushing them into the trie.

        .. NOTE:: StorageDB works similarly

        AccountDB synchronizes the snapshot/revert/persist of both of the
        journals.
        """
        self._raw_store_db = db
        self._batchdb = BatchDB(db)
        self._batchtrie = BatchDB(db)
        self._journaldb = JournalDB(self._batchdb)
        self._trie = HashTrie(HexaryTrie(self._batchtrie, state_root, prune=True))
        self._trie_cache = CacheDB(self._trie)
        self._journaltrie = JournalDB(self._trie_cache)
        self._account_cache = LRU(2048)
        self._account_stores = {}  # type: Dict[Address, AccountStorageDB]
        self._dirty_accounts = set()  # type: Set[Address]

    @property
    def state_root(self) -> Hash32:
        return self._trie.root_hash

    @state_root.setter
    def state_root(self, value: Hash32) -> None:
        if self._trie.root_hash != value:
            self._trie_cache.reset_cache()
            self._trie.root_hash = value

    def has_root(self, state_root: bytes) -> bool:
        return state_root in self._batchtrie

    #
    # Storage
    #
    def get_storage(self, address: Address, slot: int, from_journal: bool=True) -> int:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(slot, title="Storage Slot")

        account_store = self._get_address_store(address)
        return account_store.get(slot, from_journal)

    def set_storage(self, address: Address, slot: int, value: int) -> None:
        validate_uint256(value, title="Storage Value")
        validate_uint256(slot, title="Storage Slot")
        validate_canonical_address(address, title="Storage Address")

        account_store = self._get_address_store(address)
        self._dirty_accounts.add(address)
        account_store.set(slot, value)

    def delete_storage(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        self._set_storage_root(address, BLANK_ROOT_HASH)
        self._wipe_storage(address)

    def _wipe_storage(self, address: Address) -> None:
        """
        Wipe out the storage, without explicitly handling the storage root update
        """
        account_store = self._get_address_store(address)
        self._dirty_accounts.add(address)
        account_store.delete()

    def _get_address_store(self, address: Address) -> AccountStorageDB:
        if address in self._account_stores:
            store = self._account_stores[address]
        else:
            storage_root = self._get_storage_root(address)
            store = AccountStorageDB(self._raw_store_db, storage_root, address)
            self._account_stores[address] = store
        return store

    def _dirty_account_stores(self) -> Iterable[Tuple[Address, AccountStorageDB]]:
        for address in self._dirty_accounts:
            store = self._account_stores[address]
            yield address, store

    @to_tuple
    def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]:
        # list all the accounts that were changed, and their new storage roots
        for address, store in self._dirty_account_stores():
            if store.has_changed_root:
                yield address, store.get_changed_root()

    def _get_storage_root(self, address: Address) -> Hash32:
        account = self._get_account(address)
        return account.storage_root

    def _set_storage_root(self, address: Address, new_storage_root: Hash32) -> None:
        account = self._get_account(address)
        self._set_account(address, account.copy(storage_root=new_storage_root))

    def _validate_flushed_storage(self, address: Address, store: AccountStorageDB) -> None:
        if store.has_changed_root:
            actual_storage_root = self._get_storage_root(address)
            expected_storage_root = store.get_changed_root()
            if expected_storage_root != actual_storage_root:
                raise ValidationError(
                    "Storage root was not saved to account before trying to persist roots. "
                    "Account %r had storage %r, but should be %r." % (
                        address,
                        actual_storage_root,
                        expected_storage_root,
                    )
                )

    #
    # Balance
    #
    def get_balance(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.balance

    def set_balance(self, address: Address, balance: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(balance, title="Account Balance")

        account = self._get_account(address)
        self._set_account(address, account.copy(balance=balance))

    #
    # Nonce
    #
    def get_nonce(self, address: Address) -> int:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.nonce

    def set_nonce(self, address: Address, nonce: int) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_uint256(nonce, title="Nonce")

        account = self._get_account(address)
        self._set_account(address, account.copy(nonce=nonce))

    def increment_nonce(self, address: Address) -> None:
        current_nonce = self.get_nonce(address)
        self.set_nonce(address, current_nonce + 1)

    #
    # Code
    #
    def get_code(self, address: Address) -> bytes:
        validate_canonical_address(address, title="Storage Address")

        try:
            return self._journaldb[self.get_code_hash(address)]
        except KeyError:
            return b""

    def set_code(self, address: Address, code: bytes) -> None:
        validate_canonical_address(address, title="Storage Address")
        validate_is_bytes(code, title="Code")

        account = self._get_account(address)

        code_hash = keccak(code)
        self._journaldb[code_hash] = code
        self._set_account(address, account.copy(code_hash=code_hash))

    def get_code_hash(self, address: Address) -> Hash32:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        return account.code_hash

    def delete_code(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account.copy(code_hash=EMPTY_SHA3))

    #
    # Account Methods
    #
    def account_has_code_or_nonce(self, address: Address) -> bool:
        return self.get_nonce(address) != 0 or self.get_code_hash(address) != EMPTY_SHA3

    def delete_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        if address in self._account_cache:
            del self._account_cache[address]
        del self._journaltrie[address]

        self._wipe_storage(address)

    def account_exists(self, address: Address) -> bool:
        validate_canonical_address(address, title="Storage Address")
        return self._journaltrie.get(address, b'') != b''

    def touch_account(self, address: Address) -> None:
        validate_canonical_address(address, title="Storage Address")

        account = self._get_account(address)
        self._set_account(address, account)

    def account_is_empty(self, address: Address) -> bool:
        return not self.account_has_code_or_nonce(address) and self.get_balance(address) == 0

    #
    # Internal
    #
    def _get_account(self, address: Address, from_journal: bool=True) -> Account:
        if from_journal and address in self._account_cache:
            return self._account_cache[address]
        rlp_account = (self._journaltrie if from_journal else self._trie_cache).get(address, b'')
        if rlp_account:
            account = rlp.decode(rlp_account, sedes=Account)
        else:
            account = Account()
        if from_journal:
            self._account_cache[address] = account
        return account

    def _set_account(self, address: Address, account: Account) -> None:
        self._account_cache[address] = account
        rlp_account = rlp.encode(account, sedes=Account)
        self._journaltrie[address] = rlp_account

    #
    # Record and discard API
    #
    def record(self) -> UUID:
        changeset_id = self._journaldb.record()
        self._journaltrie.record(changeset_id)

        for _, store in self._dirty_account_stores():
            store.record(changeset_id)
        return changeset_id

    def discard(self, changeset: UUID) -> None:
        self._journaldb.discard(changeset)
        self._journaltrie.discard(changeset)
        self._account_cache.clear()
        for _, store in self._dirty_account_stores():
            store.discard(changeset)

    def commit(self, changeset: UUID) -> None:
        self._journaldb.commit(changeset)
        self._journaltrie.commit(changeset)
        for _, store in self._dirty_account_stores():
            store.commit(changeset)

    def make_state_root(self) -> Hash32:
        for _, store in self._dirty_account_stores():
            store.make_storage_root()

        for address, storage_root in self._get_changed_roots():
            self.logger.debug2(
                "Updating account 0x%s to storage root 0x%s",
                address.hex(),
                storage_root.hex(),
            )
            self._set_storage_root(address, storage_root)

        self._journaldb.persist()
        self._journaltrie.persist()
        return self.state_root

    def persist(self) -> None:
        self.make_state_root()

        # persist storage
        with self._raw_store_db.atomic_batch() as write_batch:
            for address, store in self._dirty_account_stores():
                self._validate_flushed_storage(address, store)
                store.persist(write_batch)

        for address, new_root in self._get_changed_roots():
            if new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH:
                raise ValidationError(
                    "After persisting storage trie, a root node was not found. "
                    "State root for account 0x%s is missing for hash 0x%s." % (
                        address.hex(),
                        new_root.hex(),
                    )
                )

        # reset local storage trackers
        self._account_stores = {}
        self._dirty_accounts = set()

        # persist accounts
        self._validate_generated_root()
        with self._raw_store_db.atomic_batch() as write_batch:
            self._batchtrie.commit_to(write_batch, apply_deletes=False)
            self._batchdb.commit_to(write_batch, apply_deletes=False)

    def _validate_generated_root(self) -> None:
        db_diff = self._journaldb.diff()
        if len(db_diff):
            raise ValidationError(
                "AccountDB had a dirty db when it needed to be clean: %r" % db_diff
            )
        trie_diff = self._journaltrie.diff()
        if len(trie_diff):
            raise ValidationError(
                "AccountDB had a dirty trie when it needed to be clean: %r" % trie_diff
            )

    def _log_pending_accounts(self) -> None:
        accounts_displayed = set()  # type: Set[bytes]
        queued_changes = self._journaltrie.journal.journal_data.items()
        # mypy bug for ordered dict reversibility: https://github.com/python/typeshed/issues/2078
        for _, accounts in reversed(queued_changes):
            for address in accounts:
                if address in accounts_displayed:
                    continue
                else:
                    accounts_displayed.add(address)
                    account = self._get_account(Address(address))
                    self.logger.debug2(
                        "Account %s: balance %d, nonce %d, storage root %s, code hash %s",
                        encode_hex(address),
                        account.balance,
                        account.nonce,
                        encode_hex(account.storage_root),
                        encode_hex(account.code_hash),
                    )