Exemplo n.º 1
0
class StorageCache(object):
    """RelStorage integration with memcached or similar.

    Holds a list of memcache clients in order from most local to
    most global.  The first is a LocalClient, which stores the cache
    in the Python process, but shares the cache between threads.
    """
    # pylint:disable=too-many-instance-attributes,too-many-public-methods

    # queue is a _TemporaryStorage used during commit
    temp_objects = None
    # store_temp and read_temp are methods copied from the queue while
    # we are committing.
    store_temp = None
    read_temp = None

    # checkpoints, when set, is a tuple containing the integer
    # transaction ID of the two current checkpoints. checkpoint0 is
    # greater than or equal to checkpoint1.
    checkpoints = None

    # current_tid contains the last polled transaction ID. Invariant:
    # when self.checkpoints is not None, self.delta_after0 has info
    # from *all* transactions in the range:
    #
    #   (self.checkpoints[0], self.current_tid]
    #
    # (That is, `tid > self.checkpoints[0] and tid <= self.current_tid`)
    #
    # We assign to this *only* after executing a poll, or
    # when reading data from the persistent cache (which happens at
    # startup, and usually also when someone calls clear())
    #
    # Start with None so we can distinguish the case of never polled/
    # no tid in persistent cache from a TID of 0, which can happen in
    # tests.
    current_tid = None

    _tracer = None

    _delta_map_type = OID_TID_MAP_TYPE

    def __init__(self,
                 adapter,
                 options,
                 prefix,
                 local_client=None,
                 _tracer=None):
        self.adapter = adapter
        self.options = options
        self.prefix = prefix or ''

        # delta_after0 contains {oid: tid} *after* checkpoint 0
        # and before or at self.current_tid.
        self.delta_after0 = self._delta_map_type()

        # delta_after1 contains {oid: tid} *after* checkpoint 1 and
        # *before* or at checkpoint 0. The content of delta_after1 only
        # changes when checkpoints shift and we rebuild it.
        self.delta_after1 = self._delta_map_type()

        # delta_size_limit places an approximate limit on the number of
        # entries in the delta_after maps.
        self.delta_size_limit = options.cache_delta_size_limit

        if local_client is None:
            self.local_client = LocalClient(options, self.prefix)
        else:
            self.local_client = local_client

        shared_cache = MemcacheStateCache.from_options(options, self.prefix)
        if shared_cache is not None:
            self.cache = MultiStateCache(self.local_client, shared_cache)
        else:
            self.cache = self.local_client

        if local_client is None:
            self.restore()

        if _tracer is None:
            tracefile = persistence.trace_file(options, self.prefix)
            if tracefile:
                _tracer = ZEOTracer(tracefile)
                _tracer.trace(0x00)

        self._tracer = _tracer
        if hasattr(self._tracer, 'trace_store_current'):
            self.cache = TracingStateCache(self.cache, _tracer)

    # XXX: Note that our __bool__ and __len__ are NOT consistent
    def __bool__(self):
        return True

    __nonzero__ = __bool__

    def __len__(self):
        return len(self.local_client)

    @property
    def size(self):
        return self.local_client.size

    @property
    def limit(self):
        return self.local_client.limit

    def stats(self):
        """
        Return stats. This is a debugging aid only. The format is undefined and intended
        for human inspection only.
        """
        return self.local_client.stats()

    def __repr__(self):
        return "<%s at %x size=%d len=%d>" % (self.__class__.__name__,
                                              id(self), self.size, len(self))

    def reset_stats(self):
        self.local_client.reset_stats()

    def new_instance(self):
        """
        Return a copy of this instance sharing the same local client.
        """
        local_client = self.local_client if self.options.share_local_cache else None

        cache = type(self)(self.adapter,
                           self.options,
                           self.prefix,
                           local_client,
                           _tracer=self._tracer or False)

        # The delta maps get more and more stale the longer time goes on.
        # Maybe we want to try to re-create them based on the local max tids?
        # Also, if there have been enough changes that someone has shifted the
        # checkpoints, cache.checkpoints won't match the global checkpoints
        # and they will wind up discarding the delta maps on the first poll.
        #
        # Alternately, we could watch our children created here, and see
        # which one is still alive and has the highest `current_tid` indicating the
        # most recent poll, and copy that information.
        cache.checkpoints = self.checkpoints
        cache.delta_after0 = self._delta_map_type(self.delta_after0)
        cache.delta_after1 = self._delta_map_type(self.delta_after1)
        cache.current_tid = self.current_tid
        return cache

    def release(self):
        """
        Release resources held by this instance.

        This is usually memcache connections if they're in use.
        """
        self.cache.close()
        # Release our clients. If we had a non-shared local cache,
        # this will also allow it to release any memory it's holding.
        self.local_client = self.cache = _UsedAfterRelease

    def save(self, **save_args):
        """
        Store any persistent client data.
        """
        if self.options.cache_local_dir and len(self):  # pylint:disable=len-as-condition
            # (our __bool__ is not consistent with our len)
            stats = self.local_client.stats()
            if stats['hits'] or stats['sets']:
                # Only write this out if (1) it proved useful OR (2)
                # we've made modifications. Otherwise, we're writing a consolidated
                # file for no good reason.
                # TODO: Consider the correctness here, now that we have a
                # more accurate cache. Should that maybe be AND?
                return self.local_client.save(**save_args)
            logger.debug(
                "Cannot justify writing cache file, no hits or misses")

    def restore(self):
        # We must only restore into an empty cache.
        assert not len(self.local_client)  # pylint:disable=len-as-condition
        assert not self.checkpoints

        # Note that there may have been a tiny amount of data in the
        # file that we didn't get to actually store but that still
        # comes back in the delta_map; that's ok.
        row_filter = _PersistentRowFilter(self.adapter, self._delta_map_type)
        self.local_client.restore(row_filter)
        self.local_client.remove_invalid_persistent_oids(
            row_filter.polled_invalid_oids)

        self.checkpoints = self.local_client.get_checkpoints()
        if self.checkpoints:
            # No point keeping the delta maps otherwise,
            # we have to poll. If there were no checkpoints, it means
            # we saved without having ever completed a poll.
            #
            # We choose the cp0 as our beginning TID at which to
            # resume polling. We have information on cached data as it
            # relates to those checkpoints. (TODO: Are we sure that
            # the delta maps we've just built are actually accurate
            # as-of this particular TID we're choosing to poll from?)
            #
            self.current_tid = self.checkpoints[0]
            self.delta_after0 = row_filter.delta_after0
            self.delta_after1 = row_filter.delta_after1

        logger.debug(
            "Restored with current_tid %s and checkpoints %s and deltas %s %s",
            self.current_tid, self.checkpoints, len(self.delta_after0),
            len(self.delta_after1))

    def close(self, **save_args):
        """
        Release resources held by this instance, and
        save any persistent data necessary.
        """
        self.save(**save_args)
        self.release()

        if self._tracer:
            # Note we can't do this in release(). Release is called on
            # all instances, while close() is only called on the main one.
            self._tracer.close()
            del self._tracer

    def _reset(self, message=None):
        """
        Reset the transaction state of only this instance.

        If this is being done in a transactional way, it must be followed
        by raising an exception. If the *message* parameter is provided,
        then a ``CacheConsistencyError`` will be raised when this
        method returns.
        """
        # As if we've never polled
        for name in ('checkpoints', 'current_tid'):
            try:
                delattr(self, name)
            except AttributeError:
                pass
        self.delta_after0 = self._delta_map_type()
        self.delta_after1 = self._delta_map_type()
        if message:
            raise CacheConsistencyError(message)

    def clear(self, load_persistent=True):
        """
        Remove all data from the cache, both locally (and shared among
        other instances), and globally.

        Called by speed tests.

        Starting from the introduction of persistent cache files, this
        also results in the local client being repopulated with the
        current set of persistent data. The *load_persistent* keyword
        can be used to control this.

        .. versionchanged:: 2.0b6 Added the ``load_persistent``
           keyword. This argument is provisional.
        """
        self._reset()
        # After this our current_tid is probably out of sync with the
        # storage's current_tid. Whether or not we load data from
        # persistent caches, it's probably in the past of what the
        # storage thinks.
        # XXX: Ideally, we should be able to populate that information
        # back up so that we get the right polls.

        self.cache.flush_all()

        if load_persistent:
            self.restore()

    def zap_all(self):
        """
        Remove all data from the cache, both locally (and shared among
        other instances, and globally); in addition, remove any
        persistent cache files on disk.
        """
        self.clear(load_persistent=False)
        self.local_client.zap_all()

    def _check_tid_after_load(self,
                              oid_int,
                              actual_tid_int,
                              expect_tid_int=None):
        """Verify the tid of an object loaded from the database is sane."""
        if actual_tid_int > self.current_tid:
            # Strangely, the database just gave us data from a future
            # transaction. We can't give the data to ZODB because that
            # would be a consistency violation. However, the cause is
            # hard to track down, so issue a ReadConflictError and
            # hope that the application retries successfully.
            msg = ("Got data for OID 0x%(oid_int)x from "
                   "future transaction %(actual_tid_int)d (%(got_ts)s).  "
                   "Current transaction is %(current_tid)d (%(current_ts)s)." %
                   {
                       'oid_int': oid_int,
                       'actual_tid_int': actual_tid_int,
                       'current_tid': self.current_tid,
                       'got_ts': str(TimeStamp(p64(actual_tid_int))),
                       'current_ts': str(TimeStamp(p64(self.current_tid))),
                   })
            raise ReadConflictError(msg)

        if expect_tid_int is not None and actual_tid_int != expect_tid_int:
            # Uh-oh, the cache is inconsistent with the database.
            # We didn't get a TID from the future, but it's not what we
            # had in our delta_after0 map, which means...we missed a change
            # somewhere.
            #
            # Possible causes:
            #
            # - The database MUST provide a snapshot view for each
            #   session; this error can occur if that requirement is
            #   violated. For example, MySQL's MyISAM engine is not
            #   sufficient for the object_state table because MyISAM
            #   can not provide a snapshot view. (InnoDB is
            #   sufficient.)
            #
            # - (Similar to the last one.) Using too low of a
            #   isolation level for the database connection and
            #   viewing unrelated data.
            #
            # - Something could be writing to the database out
            #   of order, such as a version of RelStorage that
            #   acquires a different commit lock.
            #
            # - A software bug. In the past, there was a subtle bug
            #   in after_poll() that caused it to ignore the
            #   transaction order, leading it to sometimes put the
            #   wrong tid in delta_after*.
            #
            # - Restarting a load connection at a future point we hadn't
            #   actually polled to, such that our current_tid is out of sync
            #   with the connection's *actual* viewable tid?
            cp0, cp1 = self.checkpoints

            msg = ("Detected an inconsistency "
                   "between the RelStorage cache and the database "
                   "while loading an object using the delta_after0 dict.  "
                   "Please verify the database is configured for "
                   "ACID compliance and that all clients are using "
                   "the same commit lock.  "
                   "(oid_int=%(oid_int)r, expect_tid_int=%(expect_tid_int)r, "
                   "actual_tid_int=%(actual_tid_int)r, "
                   "current_tid=%(current_tid)r, cp0=%(cp0)r, cp1=%(cp1)r, "
                   "len(delta_after0)=%(lda0)r, len(delta_after1)=%(lda1)r, "
                   "pid=%(pid)r, thread_ident=%(thread_ident)r)" % {
                       'oid_int': oid_int,
                       'expect_tid_int': expect_tid_int,
                       'actual_tid_int': actual_tid_int,
                       'current_tid': self.current_tid,
                       'cp0': cp0,
                       'cp1': cp1,
                       'lda0': len(self.delta_after0),
                       'lda1': len(self.delta_after1),
                       'pid': os.getpid(),
                       'thread_ident': threading.current_thread(),
                   })
            # We reset ourself as if we hadn't polled, and hope the transient
            # error gets retried in a working, consistent view.
            self._reset(msg)

    def loadSerial(self, oid_int, tid_int):
        """
        Return the locally cached state for the object *oid_int* as-of
        exactly *tid_int*.

        If that state is not available in the local cache, return
        nothing.

        If we're history free, and the tid_int doesn't match our
        knowledge of what the latest tid for the object should be,
        return nothing.
        """
        # We use only the local client because, for history-free storages,
        # it's the only one we can be reasonably sure has been
        # invalidated by a local pack. Also, our point here is to avoid
        # network traffic, so it's no good going to memcache for what may be
        # a stale answer.

        if not self.options.keep_history:
            # For history-free, we can only have one state. If we
            # think we know what it is, but they ask for something different,
            # then there's no way it can be found.
            known_tid_int = self.delta_after0.get(oid_int)
            if known_tid_int is not None and known_tid_int != tid_int:
                return None

        # If we've seen this object, it could be in a few places:
        # (oid, tid) (if it was ever in a delta), or (oid, cp0)
        # if it has fallen behind. Regardless, we can only use it if
        # the tids match.
        #
        # We have a multi-query method, but we don't use it because we
        # don't want to move keys around.
        cache = self.local_client
        for tid in (tid_int,
                    self.checkpoints[0] if self.checkpoints else None):
            if not tid:
                break
            cache_data = cache[(oid_int, tid)]
            if cache_data and cache_data[1] == tid_int:
                return cache_data[0]

    def load(self, cursor, oid_int):
        """
        Load the given object from cache if possible.

        Fall back to loading from the database.

        Returns (state_bytes, tid_int).
        """
        # pylint:disable=too-many-statements,too-many-branches,too-many-locals
        if not self.checkpoints:
            # No poll has occurred yet. For safety, don't use the cache.
            # Note that without going through the cache, we can't
            # go through tracing either.
            return self.adapter.mover.load_current(cursor, oid_int)

        # Get the object from the transaction specified
        # by the following values, in order:
        #
        #   1. delta_after0[oid_int]
        #   2. checkpoints[0]
        #   3. delta_after1[oid_int]
        #   4. checkpoints[1]
        #   5. The database.
        #
        # checkpoints[0] is the preferred location.
        #
        # If delta_after0 contains oid_int, we should not look at any
        # other cache keys, since the tid_int specified in
        # delta_after0 replaces all older transaction IDs. We *know*
        # that oid_int should be at (exactly) tid_int because we
        # either made that change ourself (after_tpc_finish) *or* we
        # have polled within our current database transaction (or a
        # previous one) and been told that the oid changed in tid.
        #
        # Similarly, if delta_after1 contains oid_int, we should not
        # look at checkpoints[1]. Also, when both checkpoints are set
        # to the same transaction ID, we don't need to ask for the
        # same key twice.
        cache = self.cache
        tid_int = self.delta_after0.get(oid_int)
        if tid_int:
            # This object changed after checkpoint0, so
            # there is only one place to look for its state: the exact key.
            key = (oid_int, tid_int)
            cache_data = cache[key]
            if cache_data:
                # Cache hit.
                assert cache_data[1] == tid_int, (cache_data[1], key)
                return cache_data

            # Cache miss.
            state, actual_tid_int = self.adapter.mover.load_current(
                cursor, oid_int)
            if state and actual_tid_int:
                # If either is None, the object was deleted.
                self._check_tid_after_load(oid_int, actual_tid_int, tid_int)

                # At this point we know that tid_int == actual_tid_int
                # XXX: Previously, we did not trace this as a store into the cache.
                # Why?
                cache[key] = (state, actual_tid_int)
            return state, tid_int

        # Make a list of cache keys to query. The list will have either
        # 1 or 2 keys.
        cp0, cp1 = self.checkpoints
        tid2 = None
        tid_int = self.delta_after1.get(oid_int)
        if tid_int:
            tid2 = tid_int
        elif cp1 != cp0:
            tid2 = cp1

        preferred_key = (oid_int, cp0)

        # Query the cache. Query multiple keys simultaneously to
        # minimize latency. The client is responsible for moving
        # the data to the preferred key if it wasn't found there.
        response = cache(oid_int, cp0, tid2)
        if response:  # We have a hit!
            state, actual_tid = response
            return state, actual_tid

        # Cache miss.
        state, tid_int = self.adapter.mover.load_current(cursor, oid_int)
        if tid_int:
            self._check_tid_after_load(oid_int, tid_int)
            cache[preferred_key] = (state, tid_int)
        return state, tid_int

    def prefetch(self, cursor, oid_ints):
        # Just like load(), but we only fetch the OIDs
        # we can't find in the cache.
        if not self.checkpoints:
            # No point even trying, we would just throw the results away
            return

        to_fetch = OID_OBJECT_MAP_TYPE()  # {oid: cache key}
        cache = self.cache
        cp0, cp1 = self.checkpoints
        delta_after0 = self.delta_after0.get
        delta_after1 = self.delta_after1.get
        for oid_int in oid_ints:
            tid_int = delta_after0(oid_int)
            if tid_int:
                key = (oid_int, tid_int)
                cache_data = cache[key]
                if not cache_data:
                    # That was our one place, so we must fetch
                    to_fetch[oid_int] = key
                continue

            tid2 = None
            tid_int = delta_after1(oid_int)
            if tid_int:
                tid2 = tid_int
            elif cp1 != cp0:
                tid2 = cp1

            cache_data = cache(oid_int, cp0, tid2)
            if not cache_data:
                preferred_key = (oid_int, cp0)
                to_fetch[oid_int] = preferred_key

        if not to_fetch:
            return

        for oid, state, tid_int in self.adapter.mover.load_currents(
                cursor, to_fetch):
            key = to_fetch[oid]
            # Note that we're losing the knowledge of whether the TID
            # in the key came from delta_after0 or not, so we're not
            # validating that part.
            self._check_tid_after_load(oid, tid_int)
            cache[key] = (state, tid_int)

    def invalidate(self, oid_int, tid_int):
        del self.cache[(oid_int, tid_int)]
        if self.delta_after0.get(oid_int) == tid_int:
            del self.delta_after0[oid_int]

    def invalidate_all(self, oids):
        """
        In the local cache only, invalidate all cached data for the
        given OIDs.
        """
        self.local_client.invalidate_all(oids)
        deltas = self.delta_after0, self.delta_after1
        for oid in oids:
            for delta in deltas:
                try:
                    del delta[oid]
                except KeyError:
                    pass

    def tpc_begin(self):
        """Prepare temp space for objects to cache."""
        q = self.temp_objects = _TemporaryStorage()
        self.store_temp = q.store_temp
        self.read_temp = q.read_temp

    def _send_queue(self, tid):
        """
        Now that this tid is known, send all queued objects to the
        cache. The cache will have ``(oid, tid)`` entry for each object
        we have been holding on to (well, in a big transaction, some of them
        might actually not get stored in the cache. But we try!)
        """
        tid_int = u64(tid)

        self.cache.set_all_for_tid(tid_int, self.temp_objects)
        # We only do this because cache_trace_analysis uses us
        # in ways that aren't quite accurate. We'd prefer to call clear_temp()
        # at this point.
        self.temp_objects.reset()

    def after_tpc_finish(self, tid):
        """
        Flush queued changes.

        This is called after the database commit lock is released,
        but before releasing the storage lock that will allow other
        threads to use this instance.
        """
        tid_int = u64(tid)

        if self.checkpoints:
            for oid_int in self.temp_objects.stored_oids:
                # Future cache lookups for oid_int should now use
                # the tid just committed. We're about to flush that
                # data to the cache.
                self.delta_after0[oid_int] = tid_int
        # Under what circumstances would we get here (after commiting
        # a transaction) without ever having polled to establish
        # checkpoints? Turns out that database-level APIs like
        # db.undo() use new storage instances in an unusual way, and
        # will not necessarily have polled by the time they commit.
        #
        # Of course, if we restored from persistent cache files the master
        # could have checkpoints we copied down.
        #
        # TODO: Create a special subclass for MVCC instances and separate
        # the state handling.

        self._send_queue(tid)

    def clear_temp(self):
        """Discard all transaction-specific temporary data.

        Called after transaction finish or abort.
        """
        if self.temp_objects is not None:
            self.store_temp = None
            self.read_temp = None
            self.temp_objects.close()
            self.temp_objects = None

    def after_poll(self, cursor, prev_tid_int, new_tid_int, changes):
        """
        Update checkpoint data after a database poll.

        *cursor* is connected to a load connection.

        *prev_tid_int* is the tid that was last polled (that is, it
        was the *new_tid_int* the last time this was called).

        *changes* lists all [(oid_int, tid_int)] changed after
        *prev_tid_int*, up to and including *new_tid_int*, excluding
        the changes last committed by the associated storage instance.

        *changes* can be None to indicate that the cache is definitely
        in an inconsistent state: too much changed to be specific,
        there is no data at all (in which case *new_tid_int* should be
        0), or the database connection is stale.

        *prev_tid_int* can be None, in which case the changes
        parameter will be ignored. new_tid_int can not be None.

        If *changes* was not none, this method returns a collection of
        OID integers from it. (Because changes is only required to be
        an iterable, you may not be able to iterate it again.)
        """
        my_prev_tid_int = self.current_tid or 0
        self.current_tid = new_tid_int

        global_checkpoints = self.cache.get_checkpoints()

        if not global_checkpoints:
            # No other instance has established an opinion yet,
            # so I get to.
            self.__poll_establish_global_checkpoints(new_tid_int)
            return

        global_checkpoints_in_future = global_checkpoints[0] > new_tid_int
        if global_checkpoints_in_future:
            # checkpoint0 is in a future that this instance can't yet
            # see. Ignore the checkpoint change for now, continue
            # with our own.
            global_checkpoints = self.checkpoints
            if not self.checkpoints:
                # How nice, this was our first poll, but
                # yet somehow we are still behind the global
                # checkpoints. The global checkpoints are probably
                # wrong (maybe there's a replica involved and the global
                # cache is now stale).
                global_checkpoints = (new_tid_int, new_tid_int)

        # We want to keep the current checkpoints for speed, but we
        # have to replace them (to avoid consistency violations)
        # if certain conditions happen (like emptying the ZODB Connection cache
        # which happens when `changes` is None).
        if (global_checkpoints == self.checkpoints  # In sync with the world
                # Poller didn't give up, and there was data in the database
                and changes is not None
                # The storage had polled before and gotten a response
                # other than 0, meaning no data in the database.
                and prev_tid_int
                # And what we think is the last time we polled
                # is at *least* as new as the last time the storage
                # thinks it polled.
                # Since we only assign to current_tid here (and when we read
                # persistent cache data, which also ultimately came from here)
                # it's not clear how we could get ahead.
                and my_prev_tid_int >= prev_tid_int
                # And the transaction that was just polled is
                # current or in the future. If we went backwards,
                # it's because the underlying data went backwards
                # (possibly we switched to a replica that's out of date)
                # and the user configured `revert-when-stale` to be on.
                # In that case, `changes` should also be None and we really shouldn't
                # get here.
                and new_tid_int >= my_prev_tid_int):

            # All the conditions for keeping the checkpoints were met,
            # so just update self.delta_after0 and self.current_tid.
            try:
                changes = self.__poll_update_delta0_from_changes(changes)
            except:
                consume(changes)
                raise
        else:
            log.debug(
                "Using new checkpoints: %s. Current cp: %s. "
                "Too many changes? %s. prev_tid_int: %s. my_prev_tid_int: %s. "
                "new_tid_int: %s", global_checkpoints, self.checkpoints,
                changes is None, prev_tid_int, my_prev_tid_int, new_tid_int)
            if changes is not None:
                changes = OID_SET_TYPE([oid for oid, _tid in changes])

            self.__poll_replace_checkpoints(cursor, global_checkpoints,
                                            new_tid_int)

        if not global_checkpoints_in_future and self._should_suggest_shifted_checkpoints(
        ):
            self._suggest_shifted_checkpoints()

        return changes

    #: By default, a 70% chance when we're full.
    CP_REPLACEMENT_CHANCE_WHEN_FULL = float(
        os.environ.get('RELSTORAGE_CP_REPLACEMENT_CHANCE_WHEN_FULL', "0.7"))

    #: If we're just close, a 20% chance.
    CP_REPLACEMENT_CHANCE_WHEN_CLOSE = float(
        os.environ.get('RELSTORAGE_CP_REPLACEMENT_CHANCE_WHEN_CLOSE', "0.2"))

    #: Start considering that we're close when we're 80% full.
    CP_REPLACEMENT_BEGIN_CONSIDERING_PERCENT = float(
        os.environ.get('RELSTORAGE_CP_REPLACEMENT_BEGIN_CONSIDERING_PERCENT',
                       "0.8"))

    def _should_suggest_shifted_checkpoints(self, _random=random.random):
        """
        Take the size of the checkpoints and our thresholds into account
        and determine whether we should try to replace them.
        """

        # Use the global state-sharing default random generator by
        # default (allow replacement for testing). This ensures our
        # uniform odds are truly uniform (unless someone reseeds the
        # generator) across all instances. (Interestingly, Python 3.7
        # automatically reseeds the generator on fork.) A single shared instance
        # of SystemRandom would get all workers on a single machine sharing the same
        # sequence, but it costs a system call.

        delta_size = len(self.delta_after0)
        limit = self.delta_size_limit

        if delta_size < (limit *
                         self.CP_REPLACEMENT_BEGIN_CONSIDERING_PERCENT):
            return False

        if delta_size >= limit:
            chances = self.CP_REPLACEMENT_CHANCE_WHEN_FULL
            when_dice_not_used = True
        else:
            chances = self.CP_REPLACEMENT_CHANCE_WHEN_CLOSE
            when_dice_not_used = False

        if chances < 1:
            # e.g., for a 90% chance, only 10% of the range of random
            # numbers (uniformly generated in the range [0.0, 1.0))
            # should lead to a false return.
            # 0.0 -- 0.89 < 0.9: True
            # 0.9 -- 0.99 >= 0.9: False
            return _random() < chances

        return when_dice_not_used

    def __poll_establish_global_checkpoints(self, new_tid_int):
        # Because we *always* have checkpoints in our local_client,
        # once we've set them, not being able to find them there also
        # means that it was our first poll, and so we shouldn't have
        # checkpoints ourself. Of course, with multi-threaded race
        # conditions, that might not actually be the case.

        # assert not self.checkpoints

        # Initialize the checkpoints; we've never polled before.
        log.debug("Initializing checkpoints: %s", new_tid_int)

        self.checkpoints = self.cache.store_checkpoints(
            new_tid_int, new_tid_int)

    @metricmethod_sampled
    def __poll_update_delta0_from_changes(self, changes):
        m = self.cache.updating_delta_map(self.delta_after0)
        m_get = m.get
        changed_oids = OID_SET_TYPE()
        for oid_int, tid_int in changes:
            changed_oids.add(oid_int)
            my_tid_int = m_get(oid_int, -1)
            if tid_int > my_tid_int:
                m[oid_int] = tid_int
        return changed_oids

    @metricmethod
    def __poll_replace_checkpoints(self, cursor, new_checkpoints, new_tid_int):
        # We have to replace the checkpoints.
        cp0, cp1 = new_checkpoints

        # Use the checkpoints specified by the cache (or equal to new_tid_int,
        # if the cache was in the future.)

        # Rebuild delta_after0 and delta_after1, if we can.
        # If we can't, because we don't actually have a range, do nothing.
        # If the case that the checkpoints are (new_tid, new_tid),
        # we'll do nothing and have no delta maps. This is because, hopefully,
        # next time we poll we'll be able to use the global checkpoints and
        # catch up then.
        new_delta_after0 = self._delta_map_type()
        new_delta_after1 = self._delta_map_type()
        if cp1 < new_tid_int:
            # poller.list_changes(cp1, new_tid_int) provides an iterator of
            # (oid, tid) where tid > cp1 and tid <= new_tid_int. It is guaranteed
            # that each oid shows up only once.
            change_list = self.adapter.poller.list_changes(
                cursor, cp1, new_tid_int)

            # Put the changes in new_delta_after*.
            # Let the backing cache know about this (this is only done
            # for tracing).
            updating_0 = self.cache.updating_delta_map(new_delta_after0)
            updating_1 = self.cache.updating_delta_map(new_delta_after1)
            try:
                for oid_int, tid_int in change_list:
                    if tid_int <= cp1 or tid_int > new_tid_int:
                        self._reset("Requested changes %d < tid <= %d "
                                    "but change %d for OID %d out of range." %
                                    (cp1, new_tid_int, tid_int, oid_int))

                    d = updating_0 if tid_int > cp0 else updating_1
                    d[oid_int] = tid_int
            except:
                consume(change_list)
                raise

            # Everybody has a home (we didn't get duplicate entries
            # or multiple entries for the same OID with different TID)
            # This is guaranteed by the IPoller interface, so we don't waste
            # time tracking it here.
        logger.debug(
            "Built new deltas from cp1 %s to current_tid %s of sizes %d (0) and %d (1)",
            cp1, new_tid_int, len(new_delta_after0), len(new_delta_after1))

        self.checkpoints = new_checkpoints
        self.delta_after0 = new_delta_after0
        self.delta_after1 = new_delta_after1

    def _suggest_shifted_checkpoints(self):
        """Suggest that future polls use a new pair of checkpoints.

        This does nothing if another instance has already shifted
        the checkpoints.

        checkpoint0 shifts to checkpoint1 and the tid just committed
        becomes checkpoint0.
        """
        cp0, _cp1 = self.checkpoints
        tid_int = self.current_tid  # transaction we just committed.
        assert tid_int >= cp0

        # delta_after0 has reached its limit. The way to shrink it
        # is to shift the checkpoints. Suggest shifted checkpoints
        # for future polls. If delta_after0 is far over the limit
        # (caused by a large transaction), suggest starting new
        # checkpoints instead of shifting.
        delta_size = len(self.delta_after0)
        huge = (delta_size >= self.delta_size_limit * 2)

        if huge:
            # start new checkpoints
            change_to = (tid_int, tid_int)
        else:
            # shift the existing checkpoints
            change_to = (tid_int, cp0)
        expect = self.checkpoints

        logger.debug(
            "Broadcasting shift of checkpoints to %s. "
            "len(delta_after0) == %d.", change_to, delta_size)

        # The poll code will later see the new checkpoints
        # and update self.checkpoints and self.delta_after(0|1).
        return self.cache.replace_checkpoints(expect, change_to)
Exemplo n.º 2
0
class StorageCache(DetachableMVCCDatabaseViewer):
    """RelStorage integration with memcached or similar.

    Holds a list of memcache clients in order from most local to
    most global.  The first is a LocalClient, which stores the cache
    in the Python process, but shares the cache between threads.
    """
    # pylint:disable=too-many-instance-attributes,too-many-public-methods

    __slots__ = (
        'adapter',
        'options',
        'keep_history',
        'prefix',
        'polling_state',
        'local_client',
        'cache',
        'object_index',
    )

    if IN_TESTRUNNER:

        class MVCCInternalConsistencyError(Exception):
            "This can never be raised or caught."
    else:
        MVCCInternalConsistencyError = AssertionError

    def __init__(self, adapter, options, prefix, _parent=None):
        super(StorageCache, self).__init__()
        self.adapter = adapter
        self.options = options
        self.keep_history = options.keep_history
        self.prefix = prefix or ''

        if _parent is None:
            # I must be the master!

            # This is shared between all instances of a cache in a tree,
            # including the master, so that they can share information about
            # polling.
            self.polling_state = MVCCDatabaseCoordinator(self.options)
            self.local_client = LocalClient(options, self.prefix)

            shared_cache = MemcacheStateCache.from_options(
                options, self.prefix)
            if shared_cache is not None:
                self.cache = MultiStateCache(self.local_client, shared_cache)
            else:
                self.cache = self.local_client

            tracefile = persistence.trace_file(options, self.prefix)
            if tracefile:
                tracer = ZEOTracer(tracefile)
                tracer.trace(0x00)
                self.cache = TracingStateCache(self.cache, tracer)
        else:
            self.polling_state = _parent.polling_state  # type: MVCCDatabaseCoordinator
            self.local_client = _parent.local_client.new_instance()
            self.cache = _parent.cache.new_instance()

        # Once we have registered with the MVCCDatabaseCoordinator,
        # we cannot make any changes to our own mvcc state without
        # letting it know about them. In particular, that means we must
        # not just assign to this object (except under careful circumstances
        # where we're sure to be single threaded.)
        # This object can be None
        self.object_index = None

        # It is also important not to register with the coordinator until
        # we are fully initialized; we could be constructing a new_instance
        # in a separate thread while polling is going on in other threads.
        # We can get strange AttributeError if a partially constructed instance
        # is exposed.
        self.polling_state.register(self)

        if _parent is None:
            self.restore()

    @property
    def current_tid(self):
        # testing
        return self.highest_visible_tid

    # XXX: Note that our __bool__ and __len__ are NOT consistent
    def __bool__(self):
        return True

    __nonzero__ = __bool__

    def __len__(self):
        return len(self.local_client)

    @property
    def size(self):
        return self.local_client.size

    @property
    def limit(self):
        return self.local_client.limit

    def stats(self):
        """
        Return stats. This is a debugging aid only. The format is undefined and intended
        for human inspection only.
        """
        stats = self.local_client.stats()
        stats['local_index_stats'] = self.object_index.stats(
        ) if self.object_index else None
        stats['global_index_stats'] = self.polling_state.stats()
        return stats

    def __repr__(self):
        return "<%s at 0x%x hvt=%s bytes=%d len=%d state=%r>" % (
            self.__class__.__name__,
            id(self),
            self.highest_visible_tid,
            self.size,
            len(self),
            self.polling_state,
        )

    def reset_stats(self):
        self.local_client.reset_stats()

    def new_instance(self, before=None, adapter=None):
        """
        Return a copy of this instance sharing the same local client
        and having the most current view of the database as collected
        by any instance.

        If *before* is given, the new cache will use a distinct
        :class:`MVCCDatabaseCoordinator`  so that
        its usage pattern does not interfere.
        """
        klass = type(self) if before is None else _BeforeStorageCache
        cache = klass(adapter or self.adapter,
                      self.options,
                      self.prefix,
                      _parent=self)
        return cache

    def release(self):
        """
        Release resources held by this instance.

        This does not corrupt shared state, and must be called
        on each instance that's not the root.

        This is usually memcache connections if they're in use.
        """
        self.cache.release()
        # Release our clients. If we had a non-shared local cache,
        # this will also allow it to release any memory it's holding.
        self.local_client = self.cache = _UsedAfterRelease
        self.polling_state.unregister(self)
        self.polling_state = _UsedAfterRelease
        self.object_index = None
        self.highest_visible_tid = None

    def close(self, **save_args):
        """
        Release resources held by this instance, and
        save any persistent data necessary.

        This is only called on the root. If there are still instances
        that haven't been released, they'll be broken.
        """
        # grab things that will be reset in release()
        cache = self.cache
        polling_state = self.polling_state

        # Go ahead and release our polling_state now, in case
        # it helps to vacuum for save.
        self.polling_state.unregister(self)
        self.save(**save_args)
        self.release()
        cache.close()
        polling_state.close()

    def save(self, **save_args):
        """
        Store any persistent client data.
        """
        if self.options.cache_local_dir and len(self) > 0:  # pylint:disable=len-as-condition
            # (our __bool__ is not consistent with our len)
            stats = self.local_client.stats()
            if stats['hits'] or stats['sets']:
                # Only write this out if (1) it proved useful OR (2)
                # we've made modifications. Otherwise, we're writing a consolidated
                # file for no good reason.
                # TODO: Consider the correctness here, now that we have a
                # more accurate cache. Should that maybe be AND?
                return self.polling_state.save(self, save_args)
            logger.debug(
                "Cannot justify writing cache file, no hits or misses")

    def restore(self):
        # We must only restore into an empty cache.
        state = self.polling_state
        assert not self.local_client
        state.restore(self.adapter, self.local_client)

    def _reset(self, message=None):
        """
        Reset the transaction state of only this instance.

        If this is being done in a transactional way, it must be followed
        by raising an exception. If the *message* parameter is provided,
        then a ``CacheConsistencyError`` will be raised when this
        method returns.
        """
        # As if we've never polled
        self.polling_state.reset_viewer(self)
        self.polling_state.flush_all()
        if message:
            raise CacheConsistencyError(message)

    def clear(self, load_persistent=True):
        """
        Remove all data from the cache, both locally (and shared among
        other instances), and globally.

        Called by speed tests.

        Starting from the introduction of persistent cache files, this
        also results in the local client being repopulated with the
        current set of persistent data. The *load_persistent* keyword
        can be used to control this.

        .. versionchanged:: 2.0b6 Added the ``load_persistent``
           keyword. This argument is provisional.
        """
        self._reset()
        self.polling_state.flush_all()
        self.cache.flush_all()

        if load_persistent:
            self.restore()

    def zap_all(self):
        """
        Remove all data from the cache, both locally (and shared among
        other instances, and globally); in addition, remove any
        persistent cache files on disk.
        """
        self.local_client.zap_all()
        self.clear(load_persistent=False)

    def _check_tid_after_load(self,
                              oid_int,
                              actual_tid_int,
                              expect_tid_int=None,
                              cursor=None):
        """Verify the tid of an object loaded from the database is sane."""
        if actual_tid_int is not None and actual_tid_int > self.highest_visible_tid:
            # Strangely, the database just gave us data from a future
            # transaction. We can't give the data to ZODB because that
            # would be a consistency violation. However, the cause is
            # hard to track down, so issue a ReadConflictError and
            # hope that the application retries successfully.
            msg = ("Got data for OID 0x%(oid_int)x from "
                   "future transaction %(actual_tid_int)d (%(got_ts)s).  "
                   "Current transaction is %(hvt)s (%(current_ts)s)." % {
                       'oid_int': oid_int,
                       'actual_tid_int': actual_tid_int,
                       'hvt': self.highest_visible_tid,
                       'got_ts': str(TimeStamp(p64(actual_tid_int))),
                       'current_ts': str(
                           TimeStamp(p64(self.highest_visible_tid))),
                   })
            raise ReadConflictError(msg)

        if expect_tid_int is not None and actual_tid_int != expect_tid_int:
            # Uh-oh, the cache is inconsistent with the database.
            # We didn't get a TID from the future, but it's not what we
            # had in our delta_after0 map, which means...we missed a change
            # somewhere.
            #
            # Possible causes:
            #
            # - The database MUST provide a snapshot view for each
            #   session; this error can occur if that requirement is
            #   violated. For example, MySQL's MyISAM engine is not
            #   sufficient for the object_state table because MyISAM
            #   can not provide a snapshot view. (InnoDB is
            #   sufficient.)
            #
            # - (Similar to the last one.) Using too low of a
            #   isolation level for the database connection and
            #   viewing unrelated data.
            #
            # - Something could be writing to the database out
            #   of order, such as a version of RelStorage that
            #   acquires a different commit lock.
            #
            # - A software bug. In the past, there was a subtle bug
            #   in after_poll() that caused it to ignore the
            #   transaction order, leading it to sometimes put the
            #   wrong tid in delta_after*.
            #
            # - Restarting a load connection at a future point we hadn't
            #   actually polled to, such that our current_tid is out of sync
            #   with the connection's *actual* viewable tid?
            from pprint import pformat
            from relstorage._util import int64_to_8bytes
            msg = (
                "Detected an inconsistency "
                "between the RelStorage cache and the database "
                "while loading an object using the MVCC index.  "
                "Please verify the database is configured for "
                "ACID compliance and that all clients are using "
                "the same commit lock. Info:\n%s" % pformat({
                    'oid_int':
                    oid_int,
                    'expect_tid_int':
                    expect_tid_int,
                    'actual_tid_int':
                    actual_tid_int,
                    # Typically if this happens we get something newer than we expect.
                    'actual_expect_delta':
                    actual_tid_int - expect_tid_int,
                    'expect_tid':
                    str(TimeStamp(int64_to_8bytes(expect_tid_int))),
                    'actual_tid':
                    str(TimeStamp(int64_to_8bytes(actual_tid_int))),
                    'current_tid':
                    self.highest_visible_tid,
                    'pid':
                    os.getpid(),
                    'thread_ident':
                    threading.current_thread(),
                    'cursor':
                    cursor,
                }))
            # We reset ourself as if we hadn't polled, and hope the transient
            # error gets retried in a working, consistent view.
            self._reset(msg)

    def loadSerial(self, oid_int, tid_int):
        """
        Return the locally cached state for the object *oid_int* as-of
        exactly *tid_int*.

        If that state is not available in the local cache, return
        nothing.

        This is independent of the current transaction and polling state, and
        may return data from the future.

        If the storage hasn't polled invalidations, or if there are other viewers
        open at transactions in the past, it may also return data from the past
        that has been overwritten (in history-free storages).
        """
        # We use only the local client because, for history-free storages,
        # it's the only one we can be reasonably sure has been
        # invalidated by a local pack. Also, our point here is to avoid
        # network traffic, so it's no good going to memcache for what may be
        # a stale answer.

        cache = self.local_client
        # Don't take this as an MRU hit; if we succeed, we'll
        # put new cached data in for this OID and do that anyway.
        cache_data = cache.get((oid_int, tid_int), False)
        if cache_data and cache_data[1] == tid_int:
            return cache_data[0]

    def load(self, cursor, oid_int):
        """
        Load the given object from cache if possible.

        Fall back to loading from the database.

        Returns (state_bytes, tid_int).
        """
        # pylint:disable=too-many-statements,too-many-branches,too-many-locals
        if not self.object_index:
            # No poll has occurred yet. For safety, don't use the cache.
            # Note that without going through the cache, we can't
            # go through tracing either.
            return self.adapter.mover.load_current(cursor, oid_int)

        # Get the object from the transaction specified
        # by the following values, in order:
        #
        #   1. self.object_index[oid_int]
        #
        # An entry in object_index means we've polled for and know the exact
        # TID for this object, either because we polled, or because someone
        # loaded it and put it in the index. If we know a TID, we must *never*
        # use the wildcard frozen value (it's possible to have an older frozen tid that's
        # valid for older transactions, but out of date for this one.) That's handled
        # internally in the clients.

        cache = self.cache
        index = self.object_index
        indexed_tid_int = index[oid_int]  # Could be None

        key = (oid_int, indexed_tid_int)
        cache_data = cache[key]
        if cache_data and indexed_tid_int is None and cache_data[
                1] > self.highest_visible_tid:
            # Cache hit on a wildcard, but we need to verify the wildcard
            # and it didn't pass. This situation should be impossible.
            cache_data = None

        if cache_data:
            # Cache hit, non-wildcard or wildcard matched.
            return cache_data

        # Cache miss.
        state, actual_tid_int = self.adapter.mover.load_current(
            cursor, oid_int)
        if actual_tid_int:
            # If either is None, the object was deleted.
            self._check_tid_after_load(oid_int, actual_tid_int,
                                       indexed_tid_int, cursor)

            # We may or may not have had an index entry, but make sure we do now.
            # Eventually this will age to be frozen again if needed.
            index[oid_int] = actual_tid_int
            cache[(oid_int, actual_tid_int)] = (state, actual_tid_int)
            return state, actual_tid_int

        # This is in the bytecode as a LOAD_CONST
        return None, None

    def prefetch(self, cursor, oid_ints):
        # Just like load(), but we only fetch the OIDs
        # we can't find in the cache.
        if not self.object_index:
            # No point even trying, we would just throw the results away
            return

        cache = self.cache
        if cache is self.local_client and not cache.limit:
            # No point.
            return

        index = self.object_index
        # We don't actually need the cache data, so avoid asking
        # for it. That would trigger stats updates (hits/misses)
        # and move it to the front of the LRU list. But this is just
        # in advance, we don't know if it will actually be used.
        # `in` has a race condition (it could be evicted soon), but
        # if it is, there was probably something else more important
        # going on.
        to_fetch = {
            oid_int
            for oid_int in oid_ints if (oid_int, index[oid_int]) not in cache
        }

        if not to_fetch:
            return

        for oid, state, tid_int in self.adapter.mover.load_currents(
                cursor, to_fetch):
            key = (oid, tid_int)
            self._check_tid_after_load(oid, tid_int, cursor=cursor)
            cache[key] = (state, tid_int)
            index[oid] = tid_int

    def prefetch_for_conflicts(self, cursor, oid_tid_pairs):
        results = {}
        to_fetch = OidTMap()
        cache_get = self.cache.get

        # if we've never polled, we can't actually use our cache, will
        # just have to make a bulk query.
        if not self.object_index:
            to_fetch = OidTMap(oid_tid_pairs)
        else:
            for key in oid_tid_pairs:
                # Don't update stats/MRU, just as with normal prefetch().
                # It's also important here to avoid taking the lock.
                # We don't store this prefetched data back into the cache because
                # we're just about to overwrite it; we'd have to have multiple writers
                # all with the same initial starting TID lined up to write to the object
                # for that to have any benefit.
                cache_data = cache_get(key, peek=True)
                if not cache_data:
                    to_fetch[key[0]] = key[1]
                else:
                    results[key[0]] = cache_data
                    assert cache_data[1] == key[1]

        if to_fetch:
            check = self._check_tid_after_load if self.object_index else lambda *_, **kw__: None
            for oid, state, tid_int in self.adapter.mover.load_currents(
                    cursor, to_fetch):
                check(oid, tid_int, to_fetch[oid], cursor=cursor)
                results[oid] = (state, tid_int)

        return results

    def remove_cached_data(self, oid_int, tid_int):
        """
        See notes in `invalidate_all`.
        """
        del self.cache[(oid_int, tid_int)]

    def remove_all_cached_data_for_oids(self, oids):
        """
        Invalidate all cached data for the given OIDs.

        This isn't transactional or locked so it may still result in
        this or others seeing invalid (ha!) states.

        This is a specialized API. It allows violation of our internal
        consistency constraints. It should only be used when the
        database is being manipulated at a low level, such as during
        pack or undo.
        """
        # Erase our knowledge of where to look
        # self._invalidate_all(oids)
        # Remove the data too.
        self.cache.invalidate_all(oids)

    def after_tpc_finish(self, tid, temp_storage):
        """
        Flush queued changes.

        This is called after the database commit lock is released,
        but before control is returned to the Connection.

        Now that this tid is known, send all queued objects to the
        cache. The cache will have ``(oid, tid)`` entry for each object
        we have been holding on to (well, in a big transaction, some of them
        might actually not get stored in the cache. But we try!)
        """
        tid_int = bytes8_to_int64(tid)
        self.cache.set_all_for_tid(tid_int, temp_storage)

    def poll(self, conn, cursor, ignore_tid):
        try:
            changes = self.polling_state.poll(self, conn, cursor)
        except self.MVCCInternalConsistencyError:  # pragma: no cover
            logger.critical(
                "Internal consistency violation in the MVCC coordinator. "
                "Please report a bug to the RelStorage maintainers. "
                "Flushing caches for safety. ",
                exc_info=True)
            self._reset("Unknown internal violation")

        if changes is not None:
            return OIDSet(oid for oid, tid in changes if tid != ignore_tid)