示例#1
0
    def test_delete_stale_objects_on_save(self):
        from relstorage.cache.local_database import Database
        from relstorage.cache.persistence import sqlite_connect
        c = self._makeOne(cache_local_dir=":memory:")

        conn = sqlite_connect(c.options, 'ignored-pfx', close_async=False)
        self.addCleanup(conn.close)
        db = Database.from_connection(conn)
        db.store_temp([(0, 0, b'state', 0)])
        db.move_from_temp()
        self.assertEqual(dict(db.oid_to_tid), {0: 0})
        conn.commit()
        # Pretend we loaded this from the db
        c[(0, 0)] = (b'state', 0)
        c._min_allowed_writeback[0] = 0

        # Pass a newer version through
        c[(0, 1)] = (b'state', 1)
        self.assertEqual(c._min_allowed_writeback[0], 1)
        # Evict it so we don't have it to write.
        del c._bucket0[(0, 1)]

        # But it gets removed based on having seen it and knowing
        # it's there.
        conn._rs_has_closed = True # Prevent closing by the write method
        try:
            c.write_to_sqlite(conn)
        finally:
            conn._rs_has_closed = False

        self.assertEmpty(db.oid_to_tid)
示例#2
0
    def _makeOne(self):
        db = Database.from_connection(
            self.connection,
            use_upsert=self.USE_UPSERT,
        )
        ost = db.store_temp

        def store_temp(rows):
            return ost([(r[0], r[1], 0, r[2], r[3]) for r in rows])

        db.store_temp = store_temp
        db.real_store_temp = ost
        return db
示例#3
0
    def remove_invalid_persistent_oids(self, bad_oids):
        """
        Remove data from the persistent cache for the given oids.
        """
        options = self.options
        if not options.cache_local_dir:
            return

        count_removed = 0
        conn = '(no oids to remove)'
        if bad_oids:
            conn = sqlite_connect(options, self.prefix, close_async=False)
            with closing(conn):
                db = Database.from_connection(conn)
                count_removed = db.remove_invalid_persistent_oids(bad_oids)
        logger.debug("Removed %d invalid OIDs from %s", count_removed, conn)
示例#4
0
    def read_from_sqlite(self, connection):
        import gc
        gc.collect()  # Free memory, we're about to make big allocations.
        mem_before = get_memory_usage()

        db = Database.from_connection(connection)
        checkpoints = db.checkpoints

        @_log_timed
        def fetch_and_filter_rows():
            # Do this here so that we don't have the result
            # in a local variable and it can be collected
            # before we measure the memory delta.
            #
            # In large benchmarks, this function accounts for 57%
            # of the total time to load data. 26% of the total is
            # fetching rows from sqlite, and 18% of the total is allocating
            # storage for the blob state.
            #
            # We make one call into sqlite and let it handle the iterating.
            # Items are (oid, key_tid, state, actual_tid).
            # key_tid may equal the actual tid, or be -1 when the row was previously
            # frozen;
            # That doesn't matter to us, we always freeze all rows.
            size = 0
            limit = self.limit
            items = []
            rows = db.fetch_rows_by_priority()
            for oid, frozen, state, actual_tid, frequency in rows:
                size += len(state)
                if size > limit:
                    break
                items.append((oid, (state, actual_tid, frozen, frequency)))
            consume(rows)
            # Rows came to us MRU to LRU, but we need to feed them the other way.
            items.reverse()
            return items

        # In the large benchmark, this is 25% of the total time.
        # 18% of the total time is preallocating the entry nodes.
        self._bulk_update(fetch_and_filter_rows(),
                          source=connection,
                          mem_usage_before=mem_before)
        return checkpoints
示例#5
0
 def _makeOne(self, conn=None):
     return Database.from_connection(conn or self.connection, )
示例#6
0
 def _makeOne(self):
     return Database.from_connection(
         self.connection,
         use_upsert=self.USE_UPSERT,
     )
示例#7
0
    def write_to_sqlite(self, connection):
        # pylint:disable=too-many-locals
        mem_before = get_memory_usage()

        cur = connection.cursor()

        db = Database.from_connection(connection)
        begin = time.time()

        # In a large benchmark, store_temp() accounts for 32%
        # of the total time, while move_from_temp accounts for 49%.

        # The batch size depends on how many params a stored proc can
        # have; if we go too big we get OperationalError: too many SQL
        # variables. The default is 999.
        # Note that the multiple-value syntax was added in
        # 3.7.11, 2012-03-20.
        with _timer() as batch_timer:
            cur.execute('BEGIN')
            stored_oid_tid = db.oid_to_tid
            fetch_current = time.time()
            count_written, _ = db.store_temp(
                self._items_to_write(stored_oid_tid))
            cur.execute("COMMIT")

        # Take out (reserved write) locks now; if we don't, we can get
        # 'OperationalError: database is locked' (the SQLITE_BUSY
        # error code; the SQLITE_LOCKED error code is about table
        # locks and has the string 'database table is locked') But
        # beginning immediate lets us stand in line.
        #
        # Note that the SQLITE_BUSY error on COMMIT happens "if an
        # another thread or process has a shared lock on the database
        # that prevented the database from being updated.", But "When
        # COMMIT fails in this way, the transaction remains active and
        # the COMMIT can be retried later after the reader has had a
        # chance to clear." So we could retry the commit a couple
        # times and give up if can't get there. It looks like one
        # production database takes about 2.5s to execute this entire function;
        # it seems like most instances that are shutting down get to this place
        # at roughly the same time and stack up waiting:
        #
        # Instance | Save Time | write_to_sqlite time
        #     1    |   2.36s   |   2.35s
        #     2    |   5.31s   |   5.30s
        #     3    |   6.51s   |   6.30s
        #     4    |   7.91s   |   7.90s
        #
        # That either suggests that all waiting happens just to acquire this lock and
        # commit this transaction (and that subsequent operations are inconsequential
        # in terms of time) OR that the exclusive lock isn't truly getting dropped,
        # OR that some function in subsequent processes is taking longer and longer.
        # And indeed, our logging showed that a gc.collect() at the end of this function was
        # taking more and more time:
        #
        # Instance | GC Time  | Reported memory usage after GC
        #    1     |   2.00s  |     34.4MB
        #    2     |   3.50s  |     83.2MB
        #    3     |   4.94s  |    125.9MB
        #    4     |   6.44   |    202.1MB
        #
        # The exclusive lock time did vary, but not as much; trim time
        # didn't really vary:
        #
        # Instance |  Exclusive Write Time | Batch Insert Time | Fetch Current
        #    1     |   0.09s               |   0.12s           |   0.008s
        #    2     |   1.19s               |   0.44s           |   0.008s
        #    3     |   0.91s               |   0.43s           |   0.026s
        #    4     |   0.92s               |   0.34s           |   0.026s
        with _timer() as exclusive_timer:
            cur.execute('BEGIN IMMEDIATE')
            # During the time it took for us to commit, and the time that we
            # got the lock, it's possible that someone else committed and
            # changed the data in object_state
            # Things might have changed in the database since our
            # snapshot where we put temp objects in, so we can't rely on
            # just assuming that's the truth anymore.
            rows_inserted = db.move_from_temp()
            if self.checkpoints:
                db.update_checkpoints(*self.checkpoints)

            cur.execute('COMMIT')
        # TODO: Maybe use BTrees.family.intersection to get the common keys?
        # Or maybe use a SQLite Python function?
        # 'DELETE from object_state where is_stale(zoid, tid)'
        # What's fastest? The custom function version would require scanning the entire
        # table; unfortunately this sqlite interface doesn't allow defining virtual tables.
        with _timer() as trim_timer:
            # Delete anything we still know is stale, because we
            # saw a new TID for it, but we had nothing to replace it with.
            min_allowed_writeback = OID_TID_MAP_TYPE()
            for k, v in self._min_allowed_writeback.items():
                if stored_oid_tid.get(k, MAX_TID) < v:
                    min_allowed_writeback[k] = v
            db.trim_to_size(self.limit, min_allowed_writeback)
            del min_allowed_writeback
        # Typically we write when we're closing and don't expect to do
        # anything else, so there's no need to keep tracking this info.
        self._min_allowed_writeback = OID_TID_MAP_TYPE()

        # Cleanups
        cur.close()
        del cur
        db.close()  # closes the connection.
        del db
        del stored_oid_tid

        # We're probably shutting down, don't perform a GC; we see
        # that can get quite lengthy.

        end = time.time()
        stats = self.stats()
        mem_after = get_memory_usage()
        logger.info(
            "Wrote %d items to %s in %s "
            "(%s to fetch current, %s to insert batch (%d rows), %s to write, %s to trim). "
            "Used %s memory. (Storage: %s) "
            "Total hits %s; misses %s; ratio %s (stores: %s)", count_written,
            connection, end - begin, fetch_current - begin,
            batch_timer.duration, rows_inserted,
            exclusive_timer.duration, trim_timer.duration,
            byte_display(mem_after - mem_before), self._bucket0, stats['hits'],
            stats['misses'], stats['ratio'], stats['sets'])

        return count_written
示例#8
0
    def read_from_sqlite(self, connection, row_filter=None):
        import gc
        gc.collect()  # Free memory, we're about to make big allocations.
        mem_before = get_memory_usage()

        db = Database.from_connection(connection)
        checkpoints = db.checkpoints
        if checkpoints:
            self.store_checkpoints(*checkpoints)

        self._min_allowed_writeback = db.oid_to_tid

        # XXX: The cache_ring is going to consume all the items we
        # give it, even if it doesn't actually add them to the cache.
        # It also creates a very large preallocated array for all to
        # hold the `total_count` of items. We don't want to read more
        # rows than necessary, to keep the delta maps small and to
        # avoid the overhead; we could pass the COUNT(zoid) to
        # `bulk_update`, and have our row iterable be a generator that
        # reads a row and breaks when it reaches the limit, but then
        # we have that whole preallocated array hanging around, which
        # is probably much bigger than we need. So we need to give an
        # accurate count, and the easiest way to do that is to materialize
        # the list of rows.
        #
        # In practice, we can assume that the limit hasn't changed between this
        # run and the last run, so the database will already have been trimmed
        # to fit the desired size, so essentially all the rows in the database
        # should go in our cache.
        @_log_timed
        def fetch_and_filter_rows():
            # Do this here so that we don't have the result
            # in a local variable and it can be collected
            # before we measure the memory delta.
            #
            # In large benchmarks, this function accounts for 57%
            # of the total time to load data. 26% of the total is
            # fetching rows from sqlite, and 18% of the total is allocating
            # storage for the blob state.
            #
            # We make one call into sqlite and let it handle the iterating.
            # Items are (oid, key_tid, state, actual_tid). Currently,
            # key_tid == actual_tid
            items = db.list_rows_by_priority()
            # row_filter produces the sequence ((oid, key_tid) (state, actual_tid))
            if row_filter is not None:
                row_iter = row_filter(checkpoints, items)
                items = list(row_iter)
            else:
                items = [(r[:2], r[2:]) for r in items]

            # Look at them from most to least recent,
            # but insert them the other way.
            items.reverse()
            return items

        f = fetch_and_filter_rows.__wrapped__
        f.__name__ = f.__name__ + ':' + (str(row_filter) if row_filter
                                         is not None else '<no filter>')

        # In the large benchmark, this is 25% of the total time.
        # 18% of the total time is preallocating the entry nodes.
        self.__bucket.bulk_update(fetch_and_filter_rows(),
                                  source=connection,
                                  log_count=len(self._min_allowed_writeback),
                                  mem_usage_before=mem_before)