def fetch_and_filter_rows(): # Do this here so that we don't have the result # in a local variable and it can be collected # before we measure the memory delta. # # In large benchmarks, this function accounts for 57% # of the total time to load data. 26% of the total is # fetching rows from sqlite, and 18% of the total is allocating # storage for the blob state. # # We make one call into sqlite and let it handle the iterating. # Items are (oid, key_tid, state, actual_tid). # key_tid may equal the actual tid, or be -1 when the row was previously # frozen; # That doesn't matter to us, we always freeze all rows. size = 0 limit = self.limit items = [] rows = db.fetch_rows_by_priority() for oid, frozen, state, actual_tid, frequency in rows: size += len(state) if size > limit: break items.append((oid, (state, actual_tid, frozen, frequency))) consume(rows) # Rows came to us MRU to LRU, but we need to feed them the other way. items.reverse() return items
def _lock_readCurrent_oids_for_share(self, cursor, current_oids, shared_locks_block): _, table = self._get_current_objects_query oids_to_lock = sorted(set(current_oids)) batcher = self.make_batcher(cursor) locking_suffix = ' %s ' % ( self._lock_share_clause if shared_locks_block else self._lock_share_clause_nowait ) try: rows = batcher.select_from( ('zoid',), table, suffix=locking_suffix, **{'zoid': oids_to_lock} ) consume(rows) except self.illegal_operation_exceptions: # pragma: no cover # Bug in our code raise except self.lock_exceptions: self.reraise_commit_lock_error( cursor, 'SELECT zoid FROM {table} WHERE zoid IN () {lock}'.format( table=table, lock=locking_suffix ), UnableToLockRowsToReadCurrentError )
def _connect(self): if self._connection is None: conn = self.driver.connect_to_file( self.db_path, mmap_size=1024 * 1024 * 10, # try to keep the whole thing in memory. override_pragmas={ # We can always reconstruct the contents of this file from the database # itself, and speed is utterly critical. 'journal_mode': 'off', 'synchronous': 'off', } ) try: consume(conn.execute('SELECT count(*) from new_oid')) except sqlite3.OperationalError: conn.executescript( self._new_oid_query + """ INSERT OR REPLACE INTO new_oid SELECT MAX(x) FROM ( SELECT 0 x UNION ALL SELECT MAX(zoid) FROM new_oid ) """) self._connection = conn return self._connection
def new_oids_no_cursor(self): with self.lock: conn = self._connect() consume(conn.execute('BEGIN IMMEDIATE TRANSACTION')) row, = conn.execute('SELECT zoid FROM new_oid') conn.execute('UPDATE new_oid SET zoid = zoid + 1') conn.commit() return self._oid_range_around(row[0] + 1)
def _lock_rows_being_modified(self, cursor): stmt = self._lock_current_objects_query try: cursor.execute(stmt) rows = cursor consume(rows) except self.illegal_operation_exceptions: # pragma: no cover # Bug in our code raise except self.lock_exceptions: self.reraise_commit_lock_error(cursor, stmt, UnableToLockRowsToModifyError)
def __poll_replace_checkpoints(self, cursor, new_checkpoints, new_tid_int): # We have to replace the checkpoints. cp0, cp1 = new_checkpoints # Use the checkpoints specified by the cache (or equal to new_tid_int, # if the cache was in the future.) # Rebuild delta_after0 and delta_after1, if we can. # If we can't, because we don't actually have a range, do nothing. # If the case that the checkpoints are (new_tid, new_tid), # we'll do nothing and have no delta maps. This is because, hopefully, # next time we poll we'll be able to use the global checkpoints and # catch up then. new_delta_after0 = self._delta_map_type() new_delta_after1 = self._delta_map_type() if cp1 < new_tid_int: # poller.list_changes(cp1, new_tid_int) provides an iterator of # (oid, tid) where tid > cp1 and tid <= new_tid_int. It is guaranteed # that each oid shows up only once. change_list = self.adapter.poller.list_changes( cursor, cp1, new_tid_int) # Put the changes in new_delta_after*. # Let the backing cache know about this (this is only done # for tracing). updating_0 = self.cache.updating_delta_map(new_delta_after0) updating_1 = self.cache.updating_delta_map(new_delta_after1) try: for oid_int, tid_int in change_list: if tid_int <= cp1 or tid_int > new_tid_int: self._reset("Requested changes %d < tid <= %d " "but change %d for OID %d out of range." % (cp1, new_tid_int, tid_int, oid_int)) d = updating_0 if tid_int > cp0 else updating_1 d[oid_int] = tid_int except: consume(change_list) raise # Everybody has a home (we didn't get duplicate entries # or multiple entries for the same OID with different TID) # This is guaranteed by the IPoller interface, so we don't waste # time tracking it here. logger.debug( "Built new deltas from cp1 %s to current_tid %s of sizes %d (0) and %d (1)", cp1, new_tid_int, len(new_delta_after0), len(new_delta_after1)) self.checkpoints = new_checkpoints self.delta_after0 = new_delta_after0 self.delta_after1 = new_delta_after1
def _set_min_oid_from_range(self, cursor, n): # Recall that the very first write to the database will cause # the file to be locked against further writes. So there's some # benefit in avoiding writes if they're not needed. Because we # keep this in a separate database as well, we can keep the connection # in autocommit mode. with self.lock: # We've left the underlying connection in autocommit mode. conn = self._connect() rows = conn.execute( 'SELECT zoid FROM new_oid WHERE zoid < ?', (n,)).fetchall() if rows: # Narf, we need to execute a write transaction. consume(conn.execute( 'UPDATE new_oid SET zoid = :new WHERE zoid < :new', {'new': n}))
def after_poll(self, cursor, prev_tid_int, new_tid_int, changes): """ Update checkpoint data after a database poll. *cursor* is connected to a load connection. *prev_tid_int* is the tid that was last polled (that is, it was the *new_tid_int* the last time this was called). *changes* lists all [(oid_int, tid_int)] changed after *prev_tid_int*, up to and including *new_tid_int*, excluding the changes last committed by the associated storage instance. *changes* can be None to indicate that the cache is definitely in an inconsistent state: too much changed to be specific, there is no data at all (in which case *new_tid_int* should be 0), or the database connection is stale. *prev_tid_int* can be None, in which case the changes parameter will be ignored. new_tid_int can not be None. If *changes* was not none, this method returns a collection of OID integers from it. (Because changes is only required to be an iterable, you may not be able to iterate it again.) """ my_prev_tid_int = self.current_tid or 0 self.current_tid = new_tid_int global_checkpoints = self.cache.get_checkpoints() if not global_checkpoints: # No other instance has established an opinion yet, # so I get to. self.__poll_establish_global_checkpoints(new_tid_int) return global_checkpoints_in_future = global_checkpoints[0] > new_tid_int if global_checkpoints_in_future: # checkpoint0 is in a future that this instance can't yet # see. Ignore the checkpoint change for now, continue # with our own. global_checkpoints = self.checkpoints if not self.checkpoints: # How nice, this was our first poll, but # yet somehow we are still behind the global # checkpoints. The global checkpoints are probably # wrong (maybe there's a replica involved and the global # cache is now stale). global_checkpoints = (new_tid_int, new_tid_int) # We want to keep the current checkpoints for speed, but we # have to replace them (to avoid consistency violations) # if certain conditions happen (like emptying the ZODB Connection cache # which happens when `changes` is None). if (global_checkpoints == self.checkpoints # In sync with the world # Poller didn't give up, and there was data in the database and changes is not None # The storage had polled before and gotten a response # other than 0, meaning no data in the database. and prev_tid_int # And what we think is the last time we polled # is at *least* as new as the last time the storage # thinks it polled. # Since we only assign to current_tid here (and when we read # persistent cache data, which also ultimately came from here) # it's not clear how we could get ahead. and my_prev_tid_int >= prev_tid_int # And the transaction that was just polled is # current or in the future. If we went backwards, # it's because the underlying data went backwards # (possibly we switched to a replica that's out of date) # and the user configured `revert-when-stale` to be on. # In that case, `changes` should also be None and we really shouldn't # get here. and new_tid_int >= my_prev_tid_int): # All the conditions for keeping the checkpoints were met, # so just update self.delta_after0 and self.current_tid. try: changes = self.__poll_update_delta0_from_changes(changes) except: consume(changes) raise else: log.debug( "Using new checkpoints: %s. Current cp: %s. " "Too many changes? %s. prev_tid_int: %s. my_prev_tid_int: %s. " "new_tid_int: %s", global_checkpoints, self.checkpoints, changes is None, prev_tid_int, my_prev_tid_int, new_tid_int) if changes is not None: changes = OID_SET_TYPE([oid for oid, _tid in changes]) self.__poll_replace_checkpoints(cursor, global_checkpoints, new_tid_int) if not global_checkpoints_in_future and self._should_suggest_shifted_checkpoints( ): self._suggest_shifted_checkpoints() return changes
def _before_commit(self, connection, _rolling_back=None): # Regardless of whether we're rolling back or not we need to delete # everything to freshen the connection. DELETE with no WHERE clause # in SQLite is optimized like a TRUNCATE. consume(connection.execute('DELETE FROM temp_store')) consume(connection.execute('DELETE FROM temp_blob_chunk'))
def _lock_consume_rows_for_readCurrent(self, rows, shared_locks_block): # subclasses use the argument # pylint:disable=unused-argument consume(rows)
def test_select_one(self): cursor = MockCursor() batcher = self.getClass()(cursor) consume( batcher.select_from(('zoid', 'tid'), 'object_state', oids=(1, ))) self.assertEqual(cursor.executed, [(self.select_one, self._in(1, ))])