def move_from_temp(self, cursor, tid, txn_has_blobs): """Moved the temporarily stored objects to permanent storage. Returns the list of oids stored. """ if self.keep_history: stmt = self._move_from_temp_hp_insert_query cursor.execute(stmt, (tid,)) else: self._move_from_temp_object_state(cursor, tid) if txn_has_blobs: stmt = """ DELETE FROM blob_chunk WHERE zoid IN (SELECT zoid FROM temp_store) """ cursor.execute(stmt) if txn_has_blobs: stmt = self._move_from_temp_copy_blob_query cursor.execute(stmt, (tid,)) stmt = """ SELECT zoid FROM temp_store """ cursor.execute(stmt) return [oid for (oid,) in fetchmany(cursor)]
def _add_refs_for_oids(self, cursor, oids, get_references): """Fill object_refs with the states for some objects. Returns the number of references added. """ oid_list = ','.join(str(oid) for oid in oids) use_base64 = (self.database_type == 'postgresql') if use_base64: stmt = """ SELECT zoid, tid, encode(state, 'base64') FROM object_state WHERE zoid IN (%s) """ % oid_list else: stmt = """ SELECT zoid, tid, state FROM object_state WHERE zoid IN (%s) """ % oid_list self.runner.run_script_stmt(cursor, stmt) add_objects = [] add_refs = [] for from_oid, tid, state in fetchmany(cursor): if hasattr(state, 'read'): # Oracle state = state.read() add_objects.append((from_oid, tid)) if state: state = decode_bytes_param(state, use_base64) try: to_oids = get_references(state) except: log.error("pre_pack: can't unpickle " "object %d in transaction %d; state length = %d" % ( from_oid, tid, len(state))) raise for to_oid in to_oids: add_refs.append((from_oid, tid, to_oid)) if not add_objects: return 0 stmt = "DELETE FROM object_refs_added WHERE zoid IN (%s)" % oid_list self.runner.run_script_stmt(cursor, stmt) stmt = "DELETE FROM object_ref WHERE zoid IN (%s)" % oid_list self.runner.run_script_stmt(cursor, stmt) stmt = """ INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s) """ self.runner.run_many(cursor, stmt, add_refs) stmt = """ INSERT INTO object_refs_added (zoid, tid) VALUES (%s, %s) """ self.runner.run_many(cursor, stmt, add_objects) return len(add_refs)
def _add_refs_for_tid(self, cursor, tid, get_references): """Fill object_refs with all states for a transaction. Returns the number of references added. """ log.debug("pre_pack: transaction %d: computing references ", tid) from_count = 0 stmt = """ SELECT zoid, state FROM object_state WHERE tid = %(tid)s """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) add_rows = [] # [(from_oid, tid, to_oid)] for from_oid, state in fetchmany(cursor): state = db_binary_to_bytes(state) if hasattr(state, 'read'): # Oracle state = state.read() if state: assert isinstance(state, bytes), type( state) # PY3: used to be str(state) from_count += 1 try: to_oids = get_references(state) except: log.error( "pre_pack: can't unpickle " "object %d in transaction %d; state length = %d" % (from_oid, tid, len(state))) raise for to_oid in to_oids: add_rows.append((from_oid, tid, to_oid)) # A previous pre-pack may have been interrupted. Delete rows # from the interrupted attempt. stmt = "DELETE FROM object_ref WHERE tid = %(tid)s" self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) # Add the new references. stmt = """ INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s) """ self.runner.run_many(cursor, stmt, add_rows) # The references have been computed for this transaction. stmt = """ INSERT INTO object_refs_added (tid) VALUES (%(tid)s) """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) to_count = len(add_rows) log.debug( "pre_pack: transaction %d: has %d reference(s) " "from %d object(s)", tid, to_count, from_count) return to_count
def fill_object_refs(self, conn, cursor, get_references): """Update the object_refs table by analyzing new object states. Note that ZODB connections can change the object states while this method is running, possibly obscuring object references, so this method runs repeatedly until it detects no changes between two passes. """ holding_commit = False attempt = 0 while True: attempt += 1 if attempt >= 3 and not holding_commit: # Starting with the third attempt, hold the commit lock # to prevent changes. holding_commit = True self.locker.hold_commit_lock(cursor) stmt = """ SELECT object_state.zoid FROM object_state LEFT JOIN object_refs_added ON (object_state.zoid = object_refs_added.zoid) WHERE object_refs_added.tid IS NULL OR object_refs_added.tid != object_state.tid ORDER BY object_state.zoid """ self.runner.run_script_stmt(cursor, stmt) oids = [oid for (oid, ) in fetchmany(cursor)] log_at = time.time() + 60 if oids: if attempt == 1: self.on_filling_object_refs() oid_count = len(oids) oids_done = 0 log.info("pre_pack: analyzing references from %d object(s)", oid_count) while oids: batch = oids[:100] oids = oids[100:] self._add_refs_for_oids(cursor, batch, get_references) oids_done += len(batch) now = time.time() if now >= log_at: # Save the work done so far. conn.commit() log_at = now + 60 log.info("pre_pack: objects analyzed: %d/%d", oids_done, oid_count) conn.commit() log.info("pre_pack: objects analyzed: %d/%d", oids_done, oid_count) else: # No changes since last pass. break if holding_commit: self.locker.release_commit_lock(cursor) log.info("attempts: %d - lock released", attempt)
def _add_refs_for_tid(self, cursor, tid, get_references): """Fill object_refs with all states for a transaction. Returns the number of references added. """ log.debug("pre_pack: transaction %d: computing references ", tid) from_count = 0 stmt = """ SELECT zoid, state FROM object_state WHERE tid = %(tid)s """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) add_rows = [] # [(from_oid, tid, to_oid)] for from_oid, state in fetchmany(cursor): state = db_binary_to_bytes(state) if hasattr(state, 'read'): # Oracle state = state.read() if state: assert isinstance(state, bytes), type(state) # PY3: used to be str(state) from_count += 1 try: to_oids = get_references(state) except: log.error( "pre_pack: can't unpickle " "object %d in transaction %d; state length = %d" % ( from_oid, tid, len(state))) raise for to_oid in to_oids: add_rows.append((from_oid, tid, to_oid)) # A previous pre-pack may have been interrupted. Delete rows # from the interrupted attempt. stmt = "DELETE FROM object_ref WHERE tid = %(tid)s" self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) # Add the new references. stmt = """ INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s) """ self.runner.run_many(cursor, stmt, add_rows) # The references have been computed for this transaction. stmt = """ INSERT INTO object_refs_added (tid) VALUES (%(tid)s) """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) to_count = len(add_rows) log.debug("pre_pack: transaction %d: has %d reference(s) " "from %d object(s)", tid, to_count, from_count) return to_count
def fill_object_refs(self, conn, cursor, get_references): """Update the object_refs table by analyzing new object states. Note that ZODB connections can change the object states while this method is running, possibly obscuring object references, so this method runs repeatedly until it detects no changes between two passes. """ holding_commit = False attempt = 0 while True: attempt += 1 if attempt >= 3 and not holding_commit: # Starting with the third attempt, hold the commit lock # to prevent changes. holding_commit = True self.locker.hold_commit_lock(cursor) stmt = """ SELECT object_state.zoid FROM object_state LEFT JOIN object_refs_added ON (object_state.zoid = object_refs_added.zoid) WHERE object_refs_added.tid IS NULL OR object_refs_added.tid != object_state.tid ORDER BY object_state.zoid """ self.runner.run_script_stmt(cursor, stmt) oids = [oid for (oid,) in fetchmany(cursor)] log_at = time.time() + 60 if oids: if attempt == 1: self.on_filling_object_refs() oid_count = len(oids) oids_done = 0 log.info( "pre_pack: analyzing references from %d object(s)", oid_count) while oids: batch = oids[:100] oids = oids[100:] self._add_refs_for_oids(cursor, batch, get_references) oids_done += len(batch) now = time.time() if now >= log_at: # Save the work done so far. conn.commit() log_at = now + 60 log.info( "pre_pack: objects analyzed: %d/%d", oids_done, oid_count) conn.commit() log.info( "pre_pack: objects analyzed: %d/%d", oids_done, oid_count) else: # No changes since last pass. break
def _pack_transaction(self, cursor, pack_tid, tid, packed, has_removable, packed_list): """Pack one transaction. Requires populated pack tables.""" log.debug("pack: transaction %d: packing", tid) removed_objects = 0 removed_states = 0 if has_removable: stmt = self._script_pack_current_object self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) removed_objects = cursor.rowcount stmt = self._script_pack_object_state self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) removed_states = cursor.rowcount # Terminate prev_tid chains stmt = """ UPDATE object_state SET prev_tid = 0 WHERE prev_tid = %(tid)s AND tid <= %(pack_tid)s """ self.runner.run_script_stmt(cursor, stmt, { 'pack_tid': pack_tid, 'tid': tid }) stmt = """ SELECT pack_state.zoid FROM pack_state WHERE pack_state.tid = %(tid)s """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) for (oid, ) in fetchmany(cursor): packed_list.append((oid, tid)) # Find out whether the transaction is empty stmt = self._script_transaction_has_data self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) empty = not list(cursor) # mark the transaction packed and possibly empty if empty: clause = 'empty = %(TRUE)s' state = 'empty' else: clause = 'empty = %(FALSE)s' state = 'not empty' stmt = "UPDATE transaction SET packed = %(TRUE)s, " + clause stmt += " WHERE tid = %(tid)s" self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) log.debug( "pack: transaction %d (%s): removed %d object(s) and %d state(s)", tid, state, removed_objects, removed_states)
def _pack_transaction(self, cursor, pack_tid, tid, packed, has_removable, packed_list): """Pack one transaction. Requires populated pack tables.""" log.debug("pack: transaction %d: packing", tid) removed_objects = 0 removed_states = 0 if has_removable: stmt = self._script_pack_current_object self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) removed_objects = cursor.rowcount stmt = self._script_pack_object_state self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) removed_states = cursor.rowcount # Terminate prev_tid chains stmt = """ UPDATE object_state SET prev_tid = 0 WHERE prev_tid = %(tid)s AND tid <= %(pack_tid)s """ self.runner.run_script_stmt(cursor, stmt, {'pack_tid': pack_tid, 'tid': tid}) stmt = """ SELECT pack_state.zoid FROM pack_state WHERE pack_state.tid = %(tid)s """ self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) for (oid,) in fetchmany(cursor): packed_list.append((oid, tid)) # Find out whether the transaction is empty stmt = self._script_transaction_has_data self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) empty = not list(cursor) # mark the transaction packed and possibly empty if empty: clause = 'empty = %(TRUE)s' state = 'empty' else: clause = 'empty = %(FALSE)s' state = 'not empty' stmt = "UPDATE transaction SET packed = %(TRUE)s, " + clause stmt += " WHERE tid = %(tid)s" self.runner.run_script_stmt(cursor, stmt, {'tid': tid}) log.debug( "pack: transaction %d (%s): removed %d object(s) and %d state(s)", tid, state, removed_objects, removed_states)
def current_object_tids(self, cursor, oids): """Returns the current {oid: tid} for specified object ids.""" res = {} _stmt = self._current_object_tids_query oids = list(oids) while oids: # XXX: Dangerous (SQL injection)! And probably slow. Can we do better? oid_list = ','.join(str(oid) for oid in oids[:1000]) del oids[:1000] stmt = _stmt % (oid_list,) cursor.execute(stmt) for oid, tid in fetchmany(cursor): res[oid] = tid return res
def generic_current_object_tids(self, cursor, oids): """Returns the current {oid: tid} for specified object ids.""" res = {} if self.keep_history: table = 'current_object' else: table = 'object_state' oids = list(oids) while oids: oid_list = ','.join(str(oid) for oid in oids[:1000]) del oids[:1000] stmt = "SELECT zoid, tid FROM %s WHERE zoid IN (%s)" % (table, oid_list) cursor.execute(stmt) for oid, tid in fetchmany(cursor): res[oid] = tid return res
def generic_current_object_tids(self, cursor, oids): """Returns the current {oid: tid} for specified object ids.""" res = {} if self.keep_history: table = 'current_object' else: table = 'object_state' oids = list(oids) while oids: oid_list = ','.join(str(oid) for oid in oids[:1000]) del oids[:1000] stmt = "SELECT zoid, tid FROM %s WHERE zoid IN (%s)" % ( table, oid_list) cursor.execute(stmt) for oid, tid in fetchmany(cursor): res[oid] = tid return res
def _traverse_graph(self, cursor): """Visit the entire object graph to find out what should be kept. Sets the pack_object.keep flags. """ log.info("pre_pack: downloading pack_object and object_ref.") # Note: TreeSet can be updated at random much faster than Set, # but TreeSet consumes more memory. (Random TreeSet updates are # probably O(log n) while random Set updates are probably O(n). # OTOH, adding to Sets or TreeSets in order is an O(1) operation.) Set = BTrees.family64.II.Set TreeSet = BTrees.family64.II.TreeSet Bucket = BTrees.family64.IO.Bucket set_difference = BTrees.family64.II.difference # Download the list of root objects to keep from pack_object. keep_set = TreeSet() stmt = """ SELECT zoid FROM pack_object WHERE keep = %(TRUE)s """ self.runner.run_script_stmt(cursor, stmt) for from_oid, in fetchmany(cursor): keep_set.insert(from_oid) # Download the list of object references into all_refs. all_refs = Bucket() # {from_oid: set([to_oid])} # Note the Oracle optimizer hints in the following statement; MySQL # and PostgreSQL ignore these. Oracle fails to notice that pack_object # is now filled and chooses the wrong execution plan, completely # killing this query on large RelStorage databases, unless these hints # are included. stmt = """ SELECT /*+ FULL(object_ref) */ /*+ FULL(pack_object) */ object_ref.zoid, object_ref.to_zoid FROM object_ref JOIN pack_object ON (object_ref.zoid = pack_object.zoid) WHERE object_ref.tid >= pack_object.keep_tid ORDER BY object_ref.zoid, object_ref.to_zoid """ # While downloading the OIDs, move them to Set and Bucket # objects. A Set takes a lot less RAM than Python integer sets. # Grouped by object_ref.zoid, store all object_ref.to_zoid in sets. self.runner.run_script_stmt(cursor, stmt) for from_oid, rows in groupby(fetchmany(cursor), itemgetter(0)): d = all_refs.get(from_oid) if d is None: all_refs[from_oid] = d = Set() d.update(row[1] for row in rows) # Traverse the object graph. Add all of the reachable OIDs # to keep_set. log.info("pre_pack: traversing the object graph " "to find reachable objects.") parents = Set(keep_set) pass_num = 0 while parents: pass_num += 1 children = TreeSet() for parent in parents: to_oids = all_refs.get(parent) if to_oids: children.update(to_oids) parents = set_difference(children, keep_set) keep_set.update(parents) log.debug("pre_pack: found %d more referenced object(s) in " "pass %d", len(parents), pass_num) # Set pack_object.keep for all OIDs in keep_set. del all_refs # Free some RAM log.info("pre_pack: marking objects reachable: %d", len(keep_set)) batch = [] def upload_batch(): oids_str = ','.join(str(oid) for oid in batch) del batch[:] stmt = """ UPDATE pack_object SET keep = %%(TRUE)s, visited = %%(TRUE)s WHERE zoid IN (%s) """ % oids_str self.runner.run_script_stmt(cursor, stmt) for oid in keep_set: batch.append(oid) if len(batch) >= 1000: upload_batch() if batch: upload_batch()
def generic_move_from_temp(self, cursor, tid, txn_has_blobs): """Moved the temporarily stored objects to permanent storage. Returns the list of oids stored. """ if self.keep_history: if self.database_type == 'oracle': stmt = """ INSERT INTO object_state (zoid, tid, prev_tid, md5, state_size, state) SELECT zoid, :1, prev_tid, md5, COALESCE(LENGTH(state), 0), state FROM temp_store """ else: stmt = """ INSERT INTO object_state (zoid, tid, prev_tid, md5, state_size, state) SELECT zoid, %s, prev_tid, md5, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid, )) else: if self.database_type == 'mysql': stmt = """ REPLACE INTO object_state (zoid, tid, state_size, state) SELECT zoid, %s, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid, )) else: stmt = """ DELETE FROM object_state WHERE zoid IN (SELECT zoid FROM temp_store) """ cursor.execute(stmt) if self.database_type == 'oracle': stmt = """ INSERT INTO object_state (zoid, tid, state_size, state) SELECT zoid, :1, COALESCE(LENGTH(state), 0), state FROM temp_store """ else: stmt = """ INSERT INTO object_state (zoid, tid, state_size, state) SELECT zoid, %s, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid, )) if txn_has_blobs: stmt = """ DELETE FROM blob_chunk WHERE zoid IN (SELECT zoid FROM temp_store) """ cursor.execute(stmt) if txn_has_blobs: if self.database_type == 'oracle': stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) SELECT zoid, :1, chunk_num, chunk FROM temp_blob_chunk """ else: stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) SELECT zoid, %s, chunk_num, chunk FROM temp_blob_chunk """ cursor.execute(stmt, (tid, )) stmt = """ SELECT zoid FROM temp_store """ cursor.execute(stmt) return [oid for (oid, ) in fetchmany(cursor)]
def pack(self, pack_tid, sleep=None, packed_func=None): """Pack. Requires the information provided by pre_pack.""" # Read committed mode is sufficient. conn, cursor = self.connmanager.open() try: try: stmt = """ SELECT transaction.tid, CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END, CASE WHEN pack_state_tid.tid IS NOT NULL THEN 1 ELSE 0 END FROM transaction LEFT JOIN pack_state_tid ON ( transaction.tid = pack_state_tid.tid) WHERE transaction.tid > 0 AND transaction.tid <= %(pack_tid)s AND (packed = %(FALSE)s OR pack_state_tid.tid IS NOT NULL) """ self.runner.run_script_stmt( cursor, stmt, {'pack_tid': pack_tid}) tid_rows = list(fetchmany(cursor)) tid_rows.sort() # oldest first total = len(tid_rows) log.info("pack: will pack %d transaction(s)", total) stmt = self._script_create_temp_pack_visit if stmt: self.runner.run_script(cursor, stmt) # Hold the commit lock while packing to prevent deadlocks. # Pack in small batches of transactions only after we are able # to obtain a commit lock in order to minimize the # interruption of concurrent write operations. start = time.time() packed_list = [] counter, lastreport, statecounter = 0, 0, 0 # We'll report on progress in at most .1% step increments reportstep = max(total / 1000, 1) self._pause_pack_until_lock(cursor, sleep) for tid, packed, has_removable in tid_rows: self._pack_transaction( cursor, pack_tid, tid, packed, has_removable, packed_list) counter += 1 if time.time() >= start + self.options.pack_batch_timeout: conn.commit() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) statecounter += len(packed_list) if counter >= lastreport + reportstep: log.info("pack: packed %d (%.1f%%) transaction(s), " "affecting %d states", counter, counter/float(total)*100, statecounter) lastreport = counter / reportstep * reportstep del packed_list[:] self.locker.release_commit_lock(cursor) self._pause_pack_until_lock(cursor, sleep) start = time.time() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) packed_list = None self._pack_cleanup(conn, cursor, sleep) except: log.exception("pack: failed") conn.rollback() raise else: log.info("pack: finished successfully") conn.commit() finally: self.connmanager.close(conn, cursor)
def _fetchmany(self, cursor): return fetchmany(cursor)
def _add_refs_for_oids(self, cursor, oids, get_references): """Fill object_refs with the states for some objects. Returns the number of references added. """ oid_list = ','.join(str(oid) for oid in oids) use_base64 = (self.database_type == 'postgresql') if use_base64: stmt = """ SELECT zoid, tid, encode(state, 'base64') FROM object_state WHERE zoid IN (%s) """ % oid_list else: stmt = """ SELECT zoid, tid, state FROM object_state WHERE zoid IN (%s) """ % oid_list self.runner.run_script_stmt(cursor, stmt) add_objects = [] add_refs = [] for from_oid, tid, state in fetchmany(cursor): if hasattr(state, 'read'): # Oracle state = state.read() add_objects.append((from_oid, tid)) if state: state = decode_bytes_param(state, use_base64) try: to_oids = get_references(state) except: log.error( "pre_pack: can't unpickle " "object %d in transaction %d; state length = %d" % (from_oid, tid, len(state))) raise for to_oid in to_oids: add_refs.append((from_oid, tid, to_oid)) if not add_objects: return 0 stmt = "DELETE FROM object_refs_added WHERE zoid IN (%s)" % oid_list self.runner.run_script_stmt(cursor, stmt) stmt = "DELETE FROM object_ref WHERE zoid IN (%s)" % oid_list self.runner.run_script_stmt(cursor, stmt) stmt = """ INSERT INTO object_ref (zoid, tid, to_zoid) VALUES (%s, %s, %s) """ self.runner.run_many(cursor, stmt, add_refs) stmt = """ INSERT INTO object_refs_added (zoid, tid) VALUES (%s, %s) """ self.runner.run_many(cursor, stmt, add_objects) return len(add_refs)
def pack(self, pack_tid, sleep=None, packed_func=None): """Run garbage collection. Requires the information provided by pre_pack. """ # Read committed mode is sufficient. conn, cursor = self.connmanager.open() try: try: stmt = """ SELECT zoid, keep_tid FROM pack_object WHERE keep = %(FALSE)s """ self.runner.run_script_stmt(cursor, stmt) to_remove = list(fetchmany(cursor)) total = len(to_remove) log.info("pack: will remove %d object(s)", total) # Hold the commit lock while packing to prevent deadlocks. # Pack in small batches of transactions only after we are able # to obtain a commit lock in order to minimize the # interruption of concurrent write operations. start = time.time() packed_list = [] # We'll report on progress in at most .1% step increments lastreport, reportstep = 0, max(total / 1000, 1) self._pause_pack_until_lock(cursor, sleep) while to_remove: items = to_remove[:100] del to_remove[:100] stmt = """ DELETE FROM object_state WHERE zoid = %s AND tid = %s """ self.runner.run_many(cursor, stmt, items) packed_list.extend(items) if time.time() >= start + self.options.pack_batch_timeout: conn.commit() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) del packed_list[:] counter = total - len(to_remove) if counter >= lastreport + reportstep: log.info("pack: removed %d (%.1f%%) state(s)", counter, counter/float(total)*100) lastreport = counter / reportstep * reportstep self.locker.release_commit_lock(cursor) self._pause_pack_until_lock(cursor, sleep) start = time.time() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) packed_list = None self._pack_cleanup(conn, cursor) except: log.exception("pack: failed") conn.rollback() raise else: log.info("pack: finished successfully") conn.commit() finally: self.connmanager.close(conn, cursor)
def generic_move_from_temp(self, cursor, tid, txn_has_blobs): """Moved the temporarily stored objects to permanent storage. Returns the list of oids stored. """ if self.keep_history: if self.database_type == 'oracle': stmt = """ INSERT INTO object_state (zoid, tid, prev_tid, md5, state_size, state) SELECT zoid, :1, prev_tid, md5, COALESCE(LENGTH(state), 0), state FROM temp_store """ else: stmt = """ INSERT INTO object_state (zoid, tid, prev_tid, md5, state_size, state) SELECT zoid, %s, prev_tid, md5, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid,)) else: if self.database_type == 'mysql': stmt = """ REPLACE INTO object_state (zoid, tid, state_size, state) SELECT zoid, %s, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid,)) else: stmt = """ DELETE FROM object_state WHERE zoid IN (SELECT zoid FROM temp_store) """ cursor.execute(stmt) if self.database_type == 'oracle': stmt = """ INSERT INTO object_state (zoid, tid, state_size, state) SELECT zoid, :1, COALESCE(LENGTH(state), 0), state FROM temp_store """ else: stmt = """ INSERT INTO object_state (zoid, tid, state_size, state) SELECT zoid, %s, COALESCE(LENGTH(state), 0), state FROM temp_store """ cursor.execute(stmt, (tid,)) if txn_has_blobs: stmt = """ DELETE FROM blob_chunk WHERE zoid IN (SELECT zoid FROM temp_store) """ cursor.execute(stmt) if txn_has_blobs: if self.database_type == 'oracle': stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) SELECT zoid, :1, chunk_num, chunk FROM temp_blob_chunk """ else: stmt = """ INSERT INTO blob_chunk (zoid, tid, chunk_num, chunk) SELECT zoid, %s, chunk_num, chunk FROM temp_blob_chunk """ cursor.execute(stmt, (tid,)) stmt = """ SELECT zoid FROM temp_store """ cursor.execute(stmt) return [oid for (oid,) in fetchmany(cursor)]
def pack(self, pack_tid, sleep=None, packed_func=None): """Run garbage collection. Requires the information provided by pre_pack. """ # Read committed mode is sufficient. conn, cursor = self.connmanager.open() try: try: stmt = """ SELECT zoid, keep_tid FROM pack_object WHERE keep = %(FALSE)s """ self.runner.run_script_stmt(cursor, stmt) to_remove = list(fetchmany(cursor)) total = len(to_remove) log.info("pack: will remove %d object(s)", total) # Hold the commit lock while packing to prevent deadlocks. # Pack in small batches of transactions only after we are able # to obtain a commit lock in order to minimize the # interruption of concurrent write operations. start = time.time() packed_list = [] # We'll report on progress in at most .1% step increments lastreport, reportstep = 0, max(total / 1000, 1) self._pause_pack_until_lock(cursor, sleep) while to_remove: items = to_remove[:100] del to_remove[:100] stmt = """ DELETE FROM object_state WHERE zoid = %s AND tid = %s """ self.runner.run_many(cursor, stmt, items) packed_list.extend(items) if time.time() >= start + self.options.pack_batch_timeout: conn.commit() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) del packed_list[:] counter = total - len(to_remove) if counter >= lastreport + reportstep: log.info("pack: removed %d (%.1f%%) state(s)", counter, counter / float(total) * 100) lastreport = counter / reportstep * reportstep self.locker.release_commit_lock(cursor) self._pause_pack_until_lock(cursor, sleep) start = time.time() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) packed_list = None self._pack_cleanup(conn, cursor) except: log.exception("pack: failed") conn.rollback() raise else: log.info("pack: finished successfully") conn.commit() finally: self.connmanager.close(conn, cursor)
def _traverse_graph(self, cursor): """Visit the entire object graph to find out what should be kept. Sets the pack_object.keep flags. """ log.info("pre_pack: downloading pack_object and object_ref.") # Note: TreeSet can be updated at random much faster than Set, # but TreeSet consumes more memory. (Random TreeSet updates are # probably O(log n) while random Set updates are probably O(n). # OTOH, adding to Sets or TreeSets in order is an O(1) operation.) Set = BTrees.family64.II.Set TreeSet = BTrees.family64.II.TreeSet Bucket = BTrees.family64.IO.Bucket set_difference = BTrees.family64.II.difference # Download the list of root objects to keep from pack_object. keep_set = TreeSet() stmt = """ SELECT zoid FROM pack_object WHERE keep = %(TRUE)s """ self.runner.run_script_stmt(cursor, stmt) for from_oid, in fetchmany(cursor): keep_set.insert(from_oid) # Download the list of object references into all_refs. all_refs = Bucket() # {from_oid: set([to_oid])} # Note the Oracle optimizer hints in the following statement; MySQL # and PostgreSQL ignore these. Oracle fails to notice that pack_object # is now filled and chooses the wrong execution plan, completely # killing this query on large RelStorage databases, unless these hints # are included. stmt = """ SELECT /*+ FULL(object_ref) */ /*+ FULL(pack_object) */ object_ref.zoid, object_ref.to_zoid FROM object_ref JOIN pack_object ON (object_ref.zoid = pack_object.zoid) WHERE object_ref.tid >= pack_object.keep_tid ORDER BY object_ref.zoid, object_ref.to_zoid """ # While downloading the OIDs, move them to Set and Bucket # objects. A Set takes a lot less RAM than Python integer sets. # Grouped by object_ref.zoid, store all object_ref.to_zoid in sets. self.runner.run_script_stmt(cursor, stmt) for from_oid, rows in groupby(fetchmany(cursor), itemgetter(0)): d = all_refs.get(from_oid) if d is None: all_refs[from_oid] = d = Set() d.update(row[1] for row in rows) # Traverse the object graph. Add all of the reachable OIDs # to keep_set. log.info("pre_pack: traversing the object graph " "to find reachable objects.") parents = Set(keep_set) pass_num = 0 while parents: pass_num += 1 children = TreeSet() for parent in parents: to_oids = all_refs.get(parent) if to_oids: children.update(to_oids) parents = set_difference(children, keep_set) keep_set.update(parents) log.debug( "pre_pack: found %d more referenced object(s) in " "pass %d", len(parents), pass_num) # Set pack_object.keep for all OIDs in keep_set. del all_refs # Free some RAM log.info("pre_pack: marking objects reachable: %d", len(keep_set)) batch = [] def upload_batch(): oids_str = ','.join(str(oid) for oid in batch) del batch[:] stmt = """ UPDATE pack_object SET keep = %%(TRUE)s, visited = %%(TRUE)s WHERE zoid IN (%s) """ % oids_str self.runner.run_script_stmt(cursor, stmt) for oid in keep_set: batch.append(oid) if len(batch) >= 1000: upload_batch() if batch: upload_batch()
def pack(self, pack_tid, sleep=None, packed_func=None): """Pack. Requires the information provided by pre_pack.""" # Read committed mode is sufficient. conn, cursor = self.connmanager.open() try: try: stmt = """ SELECT transaction.tid, CASE WHEN packed = %(TRUE)s THEN 1 ELSE 0 END, CASE WHEN pack_state_tid.tid IS NOT NULL THEN 1 ELSE 0 END FROM transaction LEFT JOIN pack_state_tid ON ( transaction.tid = pack_state_tid.tid) WHERE transaction.tid > 0 AND transaction.tid <= %(pack_tid)s AND (packed = %(FALSE)s OR pack_state_tid.tid IS NOT NULL) """ self.runner.run_script_stmt(cursor, stmt, {'pack_tid': pack_tid}) tid_rows = list(fetchmany(cursor)) tid_rows.sort() # oldest first total = len(tid_rows) log.info("pack: will pack %d transaction(s)", total) stmt = self._script_create_temp_pack_visit if stmt: self.runner.run_script(cursor, stmt) # Hold the commit lock while packing to prevent deadlocks. # Pack in small batches of transactions only after we are able # to obtain a commit lock in order to minimize the # interruption of concurrent write operations. start = time.time() packed_list = [] counter, lastreport, statecounter = 0, 0, 0 # We'll report on progress in at most .1% step increments reportstep = max(total / 1000, 1) self._pause_pack_until_lock(cursor, sleep) for tid, packed, has_removable in tid_rows: self._pack_transaction(cursor, pack_tid, tid, packed, has_removable, packed_list) counter += 1 if time.time() >= start + self.options.pack_batch_timeout: conn.commit() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) statecounter += len(packed_list) if counter >= lastreport + reportstep: log.info( "pack: packed %d (%.1f%%) transaction(s), " "affecting %d states", counter, counter / float(total) * 100, statecounter) lastreport = counter / reportstep * reportstep del packed_list[:] self.locker.release_commit_lock(cursor) self._pause_pack_until_lock(cursor, sleep) start = time.time() if packed_func is not None: for oid, tid in packed_list: packed_func(oid, tid) packed_list = None self._pack_cleanup(conn, cursor, sleep) except: log.exception("pack: failed") conn.rollback() raise else: log.info("pack: finished successfully") conn.commit() finally: self.connmanager.close(conn, cursor)