def main(self): librariangc.log = self.logger if self.options.loglevel <= logging.DEBUG: librariangc.debug = True # XXX wgrant 2011-09-18 bug=853066: Using Storm's raw connection # here is wrong. We should either create our own or use # Store.execute or cursor() and the transaction module. conn = IStore(LibraryFileAlias)._connection._raw_connection # Refuse to run if we have significant clock skew between the # librarian and the database. librariangc.confirm_no_clock_skew(conn) # Note that each of these next steps will issue commit commands # as appropriate to make this script transaction friendly if not self.options.skip_expiry: librariangc.expire_aliases(conn) if not self.options.skip_content: # First sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_blobs: librariangc.delete_expired_blobs(conn) if not self.options.skip_duplicates: librariangc.merge_duplicates(conn) if not self.options.skip_aliases: librariangc.delete_unreferenced_aliases(conn) if not self.options.skip_content: # Second sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_files: librariangc.delete_unwanted_files(conn)
def test_DoneDeleteUnreferencedButNotExpiredAliases(self): # LibraryFileAliases can be removed only after they have expired. # If an explicit expiry is set and in recent past (currently up to # one week ago), the files hang around. # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # Flag one of our LibraryFileAliases with an expiry date in the # recent past. self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.recent_past del f1 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should not remove our # example aliases, as one is unreferenced with a NULL expiry and # the other is unreferenced with an expiry in the recent past. librariangc.delete_unreferenced_aliases(self.con) # Make sure both our example files are still there self.ztm.begin() # Our recently expired LibraryFileAlias is still available. LibraryFileAlias.get(self.f1_id)
def test_DeleteUnreferencedAliases2(self): # Don't delete LibraryFileAliases accessed recently # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # We now have two aliases sharing the same content. self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f2 = LibraryFileAlias.get(self.f2_id) self.assertEqual(f1.content, f2.content) # Flag one of our LibraryFileAliases as being recently created f1.date_created = self.recent_past del f1 del f2 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should remove # the alias with the ID self.f2_id, but the other should stay, # as it was accessed recently. librariangc.delete_unreferenced_aliases(self.con) self.ztm.begin() LibraryFileAlias.get(self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)
def test_MergeDuplicates(self): # Merge the duplicates librariangc.merge_duplicates(self.con) # merge_duplicates should have committed self.ztm.begin() self.ztm.abort() # Confirm that the duplicates have been merged self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f2 = LibraryFileAlias.get(self.f2_id) self.failUnlessEqual(f1.contentID, f2.contentID)
def test_DeleteUnreferencedContent(self): # Merge the duplicates. This creates an # unreferenced LibraryFileContent librariangc.merge_duplicates(self.con) self.ztm.begin() # Locate the unreferenced LibraryFileContent cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL AND LibraryFileContent.id IN (%d, %d) """ % (self.f1_id, self.f2_id)) results = cur.fetchall() self.failUnlessEqual(len(results), 1) unreferenced_id = results[0][0] self.ztm.abort() # Make sure the file exists on disk path = librariangc.get_file_path(unreferenced_id) self.failUnless(os.path.exists(path)) # Delete unreferenced content librariangc.delete_unreferenced_content(self.con) # Make sure the file is gone self.failIf(os.path.exists(path)) # delete_unreferenced_content should have committed self.ztm.begin() # Make sure the unreferenced entries have all gone cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL """) results = list(cur.fetchall()) self.failUnlessEqual( len(results), 0, 'Too many results %r' % (results,) )
def test_DeleteUnreferencedAndWellExpiredAliases(self): # LibraryFileAliases can be removed after they have expired # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # Flag one of our LibraryFileAliases with an expiry date in the past self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.ancient_past del f1 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should remove our # example aliases, as one is unreferenced with a NULL expiry and # the other is unreferenced with an expiry in the past. librariangc.delete_unreferenced_aliases(self.con) # Make sure both our example files are gone self.ztm.begin() self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)
def test_DeleteUnreferencedContent2(self): # Like testDeleteUnreferencedContent, except that the file is # removed from disk before attempting to remove the unreferenced # LibraryFileContent. # # Because the garbage collector will remove an unreferenced file from # disk before it commits the database changes, it is possible that the # db removal will fail (eg. an exception was raised on COMMIT) leaving # the rows untouched in the database but no file on disk. # This is fine, as the next gc run will attempt it again and # nothing can use unreferenced files anyway. This test ensures # that this all works. # Merge the duplicates. This creates an # unreferenced LibraryFileContent librariangc.merge_duplicates(self.con) self.ztm.begin() # Locate the unreferenced LibraryFileContent cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL AND LibraryFileContent.id IN (%d, %d) """ % (self.f1_id, self.f2_id)) results = cur.fetchall() self.failUnlessEqual(len(results), 1) unreferenced_id = results[0][0] self.ztm.abort() # Make sure the file exists on disk path = librariangc.get_file_path(unreferenced_id) self.failUnless(os.path.exists(path)) # Remove the file from disk os.unlink(path) self.failIf(os.path.exists(path)) # Delete unreferenced content librariangc.delete_unreferenced_content(self.con) # Make sure the file is gone self.failIf(os.path.exists(path)) # delete_unreferenced_content should have committed self.ztm.begin() # Make sure the unreferenced entries have all gone cur = cursor() cur.execute(""" SELECT LibraryFileContent.id FROM LibraryFileContent LEFT OUTER JOIN LibraryFileAlias ON LibraryFileContent.id = LibraryFileAlias.content WHERE LibraryFileAlias.id IS NULL """) results = list(cur.fetchall()) self.failUnlessEqual( len(results), 0, 'Too many results %r' % (results,) )