def test_DoneDeleteUnreferencedButNotExpiredAliases(self): # LibraryFileAliases can be removed only after they have expired. # If an explicit expiry is set and in recent past (currently up to # one week ago), the files hang around. # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # Flag one of our LibraryFileAliases with an expiry date in the # recent past. self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.recent_past del f1 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should not remove our # example aliases, as one is unreferenced with a NULL expiry and # the other is unreferenced with an expiry in the recent past. librariangc.delete_unreferenced_aliases(self.con) # Make sure both our example files are still there self.ztm.begin() # Our recently expired LibraryFileAlias is still available. LibraryFileAlias.get(self.f1_id)
def test_cronscript(self): script_path = os.path.join( config.root, 'cronscripts', 'librarian-gc.py' ) cmd = [sys.executable, script_path, '-q'] process = Popen(cmd, stdout=PIPE, stderr=STDOUT, stdin=PIPE) (script_output, _empty) = process.communicate() self.failUnlessEqual( process.returncode, 0, 'Error: %s' % script_output) self.failUnlessEqual(script_output, '') # Make sure that our example files have been garbage collected self.ztm.begin() self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id) # And make sure stuff that *is* referenced remains LibraryFileAlias.get(2) cur = cursor() cur.execute("SELECT count(*) FROM LibraryFileAlias") count = cur.fetchone()[0] self.failIfEqual(count, 0) cur.execute("SELECT count(*) FROM LibraryFileContent") count = cur.fetchone()[0] self.failIfEqual(count, 0)
def test_DeleteUnreferencedAliases2(self): # Don't delete LibraryFileAliases accessed recently # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # We now have two aliases sharing the same content. self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f2 = LibraryFileAlias.get(self.f2_id) self.assertEqual(f1.content, f2.content) # Flag one of our LibraryFileAliases as being recently created f1.date_created = self.recent_past del f1 del f2 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should remove # the alias with the ID self.f2_id, but the other should stay, # as it was accessed recently. librariangc.delete_unreferenced_aliases(self.con) self.ztm.begin() LibraryFileAlias.get(self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)
def test_DeleteUnreferencedAliases(self): self.ztm.begin() # Confirm that our sample files are there. f1 = LibraryFileAlias.get(self.f1_id) f2 = LibraryFileAlias.get(self.f2_id) # Grab the content IDs related to these # unreferenced LibraryFileAliases c1_id = f1.contentID c2_id = f2.contentID del f1, f2 self.ztm.abort() # Delete unreferenced aliases librariangc.delete_unreferenced_aliases(self.con) # This should have committed self.ztm.begin() # Confirm that the LibaryFileContents are still there. LibraryFileContent.get(c1_id) LibraryFileContent.get(c2_id) # But the LibraryFileAliases should be gone self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)
def test_MergeDuplicates(self): # Merge the duplicates librariangc.merge_duplicates(self.con) # merge_duplicates should have committed self.ztm.begin() self.ztm.abort() # Confirm that the duplicates have been merged self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f2 = LibraryFileAlias.get(self.f2_id) self.failUnlessEqual(f1.contentID, f2.contentID)
def test__getURLForDownload(self): # This protected method is used by getFileByAlias. It is supposed to # use the internal host and port rather than the external, proxied # host and port. This is to provide relief for our own issues with the # problems reported in bug 317482. # # (Set up:) client = LibrarianClient() alias_id = client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') config.push( 'test config', textwrap.dedent('''\ [librarian] download_host: example.org download_port: 1234 ''')) try: # (Test:) # The LibrarianClient should use the download_host and # download_port. expected_host = 'http://example.org:1234/' download_url = client._getURLForDownload(alias_id) self.assertTrue( download_url.startswith(expected_host), 'expected %s to start with %s' % (download_url, expected_host)) # If the alias has been deleted, _getURLForDownload returns None. lfa = LibraryFileAlias.get(alias_id) lfa.content = None call = block_implicit_flushes( # Prevent a ProgrammingError LibrarianClient._getURLForDownload) self.assertEqual(call(client, alias_id), None) finally: # (Tear down:) config.pop('test config')
def test_restricted_getURLForDownload(self): # The RestrictedLibrarianClient should use the # restricted_download_host and restricted_download_port, but is # otherwise identical to the behaviour of the LibrarianClient discussed # and demonstrated above. # # (Set up:) client = RestrictedLibrarianClient() alias_id = client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') config.push( 'test config', textwrap.dedent('''\ [librarian] restricted_download_host: example.com restricted_download_port: 5678 ''')) try: # (Test:) # The LibrarianClient should use the download_host and # download_port. expected_host = 'http://example.com:5678/' download_url = client._getURLForDownload(alias_id) self.assertTrue( download_url.startswith(expected_host), 'expected %s to start with %s' % (download_url, expected_host)) # If the alias has been deleted, _getURLForDownload returns None. lfa = LibraryFileAlias.get(alias_id) lfa.content = None call = block_implicit_flushes( # Prevent a ProgrammingError RestrictedLibrarianClient._getURLForDownload) self.assertEqual(call(client, alias_id), None) finally: # (Tear down:) config.pop('test config')
def getAlias(self, aliasid, token, path): """Returns a LibraryFileAlias, or raises LookupError. A LookupError is raised if no record with the given ID exists or if not related LibraryFileContent exists. :param token: The token for the file. If None no token is present. When a token is supplied, it is looked up with path. :param path: The path the request is for, unused unless a token is supplied; when supplied it must match the token. The value of path is expected to be that from a twisted request.args e.g. /foo/bar. """ restricted = self.restricted if token and path: # with a token and a path we may be able to serve restricted files # on the public port. store = session_store() token_found = store.find(TimeLimitedToken, SQL("age(created) < interval '1 day'"), TimeLimitedToken.token == token, TimeLimitedToken.path==path).is_empty() store.reset() if token_found: raise LookupError("Token stale/pruned/path mismatch") else: restricted = True alias = LibraryFileAlias.selectOne(And( LibraryFileAlias.id == aliasid, LibraryFileAlias.contentID == LibraryFileContent.q.id, LibraryFileAlias.restricted == restricted)) if alias is None: raise LookupError("No file alias with LibraryFileContent") return alias
def addAlias(self, fileid, filename, mimetype, expires=None): """Add an alias, and return its ID. If a matching alias already exists, it will return that ID instead. """ return LibraryFileAlias( contentID=fileid, filename=filename, mimetype=mimetype, expires=expires, restricted=self.restricted).id
def test_deleteWellExpiredAliases(self): # LibraryFileAlias records that are expired are unlinked from their # content. # Flag one of our LibraryFileAliases with an expiry date in the past self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.ancient_past del f1 self.ztm.commit() # Unlink expired LibraryFileAliases. librariangc.expire_aliases(self.con) self.ztm.begin() # Make sure the well expired f1 is still there, but has no content. f1 = LibraryFileAlias.get(self.f1_id) self.assert_(f1.content is None) # f2 should still have content, as it isn't flagged for expiry. f2 = LibraryFileAlias.get(self.f2_id) self.assert_(f2.content is not None)
def getAlias(self, aliasid, token, path): """Returns a LibraryFileAlias, or raises LookupError. A LookupError is raised if no record with the given ID exists or if not related LibraryFileContent exists. :param aliasid: A `LibraryFileAlias` ID. :param token: The token for the file. If None no token is present. When a token is supplied, it is looked up with path. :param path: The path the request is for, unused unless a token is supplied; when supplied it must match the token. The value of path is expected to be that from a twisted request.args e.g. /foo/bar. """ restricted = self.restricted if token and path: # With a token and a path we may be able to serve restricted files # on the public port. if isinstance(token, Macaroon): # Macaroons have enough other constraints that they don't # need to be path-specific; it's simpler and faster to just # check the alias ID. token_ok = threads.blockingCallFromThread( default_reactor, self._verifyMacaroon, token, aliasid) else: # The URL-encoding of the path may have changed somewhere # along the line, so reencode it canonically. LFA.filename # can't contain slashes, so they're safe to leave unencoded. # And urllib.quote erroneously excludes ~ from its safe set, # while RFC 3986 says it should be unescaped and Chromium # forcibly decodes it in any URL that it sees. # # This needs to match url_path_quote. normalised_path = urllib.quote(urllib.unquote(path), safe='/~+') store = session_store() token_ok = not store.find( TimeLimitedToken, SQL("age(created) < interval '1 day'"), TimeLimitedToken.token == hashlib.sha256(token).hexdigest(), TimeLimitedToken.path == normalised_path).is_empty() store.reset() if token_ok: restricted = True else: raise LookupError("Token stale/pruned/path mismatch") alias = LibraryFileAlias.selectOne( And(LibraryFileAlias.id == aliasid, LibraryFileAlias.contentID == LibraryFileContent.q.id, LibraryFileAlias.restricted == restricted)) if alias is None: raise LookupError("No file alias with LibraryFileContent") return alias
def test_ignoreRecentlyExpiredAliases(self): # LibraryFileAlias records that have expired recently are not # garbage collected. # Flag one of our LibraryFileAliases with an expiry date in the # recent past. self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.recent_past # Within stay of execution. del f1 self.ztm.commit() # Unlink expired LibraryFileAliases. librariangc.expire_aliases(self.con) self.ztm.begin() # Make sure f1 is still there and has content. This ensures that # our stay of execution is still working. f1 = LibraryFileAlias.get(self.f1_id) self.assert_(f1.content is not None) # f2 should still have content, as it isn't flagged for expiry. f2 = LibraryFileAlias.get(self.f2_id) self.assert_(f2.content is not None)
def test_addFile_hashes(self): # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the # LibraryFileContent record. data = "i am some data" md5 = hashlib.md5(data).hexdigest() sha1 = hashlib.sha1(data).hexdigest() sha256 = hashlib.sha256(data).hexdigest() client = LibrarianClient() lfa = LibraryFileAlias.get(client.addFile("file", len(data), StringIO(data), "text/plain")) self.assertEqual(md5, lfa.content.md5) self.assertEqual(sha1, lfa.content.sha1) self.assertEqual(sha256, lfa.content.sha256)
def _makeDupes(self): """Create two duplicate LibraryFileContent entries with one LibraryFileAlias each. Return the two LibraryFileAlias ids as a tuple. """ # Connect to the database as a user with file upload privileges, # in this case the PostgreSQL default user who happens to be an # administrator on launchpad development boxes. switch_dbuser('testadmin') ztm = self.layer.txn ztm.begin() # Add some duplicate files content = 'This is some content' f1_id = self.client.addFile( 'foo.txt', len(content), StringIO(content), 'text/plain', ) f1 = LibraryFileAlias.get(f1_id) f2_id = self.client.addFile( 'foo.txt', len(content), StringIO(content), 'text/plain', ) f2 = LibraryFileAlias.get(f2_id) # Make sure the duplicates really are distinct self.failIfEqual(f1_id, f2_id) self.failIfEqual(f1.contentID, f2.contentID) f1.date_created = self.ancient_past f2.date_created = self.ancient_past f1.content.datecreated = self.ancient_past f2.content.datecreated = self.ancient_past del f1, f2 ztm.commit() return f1_id, f2_id
def test_addFile_hashes(self): # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the # LibraryFileContent record. data = 'i am some data' md5 = hashlib.md5(data).hexdigest() sha1 = hashlib.sha1(data).hexdigest() sha256 = hashlib.sha256(data).hexdigest() client = LibrarianClient() lfa = LibraryFileAlias.get( client.addFile('file', len(data), StringIO(data), 'text/plain')) self.assertEqual(md5, lfa.content.md5) self.assertEqual(sha1, lfa.content.sha1) self.assertEqual(sha256, lfa.content.sha256)
def prepareHoaryForUploads(test): """Prepare ubuntu/hoary to receive uploads. Ensure ubuntu/hoary is ready to receive and build new uploads in the RELEASE pocket (they are auto-overridden to the 'universe' component). """ ubuntu = getUtility(IDistributionSet)['ubuntu'] hoary = ubuntu['hoary'] # Allow uploads to the universe component. universe = getUtility(IComponentSet)['universe'] ComponentSelection(distroseries=hoary, component=universe) # Create a fake hoary/i386 chroot. fake_chroot = LibraryFileAlias.get(1) hoary['i386'].addOrUpdateChroot(fake_chroot) LaunchpadZopelessLayer.txn.commit()
def _getAlias(self, aliasID, secure=False): """Retrieve the `LibraryFileAlias` with the given id. :param aliasID: A unique ID for the alias. :param secure: Controls the behaviour when looking up restricted files. If False restricted files are only permitted when self.restricted is True. See `getURLForAlias`. :returns: A `LibraryFileAlias`. :raises: `DownloadFailed` if the alias is invalid or inaccessible. """ from lp.services.librarian.model import LibraryFileAlias from sqlobject import SQLObjectNotFound try: lfa = LibraryFileAlias.get(aliasID) except SQLObjectNotFound: lfa = None if lfa is None: raise DownloadFailed('Alias %d not found' % aliasID) self._checkAliasAccess(lfa, secure=secure) return lfa
def test_DeleteUnreferencedAndWellExpiredAliases(self): # LibraryFileAliases can be removed after they have expired # Merge the duplicates. Both our aliases now point to the same # LibraryFileContent librariangc.merge_duplicates(self.con) # Flag one of our LibraryFileAliases with an expiry date in the past self.ztm.begin() f1 = LibraryFileAlias.get(self.f1_id) f1.expires = self.ancient_past del f1 self.ztm.commit() # Delete unreferenced LibraryFileAliases. This should remove our # example aliases, as one is unreferenced with a NULL expiry and # the other is unreferenced with an expiry in the past. librariangc.delete_unreferenced_aliases(self.con) # Make sure both our example files are gone self.ztm.begin() self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id) self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)
def test__getURLForDownload(self): # This protected method is used by getFileByAlias. It is supposed to # use the internal host and port rather than the external, proxied # host and port. This is to provide relief for our own issues with the # problems reported in bug 317482. # # (Set up:) client = LibrarianClient() alias_id = client.addFile("sample.txt", 6, StringIO("sample"), "text/plain") config.push( "test config", textwrap.dedent( """\ [librarian] download_host: example.org download_port: 1234 """ ), ) try: # (Test:) # The LibrarianClient should use the download_host and # download_port. expected_host = "http://example.org:1234/" download_url = client._getURLForDownload(alias_id) self.failUnless( download_url.startswith(expected_host), "expected %s to start with %s" % (download_url, expected_host) ) # If the alias has been deleted, _getURLForDownload returns None. lfa = LibraryFileAlias.get(alias_id) lfa.content = None call = block_implicit_flushes(LibrarianClient._getURLForDownload) # Prevent a ProgrammingError self.assertEqual(call(client, alias_id), None) finally: # (Tear down:) config.pop("test config")
def test_restricted_getURLForDownload(self): # The RestrictedLibrarianClient should use the # restricted_download_host and restricted_download_port, but is # otherwise identical to the behavior of the LibrarianClient discussed # and demonstrated above. # # (Set up:) client = RestrictedLibrarianClient() alias_id = client.addFile("sample.txt", 6, StringIO("sample"), "text/plain") config.push( "test config", textwrap.dedent( """\ [librarian] restricted_download_host: example.com restricted_download_port: 5678 """ ), ) try: # (Test:) # The LibrarianClient should use the download_host and # download_port. expected_host = "http://example.com:5678/" download_url = client._getURLForDownload(alias_id) self.failUnless( download_url.startswith(expected_host), "expected %s to start with %s" % (download_url, expected_host) ) # If the alias has been deleted, _getURLForDownload returns None. lfa = LibraryFileAlias.get(alias_id) lfa.content = None call = block_implicit_flushes(RestrictedLibrarianClient._getURLForDownload) # Prevent a ProgrammingError self.assertEqual(call(client, alias_id), None) finally: # (Tear down:) config.pop("test config")
def addFile(self, name, size, file, contentType, expires=None, debugID=None, allow_zero_length=False): """Add a file to the librarian. :param name: Name to store the file as :param size: Size of the file :param file: File-like object with the content in it :param contentType: mime-type, e.g. text/plain :param expires: Expiry time of file. See LibrarianGarbageCollection. Set to None to only expire when it is no longer referenced. :param debugID: Optional. If set, causes extra logging for this request on the server, which will be marked with the value given. :param allow_zero_length: If True permit zero length files. :returns: aliasID as an integer :raises UploadFailed: If the server rejects the upload for some reason. """ if file is None: raise TypeError('Bad File Descriptor: %s' % repr(file)) if allow_zero_length: min_size = -1 else: min_size = 0 if size <= min_size: raise UploadFailed('Invalid length: %d' % size) name = six.ensure_binary(name) # Import in this method to avoid a circular import from lp.services.librarian.model import LibraryFileContent from lp.services.librarian.model import LibraryFileAlias self._connect() try: # Get the name of the database the client is using, so that # the server can check that the client is using the same # database as the server. store = IMasterStore(LibraryFileAlias) databaseName = self._getDatabaseName(store) # Generate new content and alias IDs. # (we'll create rows with these IDs later, but not yet) contentID = store.execute( "SELECT nextval('libraryfilecontent_id_seq')").get_one()[0] aliasID = store.execute( "SELECT nextval('libraryfilealias_id_seq')").get_one()[0] # Send command self._sendLine('STORE %d %s' % (size, name)) # Send headers self._sendHeader('Database-Name', databaseName) self._sendHeader('File-Content-ID', contentID) self._sendHeader('File-Alias-ID', aliasID) if debugID is not None: self._sendHeader('Debug-ID', debugID) # Send blank line. Do not check for a response from the # server when no data will be sent. Otherwise # _checkError() might consume the "200" response which # is supposed to be read below in this method. self._sendLine('', check_for_error_responses=(size > 0)) # Prepare to the upload the file md5_digester = hashlib.md5() sha1_digester = hashlib.sha1() sha256_digester = hashlib.sha256() bytesWritten = 0 # Read in and upload the file 64kb at a time, by using the two-arg # form of iter (see # /usr/share/doc/python/html/library/functions.html#iter). for chunk in iter(lambda: file.read(1024 * 64), ''): self.state.f.write(chunk) bytesWritten += len(chunk) md5_digester.update(chunk) sha1_digester.update(chunk) sha256_digester.update(chunk) assert bytesWritten == size, ( 'size is %d, but %d were read from the file' % (size, bytesWritten)) self.state.f.flush() # Read response response = self.state.f.readline().strip() if response != '200': raise UploadFailed('Server said: ' + response) # Add rows to DB content = LibraryFileContent(id=contentID, filesize=size, sha256=sha256_digester.hexdigest(), sha1=sha1_digester.hexdigest(), md5=md5_digester.hexdigest()) LibraryFileAlias(id=aliasID, content=content, filename=name.decode('UTF-8'), mimetype=contentType, expires=expires, restricted=self.restricted) Store.of(content).flush() assert isinstance(aliasID, (int, long)), \ "aliasID %r not an integer" % (aliasID, ) return aliasID finally: self._close()