def test__getURLForDownload(self): # This protected method is used by getFileByAlias. It is supposed to # use the internal host and port rather than the external, proxied # host and port. This is to provide relief for our own issues with the # problems reported in bug 317482. # # (Set up:) client = LibrarianClient() alias_id = client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') config.push( 'test config', textwrap.dedent('''\ [librarian] download_host: example.org download_port: 1234 ''')) try: # (Test:) # The LibrarianClient should use the download_host and # download_port. expected_host = 'http://example.org:1234/' download_url = client._getURLForDownload(alias_id) self.assertTrue( download_url.startswith(expected_host), 'expected %s to start with %s' % (download_url, expected_host)) # If the alias has been deleted, _getURLForDownload returns None. lfa = LibraryFileAlias.get(alias_id) lfa.content = None call = block_implicit_flushes( # Prevent a ProgrammingError LibrarianClient._getURLForDownload) self.assertEqual(call(client, alias_id), None) finally: # (Tear down:) config.pop('test config')
def setUp(self): super(TestFeedSwift, self).setUp() self.swift_fixture = self.useFixture(SwiftFixture()) self.useFixture(FeatureFixture({'librarian.swift.enabled': True})) transaction.commit() self.addCleanup(swift.connection_pool.clear) # Restart the Librarian so it picks up the OS_* environment # variables. LibrarianLayer.librarian_fixture.killTac() LibrarianLayer.librarian_fixture.setUp() # Add some files. These common sample files all have their # modification times set to the past so they will not be # considered potential in-progress uploads. the_past = time.time() - 25 * 60 * 60 self.librarian_client = LibrarianClient() self.contents = [str(i) * i for i in range(1, 5)] self.lfa_ids = [ self.add_file('file_{0}'.format(i), content, when=the_past) for i, content in enumerate(self.contents) ] self.lfas = [ IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id) for lfa_id in self.lfa_ids ] self.lfcs = [lfa.content for lfa in self.lfas]
def setUp(self): super(TestLibrarianDBOutage, self).setUp() self.pgbouncer = PGBouncerLibrarianLayer.pgbouncer_fixture self.client = LibrarianClient() # Add a file to the Librarian so we can download it. self.url = self._makeLibraryFileUrl()
def test_missing_storage(self): # When a file exists in the DB but is missing from disk, a 404 # is just confusing. It's an internal error, so 500 instead. client = LibrarianClient() # Upload a file so we can retrieve it. sample_data = b'blah' file_alias_id = client.addFile('sample', len(sample_data), BytesIO(sample_data), contentType='text/plain') url = client.getURLForAlias(file_alias_id) # Change the date_created to a known value that doesn't match # the disk timestamp. The timestamp on disk cannot be trusted. file_alias = IMasterStore(LibraryFileAlias).get( LibraryFileAlias, file_alias_id) # Commit so the file is available from the Librarian. self.commit() # Fetch the file via HTTP. response = requests.get(url) response.raise_for_status() # Delete the on-disk file. storage = LibrarianStorage(config.librarian_server.root, None) os.remove(storage._fileLocation(file_alias.contentID)) # The URL now 500s, since the DB says it should exist. response = requests.get(url) self.assertEqual(500, response.status_code) self.assertIn('Server', response.headers) self.assertNotIn('Last-Modified', response.headers) self.assertNotIn('Cache-Control', response.headers)
def test_checkGzipEncoding(self): # Files that end in ".txt.gz" are treated special and are returned # with an encoding of "gzip" or "x-gzip" to accomodate requirements of # displaying Ubuntu build logs in the browser. The mimetype should be # "text/plain" for these files. client = LibrarianClient() contents = u'Build log \N{SNOWMAN}...'.encode('UTF-8') build_log = BytesIO() with GzipFile(mode='wb', fileobj=build_log) as f: f.write(contents) build_log.seek(0) alias_id = client.addFile(name="build_log.txt.gz", size=len(build_log.getvalue()), file=build_log, contentType="text/plain") self.commit() url = client.getURLForAlias(alias_id) response = requests.get(url) response.raise_for_status() mimetype = response.headers['content-type'] encoding = response.headers['content-encoding'] self.assertTrue(mimetype == "text/plain; charset=utf-8", "Wrong mimetype. %s != 'text/plain'." % mimetype) self.assertTrue(encoding == "gzip", "Wrong encoding. %s != 'gzip'." % encoding) self.assertEqual(contents.decode('UTF-8'), response.text)
def __init__(self, log, config, diskpool, archive, allowed_suites=None, library=None): """Initialize a publisher. Publishers need the pool root dir and a DiskPool object. Optionally we can pass a list of tuples, (distroseries.name, pocket), which will restrict the publisher actions, only suites listed in allowed_suites will be modified. """ self.log = log self._config = config self.distro = archive.distribution self.archive = archive self.allowed_suites = allowed_suites self._diskpool = diskpool if library is None: self._library = LibrarianClient() else: self._library = library # Track which distroseries pockets have been dirtied by a # change, and therefore need domination/apt-ftparchive work. # This is a set of tuples in the form (distroseries.name, pocket) self.dirty_pockets = set() # Track which pockets need release files. This will contain more # than dirty_pockets in the case of a careful index run. # This is a set of tuples in the form (distroseries.name, pocket) self.release_files_needed = set()
def testUploadsSucceed(self): # This layer is able to be used on its own as it depends on # DatabaseLayer. # We can test this using remoteAddFile (it does not need the CA # loaded) client = LibrarianClient() data = 'This is a test' client.remoteAddFile('foo.txt', len(data), StringIO(data), 'text/plain')
def test_duplicateuploads(self): client = LibrarianClient() filename = 'sample.txt' id1 = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain') id2 = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain') self.assertNotEqual(id1, id2, 'Got allocated the same id!') self.commit() self.assertEqual(client.getFileByAlias(id1).read(), b'sample') self.assertEqual(client.getFileByAlias(id2).read(), b'sample')
def test_getURLForAliasObject(self): # getURLForAliasObject returns the same URL as getURLForAlias. client = LibrarianClient() content = b"Test content" alias_id = client.addFile('test.txt', len(content), BytesIO(content), contentType='text/plain') self.commit() alias = getUtility(ILibraryFileAliasSet)[alias_id] self.assertEqual(client.getURLForAlias(alias_id), client.getURLForAliasObject(alias))
def test_addFile_uses_master(self): # addFile is a write operation, so it should always use the # master store, even if the slave is the default. Close the # slave store and try to add a file, verifying that the master # is used. client = LibrarianClient() ISlaveStore(LibraryFileAlias).close() with SlaveDatabasePolicy(): alias_id = client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') transaction.commit() f = client.getFileByAlias(alias_id) self.assertEqual(f.read(), 'sample')
def test_clientWrongDatabase(self): # If the client is using the wrong database, the server should refuse # the upload, causing LibrarianClient to raise UploadFailed. client = LibrarianClient() # Force the client to mis-report its database client._getDatabaseName = lambda cur: 'wrong_database' try: client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain') except UploadFailed as e: msg = e.args[0] self.assertTrue(msg.startswith('Server said: 400 Wrong database'), 'Unexpected UploadFailed error: ' + msg) else: self.fail("UploadFailed not raised")
def test_addFile_hashes(self): # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the # LibraryFileContent record. data = 'i am some data' md5 = hashlib.md5(data).hexdigest() sha1 = hashlib.sha1(data).hexdigest() sha256 = hashlib.sha256(data).hexdigest() client = LibrarianClient() lfa = LibraryFileAlias.get( client.addFile('file', len(data), StringIO(data), 'text/plain')) self.assertEqual(md5, lfa.content.md5) self.assertEqual(sha1, lfa.content.sha1) self.assertEqual(sha256, lfa.content.sha256)
def test_restricted_subdomain_must_match_file_alias(self): # IFF there is a .restricted. in the host, then the library file alias # in the subdomain must match that in the path. client = LibrarianClient() fileAlias = client.addFile('sample', 12, BytesIO(b'a' * 12), contentType='text/plain') fileAlias2 = client.addFile('sample', 12, BytesIO(b'b' * 12), contentType='text/plain') self.commit() url = client.getURLForAlias(fileAlias) download_host = urlparse(config.librarian.download_url)[1] if ':' in download_host: download_host = download_host[:download_host.find(':')] template_host = 'i%%d.restricted.%s' % download_host path = get_libraryfilealias_download_path(fileAlias, 'sample') # The basic URL must work. response = requests.get(url) response.raise_for_status() # Use the network level protocol because DNS resolution won't work # here (no wildcard support) connection = httplib.HTTPConnection(config.librarian.download_host, config.librarian.download_port) # A valid subdomain based URL must work. good_host = template_host % fileAlias connection.request("GET", path, headers={'Host': good_host}) response = connection.getresponse() response.read() self.assertEqual(200, response.status, response) # A subdomain based URL trying to put fileAlias into the restricted # domain of fileAlias2 must not work. hostile_host = template_host % fileAlias2 connection.request("GET", path, headers={'Host': hostile_host}) response = connection.getresponse() response.read() self.assertEqual(404, response.status) # A subdomain which matches the LFA but is nested under one that # doesn't is also treated as hostile. nested_host = 'i%d.restricted.i%d.restricted.%s' % ( fileAlias, fileAlias2, download_host) connection.request("GET", path, headers={'Host': nested_host}) response = connection.getresponse() response.read() self.assertEqual(404, response.status)
def testHideLibrarian(self): # First perform a successful upload: client = LibrarianClient() data = 'foo' client.remoteAddFile('foo', len(data), StringIO(data), 'text/plain') # The database was committed to, but not by this process, so we need # to ensure that it is fully torn down and recreated. DatabaseLayer.force_dirty_database() # Hide the librarian, and show that the upload fails: LibrarianLayer.hide() self.assertRaises(UploadFailed, client.remoteAddFile, 'foo', len(data), StringIO(data), 'text/plain') # Reveal the librarian again, allowing uploads: LibrarianLayer.reveal() client.remoteAddFile('foo', len(data), StringIO(data), 'text/plain')
def test_librarian_is_reset(self): # Add a file. We use remoteAddFile because it does not need the CA # loaded to work. client = LibrarianClient() LibrarianTestCase.url = client.remoteAddFile( self.sample_data, len(self.sample_data), StringIO(self.sample_data), 'text/plain') self.failUnlessEqual( urlopen(LibrarianTestCase.url).read(), self.sample_data) # Perform the librarian specific between-test code: LibrarianLayer.testTearDown() LibrarianLayer.testSetUp() # Which should have nuked the old file. # XXX: StuartBishop 2006-06-30 Bug=51370: # We should get a DownloadFailed exception here. data = urlopen(LibrarianTestCase.url).read() self.failIfEqual(data, self.sample_data)
def setUp(self): super(TestLibrarianGarbageCollection, self).setUp() self.client = LibrarianClient() self.patch(librariangc, 'log', BufferLogger()) # A value we use in a number of tests. This represents the # stay of execution hard coded into the garbage collector. # We don't destroy any data unless it has been waiting to be # destroyed for longer than this period. We pick a value # that is close enough to the stay of execution so that # forgetting timezone information will break things, but # far enough so that how long it takes the test to run # is not an issue. 'stay_of_excution - 1 hour' fits these # criteria. self.recent_past = utc_now() - timedelta(days=6, hours=23) # A time beyond the stay of execution. self.ancient_past = utc_now() - timedelta(days=30) self.f1_id, self.f2_id = self._makeDupes() switch_dbuser(config.librarian_gc.dbuser) self.ztm = self.layer.txn # Make sure the files exist. We do this in setup, because we # need to use the get_file_path method later in the setup and we # want to be sure it is working correctly. path = librariangc.get_file_path(self.f1_id) self.failUnless(os.path.exists(path), "Librarian uploads failed") # Make sure that every file the database knows about exists on disk. # We manually remove them for tests that need to cope with missing # library items. self.ztm.begin() cur = cursor() cur.execute("SELECT id FROM LibraryFileContent") for content_id in (row[0] for row in cur.fetchall()): path = librariangc.get_file_path(content_id) if not os.path.exists(path): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) open(path, 'w').write('whatever') self.ztm.abort() self.con = connect( user=config.librarian_gc.dbuser, isolation=ISOLATION_LEVEL_AUTOCOMMIT)
def test_404(self): client = LibrarianClient() filename = 'sample.txt' aid = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain') self.commit() url = client.getURLForAlias(aid) response = requests.get(url) response.raise_for_status() self.assertEqual(response.content, b'sample') # Change the aliasid and assert we get a 404 self.assertIn(str(aid), url) bad_id_url = uri_path_replace(url, str(aid), str(aid + 1)) self.require404(bad_id_url) # Change the filename and assert we get a 404 self.assertIn(filename, url) bad_name_url = uri_path_replace(url, filename, 'different.txt') self.require404(bad_name_url)
def test_checkNoEncoding(self): # Other files should have no encoding. client = LibrarianClient() contents = b'Build log...' build_log = BytesIO(contents) alias_id = client.addFile(name="build_log.tgz", size=len(contents), file=build_log, contentType="application/x-tar") self.commit() url = client.getURLForAlias(alias_id) response = requests.get(url) response.raise_for_status() mimetype = response.headers['content-type'] self.assertNotIn('content-encoding', response.headers) self.assertTrue( mimetype == "application/x-tar", "Wrong mimetype. %s != 'application/x-tar'." % mimetype)
def test_headers(self): client = LibrarianClient() # Upload a file so we can retrieve it. sample_data = b'blah' file_alias_id = client.addFile('sample', len(sample_data), BytesIO(sample_data), contentType='text/plain') url = client.getURLForAlias(file_alias_id) # Change the date_created to a known value that doesn't match # the disk timestamp. The timestamp on disk cannot be trusted. file_alias = IMasterStore(LibraryFileAlias).get( LibraryFileAlias, file_alias_id) file_alias.date_created = datetime(2001, 1, 30, 13, 45, 59, tzinfo=pytz.utc) # Commit so the file is available from the Librarian. self.commit() # Fetch the file via HTTP, recording the interesting headers response = requests.get(url) response.raise_for_status() last_modified_header = response.headers['Last-Modified'] cache_control_header = response.headers['Cache-Control'] # URLs point to the same content for ever, so we have a hardcoded # 1 year max-age cache policy. self.assertEqual(cache_control_header, 'max-age=31536000, public') # And we should have a correct Last-Modified header too. self.assertEqual(last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
def test_oldurl(self): # 'old' urls are in the form of http://server:port/cid/aid/fname # which we want to continue supporting. The content id is simply # ignored client = LibrarianClient() filename = 'sample.txt' aid = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain') self.commit() url = client.getURLForAlias(aid) response = requests.get(url) response.raise_for_status() self.assertEqual(response.content, b'sample') old_url = uri_path_replace(url, str(aid), '42/%d' % aid) response = requests.get(url) response.raise_for_status() self.assertEqual(response.content, b'sample') # If the content and alias IDs are not integers, a 404 is raised old_url = uri_path_replace(url, str(aid), 'foo/%d' % aid) self.require404(old_url) old_url = uri_path_replace(url, str(aid), '%d/foo' % aid) self.require404(old_url)
def get_restricted_file_and_public_url(self, filename='sample'): # Use a regular LibrarianClient to ensure we speak to the # nonrestricted port on the librarian which is where secured # restricted files are served from. client = LibrarianClient() fileAlias = client.addFile(filename, 12, BytesIO(b'a' * 12), contentType='text/plain') # Note: We're deliberately using the wrong url here: we should be # passing secure=True to getURLForAlias, but to use the returned URL # we would need a wildcard DNS facility patched into requests; instead # we use the *deliberate* choice of having the path of secure and # insecure urls be the same, so that we can test it: the server code # doesn't need to know about the fancy wildcard domains. url = client.getURLForAlias(fileAlias) # Now that we have a url which talks to the public librarian, make the # file restricted. IMasterStore(LibraryFileAlias).find( LibraryFileAlias, LibraryFileAlias.id == fileAlias).set(restricted=True) self.commit() return fileAlias, url
def testLibrarianWorking(self): # Check that the librian is actually working. This means at # a minimum the Librarian service is running and is connected # to the Launchpad database. want_librarian_working = (self.want_librarian_running and self.want_launchpad_database and self.want_component_architecture) client = LibrarianClient() data = 'Whatever' try: client.addFile('foo.txt', len(data), StringIO(data), 'text/plain') except UploadFailed: self.failIf(want_librarian_working, 'Librarian should be fully operational') # Since we use IMasterStore that doesn't throw either AttributeError # or ComponentLookupError. except TypeError: self.failIf( want_librarian_working, 'Librarian not operational as component architecture ' 'not loaded') else: self.failUnless(want_librarian_working, 'Librarian should not be operational')
def test_uploadThenDownload(self): client = LibrarianClient() # Do this 10 times, to try to make sure we get all the threads in the # thread pool involved more than once, in case handling the second # request is an issue... for count in range(10): # Upload a file. This should work without any exceptions being # thrown. sampleData = b'x' + (b'blah' * (count % 5)) fileAlias = client.addFile('sample', len(sampleData), BytesIO(sampleData), contentType='text/plain') # Make sure we can get its URL url = client.getURLForAlias(fileAlias) # However, we can't access it until we have committed, # because the server has no idea what mime-type to send it as # (NB. This could be worked around if necessary by having the # librarian allow access to files that don't exist in the DB # and spitting them out with an 'unknown' mime-type # -- StuartBishop) self.require404(url) self.commit() # Make sure we can download it using the API fileObj = client.getFileByAlias(fileAlias) self.assertEqual(sampleData, fileObj.read()) fileObj.close() # And make sure the URL works too response = requests.get(url) response.raise_for_status() self.assertEqual(sampleData, response.content)
def test_aliasNotFound(self): client = LibrarianClient() self.assertRaises(DownloadFailed, client.getURLForAlias, 99)