Пример #1
0
    def setUp(self):
        super(TestLibrarianDBOutage, self).setUp()
        self.pgbouncer = PGBouncerLibrarianLayer.pgbouncer_fixture
        self.client = LibrarianClient()

        # Add a file to the Librarian so we can download it.
        self.url = self._makeLibraryFileUrl()
Пример #2
0
    def test_uploadThenDownload(self):
        client = LibrarianClient()

        # Do this 10 times, to try to make sure we get all the threads in the
        # thread pool involved more than once, in case handling the second
        # request is an issue...
        for count in range(10):
            # Upload a file.  This should work without any exceptions being
            # thrown.
            sampleData = 'x' + ('blah' * (count%5))
            fileAlias = client.addFile('sample', len(sampleData),
                                                 StringIO(sampleData),
                                                 contentType='text/plain')

            # Make sure we can get its URL
            url = client.getURLForAlias(fileAlias)

            # However, we can't access it until we have committed,
            # because the server has no idea what mime-type to send it as
            # (NB. This could be worked around if necessary by having the
            # librarian allow access to files that don't exist in the DB
            # and spitting them out with an 'unknown' mime-type
            # -- StuartBishop)
            self.require404(url)
            self.commit()

            # Make sure we can download it using the API
            fileObj = client.getFileByAlias(fileAlias)
            self.assertEqual(sampleData, fileObj.read())
            fileObj.close()

            # And make sure the URL works too
            fileObj = urlopen(url)
            self.assertEqual(sampleData, fileObj.read())
            fileObj.close()
Пример #3
0
    def test_checkGzipEncoding(self):
        # Files that end in ".txt.gz" are treated special and are returned
        # with an encoding of "gzip" or "x-gzip" to accomodate requirements of
        # displaying Ubuntu build logs in the browser.  The mimetype should be
        # "text/plain" for these files.
        client = LibrarianClient()
        contents = u'Build log \N{SNOWMAN}...'.encode('UTF-8')
        build_log = BytesIO()
        with GzipFile(mode='wb', fileobj=build_log) as f:
            f.write(contents)
        build_log.seek(0)
        alias_id = client.addFile(name="build_log.txt.gz",
                                  size=len(build_log.getvalue()),
                                  file=build_log,
                                  contentType="text/plain")

        self.commit()

        url = client.getURLForAlias(alias_id)
        response = requests.get(url)
        response.raise_for_status()
        mimetype = response.headers['content-type']
        encoding = response.headers['content-encoding']
        self.assertTrue(mimetype == "text/plain; charset=utf-8",
                        "Wrong mimetype. %s != 'text/plain'." % mimetype)
        self.assertTrue(encoding == "gzip",
                        "Wrong encoding. %s != 'gzip'." % encoding)
        self.assertEqual(contents.decode('UTF-8'), response.text)
Пример #4
0
    def setUp(self):
        super(TestFeedSwift, self).setUp()
        self.swift_fixture = self.useFixture(SwiftFixture())
        self.useFixture(FeatureFixture({'librarian.swift.enabled': True}))
        transaction.commit()

        self.addCleanup(swift.connection_pool.clear)

        # Restart the Librarian so it picks up the OS_* environment
        # variables.
        LibrarianLayer.librarian_fixture.killTac()
        LibrarianLayer.librarian_fixture.setUp()

        # Add some files. These common sample files all have their
        # modification times set to the past so they will not be
        # considered potential in-progress uploads.
        the_past = time.time() - 25 * 60 * 60
        self.librarian_client = LibrarianClient()
        self.contents = [str(i) * i for i in range(1, 5)]
        self.lfa_ids = [
            self.add_file('file_{0}'.format(i), content, when=the_past)
            for i, content in enumerate(self.contents)
        ]
        self.lfas = [
            IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id)
            for lfa_id in self.lfa_ids
        ]
        self.lfcs = [lfa.content for lfa in self.lfas]
Пример #5
0
    def test_missing_storage(self):
        # When a file exists in the DB but is missing from disk, a 404
        # is just confusing. It's an internal error, so 500 instead.
        client = LibrarianClient()

        # Upload a file so we can retrieve it.
        sample_data = b'blah'
        file_alias_id = client.addFile('sample',
                                       len(sample_data),
                                       BytesIO(sample_data),
                                       contentType='text/plain')
        url = client.getURLForAlias(file_alias_id)

        # Change the date_created to a known value that doesn't match
        # the disk timestamp. The timestamp on disk cannot be trusted.
        file_alias = IMasterStore(LibraryFileAlias).get(
            LibraryFileAlias, file_alias_id)

        # Commit so the file is available from the Librarian.
        self.commit()

        # Fetch the file via HTTP.
        response = requests.get(url)
        response.raise_for_status()

        # Delete the on-disk file.
        storage = LibrarianStorage(config.librarian_server.root, None)
        os.remove(storage._fileLocation(file_alias.contentID))

        # The URL now 500s, since the DB says it should exist.
        response = requests.get(url)
        self.assertEqual(500, response.status_code)
        self.assertIn('Server', response.headers)
        self.assertNotIn('Last-Modified', response.headers)
        self.assertNotIn('Cache-Control', response.headers)
Пример #6
0
    def test_headers(self):
        client = LibrarianClient()

        # Upload a file so we can retrieve it.
        sample_data = 'blah'
        file_alias_id = client.addFile(
            'sample', len(sample_data), StringIO(sample_data),
            contentType='text/plain')
        url = client.getURLForAlias(file_alias_id)

        # Change the date_created to a known value that doesn't match
        # the disk timestamp. The timestamp on disk cannot be trusted.
        file_alias = IMasterStore(LibraryFileAlias).get(
            LibraryFileAlias, file_alias_id)
        file_alias.date_created = datetime(
            2001, 01, 30, 13, 45, 59, tzinfo=pytz.utc)

        # Commit so the file is available from the Librarian.
        self.commit()

        # Fetch the file via HTTP, recording the interesting headers
        result = urlopen(url)
        last_modified_header = result.info()['Last-Modified']
        cache_control_header = result.info()['Cache-Control']

        # URLs point to the same content for ever, so we have a hardcoded
        # 1 year max-age cache policy.
        self.failUnlessEqual(cache_control_header, 'max-age=31536000, public')

        # And we should have a correct Last-Modified header too.
        self.failUnlessEqual(
            last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
 def testLibrarianWorking(self):
     # Check that the librian is actually working. This means at
     # a minimum the Librarian service is running and is connected
     # to the Launchpad database.
     want_librarian_working = (
             self.want_librarian_running and self.want_launchpad_database
             and self.want_component_architecture
             )
     client = LibrarianClient()
     data = 'Whatever'
     try:
         client.addFile(
                 'foo.txt', len(data), StringIO(data), 'text/plain'
                 )
     except UploadFailed:
         self.failIf(
                 want_librarian_working,
                 'Librarian should be fully operational'
                 )
     # Since we use IMasterStore that doesn't throw either AttributeError
     # or ComponentLookupError.
     except TypeError:
         self.failIf(
                 want_librarian_working,
                 'Librarian not operational as component architecture '
                 'not loaded'
                 )
     else:
         self.failUnless(
                 want_librarian_working,
                 'Librarian should not be operational'
                 )
Пример #8
0
 def test__getURLForDownload(self):
     # This protected method is used by getFileByAlias. It is supposed to
     # use the internal host and port rather than the external, proxied
     # host and port. This is to provide relief for our own issues with the
     # problems reported in bug 317482.
     #
     # (Set up:)
     client = LibrarianClient()
     alias_id = client.addFile('sample.txt', 6, StringIO('sample'),
                               'text/plain')
     config.push(
         'test config',
         textwrap.dedent('''\
             [librarian]
             download_host: example.org
             download_port: 1234
             '''))
     try:
         # (Test:)
         # The LibrarianClient should use the download_host and
         # download_port.
         expected_host = 'http://example.org:1234/'
         download_url = client._getURLForDownload(alias_id)
         self.assertTrue(
             download_url.startswith(expected_host),
             'expected %s to start with %s' % (download_url, expected_host))
         # If the alias has been deleted, _getURLForDownload returns None.
         lfa = LibraryFileAlias.get(alias_id)
         lfa.content = None
         call = block_implicit_flushes(  # Prevent a ProgrammingError
             LibrarianClient._getURLForDownload)
         self.assertEqual(call(client, alias_id), None)
     finally:
         # (Tear down:)
         config.pop('test config')
 def testUploadsSucceed(self):
     # This layer is able to be used on its own as it depends on
     # DatabaseLayer.
     # We can test this using remoteAddFile (it does not need the CA
     # loaded)
     client = LibrarianClient()
     data = 'This is a test'
     client.remoteAddFile(
         'foo.txt', len(data), StringIO(data), 'text/plain')
Пример #10
0
 def testUploadsSucceed(self):
     # This layer is able to be used on its own as it depends on
     # DatabaseLayer.
     # We can test this using remoteAddFile (it does not need the CA
     # loaded)
     client = LibrarianClient()
     data = 'This is a test'
     client.remoteAddFile('foo.txt', len(data), StringIO(data),
                          'text/plain')
Пример #11
0
    def test_duplicateuploads(self):
        client = LibrarianClient()
        filename = 'sample.txt'
        id1 = client.addFile(filename, 6, StringIO('sample'), 'text/plain')
        id2 = client.addFile(filename, 6, StringIO('sample'), 'text/plain')

        self.failIfEqual(id1, id2, 'Got allocated the same id!')

        self.commit()

        self.failUnlessEqual(client.getFileByAlias(id1).read(), 'sample')
        self.failUnlessEqual(client.getFileByAlias(id2).read(), 'sample')
Пример #12
0
 def test_addFile_uses_master(self):
     # addFile is a write operation, so it should always use the
     # master store, even if the slave is the default. Close the
     # slave store and try to add a file, verifying that the master
     # is used.
     client = LibrarianClient()
     ISlaveStore(LibraryFileAlias).close()
     with SlaveDatabasePolicy():
         alias_id = client.addFile("sample.txt", 6, StringIO("sample"), "text/plain")
     transaction.commit()
     f = client.getFileByAlias(alias_id)
     self.assertEqual(f.read(), "sample")
Пример #13
0
    def test_duplicateuploads(self):
        client = LibrarianClient()
        filename = 'sample.txt'
        id1 = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain')
        id2 = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain')

        self.assertNotEqual(id1, id2, 'Got allocated the same id!')

        self.commit()

        self.assertEqual(client.getFileByAlias(id1).read(), b'sample')
        self.assertEqual(client.getFileByAlias(id2).read(), b'sample')
Пример #14
0
 def test_addFile_uses_master(self):
     # addFile is a write operation, so it should always use the
     # master store, even if the slave is the default. Close the
     # slave store and try to add a file, verifying that the master
     # is used.
     client = LibrarianClient()
     ISlaveStore(LibraryFileAlias).close()
     with SlaveDatabasePolicy():
         alias_id = client.addFile('sample.txt', 6, StringIO('sample'),
                                   'text/plain')
     transaction.commit()
     f = client.getFileByAlias(alias_id)
     self.assertEqual(f.read(), 'sample')
Пример #15
0
    def test_getURLForAliasObject(self):
        # getURLForAliasObject returns the same URL as getURLForAlias.
        client = LibrarianClient()
        content = "Test content"
        alias_id = client.addFile(
            'test.txt', len(content), StringIO(content),
            contentType='text/plain')
        self.commit()

        alias = getUtility(ILibraryFileAliasSet)[alias_id]
        self.assertEqual(
            client.getURLForAlias(alias_id),
            client.getURLForAliasObject(alias))
Пример #16
0
 def test_clientWrongDatabase(self):
     # If the client is using the wrong database, the server should refuse
     # the upload, causing LibrarianClient to raise UploadFailed.
     client = LibrarianClient()
     # Force the client to mis-report its database
     client._getDatabaseName = lambda cur: "wrong_database"
     try:
         client.addFile("sample.txt", 6, StringIO("sample"), "text/plain")
     except UploadFailed as e:
         msg = e.args[0]
         self.failUnless(msg.startswith("Server said: 400 Wrong database"), "Unexpected UploadFailed error: " + msg)
     else:
         self.fail("UploadFailed not raised")
Пример #17
0
    def test_getURLForAliasObject(self):
        # getURLForAliasObject returns the same URL as getURLForAlias.
        client = LibrarianClient()
        content = b"Test content"
        alias_id = client.addFile('test.txt',
                                  len(content),
                                  BytesIO(content),
                                  contentType='text/plain')
        self.commit()

        alias = getUtility(ILibraryFileAliasSet)[alias_id]
        self.assertEqual(client.getURLForAlias(alias_id),
                         client.getURLForAliasObject(alias))
Пример #18
0
    def test_addFile_hashes(self):
        # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the
        # LibraryFileContent record.
        data = "i am some data"
        md5 = hashlib.md5(data).hexdigest()
        sha1 = hashlib.sha1(data).hexdigest()
        sha256 = hashlib.sha256(data).hexdigest()

        client = LibrarianClient()
        lfa = LibraryFileAlias.get(client.addFile("file", len(data), StringIO(data), "text/plain"))

        self.assertEqual(md5, lfa.content.md5)
        self.assertEqual(sha1, lfa.content.sha1)
        self.assertEqual(sha256, lfa.content.sha256)
Пример #19
0
 def test_clientWrongDatabase(self):
     # If the client is using the wrong database, the server should refuse
     # the upload, causing LibrarianClient to raise UploadFailed.
     client = LibrarianClient()
     # Force the client to mis-report its database
     client._getDatabaseName = lambda cur: 'wrong_database'
     try:
         client.addFile('sample.txt', 6, StringIO('sample'), 'text/plain')
     except UploadFailed as e:
         msg = e.args[0]
         self.assertTrue(msg.startswith('Server said: 400 Wrong database'),
                         'Unexpected UploadFailed error: ' + msg)
     else:
         self.fail("UploadFailed not raised")
Пример #20
0
    def test_addFile_hashes(self):
        # addFile() sets the MD5, SHA-1 and SHA-256 hashes on the
        # LibraryFileContent record.
        data = 'i am some data'
        md5 = hashlib.md5(data).hexdigest()
        sha1 = hashlib.sha1(data).hexdigest()
        sha256 = hashlib.sha256(data).hexdigest()

        client = LibrarianClient()
        lfa = LibraryFileAlias.get(
            client.addFile('file', len(data), StringIO(data), 'text/plain'))

        self.assertEqual(md5, lfa.content.md5)
        self.assertEqual(sha1, lfa.content.sha1)
        self.assertEqual(sha256, lfa.content.sha256)
Пример #21
0
    def __init__(self, log, config, diskpool, archive, allowed_suites=None,
                 library=None):
        """Initialize a publisher.

        Publishers need the pool root dir and a DiskPool object.

        Optionally we can pass a list of tuples, (distroseries.name, pocket),
        which will restrict the publisher actions, only suites listed in
        allowed_suites will be modified.
        """
        self.log = log
        self._config = config
        self.distro = archive.distribution
        self.archive = archive
        self.allowed_suites = allowed_suites

        self._diskpool = diskpool

        if library is None:
            self._library = LibrarianClient()
        else:
            self._library = library

        # Track which distroseries pockets have been dirtied by a
        # change, and therefore need domination/apt-ftparchive work.
        # This is a set of tuples in the form (distroseries.name, pocket)
        self.dirty_pockets = set()

        # Track which pockets need release files. This will contain more
        # than dirty_pockets in the case of a careful index run.
        # This is a set of tuples in the form (distroseries.name, pocket)
        self.release_files_needed = set()
Пример #22
0
    def setUp(self):
        super(TestLibrarianDBOutage, self).setUp()
        self.pgbouncer = PGBouncerLibrarianLayer.pgbouncer_fixture
        self.client = LibrarianClient()

        # Add a file to the Librarian so we can download it.
        self.url = self._makeLibraryFileUrl()
Пример #23
0
 def test_restricted_subdomain_must_match_file_alias(self):
     # IFF there is a .restricted. in the host, then the library file alias
     # in the subdomain must match that in the path.
     client = LibrarianClient()
     fileAlias = client.addFile('sample',
                                12,
                                BytesIO(b'a' * 12),
                                contentType='text/plain')
     fileAlias2 = client.addFile('sample',
                                 12,
                                 BytesIO(b'b' * 12),
                                 contentType='text/plain')
     self.commit()
     url = client.getURLForAlias(fileAlias)
     download_host = urlparse(config.librarian.download_url)[1]
     if ':' in download_host:
         download_host = download_host[:download_host.find(':')]
     template_host = 'i%%d.restricted.%s' % download_host
     path = get_libraryfilealias_download_path(fileAlias, 'sample')
     # The basic URL must work.
     response = requests.get(url)
     response.raise_for_status()
     # Use the network level protocol because DNS resolution won't work
     # here (no wildcard support)
     connection = httplib.HTTPConnection(config.librarian.download_host,
                                         config.librarian.download_port)
     # A valid subdomain based URL must work.
     good_host = template_host % fileAlias
     connection.request("GET", path, headers={'Host': good_host})
     response = connection.getresponse()
     response.read()
     self.assertEqual(200, response.status, response)
     # A subdomain based URL trying to put fileAlias into the restricted
     # domain of fileAlias2 must not work.
     hostile_host = template_host % fileAlias2
     connection.request("GET", path, headers={'Host': hostile_host})
     response = connection.getresponse()
     response.read()
     self.assertEqual(404, response.status)
     # A subdomain which matches the LFA but is nested under one that
     # doesn't is also treated as hostile.
     nested_host = 'i%d.restricted.i%d.restricted.%s' % (
         fileAlias, fileAlias2, download_host)
     connection.request("GET", path, headers={'Host': nested_host})
     response = connection.getresponse()
     response.read()
     self.assertEqual(404, response.status)
Пример #24
0
 def test_librarian_is_reset(self):
     # Add a file. We use remoteAddFile because it does not need the CA
     # loaded to work.
     client = LibrarianClient()
     LibrarianTestCase.url = client.remoteAddFile(
         self.sample_data, len(self.sample_data),
         StringIO(self.sample_data), 'text/plain')
     self.failUnlessEqual(
         urlopen(LibrarianTestCase.url).read(), self.sample_data)
     # Perform the librarian specific between-test code:
     LibrarianLayer.testTearDown()
     LibrarianLayer.testSetUp()
     # Which should have nuked the old file.
     # XXX: StuartBishop 2006-06-30 Bug=51370:
     # We should get a DownloadFailed exception here.
     data = urlopen(LibrarianTestCase.url).read()
     self.failIfEqual(data, self.sample_data)
Пример #25
0
    def test_404(self):
        client = LibrarianClient()
        filename = 'sample.txt'
        aid = client.addFile(filename, 6, StringIO('sample'), 'text/plain')
        self.commit()
        url = client.getURLForAlias(aid)
        self.assertEqual(urlopen(url).read(), 'sample')

        # Change the aliasid and assert we get a 404
        self.failUnless(str(aid) in url)
        bad_id_url = uri_path_replace(url, str(aid), str(aid+1))
        self.require404(bad_id_url)

        # Change the filename and assert we get a 404
        self.failUnless(filename in url)
        bad_name_url = uri_path_replace(url, filename, 'different.txt')
        self.require404(bad_name_url)
Пример #26
0
    def setUp(self):
        super(TestLibrarianGarbageCollection, self).setUp()
        self.client = LibrarianClient()
        self.patch(librariangc, 'log', BufferLogger())

        # A value we use in a number of tests. This represents the
        # stay of execution hard coded into the garbage collector.
        # We don't destroy any data unless it has been waiting to be
        # destroyed for longer than this period. We pick a value
        # that is close enough to the stay of execution so that
        # forgetting timezone information will break things, but
        # far enough so that how long it takes the test to run
        # is not an issue. 'stay_of_excution - 1 hour' fits these
        # criteria.
        self.recent_past = utc_now() - timedelta(days=6, hours=23)
        # A time beyond the stay of execution.
        self.ancient_past = utc_now() - timedelta(days=30)

        self.f1_id, self.f2_id = self._makeDupes()

        switch_dbuser(config.librarian_gc.dbuser)
        self.ztm = self.layer.txn

        # Make sure the files exist. We do this in setup, because we
        # need to use the get_file_path method later in the setup and we
        # want to be sure it is working correctly.
        path = librariangc.get_file_path(self.f1_id)
        self.failUnless(os.path.exists(path), "Librarian uploads failed")

        # Make sure that every file the database knows about exists on disk.
        # We manually remove them for tests that need to cope with missing
        # library items.
        self.ztm.begin()
        cur = cursor()
        cur.execute("SELECT id FROM LibraryFileContent")
        for content_id in (row[0] for row in cur.fetchall()):
            path = librariangc.get_file_path(content_id)
            if not os.path.exists(path):
                if not os.path.exists(os.path.dirname(path)):
                    os.makedirs(os.path.dirname(path))
                open(path, 'w').write('whatever')
        self.ztm.abort()

        self.con = connect(
            user=config.librarian_gc.dbuser,
            isolation=ISOLATION_LEVEL_AUTOCOMMIT)
Пример #27
0
    def test_oldurl(self):
        # 'old' urls are in the form of http://server:port/cid/aid/fname
        # which we want to continue supporting. The content id is simply
        # ignored
        client = LibrarianClient()
        filename = 'sample.txt'
        aid = client.addFile(filename, 6, StringIO('sample'), 'text/plain')
        self.commit()
        url = client.getURLForAlias(aid)
        self.assertEqual(urlopen(url).read(), 'sample')

        old_url = uri_path_replace(url, str(aid), '42/%d' % aid)
        self.assertEqual(urlopen(old_url).read(), 'sample')

        # If the content and alias IDs are not integers, a 404 is raised
        old_url = uri_path_replace(url, str(aid), 'foo/%d' % aid)
        self.require404(old_url)
        old_url = uri_path_replace(url, str(aid), '%d/foo' % aid)
        self.require404(old_url)
    def testHideLibrarian(self):
        # First perform a successful upload:
        client = LibrarianClient()
        data = 'foo'
        client.remoteAddFile(
            'foo', len(data), StringIO(data), 'text/plain')
        # The database was committed to, but not by this process, so we need
        # to ensure that it is fully torn down and recreated.
        DatabaseLayer.force_dirty_database()

        # Hide the librarian, and show that the upload fails:
        LibrarianLayer.hide()
        self.assertRaises(UploadFailed, client.remoteAddFile,
                          'foo', len(data), StringIO(data), 'text/plain')

        # Reveal the librarian again, allowing uploads:
        LibrarianLayer.reveal()
        client.remoteAddFile(
            'foo', len(data), StringIO(data), 'text/plain')
 def test_librarian_is_reset(self):
     # Add a file. We use remoteAddFile because it does not need the CA
     # loaded to work.
     client = LibrarianClient()
     LibrarianTestCase.url = client.remoteAddFile(
             self.sample_data, len(self.sample_data),
             StringIO(self.sample_data), 'text/plain'
             )
     self.failUnlessEqual(
             urlopen(LibrarianTestCase.url).read(), self.sample_data
             )
     # Perform the librarian specific between-test code:
     LibrarianLayer.testTearDown()
     LibrarianLayer.testSetUp()
     # Which should have nuked the old file.
     # XXX: StuartBishop 2006-06-30 Bug=51370:
     # We should get a DownloadFailed exception here.
     data = urlopen(LibrarianTestCase.url).read()
     self.failIfEqual(data, self.sample_data)
Пример #30
0
 def test_restricted_subdomain_must_match_file_alias(self):
     # IFF there is a .restricted. in the host, then the library file alias
     # in the subdomain must match that in the path.
     client = LibrarianClient()
     fileAlias = client.addFile('sample', 12, StringIO('a'*12),
         contentType='text/plain')
     fileAlias2 = client.addFile('sample', 12, StringIO('b'*12),
         contentType='text/plain')
     self.commit()
     url = client.getURLForAlias(fileAlias)
     download_host = urlparse(config.librarian.download_url)[1]
     if ':' in download_host:
         download_host = download_host[:download_host.find(':')]
     template_host = 'i%%d.restricted.%s' % download_host
     path = get_libraryfilealias_download_path(fileAlias, 'sample')
     # The basic URL must work.
     urlopen(url)
     # Use the network level protocol because DNS resolution won't work
     # here (no wildcard support)
     connection = httplib.HTTPConnection(
         config.librarian.download_host,
         config.librarian.download_port)
     # A valid subdomain based URL must work.
     good_host = template_host % fileAlias
     connection.request("GET", path, headers={'Host': good_host})
     response = connection.getresponse()
     response.read()
     self.assertEqual(200, response.status, response)
     # A subdomain based URL trying to put fileAlias into the restricted
     # domain of fileAlias2 must not work.
     hostile_host = template_host % fileAlias2
     connection.request("GET", path, headers={'Host': hostile_host})
     response = connection.getresponse()
     response.read()
     self.assertEqual(404, response.status)
     # A subdomain which matches the LFA but is nested under one that
     # doesn't is also treated as hostile.
     nested_host = 'i%d.restricted.i%d.restricted.%s' % (
         fileAlias, fileAlias2, download_host)
     connection.request("GET", path, headers={'Host': nested_host})
     response = connection.getresponse()
     response.read()
     self.assertEqual(404, response.status)
Пример #31
0
    def test_404(self):
        client = LibrarianClient()
        filename = 'sample.txt'
        aid = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain')
        self.commit()
        url = client.getURLForAlias(aid)
        response = requests.get(url)
        response.raise_for_status()
        self.assertEqual(response.content, b'sample')

        # Change the aliasid and assert we get a 404
        self.assertIn(str(aid), url)
        bad_id_url = uri_path_replace(url, str(aid), str(aid + 1))
        self.require404(bad_id_url)

        # Change the filename and assert we get a 404
        self.assertIn(filename, url)
        bad_name_url = uri_path_replace(url, filename, 'different.txt')
        self.require404(bad_name_url)
Пример #32
0
    def test_checkNoEncoding(self):
        # Other files should have no encoding.
        client = LibrarianClient()
        contents = 'Build log...'
        build_log = StringIO(contents)
        alias_id = client.addFile(name="build_log.tgz",
                                  size=len(contents),
                                  file=build_log,
                                  contentType="application/x-tar")

        self.commit()

        url = client.getURLForAlias(alias_id)
        fileObj = urlopen(url)
        mimetype = fileObj.headers['content-type']
        self.assertRaises(KeyError, fileObj.headers.__getitem__,
                          'content-encoding')
        self.failUnless(
            mimetype == "application/x-tar",
            "Wrong mimetype. %s != 'application/x-tar'." % mimetype)
Пример #33
0
    def test_checkNoEncoding(self):
        # Other files should have no encoding.
        client = LibrarianClient()
        contents = b'Build log...'
        build_log = BytesIO(contents)
        alias_id = client.addFile(name="build_log.tgz",
                                  size=len(contents),
                                  file=build_log,
                                  contentType="application/x-tar")

        self.commit()

        url = client.getURLForAlias(alias_id)
        response = requests.get(url)
        response.raise_for_status()
        mimetype = response.headers['content-type']
        self.assertNotIn('content-encoding', response.headers)
        self.assertTrue(
            mimetype == "application/x-tar",
            "Wrong mimetype. %s != 'application/x-tar'." % mimetype)
Пример #34
0
 def get_restricted_file_and_public_url(self):
     # Use a regular LibrarianClient to ensure we speak to the
     # nonrestricted port on the librarian which is where secured
     # restricted files are served from.
     client = LibrarianClient()
     fileAlias = client.addFile(
         'sample', 12, StringIO('a'*12), contentType='text/plain')
     # Note: We're deliberately using the wrong url here: we should be
     # passing secure=True to getURLForAlias, but to use the returned URL
     # we would need a wildcard DNS facility patched into urlopen; instead
     # we use the *deliberate* choice of having the path of secure and
     # insecure urls be the same, so that we can test it: the server code
     # doesn't need to know about the fancy wildcard domains.
     url = client.getURLForAlias(fileAlias)
     # Now that we have a url which talks to the public librarian, make the
     # file restricted.
     IMasterStore(LibraryFileAlias).find(LibraryFileAlias,
         LibraryFileAlias.id==fileAlias).set(
         LibraryFileAlias.restricted==True)
     self.commit()
     return fileAlias, url
Пример #35
0
    def test_headers(self):
        client = LibrarianClient()

        # Upload a file so we can retrieve it.
        sample_data = b'blah'
        file_alias_id = client.addFile('sample',
                                       len(sample_data),
                                       BytesIO(sample_data),
                                       contentType='text/plain')
        url = client.getURLForAlias(file_alias_id)

        # Change the date_created to a known value that doesn't match
        # the disk timestamp. The timestamp on disk cannot be trusted.
        file_alias = IMasterStore(LibraryFileAlias).get(
            LibraryFileAlias, file_alias_id)
        file_alias.date_created = datetime(2001,
                                           1,
                                           30,
                                           13,
                                           45,
                                           59,
                                           tzinfo=pytz.utc)

        # Commit so the file is available from the Librarian.
        self.commit()

        # Fetch the file via HTTP, recording the interesting headers
        response = requests.get(url)
        response.raise_for_status()
        last_modified_header = response.headers['Last-Modified']
        cache_control_header = response.headers['Cache-Control']

        # URLs point to the same content for ever, so we have a hardcoded
        # 1 year max-age cache policy.
        self.assertEqual(cache_control_header, 'max-age=31536000, public')

        # And we should have a correct Last-Modified header too.
        self.assertEqual(last_modified_header, 'Tue, 30 Jan 2001 13:45:59 GMT')
Пример #36
0
    def test_checkGzipEncoding(self):
        # Files that end in ".txt.gz" are treated special and are returned
        # with an encoding of "gzip" or "x-gzip" to accomodate requirements of
        # displaying Ubuntu build logs in the browser.  The mimetype should be
        # "text/plain" for these files.
        client = LibrarianClient()
        contents = 'Build log...'
        build_log = StringIO(contents)
        alias_id = client.addFile(name="build_log.txt.gz",
                                  size=len(contents),
                                  file=build_log,
                                  contentType="text/plain")

        self.commit()

        url = client.getURLForAlias(alias_id)
        fileObj = urlopen(url)
        mimetype = fileObj.headers['content-type']
        encoding = fileObj.headers['content-encoding']
        self.failUnless(mimetype == "text/plain",
                        "Wrong mimetype. %s != 'text/plain'." % mimetype)
        self.failUnless(encoding == "gzip",
                        "Wrong encoding. %s != 'gzip'." % encoding)
Пример #37
0
    def test_oldurl(self):
        # 'old' urls are in the form of http://server:port/cid/aid/fname
        # which we want to continue supporting. The content id is simply
        # ignored
        client = LibrarianClient()
        filename = 'sample.txt'
        aid = client.addFile(filename, 6, BytesIO(b'sample'), 'text/plain')
        self.commit()
        url = client.getURLForAlias(aid)
        response = requests.get(url)
        response.raise_for_status()
        self.assertEqual(response.content, b'sample')

        old_url = uri_path_replace(url, str(aid), '42/%d' % aid)
        response = requests.get(url)
        response.raise_for_status()
        self.assertEqual(response.content, b'sample')

        # If the content and alias IDs are not integers, a 404 is raised
        old_url = uri_path_replace(url, str(aid), 'foo/%d' % aid)
        self.require404(old_url)
        old_url = uri_path_replace(url, str(aid), '%d/foo' % aid)
        self.require404(old_url)
Пример #38
0
 def get_restricted_file_and_public_url(self, filename='sample'):
     # Use a regular LibrarianClient to ensure we speak to the
     # nonrestricted port on the librarian which is where secured
     # restricted files are served from.
     client = LibrarianClient()
     fileAlias = client.addFile(filename,
                                12,
                                BytesIO(b'a' * 12),
                                contentType='text/plain')
     # Note: We're deliberately using the wrong url here: we should be
     # passing secure=True to getURLForAlias, but to use the returned URL
     # we would need a wildcard DNS facility patched into requests; instead
     # we use the *deliberate* choice of having the path of secure and
     # insecure urls be the same, so that we can test it: the server code
     # doesn't need to know about the fancy wildcard domains.
     url = client.getURLForAlias(fileAlias)
     # Now that we have a url which talks to the public librarian, make the
     # file restricted.
     IMasterStore(LibraryFileAlias).find(
         LibraryFileAlias,
         LibraryFileAlias.id == fileAlias).set(restricted=True)
     self.commit()
     return fileAlias, url
Пример #39
0
    def test_uploadThenDownload(self):
        client = LibrarianClient()

        # Do this 10 times, to try to make sure we get all the threads in the
        # thread pool involved more than once, in case handling the second
        # request is an issue...
        for count in range(10):
            # Upload a file.  This should work without any exceptions being
            # thrown.
            sampleData = b'x' + (b'blah' * (count % 5))
            fileAlias = client.addFile('sample',
                                       len(sampleData),
                                       BytesIO(sampleData),
                                       contentType='text/plain')

            # Make sure we can get its URL
            url = client.getURLForAlias(fileAlias)

            # However, we can't access it until we have committed,
            # because the server has no idea what mime-type to send it as
            # (NB. This could be worked around if necessary by having the
            # librarian allow access to files that don't exist in the DB
            # and spitting them out with an 'unknown' mime-type
            # -- StuartBishop)
            self.require404(url)
            self.commit()

            # Make sure we can download it using the API
            fileObj = client.getFileByAlias(fileAlias)
            self.assertEqual(sampleData, fileObj.read())
            fileObj.close()

            # And make sure the URL works too
            response = requests.get(url)
            response.raise_for_status()
            self.assertEqual(sampleData, response.content)
Пример #40
0
 def test__getURLForDownload(self):
     # This protected method is used by getFileByAlias. It is supposed to
     # use the internal host and port rather than the external, proxied
     # host and port. This is to provide relief for our own issues with the
     # problems reported in bug 317482.
     #
     # (Set up:)
     client = LibrarianClient()
     alias_id = client.addFile("sample.txt", 6, StringIO("sample"), "text/plain")
     config.push(
         "test config",
         textwrap.dedent(
             """\
             [librarian]
             download_host: example.org
             download_port: 1234
             """
         ),
     )
     try:
         # (Test:)
         # The LibrarianClient should use the download_host and
         # download_port.
         expected_host = "http://example.org:1234/"
         download_url = client._getURLForDownload(alias_id)
         self.failUnless(
             download_url.startswith(expected_host), "expected %s to start with %s" % (download_url, expected_host)
         )
         # If the alias has been deleted, _getURLForDownload returns None.
         lfa = LibraryFileAlias.get(alias_id)
         lfa.content = None
         call = block_implicit_flushes(LibrarianClient._getURLForDownload)  # Prevent a ProgrammingError
         self.assertEqual(call(client, alias_id), None)
     finally:
         # (Tear down:)
         config.pop("test config")
Пример #41
0
 def testLibrarianWorking(self):
     # Check that the librian is actually working. This means at
     # a minimum the Librarian service is running and is connected
     # to the Launchpad database.
     want_librarian_working = (self.want_librarian_running
                               and self.want_launchpad_database
                               and self.want_component_architecture)
     client = LibrarianClient()
     data = 'Whatever'
     try:
         client.addFile('foo.txt', len(data), StringIO(data), 'text/plain')
     except UploadFailed:
         self.failIf(want_librarian_working,
                     'Librarian should be fully operational')
     # Since we use IMasterStore that doesn't throw either AttributeError
     # or ComponentLookupError.
     except TypeError:
         self.failIf(
             want_librarian_working,
             'Librarian not operational as component architecture '
             'not loaded')
     else:
         self.failUnless(want_librarian_working,
                         'Librarian should not be operational')
Пример #42
0
class TestLibrarianDBOutage(TestCase):
    layer = PGBouncerLibrarianLayer

    def setUp(self):
        super(TestLibrarianDBOutage, self).setUp()
        self.pgbouncer = PGBouncerLibrarianLayer.pgbouncer_fixture
        self.client = LibrarianClient()

        # Add a file to the Librarian so we can download it.
        self.url = self._makeLibraryFileUrl()

    def _makeLibraryFileUrl(self):
        data = 'whatever'
        return self.client.remoteAddFile('foo.txt', len(data), StringIO(data),
                                         'text/plain')

    def getErrorCode(self):
        # We need to talk to every Librarian thread to ensure all the
        # Librarian database connections are in a known state.
        # XXX StuartBishop 2011-09-01 bug=840046: 20 might be overkill
        # for the test run, but we have no real way of knowing how many
        # connections are in use.
        num_librarian_threads = 20
        codes = set()
        for count in range(num_librarian_threads):
            try:
                urllib2.urlopen(self.url).read()
                codes.add(200)
            except urllib2.HTTPError as error:
                codes.add(error.code)
        self.assertTrue(len(codes) == 1, 'Mixed responses: %s' % str(codes))
        return codes.pop()

    def test_outage(self):
        # Everything should be working fine to start with.
        self.assertEqual(self.getErrorCode(), 200)

        # When the outage kicks in, we start getting 503 responses
        # instead of 200 and 404s.
        self.pgbouncer.stop()
        self.assertEqual(self.getErrorCode(), 503)

        # When the outage is over, things are back to normal.
        self.pgbouncer.start()
        self.assertEqual(self.getErrorCode(), 200)
Пример #43
0
class TestLibrarianDBOutage(TestCase):
    layer = PGBouncerLibrarianLayer

    def setUp(self):
        super(TestLibrarianDBOutage, self).setUp()
        self.pgbouncer = PGBouncerLibrarianLayer.pgbouncer_fixture
        self.client = LibrarianClient()

        # Add a file to the Librarian so we can download it.
        self.url = self._makeLibraryFileUrl()

    def _makeLibraryFileUrl(self):
        data = "whatever"
        return self.client.remoteAddFile("foo.txt", len(data), StringIO(data), "text/plain")

    def getErrorCode(self):
        # We need to talk to every Librarian thread to ensure all the
        # Librarian database connections are in a known state.
        # XXX StuartBishop 2011-09-01 bug=840046: 20 might be overkill
        # for the test run, but we have no real way of knowing how many
        # connections are in use.
        num_librarian_threads = 20
        codes = set()
        for count in range(num_librarian_threads):
            try:
                urllib2.urlopen(self.url).read()
                codes.add(200)
            except urllib2.HTTPError as error:
                codes.add(error.code)
        self.assertTrue(len(codes) == 1, "Mixed responses: %s" % str(codes))
        return codes.pop()

    def test_outage(self):
        # Everything should be working fine to start with.
        self.assertEqual(self.getErrorCode(), 200)

        # When the outage kicks in, we start getting 503 responses
        # instead of 200 and 404s.
        self.pgbouncer.stop()
        self.assertEqual(self.getErrorCode(), 503)

        # When the outage is over, things are back to normal.
        self.pgbouncer.start()
        self.assertEqual(self.getErrorCode(), 200)
Пример #44
0
    def testHideLibrarian(self):
        # First perform a successful upload:
        client = LibrarianClient()
        data = 'foo'
        client.remoteAddFile('foo', len(data), StringIO(data), 'text/plain')
        # The database was committed to, but not by this process, so we need
        # to ensure that it is fully torn down and recreated.
        DatabaseLayer.force_dirty_database()

        # Hide the librarian, and show that the upload fails:
        LibrarianLayer.hide()
        self.assertRaises(UploadFailed, client.remoteAddFile, 'foo', len(data),
                          StringIO(data), 'text/plain')

        # Reveal the librarian again, allowing uploads:
        LibrarianLayer.reveal()
        client.remoteAddFile('foo', len(data), StringIO(data), 'text/plain')
Пример #45
0
class TestLibrarianGarbageCollection(TestCase):
    layer = LaunchpadZopelessLayer

    def setUp(self):
        super(TestLibrarianGarbageCollection, self).setUp()
        self.client = LibrarianClient()
        self.patch(librariangc, 'log', BufferLogger())

        # A value we use in a number of tests. This represents the
        # stay of execution hard coded into the garbage collector.
        # We don't destroy any data unless it has been waiting to be
        # destroyed for longer than this period. We pick a value
        # that is close enough to the stay of execution so that
        # forgetting timezone information will break things, but
        # far enough so that how long it takes the test to run
        # is not an issue. 'stay_of_excution - 1 hour' fits these
        # criteria.
        self.recent_past = utc_now() - timedelta(days=6, hours=23)
        # A time beyond the stay of execution.
        self.ancient_past = utc_now() - timedelta(days=30)

        self.f1_id, self.f2_id = self._makeDupes()

        switch_dbuser(config.librarian_gc.dbuser)
        self.ztm = self.layer.txn

        # Make sure the files exist. We do this in setup, because we
        # need to use the get_file_path method later in the setup and we
        # want to be sure it is working correctly.
        path = librariangc.get_file_path(self.f1_id)
        self.failUnless(os.path.exists(path), "Librarian uploads failed")

        # Make sure that every file the database knows about exists on disk.
        # We manually remove them for tests that need to cope with missing
        # library items.
        self.ztm.begin()
        cur = cursor()
        cur.execute("SELECT id FROM LibraryFileContent")
        for content_id in (row[0] for row in cur.fetchall()):
            path = librariangc.get_file_path(content_id)
            if not os.path.exists(path):
                if not os.path.exists(os.path.dirname(path)):
                    os.makedirs(os.path.dirname(path))
                open(path, 'w').write('whatever')
        self.ztm.abort()

        self.con = connect(
            user=config.librarian_gc.dbuser,
            isolation=ISOLATION_LEVEL_AUTOCOMMIT)

    def tearDown(self):
        self.con.rollback()
        self.con.close()
        del self.con
        super(TestLibrarianGarbageCollection, self).tearDown()

    def _makeDupes(self):
        """Create two duplicate LibraryFileContent entries with one
        LibraryFileAlias each. Return the two LibraryFileAlias ids as a
        tuple.
        """
        # Connect to the database as a user with file upload privileges,
        # in this case the PostgreSQL default user who happens to be an
        # administrator on launchpad development boxes.
        switch_dbuser('testadmin')
        ztm = self.layer.txn

        ztm.begin()
        # Add some duplicate files
        content = 'This is some content'
        f1_id = self.client.addFile(
                'foo.txt', len(content), StringIO(content), 'text/plain',
                )
        f1 = LibraryFileAlias.get(f1_id)
        f2_id = self.client.addFile(
                'foo.txt', len(content), StringIO(content), 'text/plain',
                )
        f2 = LibraryFileAlias.get(f2_id)

        # Make sure the duplicates really are distinct
        self.failIfEqual(f1_id, f2_id)
        self.failIfEqual(f1.contentID, f2.contentID)

        f1.date_created = self.ancient_past
        f2.date_created = self.ancient_past
        f1.content.datecreated = self.ancient_past
        f2.content.datecreated = self.ancient_past

        del f1, f2

        ztm.commit()

        return f1_id, f2_id

    def test_MergeDuplicates(self):
        # Merge the duplicates
        librariangc.merge_duplicates(self.con)

        # merge_duplicates should have committed
        self.ztm.begin()
        self.ztm.abort()

        # Confirm that the duplicates have been merged
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f2 = LibraryFileAlias.get(self.f2_id)
        self.failUnlessEqual(f1.contentID, f2.contentID)

    def test_DeleteUnreferencedAliases(self):
        self.ztm.begin()

        # Confirm that our sample files are there.
        f1 = LibraryFileAlias.get(self.f1_id)
        f2 = LibraryFileAlias.get(self.f2_id)
        # Grab the content IDs related to these
        # unreferenced LibraryFileAliases
        c1_id = f1.contentID
        c2_id = f2.contentID
        del f1, f2
        self.ztm.abort()

        # Delete unreferenced aliases
        librariangc.delete_unreferenced_aliases(self.con)

        # This should have committed
        self.ztm.begin()

        # Confirm that the LibaryFileContents are still there.
        LibraryFileContent.get(c1_id)
        LibraryFileContent.get(c2_id)

        # But the LibraryFileAliases should be gone
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id)
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)

    def test_DeleteUnreferencedAliases2(self):
        # Don't delete LibraryFileAliases accessed recently

        # Merge the duplicates. Both our aliases now point to the same
        # LibraryFileContent
        librariangc.merge_duplicates(self.con)

        # We now have two aliases sharing the same content.
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f2 = LibraryFileAlias.get(self.f2_id)
        self.assertEqual(f1.content, f2.content)

        # Flag one of our LibraryFileAliases as being recently created
        f1.date_created = self.recent_past

        del f1
        del f2
        self.ztm.commit()

        # Delete unreferenced LibraryFileAliases. This should remove
        # the alias with the ID self.f2_id, but the other should stay,
        # as it was accessed recently.
        librariangc.delete_unreferenced_aliases(self.con)

        self.ztm.begin()
        LibraryFileAlias.get(self.f1_id)
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)

    def test_DeleteUnreferencedAndWellExpiredAliases(self):
        # LibraryFileAliases can be removed after they have expired

        # Merge the duplicates. Both our aliases now point to the same
        # LibraryFileContent
        librariangc.merge_duplicates(self.con)

        # Flag one of our LibraryFileAliases with an expiry date in the past
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f1.expires = self.ancient_past
        del f1
        self.ztm.commit()

        # Delete unreferenced LibraryFileAliases. This should remove our
        # example aliases, as one is unreferenced with a NULL expiry and
        # the other is unreferenced with an expiry in the past.
        librariangc.delete_unreferenced_aliases(self.con)

        # Make sure both our example files are gone
        self.ztm.begin()
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id)
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)

    def test_DoneDeleteUnreferencedButNotExpiredAliases(self):
        # LibraryFileAliases can be removed only after they have expired.
        # If an explicit expiry is set and in recent past (currently up to
        # one week ago), the files hang around.

        # Merge the duplicates. Both our aliases now point to the same
        # LibraryFileContent
        librariangc.merge_duplicates(self.con)

        # Flag one of our LibraryFileAliases with an expiry date in the
        # recent past.
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f1.expires = self.recent_past
        del f1
        self.ztm.commit()

        # Delete unreferenced LibraryFileAliases. This should not remove our
        # example aliases, as one is unreferenced with a NULL expiry and
        # the other is unreferenced with an expiry in the recent past.
        librariangc.delete_unreferenced_aliases(self.con)

        # Make sure both our example files are still there
        self.ztm.begin()
        # Our recently expired LibraryFileAlias is still available.
        LibraryFileAlias.get(self.f1_id)

    def test_deleteWellExpiredAliases(self):
        # LibraryFileAlias records that are expired are unlinked from their
        # content.

        # Flag one of our LibraryFileAliases with an expiry date in the past
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f1.expires = self.ancient_past
        del f1
        self.ztm.commit()

        # Unlink expired LibraryFileAliases.
        librariangc.expire_aliases(self.con)

        self.ztm.begin()
        # Make sure the well expired f1 is still there, but has no content.
        f1 = LibraryFileAlias.get(self.f1_id)
        self.assert_(f1.content is None)
        # f2 should still have content, as it isn't flagged for expiry.
        f2 = LibraryFileAlias.get(self.f2_id)
        self.assert_(f2.content is not None)

    def test_ignoreRecentlyExpiredAliases(self):
        # LibraryFileAlias records that have expired recently are not
        # garbage collected.

        # Flag one of our LibraryFileAliases with an expiry date in the
        # recent past.
        self.ztm.begin()
        f1 = LibraryFileAlias.get(self.f1_id)
        f1.expires = self.recent_past  # Within stay of execution.
        del f1
        self.ztm.commit()

        # Unlink expired LibraryFileAliases.
        librariangc.expire_aliases(self.con)

        self.ztm.begin()
        # Make sure f1 is still there and has content. This ensures that
        # our stay of execution is still working.
        f1 = LibraryFileAlias.get(self.f1_id)
        self.assert_(f1.content is not None)
        # f2 should still have content, as it isn't flagged for expiry.
        f2 = LibraryFileAlias.get(self.f2_id)
        self.assert_(f2.content is not None)

    def test_DeleteUnreferencedContent(self):
        # Merge the duplicates. This creates an
        # unreferenced LibraryFileContent
        librariangc.merge_duplicates(self.con)

        self.ztm.begin()

        # Locate the unreferenced LibraryFileContent
        cur = cursor()
        cur.execute("""
            SELECT LibraryFileContent.id
            FROM LibraryFileContent
            LEFT OUTER JOIN LibraryFileAlias
                ON LibraryFileContent.id = LibraryFileAlias.content
            WHERE LibraryFileAlias.id IS NULL
                AND LibraryFileContent.id IN (%d, %d)
            """ % (self.f1_id, self.f2_id))
        results = cur.fetchall()
        self.failUnlessEqual(len(results), 1)
        unreferenced_id = results[0][0]

        self.ztm.abort()

        # Make sure the file exists on disk
        path = librariangc.get_file_path(unreferenced_id)
        self.failUnless(os.path.exists(path))

        # Delete unreferenced content
        librariangc.delete_unreferenced_content(self.con)

        # Make sure the file is gone
        self.failIf(os.path.exists(path))

        # delete_unreferenced_content should have committed
        self.ztm.begin()

        # Make sure the unreferenced entries have all gone
        cur = cursor()
        cur.execute("""
            SELECT LibraryFileContent.id
            FROM LibraryFileContent
            LEFT OUTER JOIN LibraryFileAlias
                ON LibraryFileContent.id = LibraryFileAlias.content
            WHERE LibraryFileAlias.id IS NULL
            """)
        results = list(cur.fetchall())
        self.failUnlessEqual(
                len(results), 0, 'Too many results %r' % (results,)
                )

    def test_DeleteUnreferencedContent2(self):
        # Like testDeleteUnreferencedContent, except that the file is
        # removed from disk before attempting to remove the unreferenced
        # LibraryFileContent.
        #
        # Because the garbage collector will remove an unreferenced file from
        # disk before it commits the database changes, it is possible that the
        # db removal will fail (eg. an exception was raised on COMMIT) leaving
        # the rows untouched in the database but no file on disk.
        # This is fine, as the next gc run will attempt it again and
        # nothing can use unreferenced files anyway. This test ensures
        # that this all works.

        # Merge the duplicates. This creates an
        # unreferenced LibraryFileContent
        librariangc.merge_duplicates(self.con)

        self.ztm.begin()

        # Locate the unreferenced LibraryFileContent
        cur = cursor()
        cur.execute("""
            SELECT LibraryFileContent.id
            FROM LibraryFileContent
            LEFT OUTER JOIN LibraryFileAlias
                ON LibraryFileContent.id = LibraryFileAlias.content
            WHERE LibraryFileAlias.id IS NULL
                AND LibraryFileContent.id IN (%d, %d)
            """ % (self.f1_id, self.f2_id))
        results = cur.fetchall()
        self.failUnlessEqual(len(results), 1)
        unreferenced_id = results[0][0]

        self.ztm.abort()

        # Make sure the file exists on disk
        path = librariangc.get_file_path(unreferenced_id)
        self.failUnless(os.path.exists(path))

        # Remove the file from disk
        os.unlink(path)
        self.failIf(os.path.exists(path))

        # Delete unreferenced content
        librariangc.delete_unreferenced_content(self.con)

        # Make sure the file is gone
        self.failIf(os.path.exists(path))

        # delete_unreferenced_content should have committed
        self.ztm.begin()

        # Make sure the unreferenced entries have all gone
        cur = cursor()
        cur.execute("""
            SELECT LibraryFileContent.id
            FROM LibraryFileContent
            LEFT OUTER JOIN LibraryFileAlias
                ON LibraryFileContent.id = LibraryFileAlias.content
            WHERE LibraryFileAlias.id IS NULL
            """)
        results = list(cur.fetchall())
        self.failUnlessEqual(
                len(results), 0, 'Too many results %r' % (results,)
                )

    def test_deleteUnwantedFiles(self):
        self.ztm.begin()
        cur = cursor()

        # We may find files in the LibraryFileContent repository
        # that do not have an corresponding LibraryFileContent row.

        # Find a content_id we can easily delete and do so. This row is
        # removed from the database, leaving an orphaned file on the
        # filesystem that should be removed.
        cur.execute("""
            SELECT LibraryFileContent.id
            FROM LibraryFileContent
            LEFT OUTER JOIN LibraryFileAlias
                ON LibraryFileContent.id = content
            WHERE LibraryFileAlias.id IS NULL
            LIMIT 1
            """)
        content_id = cur.fetchone()[0]
        cur.execute("""
                DELETE FROM LibraryFileContent WHERE id=%s
                """, (content_id,))
        self.ztm.commit()

        path = librariangc.get_file_path(content_id)
        self.failUnless(os.path.exists(path))

        # Ensure delete_unreferenced_files does not remove the file, because
        # it will have just been created (has a recent date_created). There
        # is a window between file creation and the garbage collector
        # bothering to remove the file to avoid the race condition where the
        # garbage collector is run whilst a file is being uploaded.
        librariangc.delete_unwanted_files(self.con)
        self.failUnless(os.path.exists(path))

        # To test removal does occur when we want it to, we need to trick
        # the garbage collector into thinking it is tomorrow.
        org_time = librariangc.time

        def tomorrow_time():
            return org_time() + 24 * 60 * 60 + 1

        try:
            librariangc.time = tomorrow_time
            librariangc.delete_unwanted_files(self.con)
        finally:
            librariangc.time = org_time

        self.failIf(os.path.exists(path))

        # Make sure nothing else has been removed from disk
        self.ztm.begin()
        cur = cursor()
        cur.execute("""
                SELECT id FROM LibraryFileContent
                """)
        for content_id in (row[0] for row in cur.fetchall()):
            path = librariangc.get_file_path(content_id)
            self.failUnless(os.path.exists(path))

    def test_deleteUnwantedFilesIgnoresNoise(self):
        # Directories with invalid names in the storage area are
        # ignored. They are reported as warnings though.

        # Not a hexidecimal number.
        noisedir1_path = os.path.join(config.librarian_server.root, 'zz')

        # Too long
        noisedir2_path = os.path.join(config.librarian_server.root, '111')

        # Long non-hexadecimal number
        noisedir3_path = os.path.join(config.librarian_server.root, '11.bak')

        try:
            os.mkdir(noisedir1_path)
            os.mkdir(noisedir2_path)
            os.mkdir(noisedir3_path)

            # Files in the noise directories.
            noisefile1_path = os.path.join(noisedir1_path, 'abc')
            noisefile2_path = os.path.join(noisedir2_path, 'def')
            noisefile3_path = os.path.join(noisedir2_path, 'ghi')
            open(noisefile1_path, 'w').write('hello')
            open(noisefile2_path, 'w').write('there')
            open(noisefile3_path, 'w').write('testsuite')

            # Pretend it is tomorrow to ensure the files don't count as
            # recently created, and run the delete_unwanted_files process.
            org_time = librariangc.time

            def tomorrow_time():
                return org_time() + 24 * 60 * 60 + 1

            try:
                librariangc.time = tomorrow_time
                librariangc.delete_unwanted_files(self.con)
            finally:
                librariangc.time = org_time

            # None of the rubbish we created has been touched.
            self.assert_(os.path.isdir(noisedir1_path))
            self.assert_(os.path.isdir(noisedir2_path))
            self.assert_(os.path.isdir(noisedir3_path))
            self.assert_(os.path.exists(noisefile1_path))
            self.assert_(os.path.exists(noisefile2_path))
            self.assert_(os.path.exists(noisefile3_path))
        finally:
            # We need to clean this up ourselves, as the standard librarian
            # cleanup only removes files it knows where valid to avoid
            # accidents.
            shutil.rmtree(noisedir1_path)
            shutil.rmtree(noisedir2_path)
            shutil.rmtree(noisedir3_path)

        # Can't check the ordering, so we'll just check that one of the
        # warnings are there.
        self.assertIn(
            "WARNING Ignoring invalid directory zz",
            librariangc.log.getLogBuffer())

    def test_delete_unwanted_files_bug437084(self):
        # There was a bug where delete_unwanted_files() would die
        # if the last file found on disk was unwanted.
        switch_dbuser('testadmin')
        content = 'foo'
        self.client.addFile(
            'foo.txt', len(content), StringIO(content), 'text/plain')
        # Roll back the database changes, leaving the file on disk.
        transaction.abort()

        switch_dbuser(config.librarian_gc.dbuser)

        # This should cope.
        librariangc.delete_unwanted_files(self.con)

    def test_delete_unwanted_files_follows_symlinks(self):
        # In production, our tree has symlinks in it now.  We need to be able
        # to cope.
        # First, let's make sure we have some trash.
        switch_dbuser('testadmin')
        content = 'foo'
        self.client.addFile(
            'foo.txt', len(content), StringIO(content), 'text/plain')
        # Roll back the database changes, leaving the file on disk.
        transaction.abort()

        switch_dbuser(config.librarian_gc.dbuser)

        # Now, we will move the directory containing the trash somewhere else
        # and make a symlink to it.
        original = os.path.join(config.librarian_server.root, '00', '00')
        newdir = tempfile.mkdtemp()
        alt = os.path.join(newdir, '00')
        shutil.move(original, alt)
        os.symlink(alt, original)

        # Now we will do our thing.  This is the actual test.  It used to
        # fail.
        librariangc.delete_unwanted_files(self.con)

        # Clean up.
        os.remove(original)
        shutil.move(alt, original)
        shutil.rmtree(newdir)

    def test_cronscript(self):
        script_path = os.path.join(
                config.root, 'cronscripts', 'librarian-gc.py'
                )
        cmd = [sys.executable, script_path, '-q']
        process = Popen(cmd, stdout=PIPE, stderr=STDOUT, stdin=PIPE)
        (script_output, _empty) = process.communicate()
        self.failUnlessEqual(
            process.returncode, 0, 'Error: %s' % script_output)
        self.failUnlessEqual(script_output, '')

        # Make sure that our example files have been garbage collected
        self.ztm.begin()
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f1_id)
        self.assertRaises(SQLObjectNotFound, LibraryFileAlias.get, self.f2_id)

        # And make sure stuff that *is* referenced remains
        LibraryFileAlias.get(2)
        cur = cursor()
        cur.execute("SELECT count(*) FROM LibraryFileAlias")
        count = cur.fetchone()[0]
        self.failIfEqual(count, 0)
        cur.execute("SELECT count(*) FROM LibraryFileContent")
        count = cur.fetchone()[0]
        self.failIfEqual(count, 0)

    def test_confirm_no_clock_skew(self):
        # There should not be any clock skew when running the test suite.
        librariangc.confirm_no_clock_skew(self.con)

        # To test this function raises an excption when it should,
        # the garbage collector into thinking it is tomorrow.
        org_time = librariangc.time

        def tomorrow_time():
            return org_time() + 24 * 60 * 60 + 1

        try:
            librariangc.time = tomorrow_time
            self.assertRaises(
                Exception, librariangc.confirm_no_clock_skew, (self.con,)
                )
        finally:
            librariangc.time = org_time
Пример #46
0
 def _sendHeader(self, name, value):
     if name == "Database-Name":
         self.sentDatabaseName = True
     return LibrarianClient._sendHeader(self, name, value)
Пример #47
0
 def test_aliasNotFound(self):
     client = LibrarianClient()
     self.assertRaises(DownloadFailed, client.getURLForAlias, 99)
Пример #48
0
 def _getURLForDownload(self, aliasID):
     self.called_getURLForDownload = True
     return LibrarianClient._getURLForDownload(self, aliasID)
Пример #49
0
 def _sendHeader(self, name, value):
     if name == 'Database-Name':
         self.sentDatabaseName = True
     return LibrarianClient._sendHeader(self, name, value)
Пример #50
0
 def _getURLForDownload(self, aliasID):
     self.called_getURLForDownload = True
     return LibrarianClient._getURLForDownload(self, aliasID)
Пример #51
0
class TestFeedSwift(TestCase):
    layer = LaunchpadZopelessLayer

    def setUp(self):
        super(TestFeedSwift, self).setUp()
        self.swift_fixture = self.useFixture(SwiftFixture())
        self.useFixture(FeatureFixture({'librarian.swift.enabled': True}))
        transaction.commit()

        self.addCleanup(swift.connection_pool.clear)

        # Restart the Librarian so it picks up the OS_* environment
        # variables.
        LibrarianLayer.librarian_fixture.killTac()
        LibrarianLayer.librarian_fixture.setUp()

        # Add some files. These common sample files all have their
        # modification times set to the past so they will not be
        # considered potential in-progress uploads.
        the_past = time.time() - 25 * 60 * 60
        self.librarian_client = LibrarianClient()
        self.contents = [str(i) * i for i in range(1, 5)]
        self.lfa_ids = [
            self.add_file('file_{0}'.format(i), content, when=the_past)
            for i, content in enumerate(self.contents)
        ]
        self.lfas = [
            IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id)
            for lfa_id in self.lfa_ids
        ]
        self.lfcs = [lfa.content for lfa in self.lfas]

    def tearDown(self):
        super(TestFeedSwift, self).tearDown()
        # Restart the Librarian so it picks up the feature flag change.
        self.attachLibrarianLog(LibrarianLayer.librarian_fixture)
        LibrarianLayer.librarian_fixture.killTac()
        LibrarianLayer.librarian_fixture.setUp()

    @write_transaction
    def add_file(self, name, content, when=None, content_type='text/plain'):
        lfa_id = self.librarian_client.addFile(name=name,
                                               size=len(content),
                                               file=StringIO(content),
                                               contentType=content_type)
        if when is None:
            when = 0  # Very very old
        lfa = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id)
        path = swift.filesystem_path(lfa.content.id)
        os.utime(path, (when, when))
        return lfa_id

    def test_copy_to_swift(self):
        log = BufferLogger()

        # Confirm that files exist on disk where we expect to find them.
        for lfc in self.lfcs:
            path = swift.filesystem_path(lfc.id)
            self.assertTrue(os.path.exists(path))

        # Copy all the files into Swift.
        swift.to_swift(log, remove_func=None)

        # Confirm that files exist on disk where we expect to find them.
        for lfc in self.lfcs:
            path = swift.filesystem_path(lfc.id)
            self.assertTrue(os.path.exists(path))

        # Confirm all the files are also in Swift.
        swift_client = self.swift_fixture.connect()
        for lfc, contents in zip(self.lfcs, self.contents):
            container, name = swift.swift_location(lfc.id)
            headers, obj = swift_client.get_object(container, name)
            self.assertEqual(contents, obj, 'Did not round trip')

        # Running again does nothing, in particular does not reupload
        # the files to Swift.
        con_patch = patch.object(swift.swiftclient.Connection,
                                 'put_object',
                                 side_effect=AssertionError('do not call'))
        with con_patch:
            swift.to_swift(log)  # remove_func == None

    def test_copy_to_swift_and_rename(self):
        log = BufferLogger()

        # Confirm that files exist on disk where we expect to find them.
        for lfc in self.lfcs:
            path = swift.filesystem_path(lfc.id)
            self.assertTrue(os.path.exists(path))

        # Copy all the files into Swift.
        swift.to_swift(log, remove_func=swift.rename)

        # Confirm that files exist on disk where we expect to find them.
        for lfc in self.lfcs:
            path = swift.filesystem_path(lfc.id) + '.migrated'
            self.assertTrue(os.path.exists(path))

        # Confirm all the files are also in Swift.
        swift_client = self.swift_fixture.connect()
        for lfc, contents in zip(self.lfcs, self.contents):
            container, name = swift.swift_location(lfc.id)
            headers, obj = swift_client.get_object(container, name)
            self.assertEqual(contents, obj, 'Did not round trip')

        # Running again does nothing, in particular does not reupload
        # the files to Swift.
        con_patch = patch.object(swift.swiftclient.Connection,
                                 'put_object',
                                 side_effect=AssertionError('do not call'))
        with con_patch:
            swift.to_swift(log, remove_func=swift.rename)  # remove == False

    def test_move_to_swift(self):
        log = BufferLogger()

        # Confirm that files exist on disk where we expect to find them.
        for lfc in self.lfcs:
            path = swift.filesystem_path(lfc.id)
            self.assertTrue(os.path.exists(path))

        # Migrate all the files into Swift.
        swift.to_swift(log, remove_func=os.unlink)

        # Confirm that all the files have gone from disk.
        for lfc in self.lfcs:
            self.assertFalse(os.path.exists(swift.filesystem_path(lfc.id)))

        # Confirm all the files are in Swift.
        swift_client = self.swift_fixture.connect()
        for lfc, contents in zip(self.lfcs, self.contents):
            container, name = swift.swift_location(lfc.id)
            headers, obj = swift_client.get_object(container, name)
            self.assertEqual(contents, obj, 'Did not round trip')

    def test_librarian_serves_from_swift(self):
        log = BufferLogger()

        # Move all the files into Swift and off the file system.
        swift.to_swift(log, remove_func=os.unlink)

        # Confirm we can still access the files from the Librarian.
        for lfa_id, content in zip(self.lfa_ids, self.contents):
            data = self.librarian_client.getFileByAlias(lfa_id).read()
            self.assertEqual(content, data)

    def test_librarian_serves_from_disk(self):
        # Ensure the Librarian falls back to serving files from disk
        # when they cannot be found in the Swift server. Note that other
        # Librarian tests do not have Swift active, so this test is not
        # redundant.
        for lfa_id, content in zip(self.lfa_ids, self.contents):
            data = self.librarian_client.getFileByAlias(lfa_id).read()
            self.assertEqual(content, data)

    def test_largish_binary_files_from_disk(self):
        # Generate a largish blob, including null bytes for kicks.
        # A largish file is large enough that the HTTP upload needs
        # to be done in multiple chunks, but small enough that it is
        # stored in Swift as a single object.
        size = 512 * 1024  # 512KB
        expected_content = ''.join(chr(i % 256) for i in range(0, size))
        lfa_id = self.add_file('hello_bigboy.xls', expected_content)

        # Data round trips when served from disk.
        lfa = self.librarian_client.getFileByAlias(lfa_id)
        self.assertEqual(expected_content, lfa.read())

    def test_largish_binary_files_from_swift(self):
        # Generate large blob, multiple of the chunk size.
        # Including null bytes for kicks.
        # A largish file is large enough that the HTTP upload needs
        # to be done in multiple chunks, but small enough that it is
        # stored in Swift as a single object.
        size = LibrarianStorage.CHUNK_SIZE * 50
        self.assertTrue(size > 1024 * 1024)
        expected_content = ''.join(chr(i % 256) for i in range(0, size))
        lfa_id = self.add_file('hello_bigboy.xls', expected_content)
        lfc = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id).content

        # This data size is a multiple of our chunk size.
        self.assertEqual(0,
                         len(expected_content) % LibrarianStorage.CHUNK_SIZE)

        # Data round trips when served from Swift.
        swift.to_swift(BufferLogger(), remove_func=os.unlink)
        self.assertFalse(os.path.exists(swift.filesystem_path(lfc.id)))
        lfa = self.librarian_client.getFileByAlias(lfa_id)
        self.assertEqual(expected_content, lfa.read())

    def test_largish_binary_files_from_swift_offset(self):
        # Generate large blob, but NOT a multiple of the chunk size.
        # Including null bytes for kicks.
        # A largish file is large enough that the HTTP upload needs
        # to be done in multiple chunks, but small enough that it is
        # stored in Swift as a single object.
        size = LibrarianStorage.CHUNK_SIZE * 50 + 1
        self.assertTrue(size > 1024 * 1024)
        expected_content = ''.join(chr(i % 256) for i in range(0, size))
        lfa_id = self.add_file('hello_bigboy.xls', expected_content)
        lfc = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id).content

        # This data size is NOT a multiple of our chunk size.
        self.assertNotEqual(
            0,
            len(expected_content) % LibrarianStorage.CHUNK_SIZE)

        # Data round trips when served from Swift.
        swift.to_swift(BufferLogger(), remove_func=os.unlink)
        lfa = self.librarian_client.getFileByAlias(lfa_id)
        self.assertFalse(os.path.exists(swift.filesystem_path(lfc.id)))
        self.assertEqual(expected_content, lfa.read())

    def test_large_file_to_swift(self):
        # Generate a blob large enough that Swift requires us to store
        # it as multiple objects plus a manifest.
        size = LibrarianStorage.CHUNK_SIZE * 50
        self.assertTrue(size > 1024 * 1024)
        expected_content = ''.join(chr(i % 256) for i in range(0, size))
        lfa_id = self.add_file('hello_bigboy.xls', expected_content)
        lfa = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id)
        lfc = lfa.content

        # We don't really want to upload a file >5GB to our mock Swift,
        # so change the constant instead. Set it so we need 3 segments.
        def _reset_max(val):
            swift.MAX_SWIFT_OBJECT_SIZE = val

        self.addCleanup(_reset_max, swift.MAX_SWIFT_OBJECT_SIZE)
        swift.MAX_SWIFT_OBJECT_SIZE = int(size / 2) - 1

        # Shove the file requiring multiple segments into Swift.
        swift.to_swift(BufferLogger(), remove_func=None)

        # As our mock Swift does not support multi-segment files,
        # instead we examine it directly in Swift as best we can.
        swift_client = self.swift_fixture.connect()

        # The manifest exists. Unfortunately, we can't test that the
        # magic manifest header is set correctly.
        container, name = swift.swift_location(lfc.id)
        headers, obj = swift_client.get_object(container, name)
        self.assertEqual(obj, '')

        # The segments we expect are all in their expected locations.
        _, obj1 = swift_client.get_object(container, '{0}/0000'.format(name))
        _, obj2 = swift_client.get_object(container, '{0}/0001'.format(name))
        _, obj3 = swift_client.get_object(container, '{0}/0002'.format(name))
        self.assertRaises(swiftclient.ClientException, swift.quiet_swiftclient,
                          swift_client.get_object, container,
                          '{0}/0003'.format(name))

        # Our object round tripped
        self.assertEqual(obj1 + obj2 + obj3, expected_content)