示例#1
0
    def open(self, fileid):
        if getFeatureFlag('librarian.swift.enabled'):
            # Log our attempt.
            self.swift_download_attempts += 1

            if self.swift_download_attempts % 1000 == 0:
                log.msg('{} Swift download attempts, {} failures'.format(
                    self.swift_download_attempts, self.swift_download_fails))

            # First, try and stream the file from Swift.
            container, name = swift.swift_location(fileid)
            swift_connection = swift.connection_pool.get()
            try:
                headers, chunks = yield deferToThread(
                    swift.quiet_swiftclient, swift_connection.get_object,
                    container, name, resp_chunk_size=self.CHUNK_SIZE)
                swift_stream = TxSwiftStream(swift_connection, chunks)
                defer.returnValue(swift_stream)
            except swiftclient.ClientException as x:
                if x.http_status == 404:
                    swift.connection_pool.put(swift_connection)
                else:
                    self.swift_download_fails += 1
                    log.err(x)
            except Exception as x:
                self.swift_download_fails += 1
                log.err(x)
            # If Swift failed, for any reason, fall through to try and
            # stream the data from disk. In particular, files cannot be
            # found in Swift until librarian-feed-swift.py has put them
            # in there.

        path = self._fileLocation(fileid)
        if os.path.exists(path):
            defer.returnValue(open(path, 'rb'))
示例#2
0
    def remove_content(self, content_id):
        removed = []

        # Remove the file from disk, if it hasn't already been.
        path = get_file_path(content_id)
        try:
            os.unlink(path)
            removed.append('filesystem')
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise

        # Remove the file from Swift, if it hasn't already been.
        if self.swift_enabled:
            container, name = swift.swift_location(content_id)
            with swift.connection() as swift_connection:
                try:
                    swift.quiet_swiftclient(swift_connection.delete_object,
                                            container, name)
                    removed.append('Swift')
                except swiftclient.ClientException as x:
                    if x.http_status != 404:
                        raise

        if removed:
            log.debug3("Deleted %s from %s", content_id, ' & '.join(removed))

        elif config.librarian_server.upstream_host is None:
            # It is normal to have files in the database that
            # are not on disk if the Librarian has an upstream
            # Librarian, such as on staging. Don't annoy the
            # operator with noise in this case.
            log.info("%s already deleted", path)
示例#3
0
def swift_files(max_lfc_id):
    """Generate the (container, name) of all files stored in Swift.

    Results are yielded in numerical order.
    """
    final_container = swift.swift_location(max_lfc_id)[0]

    with swift.connection() as swift_connection:
        # We generate the container names, rather than query the
        # server, because the mock Swift implementation doesn't
        # support that operation.
        container_num = -1
        container = None
        while container != final_container:
            container_num += 1
            container = swift.SWIFT_CONTAINER_PREFIX + str(container_num)
            try:
                names = sorted(swift.quiet_swiftclient(
                    swift_connection.get_container,
                    container,
                    full_listing=True)[1],
                               key=lambda x: map(int, x['name'].split('/')))
                for name in names:
                    yield (container, name)
            except swiftclient.ClientException as x:
                if x.http_status == 404:
                    continue
                raise
示例#4
0
    def test_copy_to_swift(self):
        log = BufferLogger()

        # Confirm that files exist on disk where we expect to find them.
        for lfc in self.lfcs:
            path = swift.filesystem_path(lfc.id)
            self.assertTrue(os.path.exists(path))

        # Copy all the files into Swift.
        swift.to_swift(log, remove_func=None)

        # Confirm that files exist on disk where we expect to find them.
        for lfc in self.lfcs:
            path = swift.filesystem_path(lfc.id)
            self.assertTrue(os.path.exists(path))

        # Confirm all the files are also in Swift.
        swift_client = self.swift_fixture.connect()
        for lfc, contents in zip(self.lfcs, self.contents):
            container, name = swift.swift_location(lfc.id)
            headers, obj = swift_client.get_object(container, name)
            self.assertEqual(contents, obj, 'Did not round trip')

        # Running again does nothing, in particular does not reupload
        # the files to Swift.
        con_patch = patch.object(swift.swiftclient.Connection,
                                 'put_object',
                                 side_effect=AssertionError('do not call'))
        with con_patch:
            swift.to_swift(log)  # remove_func == None
示例#5
0
def file_exists(content_id):
    """True if the file exists either on disk or in Swift.

    Swift is only checked if the librarian.swift.enabled feature flag
    is set.
    """
    swift_enabled = getFeatureFlag('librarian.swift.enabled') or False
    if swift_enabled:
        swift_connection = swift.connection_pool.get()
        container, name = swift.swift_location(content_id)
        try:
            swift.quiet_swiftclient(swift_connection.head_object, container,
                                    name)
            return True
        except swiftclient.ClientException as x:
            if x.http_status != 404:
                raise
            swift.connection_pool.put(swift_connection)
    return os.path.exists(get_file_path(content_id))
示例#6
0
    def test_large_file_to_swift(self):
        # Generate a blob large enough that Swift requires us to store
        # it as multiple objects plus a manifest.
        size = LibrarianStorage.CHUNK_SIZE * 50
        self.assertTrue(size > 1024 * 1024)
        expected_content = ''.join(chr(i % 256) for i in range(0, size))
        lfa_id = self.add_file('hello_bigboy.xls', expected_content)
        lfa = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id)
        lfc = lfa.content

        # We don't really want to upload a file >5GB to our mock Swift,
        # so change the constant instead. Set it so we need 3 segments.
        def _reset_max(val):
            swift.MAX_SWIFT_OBJECT_SIZE = val

        self.addCleanup(_reset_max, swift.MAX_SWIFT_OBJECT_SIZE)
        swift.MAX_SWIFT_OBJECT_SIZE = int(size / 2) - 1

        # Shove the file requiring multiple segments into Swift.
        swift.to_swift(BufferLogger(), remove_func=None)

        # As our mock Swift does not support multi-segment files,
        # instead we examine it directly in Swift as best we can.
        swift_client = self.swift_fixture.connect()

        # The manifest exists. Unfortunately, we can't test that the
        # magic manifest header is set correctly.
        container, name = swift.swift_location(lfc.id)
        headers, obj = swift_client.get_object(container, name)
        self.assertEqual(obj, '')

        # The segments we expect are all in their expected locations.
        _, obj1 = swift_client.get_object(container, '{0}/0000'.format(name))
        _, obj2 = swift_client.get_object(container, '{0}/0001'.format(name))
        _, obj3 = swift_client.get_object(container, '{0}/0002'.format(name))
        self.assertRaises(swiftclient.ClientException, swift.quiet_swiftclient,
                          swift_client.get_object, container,
                          '{0}/0003'.format(name))

        # Our object round tripped
        self.assertEqual(obj1 + obj2 + obj3, expected_content)
示例#7
0
    def test_move_to_swift(self):
        log = BufferLogger()

        # Confirm that files exist on disk where we expect to find them.
        for lfc in self.lfcs:
            path = swift.filesystem_path(lfc.id)
            self.assertTrue(os.path.exists(path))

        # Migrate all the files into Swift.
        swift.to_swift(log, remove_func=os.unlink)

        # Confirm that all the files have gone from disk.
        for lfc in self.lfcs:
            self.assertFalse(os.path.exists(swift.filesystem_path(lfc.id)))

        # Confirm all the files are in Swift.
        swift_client = self.swift_fixture.connect()
        for lfc, contents in zip(self.lfcs, self.contents):
            container, name = swift.swift_location(lfc.id)
            headers, obj = swift_client.get_object(container, name)
            self.assertEqual(contents, obj, 'Did not round trip')
示例#8
0
def open_stream(content_id):
    """Return an open file for the given content_id.

    Returns None if the file cannot be found.
    """
    swift_enabled = getFeatureFlag('librarian.swift.enabled') or False
    if swift_enabled:
        try:
            swift_connection = swift.connection_pool.get()
            container, name = swift.swift_location(content_id)
            chunks = swift.quiet_swiftclient(
                swift_connection.get_object,
                container,
                name,
                resp_chunk_size=STREAM_CHUNK_SIZE)[1]
            return swift.SwiftStream(swift_connection, chunks)
        except swiftclient.ClientException as x:
            if x.http_status != 404:
                raise
    path = get_file_path(content_id)
    if os.path.exists(path):
        return open(path, 'rb')

    return None  # File not found.