def open(self, fileid): if getFeatureFlag('librarian.swift.enabled'): # Log our attempt. self.swift_download_attempts += 1 if self.swift_download_attempts % 1000 == 0: log.msg('{} Swift download attempts, {} failures'.format( self.swift_download_attempts, self.swift_download_fails)) # First, try and stream the file from Swift. container, name = swift.swift_location(fileid) swift_connection = swift.connection_pool.get() try: headers, chunks = yield deferToThread( swift.quiet_swiftclient, swift_connection.get_object, container, name, resp_chunk_size=self.CHUNK_SIZE) swift_stream = TxSwiftStream(swift_connection, chunks) defer.returnValue(swift_stream) except swiftclient.ClientException as x: if x.http_status == 404: swift.connection_pool.put(swift_connection) else: self.swift_download_fails += 1 log.err(x) except Exception as x: self.swift_download_fails += 1 log.err(x) # If Swift failed, for any reason, fall through to try and # stream the data from disk. In particular, files cannot be # found in Swift until librarian-feed-swift.py has put them # in there. path = self._fileLocation(fileid) if os.path.exists(path): defer.returnValue(open(path, 'rb'))
def remove_content(self, content_id): removed = [] # Remove the file from disk, if it hasn't already been. path = get_file_path(content_id) try: os.unlink(path) removed.append('filesystem') except OSError as e: if e.errno != errno.ENOENT: raise # Remove the file from Swift, if it hasn't already been. if self.swift_enabled: container, name = swift.swift_location(content_id) with swift.connection() as swift_connection: try: swift.quiet_swiftclient(swift_connection.delete_object, container, name) removed.append('Swift') except swiftclient.ClientException as x: if x.http_status != 404: raise if removed: log.debug3("Deleted %s from %s", content_id, ' & '.join(removed)) elif config.librarian_server.upstream_host is None: # It is normal to have files in the database that # are not on disk if the Librarian has an upstream # Librarian, such as on staging. Don't annoy the # operator with noise in this case. log.info("%s already deleted", path)
def swift_files(max_lfc_id): """Generate the (container, name) of all files stored in Swift. Results are yielded in numerical order. """ final_container = swift.swift_location(max_lfc_id)[0] with swift.connection() as swift_connection: # We generate the container names, rather than query the # server, because the mock Swift implementation doesn't # support that operation. container_num = -1 container = None while container != final_container: container_num += 1 container = swift.SWIFT_CONTAINER_PREFIX + str(container_num) try: names = sorted(swift.quiet_swiftclient( swift_connection.get_container, container, full_listing=True)[1], key=lambda x: map(int, x['name'].split('/'))) for name in names: yield (container, name) except swiftclient.ClientException as x: if x.http_status == 404: continue raise
def test_copy_to_swift(self): log = BufferLogger() # Confirm that files exist on disk where we expect to find them. for lfc in self.lfcs: path = swift.filesystem_path(lfc.id) self.assertTrue(os.path.exists(path)) # Copy all the files into Swift. swift.to_swift(log, remove_func=None) # Confirm that files exist on disk where we expect to find them. for lfc in self.lfcs: path = swift.filesystem_path(lfc.id) self.assertTrue(os.path.exists(path)) # Confirm all the files are also in Swift. swift_client = self.swift_fixture.connect() for lfc, contents in zip(self.lfcs, self.contents): container, name = swift.swift_location(lfc.id) headers, obj = swift_client.get_object(container, name) self.assertEqual(contents, obj, 'Did not round trip') # Running again does nothing, in particular does not reupload # the files to Swift. con_patch = patch.object(swift.swiftclient.Connection, 'put_object', side_effect=AssertionError('do not call')) with con_patch: swift.to_swift(log) # remove_func == None
def file_exists(content_id): """True if the file exists either on disk or in Swift. Swift is only checked if the librarian.swift.enabled feature flag is set. """ swift_enabled = getFeatureFlag('librarian.swift.enabled') or False if swift_enabled: swift_connection = swift.connection_pool.get() container, name = swift.swift_location(content_id) try: swift.quiet_swiftclient(swift_connection.head_object, container, name) return True except swiftclient.ClientException as x: if x.http_status != 404: raise swift.connection_pool.put(swift_connection) return os.path.exists(get_file_path(content_id))
def test_large_file_to_swift(self): # Generate a blob large enough that Swift requires us to store # it as multiple objects plus a manifest. size = LibrarianStorage.CHUNK_SIZE * 50 self.assertTrue(size > 1024 * 1024) expected_content = ''.join(chr(i % 256) for i in range(0, size)) lfa_id = self.add_file('hello_bigboy.xls', expected_content) lfa = IStore(LibraryFileAlias).get(LibraryFileAlias, lfa_id) lfc = lfa.content # We don't really want to upload a file >5GB to our mock Swift, # so change the constant instead. Set it so we need 3 segments. def _reset_max(val): swift.MAX_SWIFT_OBJECT_SIZE = val self.addCleanup(_reset_max, swift.MAX_SWIFT_OBJECT_SIZE) swift.MAX_SWIFT_OBJECT_SIZE = int(size / 2) - 1 # Shove the file requiring multiple segments into Swift. swift.to_swift(BufferLogger(), remove_func=None) # As our mock Swift does not support multi-segment files, # instead we examine it directly in Swift as best we can. swift_client = self.swift_fixture.connect() # The manifest exists. Unfortunately, we can't test that the # magic manifest header is set correctly. container, name = swift.swift_location(lfc.id) headers, obj = swift_client.get_object(container, name) self.assertEqual(obj, '') # The segments we expect are all in their expected locations. _, obj1 = swift_client.get_object(container, '{0}/0000'.format(name)) _, obj2 = swift_client.get_object(container, '{0}/0001'.format(name)) _, obj3 = swift_client.get_object(container, '{0}/0002'.format(name)) self.assertRaises(swiftclient.ClientException, swift.quiet_swiftclient, swift_client.get_object, container, '{0}/0003'.format(name)) # Our object round tripped self.assertEqual(obj1 + obj2 + obj3, expected_content)
def test_move_to_swift(self): log = BufferLogger() # Confirm that files exist on disk where we expect to find them. for lfc in self.lfcs: path = swift.filesystem_path(lfc.id) self.assertTrue(os.path.exists(path)) # Migrate all the files into Swift. swift.to_swift(log, remove_func=os.unlink) # Confirm that all the files have gone from disk. for lfc in self.lfcs: self.assertFalse(os.path.exists(swift.filesystem_path(lfc.id))) # Confirm all the files are in Swift. swift_client = self.swift_fixture.connect() for lfc, contents in zip(self.lfcs, self.contents): container, name = swift.swift_location(lfc.id) headers, obj = swift_client.get_object(container, name) self.assertEqual(contents, obj, 'Did not round trip')
def open_stream(content_id): """Return an open file for the given content_id. Returns None if the file cannot be found. """ swift_enabled = getFeatureFlag('librarian.swift.enabled') or False if swift_enabled: try: swift_connection = swift.connection_pool.get() container, name = swift.swift_location(content_id) chunks = swift.quiet_swiftclient( swift_connection.get_object, container, name, resp_chunk_size=STREAM_CHUNK_SIZE)[1] return swift.SwiftStream(swift_connection, chunks) except swiftclient.ClientException as x: if x.http_status != 404: raise path = get_file_path(content_id) if os.path.exists(path): return open(path, 'rb') return None # File not found.