def make_swift_request(op, account, container=None, obj=None): """ Makes a swift request via a local proxy (cost expensive) :param op: opertation (PUT, GET, DELETE, HEAD) :param account: swift account :param container: swift container :param obj: swift object :returns: swift.common.swob.Response instance """ iclient = InternalClient(LOCAL_PROXY, 'Zion', 1) path = iclient.make_path(account, container, obj) resp = iclient.make_request(op, path, {'PATH_INFO': path}, [200]) return resp
def make_swift_request(op, account, container=None, obj=None): """ Makes a swift request via a local proxy (cost expensive) :param op: opertation (PUT, GET, DELETE, HEAD) :param account: swift account :param container: swift container :param obj: swift object :returns: swift.common.swob.Response instance """ iclient = InternalClient(LOCAL_PROXY, 'SA', 1) path = iclient.make_path(account, container, obj) resp = iclient.make_request(op, path, {'PATH_INFO': path}, [200]) return resp
class ObjectExpirer(Daemon): """ Daemon that queries the internal hidden expiring_objects_account to discover objects that need to be deleted. :param conf: The daemon configuration. """ def __init__(self, conf): self.conf = conf self.logger = get_logger(conf, log_route='object-expirer') self.interval = int(conf.get('interval') or 300) self.expiring_objects_account = \ (conf.get('auto_create_account_prefix') or '.') + \ 'expiring_objects' conf_path = conf.get('__file__') or '/etc/swift/object-expirer.conf' request_tries = int(conf.get('request_tries') or 3) self.swift = InternalClient(conf_path, 'Swift Object Expirer', request_tries) self.report_interval = int(conf.get('report_interval') or 300) self.report_first_time = self.report_last_time = time() self.report_objects = 0 self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = join(self.recon_cache_path, 'object.recon') self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") self.processes = int(self.conf.get('processes', 0)) self.process = int(self.conf.get('process', 0)) def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress is final=True. :param final: Set to True for the last report once the expiration pass has completed. """ if final: elapsed = time() - self.report_first_time self.logger.info( _('Pass completed in %ds; %d objects expired') % (elapsed, self.report_objects)) dump_recon_cache( { 'object_expiration_pass': elapsed, 'expired_last_pass': self.report_objects }, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info( _('Pass so far %ds; %d objects expired') % (elapsed, self.report_objects)) self.report_last_time = time() def run_once(self, *args, **kwargs): """ Executes a single pass, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon accepts processes and process keyword args. These will override the values from the config file if provided. """ processes, process = self.get_process_values(kwargs) pool = GreenPool(self.concurrency) containers_to_delete = [] self.report_first_time = self.report_last_time = time() self.report_objects = 0 try: self.logger.debug(_('Run begin')) containers, objects = \ self.swift.get_account_info(self.expiring_objects_account) self.logger.info( _('Pass beginning; %s possible containers; %s ' 'possible objects') % (containers, objects)) for c in self.swift.iter_containers(self.expiring_objects_account): container = c['name'] timestamp = int(container) if timestamp > int(time()): break containers_to_delete.append(container) for o in self.swift.iter_objects(self.expiring_objects_account, container): obj = o['name'].encode('utf8') if processes > 0: obj_process = int( hashlib.md5('%s/%s' % (container, obj)).hexdigest(), 16) if obj_process % processes != process: continue timestamp, actual_obj = obj.split('-', 1) timestamp = int(timestamp) if timestamp > int(time()): break pool.spawn_n(self.delete_object, actual_obj, timestamp, container, obj) pool.waitall() for container in containers_to_delete: try: self.swift.delete_container( self.expiring_objects_account, container, acceptable_statuses=(2, HTTP_NOT_FOUND, HTTP_CONFLICT)) except (Exception, Timeout) as err: self.logger.exception( _('Exception while deleting container %s %s') % (container, str(err))) self.logger.debug(_('Run end')) self.report(final=True) except (Exception, Timeout): self.logger.exception(_('Unhandled exception')) def run_forever(self, *args, **kwargs): """ Executes passes forever, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon has no additional keyword args. """ sleep(random() * self.interval) while True: begin = time() try: self.run_once(*args, **kwargs) except (Exception, Timeout): self.logger.exception(_('Unhandled exception')) elapsed = time() - begin if elapsed < self.interval: sleep(random() * (self.interval - elapsed)) def get_process_values(self, kwargs): """ Gets the processes, process from the kwargs if those values exist. Otherwise, return processes, process set in the config file. :param kwargs: Keyword args passed into the run_forever(), run_once() methods. They have values specified on the command line when the daemon is run. """ if kwargs.get('processes') is not None: processes = int(kwargs['processes']) else: processes = self.processes if kwargs.get('process') is not None: process = int(kwargs['process']) else: process = self.process if process < 0: raise ValueError( 'process must be an integer greater than or equal to 0') if processes < 0: raise ValueError( 'processes must be an integer greater than or equal to 0') if processes and process >= processes: raise ValueError('process must be less than or equal to processes') return processes, process def delete_object(self, actual_obj, timestamp, container, obj): start_time = time() try: self.delete_actual_object(actual_obj, timestamp) self.swift.delete_object(self.expiring_objects_account, container, obj) self.report_objects += 1 self.logger.increment('objects') except (Exception, Timeout) as err: self.logger.increment('errors') self.logger.exception( _('Exception while deleting object %s %s %s') % (container, obj, str(err))) self.logger.timing_since('timing', start_time) self.report() def delete_actual_object(self, actual_obj, timestamp): """ Deletes the end-user object indicated by the actual object name given '<account>/<container>/<object>' if and only if the X-Delete-At value of the object is exactly the timestamp given. :param actual_obj: The name of the end-user object to delete: '<account>/<container>/<object>' :param timestamp: The timestamp the X-Delete-At value must match to perform the actual delete. """ path = '/v1/' + urllib.quote(actual_obj.lstrip('/')) self.swift.make_request('DELETE', path, {'X-If-Delete-At': str(timestamp)}, (2, HTTP_NOT_FOUND, HTTP_PRECONDITION_FAILED))
class TestObjectExpirer(ReplProbeTest): def setUp(self): self.expirer = Manager(['object-expirer']) self.expirer.start() err = self.expirer.stop() if err: raise unittest.SkipTest('Unable to verify object-expirer service') conf_files = [] for server in self.expirer.servers: conf_files.extend(server.conf_files()) conf_file = conf_files[0] self.client = InternalClient(conf_file, 'probe-test', 3) super(TestObjectExpirer, self).setUp() self.container_name = 'container-%s' % uuid.uuid4() self.object_name = 'object-%s' % uuid.uuid4() self.brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name) def _check_obj_in_container_listing(self): for obj in self.client.iter_objects(self.account, self.container_name): if self.object_name == obj['name']: return True return False @unittest.skipIf(len(ENABLED_POLICIES) < 2, "Need more than one policy") def test_expirer_object_split_brain(self): old_policy = random.choice(ENABLED_POLICIES) wrong_policy = random.choice([p for p in ENABLED_POLICIES if p != old_policy]) # create an expiring object and a container with the wrong policy self.brain.stop_primary_half() self.brain.put_container(int(old_policy)) self.brain.put_object(headers={'X-Delete-After': 2}) # get the object timestamp metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) create_timestamp = Timestamp(metadata['x-timestamp']) self.brain.start_primary_half() # get the expiring object updates in their queue, while we have all # the servers up Manager(['object-updater']).once() self.brain.stop_handoff_half() self.brain.put_container(int(wrong_policy)) # don't start handoff servers, only wrong policy is available # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # this guy should no-op since it's unable to expire the object self.expirer.once() self.brain.start_handoff_half() self.get_to_final_state() # validate object is expired found_in_policy = None metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4,), headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) self.assertIn('x-backend-timestamp', metadata) self.assertEqual(Timestamp(metadata['x-backend-timestamp']), create_timestamp) # but it is still in the listing self.assertTrue(self._check_obj_in_container_listing(), msg='Did not find listing for %s' % self.object_name) # clear proxy cache client.post_container(self.url, self.token, self.container_name, {}) # run the expirer again after replication self.expirer.once() # object is not in the listing self.assertFalse(self._check_obj_in_container_listing(), msg='Found listing for %s' % self.object_name) # and validate object is tombstoned found_in_policy = None for policy in ENABLED_POLICIES: metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4,), headers={'X-Backend-Storage-Policy-Index': int(policy)}) if 'x-backend-timestamp' in metadata: if found_in_policy: self.fail('found object in %s and also %s' % (found_in_policy, policy)) found_in_policy = policy self.assertIn('x-backend-timestamp', metadata) self.assertGreater(Timestamp(metadata['x-backend-timestamp']), create_timestamp) def test_expirer_doesnt_make_async_pendings(self): # The object expirer cleans up its own queue. The inner loop # basically looks like this: # # for obj in stuff_to_delete: # delete_the_object(obj) # remove_the_queue_entry(obj) # # By default, upon receipt of a DELETE request for an expiring # object, the object servers will create async_pending records to # clean the expirer queue. Since the expirer cleans its own queue, # this is unnecessary. The expirer can make requests in such a way # tha the object server does not write out any async pendings; this # test asserts that this is the case. # Make an expiring object in each policy for policy in ENABLED_POLICIES: container_name = "expirer-test-%d" % policy.idx container_headers = {'X-Storage-Policy': policy.name} client.put_container(self.url, self.token, container_name, headers=container_headers) now = time.time() delete_at = int(now + 2.0) client.put_object( self.url, self.token, container_name, "some-object", headers={'X-Delete-At': str(delete_at), 'X-Timestamp': Timestamp(now).normal}, contents='dontcare') time.sleep(2.0) # make sure auto-created expirer-queue containers get in the account # listing so the expirer can find them Manager(['container-updater']).once() # Make sure there's no async_pendings anywhere. Probe tests only run # on single-node installs anyway, so this set should be small enough # that an exhaustive check doesn't take too long. all_obj_nodes = self.get_all_object_nodes() pendings_before = self.gather_async_pendings(all_obj_nodes) # expire the objects Manager(['object-expirer']).once() pendings_after = self.gather_async_pendings(all_obj_nodes) self.assertEqual(pendings_after, pendings_before) def test_expirer_object_should_not_be_expired(self): # Current object-expirer checks the correctness via x-if-delete-at # header that it can be deleted by expirer. If there are objects # either which doesn't have x-delete-at header as metadata or which # has different x-delete-at value from x-if-delete-at value, # object-expirer's delete will fail as 412 PreconditionFailed. # However, if some of the objects are in handoff nodes, the expirer # can put the tombstone with the timestamp as same as x-delete-at and # the object consistency will be resolved as the newer timestamp will # be winner (in particular, overwritten case w/o x-delete-at). This # test asserts such a situation that, at least, the overwriten object # which have larger timestamp than the original expirered date should # be safe. def put_object(headers): # use internal client to PUT objects so that X-Timestamp in headers # is effective headers['Content-Length'] = '0' path = self.client.make_path( self.account, self.container_name, self.object_name) try: self.client.make_request('PUT', path, headers, (2,)) except UnexpectedResponse as e: self.fail( 'Expected 201 for PUT object but got %s' % e.resp.status) obj_brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name, 'object', self.policy) # T(obj_created) < T(obj_deleted with x-delete-at) < T(obj_recreated) # < T(expirer_executed) # Recreated obj should be appeared in any split brain case obj_brain.put_container() # T(obj_deleted with x-delete-at) # object-server accepts req only if X-Delete-At is later than 'now' # so here, T(obj_created) < T(obj_deleted with x-delete-at) now = time.time() delete_at = int(now + 2.0) recreate_at = delete_at + 1.0 put_object(headers={'X-Delete-At': str(delete_at), 'X-Timestamp': Timestamp(now).normal}) # some object servers stopped to make a situation that the # object-expirer can put tombstone in the primary nodes. obj_brain.stop_primary_half() # increment the X-Timestamp explicitly # (will be T(obj_deleted with x-delete-at) < T(obj_recreated)) put_object(headers={'X-Object-Meta-Expired': 'False', 'X-Timestamp': Timestamp(recreate_at).normal}) # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # sanity, the newer object is still there try: metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name) except UnexpectedResponse as e: self.fail( 'Expected 200 for HEAD object but got %s' % e.resp.status) self.assertIn('x-object-meta-expired', metadata) # some object servers recovered obj_brain.start_primary_half() # sleep until after recreated_at while time.time() <= recreate_at: time.sleep(0.1) # Now, expirer runs at the time after obj is recreated self.expirer.once() # verify that original object was deleted by expirer obj_brain.stop_handoff_half() try: metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4,)) except UnexpectedResponse as e: self.fail( 'Expected 404 for HEAD object but got %s' % e.resp.status) obj_brain.start_handoff_half() # and inconsistent state of objects is recovered by replicator Manager(['object-replicator']).once() # check if you can get recreated object try: metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name) except UnexpectedResponse as e: self.fail( 'Expected 200 for HEAD object but got %s' % e.resp.status) self.assertIn('x-object-meta-expired', metadata) def _test_expirer_delete_outdated_object_version(self, object_exists): # This test simulates a case where the expirer tries to delete # an outdated version of an object. # One case is where the expirer gets a 404, whereas the newest version # of the object is offline. # Another case is where the expirer gets a 412, since the old version # of the object mismatches the expiration time sent by the expirer. # In any of these cases, the expirer should retry deleting the object # later, for as long as a reclaim age has not passed. obj_brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name, 'object', self.policy) obj_brain.put_container() if object_exists: obj_brain.put_object() # currently, the object either doesn't exist, or does not have # an expiration # stop primary servers and put a newer version of the object, this # time with an expiration. only the handoff servers will have # the new version obj_brain.stop_primary_half() now = time.time() delete_at = int(now + 2.0) obj_brain.put_object({'X-Delete-At': str(delete_at)}) # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # update object record in the container listing Manager(['container-replicator']).once() # take handoff servers down, and bring up the outdated primary servers obj_brain.start_primary_half() obj_brain.stop_handoff_half() # wait until object expiration time while time.time() <= delete_at: time.sleep(0.1) # run expirer against the outdated servers. it should fail since # the outdated version does not match the expiration time self.expirer.once() # bring all servers up, and run replicator to update servers obj_brain.start_handoff_half() Manager(['object-replicator']).once() # verify the deletion has failed by checking the container listing self.assertTrue(self._check_obj_in_container_listing(), msg='Did not find listing for %s' % self.object_name) # run expirer again, delete should now succeed self.expirer.once() # verify the deletion by checking the container listing self.assertFalse(self._check_obj_in_container_listing(), msg='Found listing for %s' % self.object_name) def test_expirer_delete_returns_outdated_404(self): self._test_expirer_delete_outdated_object_version(object_exists=False) def test_expirer_delete_returns_outdated_412(self): self._test_expirer_delete_outdated_object_version(object_exists=True)
class TestObjectExpirer(ReplProbeTest): def setUp(self): self.expirer = Manager(['object-expirer']) self.expirer.start() err = self.expirer.stop() if err: raise unittest.SkipTest('Unable to verify object-expirer service') conf_files = [] for server in self.expirer.servers: conf_files.extend(server.conf_files()) conf_file = conf_files[0] self.client = InternalClient(conf_file, 'probe-test', 3) super(TestObjectExpirer, self).setUp() self.container_name = 'container-%s' % uuid.uuid4() self.object_name = 'object-%s' % uuid.uuid4() self.brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name) def _check_obj_in_container_listing(self): for obj in self.client.iter_objects(self.account, self.container_name): if self.object_name == obj['name']: return True return False @unittest.skipIf(len(ENABLED_POLICIES) < 2, "Need more than one policy") def test_expirer_object_split_brain(self): old_policy = random.choice(ENABLED_POLICIES) wrong_policy = random.choice( [p for p in ENABLED_POLICIES if p != old_policy]) # create an expiring object and a container with the wrong policy self.brain.stop_primary_half() self.brain.put_container(int(old_policy)) self.brain.put_object(headers={'X-Delete-After': 2}) # get the object timestamp metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) create_timestamp = Timestamp(metadata['x-timestamp']) self.brain.start_primary_half() # get the expiring object updates in their queue, while we have all # the servers up Manager(['object-updater']).once() self.brain.stop_handoff_half() self.brain.put_container(int(wrong_policy)) # don't start handoff servers, only wrong policy is available # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # this guy should no-op since it's unable to expire the object self.expirer.once() self.brain.start_handoff_half() self.get_to_final_state() # validate object is expired found_in_policy = None metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4, ), headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) self.assertIn('x-backend-timestamp', metadata) self.assertEqual(Timestamp(metadata['x-backend-timestamp']), create_timestamp) # but it is still in the listing self.assertTrue(self._check_obj_in_container_listing(), msg='Did not find listing for %s' % self.object_name) # clear proxy cache client.post_container(self.url, self.token, self.container_name, {}) # run the expirier again after replication self.expirer.once() # object is not in the listing self.assertFalse(self._check_obj_in_container_listing(), msg='Found listing for %s' % self.object_name) # and validate object is tombstoned found_in_policy = None for policy in ENABLED_POLICIES: metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4, ), headers={'X-Backend-Storage-Policy-Index': int(policy)}) if 'x-backend-timestamp' in metadata: if found_in_policy: self.fail('found object in %s and also %s' % (found_in_policy, policy)) found_in_policy = policy self.assertIn('x-backend-timestamp', metadata) self.assertGreater(Timestamp(metadata['x-backend-timestamp']), create_timestamp) def test_expirer_object_should_not_be_expired(self): # Current object-expirer checks the correctness via x-if-delete-at # header that it can be deleted by expirer. If there are objects # either which doesn't have x-delete-at header as metadata or which # has different x-delete-at value from x-if-delete-at value, # object-expirer's delete will fail as 412 PreconditionFailed. # However, if some of the objects are in handoff nodes, the expirer # can put the tombstone with the timestamp as same as x-delete-at and # the object consistency will be resolved as the newer timestamp will # be winner (in particular, overwritten case w/o x-delete-at). This # test asserts such a situation that, at least, the overwriten object # which have larger timestamp than the original expirered date should # be safe. def put_object(headers): # use internal client to PUT objects so that X-Timestamp in headers # is effective headers['Content-Length'] = '0' path = self.client.make_path(self.account, self.container_name, self.object_name) try: self.client.make_request('PUT', path, headers, (2, )) except UnexpectedResponse as e: self.fail('Expected 201 for PUT object but got %s' % e.resp.status) obj_brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name, 'object', self.policy) # T(obj_created) < T(obj_deleted with x-delete-at) < T(obj_recreated) # < T(expirer_executed) # Recreated obj should be appeared in any split brain case obj_brain.put_container() # T(obj_deleted with x-delete-at) # object-server accepts req only if X-Delete-At is later than 'now' # so here, T(obj_created) < T(obj_deleted with x-delete-at) now = time.time() delete_at = int(now + 2.0) recreate_at = delete_at + 1.0 put_object(headers={ 'X-Delete-At': str(delete_at), 'X-Timestamp': Timestamp(now).normal }) # some object servers stopped to make a situation that the # object-expirer can put tombstone in the primary nodes. obj_brain.stop_primary_half() # increment the X-Timestamp explicitly # (will be T(obj_deleted with x-delete-at) < T(obj_recreated)) put_object( headers={ 'X-Object-Meta-Expired': 'False', 'X-Timestamp': Timestamp(recreate_at).normal }) # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # sanity, the newer object is still there try: metadata = self.client.get_object_metadata(self.account, self.container_name, self.object_name) except UnexpectedResponse as e: self.fail('Expected 200 for HEAD object but got %s' % e.resp.status) self.assertIn('x-object-meta-expired', metadata) # some object servers recovered obj_brain.start_primary_half() # sleep until after recreated_at while time.time() <= recreate_at: time.sleep(0.1) # Now, expirer runs at the time after obj is recreated self.expirer.once() # verify that original object was deleted by expirer obj_brain.stop_handoff_half() try: metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4, )) except UnexpectedResponse as e: self.fail('Expected 404 for HEAD object but got %s' % e.resp.status) obj_brain.start_handoff_half() # and inconsistent state of objects is recovered by replicator Manager(['object-replicator']).once() # check if you can get recreated object try: metadata = self.client.get_object_metadata(self.account, self.container_name, self.object_name) except UnexpectedResponse as e: self.fail('Expected 200 for HEAD object but got %s' % e.resp.status) self.assertIn('x-object-meta-expired', metadata) def _test_expirer_delete_outdated_object_version(self, object_exists): # This test simulates a case where the expirer tries to delete # an outdated version of an object. # One case is where the expirer gets a 404, whereas the newest version # of the object is offline. # Another case is where the expirer gets a 412, since the old version # of the object mismatches the expiration time sent by the expirer. # In any of these cases, the expirer should retry deleting the object # later, for as long as a reclaim age has not passed. obj_brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name, 'object', self.policy) obj_brain.put_container() if object_exists: obj_brain.put_object() # currently, the object either doesn't exist, or does not have # an expiration # stop primary servers and put a newer version of the object, this # time with an expiration. only the handoff servers will have # the new version obj_brain.stop_primary_half() now = time.time() delete_at = int(now + 2.0) obj_brain.put_object({'X-Delete-At': str(delete_at)}) # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # update object record in the container listing Manager(['container-replicator']).once() # take handoff servers down, and bring up the outdated primary servers obj_brain.start_primary_half() obj_brain.stop_handoff_half() # wait until object expiration time while time.time() <= delete_at: time.sleep(0.1) # run expirer against the outdated servers. it should fail since # the outdated version does not match the expiration time self.expirer.once() # bring all servers up, and run replicator to update servers obj_brain.start_handoff_half() Manager(['object-replicator']).once() # verify the deletion has failed by checking the container listing self.assertTrue(self._check_obj_in_container_listing(), msg='Did not find listing for %s' % self.object_name) # run expirer again, delete should now succeed self.expirer.once() # this is mainly to paper over lp bug #1652323 self.get_to_final_state() # verify the deletion by checking the container listing self.assertFalse(self._check_obj_in_container_listing(), msg='Found listing for %s' % self.object_name) def test_expirer_delete_returns_outdated_404(self): self._test_expirer_delete_outdated_object_version(object_exists=False) def test_expirer_delete_returns_outdated_412(self): self._test_expirer_delete_outdated_object_version(object_exists=True)
class ObjectExpirer(Daemon): """ Daemon that queries the internal hidden expiring_objects_account to discover objects that need to be deleted. :param conf: The daemon configuration. """ def __init__(self, conf): self.conf = conf self.logger = get_logger(conf, log_route='object-expirer') self.interval = int(conf.get('interval') or 300) self.expiring_objects_account = \ (conf.get('auto_create_account_prefix') or '.') + \ (conf.get('expiring_objects_account_name') or 'expiring_objects') conf_path = conf.get('__file__') or '/etc/swift/object-expirer.conf' request_tries = int(conf.get('request_tries') or 3) self.swift = InternalClient(conf_path, 'Swift Object Expirer', request_tries) self.report_interval = int(conf.get('report_interval') or 300) self.report_first_time = self.report_last_time = time() self.report_objects = 0 self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = join(self.recon_cache_path, 'object.recon') self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") self.processes = int(self.conf.get('processes', 0)) self.process = int(self.conf.get('process', 0)) def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress is final=True. :param final: Set to True for the last report once the expiration pass has completed. """ if final: elapsed = time() - self.report_first_time self.logger.info(_('Pass completed in %ds; %d objects expired') % (elapsed, self.report_objects)) dump_recon_cache({'object_expiration_pass': elapsed, 'expired_last_pass': self.report_objects}, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_('Pass so far %ds; %d objects expired') % (elapsed, self.report_objects)) self.report_last_time = time() def run_once(self, *args, **kwargs): """ Executes a single pass, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon accepts processes and process keyword args. These will override the values from the config file if provided. """ processes, process = self.get_process_values(kwargs) pool = GreenPool(self.concurrency) containers_to_delete = [] self.report_first_time = self.report_last_time = time() self.report_objects = 0 try: self.logger.debug(_('Run begin')) containers, objects = \ self.swift.get_account_info(self.expiring_objects_account) self.logger.info(_('Pass beginning; %s possible containers; %s ' 'possible objects') % (containers, objects)) for c in self.swift.iter_containers(self.expiring_objects_account): container = c['name'] timestamp = int(container) if timestamp > int(time()): break containers_to_delete.append(container) for o in self.swift.iter_objects(self.expiring_objects_account, container): obj = o['name'].encode('utf8') if processes > 0: obj_process = int( hashlib.md5('%s/%s' % (container, obj)). hexdigest(), 16) if obj_process % processes != process: continue timestamp, actual_obj = obj.split('-', 1) timestamp = int(timestamp) if timestamp > int(time()): break pool.spawn_n( self.delete_object, actual_obj, timestamp, container, obj) pool.waitall() for container in containers_to_delete: try: self.swift.delete_container( self.expiring_objects_account, container, acceptable_statuses=(2, HTTP_NOT_FOUND, HTTP_CONFLICT)) except (Exception, Timeout) as err: self.logger.exception( _('Exception while deleting container %s %s') % (container, str(err))) self.logger.debug(_('Run end')) self.report(final=True) except (Exception, Timeout): self.logger.exception(_('Unhandled exception')) def run_forever(self, *args, **kwargs): """ Executes passes forever, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon has no additional keyword args. """ sleep(random() * self.interval) while True: begin = time() try: self.run_once(*args, **kwargs) except (Exception, Timeout): self.logger.exception(_('Unhandled exception')) elapsed = time() - begin if elapsed < self.interval: sleep(random() * (self.interval - elapsed)) def get_process_values(self, kwargs): """ Gets the processes, process from the kwargs if those values exist. Otherwise, return processes, process set in the config file. :param kwargs: Keyword args passed into the run_forever(), run_once() methods. They have values specified on the command line when the daemon is run. """ if kwargs.get('processes') is not None: processes = int(kwargs['processes']) else: processes = self.processes if kwargs.get('process') is not None: process = int(kwargs['process']) else: process = self.process if process < 0: raise ValueError( 'process must be an integer greater than or equal to 0') if processes < 0: raise ValueError( 'processes must be an integer greater than or equal to 0') if processes and process >= processes: raise ValueError( 'process must be less than or equal to processes') return processes, process def delete_object(self, actual_obj, timestamp, container, obj): start_time = time() try: self.delete_actual_object(actual_obj, timestamp) self.swift.delete_object(self.expiring_objects_account, container, obj) self.report_objects += 1 self.logger.increment('objects') except (Exception, Timeout) as err: self.logger.increment('errors') self.logger.exception( _('Exception while deleting object %s %s %s') % (container, obj, str(err))) self.logger.timing_since('timing', start_time) self.report() def delete_actual_object(self, actual_obj, timestamp): """ Deletes the end-user object indicated by the actual object name given '<account>/<container>/<object>' if and only if the X-Delete-At value of the object is exactly the timestamp given. :param actual_obj: The name of the end-user object to delete: '<account>/<container>/<object>' :param timestamp: The timestamp the X-Delete-At value must match to perform the actual delete. """ path = '/v1/' + urllib.quote(actual_obj.lstrip('/')) self.swift.make_request('DELETE', path, {'X-If-Delete-At': str(timestamp)}, (2, HTTP_NOT_FOUND, HTTP_PRECONDITION_FAILED))
class UtilizationAggregator(Daemon): def __init__(self, conf): self.conf = conf self.logger = get_logger(conf, log_route='utilization-aggregator') self.interval = int(conf.get('interval') or 60) self.aggregate_account = '.utilization' self.sample_account = '.transfer_record' conf_path = conf.get('__file__') or \ '/etc/swift/swift-utilization-aggregator.conf' request_tries = int(conf.get('request_tries') or 3) self.swift = InternalClient(conf_path, 'Swift Utilization Aggregator', request_tries) self.report_interval = int(conf.get('report_interval') or 60) self.report_first_time = self.report_last_time = time() self.report_containers = 0 self.report_objects = 0 self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = join(self.recon_cache_path, 'object.recon') self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") self.processes = int(self.conf.get('processes', 0)) self.process = int(self.conf.get('process', 0)) self.container_ring = Ring('/etc/swift', ring_name='container') self.sample_rate = int(self.conf.get('sample_rate', 600)) self.last_chk = iso8601_to_timestamp(self.conf.get( 'service_start')) self.kinx_api_url = self.conf.get('kinx_api_url') def report(self, final=False): if final: elapsed = time() - self.report_first_time self.logger.info(_('Pass completed in %ds; %d containers,' ' %d objects aggregated') % (elapsed, self.report_containers, self.report_objects)) dump_recon_cache({'object_aggregation_pass': elapsed, 'aggregation_last_pass': self.report_containers}, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_('Pass so far %ds; %d objects aggregated') % (elapsed, self.report_objects)) self.report_last_time = time() def run_once(self, *args, **kwargs): processes, process = self.get_process_values(kwargs) pool = GreenPool(self.concurrency) self.report_first_time = self.report_last_time = time() self.report_objects = 0 self.report_containers = 0 containers_to_delete = [] try: self.logger.debug(_('Run begin')) containers, objects = \ self.swift.get_account_info(self.sample_account) self.logger.info(_('Pass beginning; %s possible containers; %s ' 'possible objects') % (containers, objects)) for c in self.swift.iter_containers(self.sample_account): container = c['name'] try: timestamp, account = container.split('_', 1) timestamp = float(timestamp) except ValueError: self.logger.debug('ValueError: %s, ' 'need more than 1 value to unpack' % \ container) else: if processes > 0: obj_proc = int(hashlib.md5(container).hexdigest(), 16) if obj_proc % processes != process: continue n = (float(time()) // self.sample_rate) * self.sample_rate if timestamp <= n: containers_to_delete.append(container) pool.spawn_n(self.aggregate_container, container) pool.waitall() for container in containers_to_delete: try: self.logger.debug('delete container: %s' % container) self.swift.delete_container(self.sample_account, container, acceptable_statuses=( 2, HTTP_NOT_FOUND, HTTP_CONFLICT)) except (Exception, Timeout) as err: self.logger.exception( _('Exception while deleting container %s %s') % (container, str(err))) tenants_to_fillup = list() for c in self.swift.iter_containers(self.aggregate_account): tenant_id = c['name'] if processes > 0: c_proc = int(hashlib.md5(tenant_id).hexdigest(), 16) if c_proc % processes != process: continue tenants_to_fillup.append(tenant_id) # fillup lossed usage data self.fillup_lossed_usage_data(tenants_to_fillup) self.logger.debug(_('Run end')) self.report(final=True) except (Exception, Timeout): self.logger.exception(_('Unhandled exception')) def run_forever(self, *args, **kwargs): """ Executes passes forever, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon has no additional keyword args. """ sleep(random() * self.interval) while True: begin = time() try: self.run_once(*args, **kwargs) except (Exception, Timeout): self.logger.exception(_('Unhandled exception')) elapsed = time() - begin if elapsed < self.interval: sleep(random() * (self.interval - elapsed)) def get_process_values(self, kwargs): """ Gets the processes, process from the kwargs if those values exist. Otherwise, return processes, process set in the config file. :param kwargs: Keyword args passed into the run_forever(), run_once() methods. They have values specified on the command line when the daemon is run. """ if kwargs.get('processes') is not None: processes = int(kwargs['processes']) else: processes = self.processes if kwargs.get('process') is not None: process = int(kwargs['process']) else: process = self.process if process < 0: raise ValueError( 'process must be an integer greater than or equal to 0') if processes < 0: raise ValueError( 'processes must be an integer greater than or equal to 0') if processes and process >= processes: raise ValueError( 'process must be less than or equal to processes') return processes, process def aggregate_container(self, container): start_time = time() try: objs_to_delete = list() bytes_recvs = dict() bytes_sents = dict() ts, tenant_id, account = container.split('_', 2) ts = int(float(ts)) for o in self.swift.iter_objects(self.sample_account, container): name = o['name'] objs_to_delete.append(name) ts, bytes_rv, bytes_st, trans_id, client_ip = name.split('/') bill_type = self.get_billtype_by_client_ip(client_ip, ts) bytes_recvs[bill_type] = bytes_recvs.get(bill_type, 0) + int(bytes_rv) bytes_sents[bill_type] = bytes_sents.get(bill_type, 0) + int(bytes_st) self.report_objects += 1 for o in objs_to_delete: self.swift.delete_object(self.sample_account, container, o) for bill_type, bt_rv in bytes_recvs.items(): t_object = 'transfer/%d/%d/%d_%d_%d' % (ts, bill_type, bt_rv, bytes_sents[bill_type], self.report_objects) self._hidden_update(tenant_id, t_object) except (Exception, Timeout) as err: self.logger.increment('errors') self.logger.exception( _('Exception while aggregating sample %s %s') % (container, str(err))) self.logger.timing_since('timing', start_time) self.report() def account_info(self, tenant_id, timestamp): path = '/v1/%s/%s?prefix=usage/%d&limit=1' % (self.aggregate_account, tenant_id, timestamp) resp = self.swift.make_request('GET', path, {}, (2,)) if len(resp.body) == 0: return 0, 0, 0 usages = resp.body.split('/', 2)[2].rstrip() cont_cnt, obj_cnt, bt_used = usages.split('_') return int(cont_cnt), int(obj_cnt), int(bt_used) def _hidden_update(self, container, obj, method='PUT'): hidden_path = '/%s/%s/%s' % (self.aggregate_account, container, obj) part, nodes = self.container_ring.get_nodes(self.aggregate_account, container) for node in nodes: ip = node['ip'] port = node['port'] dev = node['device'] action_headers = dict() action_headers['user-agent'] = 'aggregator' action_headers['X-Timestamp'] = normalize_timestamp(time()) action_headers['referer'] = 'aggregator-daemon' action_headers['x-size'] = '0' action_headers['x-content-type'] = "text/plain" action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' conn = http_connect(ip, port, dev, part, method, hidden_path, action_headers) response = conn.getresponse() response.read() def fillup_lossed_usage_data(self, tenants): now = (float(time()) // self.sample_rate) * self.sample_rate path = '/v1/%s/%s?prefix=usage/%d&limit=1' for t in tenants: last = self.last_chk cont_cnt = obj_cnt = bt_used = -1 while last <= now: p = path % (self.aggregate_account, t, last) resp = self.swift.make_request('GET', p, {}, (2,)) if len(resp.body) != 0: usages = resp.body.split('/', 2)[2].rstrip() c, o, bt = usages.split('_') cont_cnt = int(c) obj_cnt = int(o) bt_used = int(bt) else: before = last - self.sample_rate if cont_cnt == -1: cont_cnt, obj_cnt, bt_used = \ self.account_info(self.aggregate_account, before) obj = 'usage/%d/%d_%d_%d' % (last, cont_cnt, obj_cnt, bt_used) self._hidden_update(t, obj) last += self.sample_rate self.last_chk = now def get_billtype_by_client_ip(self, client_ip, timestamp): end_ts = timestamp_to_iso8601(timestamp + self.sample_rate - 1) start_ts = timestamp_to_iso8601(timestamp) params = {'start': start_ts, 'end': end_ts} path = self.kinx_api_url + '/?%s' % (urllib.urlencode(params)) data = json.loads(urllib.urlopen(path).read()) bill_type = -1 for r in data['ip_ranges']: bill_type = r['bill_type'] for cidr in r['ip_range']: if self.ip_in_cidr(client_ip, cidr): return bill_type return bill_type def ip_in_cidr(self, client_ip, cidr): bt_to_bits = lambda b: bin(int(b))[2:].rjust(8, '0') ip_to_bits = lambda ip: ''.join([bt_to_bits(b) for b in ip.split('.')]) client_ip_bits = ip_to_bits(client_ip) ip, snet = cidr.split('/') ip_bits = ip_to_bits(ip) if client_ip_bits[:int(snet)] == ip_bits[:int(snet)]: return True else: return False
class ObjectRestorer(Daemon): """ Daemon that queries the internal hidden expiring_objects_account to discover objects that need to be deleted. :param conf: The daemon configuration. """ def __init__(self, conf): self.conf = conf self.container_ring = Ring('/etc/swift', ring_name='container') self.logger = get_logger(conf, log_route='object-restorer') self.logger.set_statsd_prefix('s3-object-restorer') self.interval = int(conf.get('interval') or 300) self.restoring_object_account = '.s3_restoring_objects' self.expiring_restored_account = '.s3_expiring_restored_objects' self.glacier_account_prefix = '.glacier_' self.todo_container = 'todo' self.restoring_container = 'restoring' conf_path = '/etc/swift/s3-object-restorer.conf' request_tries = int(conf.get('request_tries') or 3) self.glacier = self._init_glacier() self.glacier_tmpdir = conf.get('temp_path', '/var/cache/s3/') self.swift = InternalClient(conf_path, 'Swift Object Restorer', request_tries) self.report_interval = int(conf.get('report_interval') or 300) self.report_first_time = self.report_last_time = time() self.report_objects = 0 self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = join(self.recon_cache_path, 'object.recon') self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") self.processes = int(self.conf.get('processes', 0)) self.process = int(self.conf.get('process', 0)) self.client = Client(self.conf.get('sentry_sdn', '')) def _init_glacier(self): con = Layer2(region_name='ap-northeast-1') return con.get_vault('swift-s3-transition') def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress is final=True. :param final: Set to True for the last report once the expiration pass has completed. """ if final: elapsed = time() - self.report_first_time self.logger.info(_('Pass completed in %ds; %d objects restored') % (elapsed, self.report_objects)) dump_recon_cache({'object_expiration_pass': elapsed, 'expired_last_pass': self.report_objects}, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_('Pass so far %ds; %d objects restored') % (elapsed, self.report_objects)) self.report_last_time = time() def run_once(self, *args, **kwargs): """ Executes a single pass, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon accepts processes and process keyword args. These will override the values from the config file if provided. """ processes, process = self.get_process_values(kwargs) pool = GreenPool(self.concurrency) self.report_first_time = self.report_last_time = time() self.report_objects = 0 try: self.logger.debug(_('Run begin')) for o in self.swift.iter_objects(self.restoring_object_account, self.todo_container): obj = o['name'].encode('utf8') if processes > 0: obj_process = int( hashlib.md5('%s/%s' % (self.todo_container, obj)). hexdigest(), 16) if obj_process % processes != process: continue pool.spawn_n(self.start_object_restoring, obj) pool.waitall() for o in self.swift.iter_objects(self.restoring_object_account, self.restoring_container): obj = o['name'].encode('utf8') if processes > 0: obj_process = int( hashlib.md5('%s/%s' % (self.restoring_container, obj)). hexdigest(), 16) if obj_process % processes != process: continue pool.spawn_n(self.check_object_restored, obj) pool.waitall() self.logger.debug(_('Run end')) self.report(final=True) except (Exception, Timeout) as e: report_exception(self.logger, _('Unhandled exception'), self.client) def run_forever(self, *args, **kwargs): """ Executes passes forever, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon has no additional keyword args. """ sleep(random() * self.interval) while True: begin = time() try: self.run_once(*args, **kwargs) except (Exception, Timeout): report_exception(self.logger, _('Unhandled exception'), self.client) elapsed = time() - begin if elapsed < self.interval: sleep(random() * (self.interval - elapsed)) def get_process_values(self, kwargs): """ Gets the processes, process from the kwargs if those values exist. Otherwise, return processes, process set in the config file. :param kwargs: Keyword args passed into the run_forever(), run_once() methods. They have values specified on the command line when the daemon is run. """ if kwargs.get('processes') is not None: processes = int(kwargs['processes']) else: processes = self.processes if kwargs.get('process') is not None: process = int(kwargs['process']) else: process = self.process if process < 0: raise ValueError( 'process must be an integer greater than or equal to 0') if processes < 0: raise ValueError( 'processes must be an integer greater than or equal to 0') if processes and process >= processes: raise ValueError( 'process must be less than or equal to processes') return processes, process def start_object_restoring(self, obj): start_time = time() try: actual_obj = obj account, container, obj = actual_obj.split('/', 2) archiveId = self.get_archiveid(account, container, obj) if archiveId is None: self.swift.delete_object(self.restoring_object_account, self.todo_container, actual_obj) return jobId = self.glacier.retrieve_archive(archiveId).id restoring_obj = make_glacier_hidden_object_name(actual_obj, jobId) meta_prefix = 'X-Object-Meta' meta = self.swift.get_object_metadata(account, container, obj, metadata_prefix=meta_prefix) meta = {'X-Object-Meta' + key: value for key, value in meta.iteritems()} self.update_action_hidden(self.restoring_object_account, self.restoring_container, restoring_obj, metadata=meta) self.swift.delete_object(self.restoring_object_account, self.todo_container, actual_obj) self.report_objects += 1 self.logger.increment('start') except (Exception, Timeout) as err: self.logger.increment('errors') report_exception(self.logger.exception, _('Exception while restoring object %s. %s') % (obj, str(err)), self.client) self.logger.timing_since('timing', start_time) self.report() def get_archiveid(self, account, container, obj): glacier_account = '%s%s' % (self.glacier_account_prefix, account) glacier_obj = None for o in get_objects_by_prefix(glacier_account, container, obj, swift_client=self.swift): name = get_glacier_objname_from_hidden_object(o) if name == obj: glacier_obj = o break if glacier_obj is None: return None return get_glacier_key_from_hidden_object(glacier_obj) def check_object_restored(self, restoring_object): actual_obj = get_glacier_objname_from_hidden_object(restoring_object) jobId = get_glacier_key_from_hidden_object(restoring_object) try: path = '/v1/%s' % actual_obj resp = self.swift.make_request('GET', path, {}, (2, 4,)) if resp.status_int == 404: raise Exception('Object Not Found: %s' % actual_obj) job = self.glacier.get_job(job_id=jobId) if not job.completed: return self.complete_restore(actual_obj, job) except Exception as e: # Job ID가 만료될 경우 다시 restore 를 시도한다. if not e.message.startswith('Object Not Found:'): self.start_object_restoring(actual_obj) self.logger.info(e) self.swift.delete_object(self.restoring_object_account, self.restoring_container, restoring_object) def complete_restore(self, actual_obj, job): tmppath = tempfile.NamedTemporaryFile(bufsize=0, delete=False, dir=self.glacier_tmpdir).name try: job.download_to_file(filename=tmppath) prefix = 'X-Object-Meta' a, c, o = actual_obj.split('/', 2) metadata = self.swift.get_object_metadata(a, c, o, metadata_prefix=prefix) metadata = {'X-Object-Meta' + key: value for key, value in metadata .iteritems()} days = int(metadata['X-Object-Meta-s3-restore-expire-days']) exp_time = normalize_delete_at_timestamp(calc_nextDay(time()) + (days - 1) * 86400) # send restored object to proxy server path = '/v1/%s' % actual_obj metadata['X-Object-Meta-S3-Restored'] = True exp_date = strftime("%a, %d %b %Y %H:%M:%S GMT", gmtime(float(exp_time))) metadata['X-Object-Meta-s3-restore'] = 'ongoing-request="false", ' \ 'expiry-date="%s"' % exp_date metadata['Content-Length'] = os.path.getsize(tmppath) del metadata['X-Object-Meta-s3-restore-expire-days'] obj_body = open(tmppath, 'r') self.swift.make_request('PUT', path, metadata, (2,), body_file=obj_body) # Add to .s3_expiring_restored_objects self.update_action_hidden(self.expiring_restored_account, exp_time, actual_obj) obj_body.close() self.logger.increment('done') except UnexpectedResponse as e: if e.resp.status_int == 404: self.logger.error('Restoring object not found - %s' % actual_obj) except Exception as e: self.logger.increment('errors') self.logger.debug(e) finally: os.remove(tmppath) def compute_obj_md5(self, obj): etag = hashlib.md5() etag.update(obj) etag = etag.hexdigest() return etag def update_action_hidden(self, account, container, obj, metadata=None): hidden_path = '/%s/%s/%s' % (account, container, obj) part, nodes = self.container_ring.get_nodes(account, container) for node in nodes: ip = node['ip'] port = node['port'] dev = node['device'] action_headers = dict() action_headers['user-agent'] = 'restore-daemon' action_headers['X-Timestamp'] = normalize_timestamp(time()) action_headers['referer'] = 'restore-daemon' action_headers['x-size'] = '0' action_headers['x-content-type'] = "text/plain" action_headers['x-etag'] = 'd41d8cd98f00b204e9800998ecf8427e' if metadata: action_headers.update(metadata) conn = http_connect(ip, port, dev, part, 'PUT', hidden_path, action_headers) response = conn.getresponse() response.read()
class ObjectExpirer(Daemon): def __init__(self, conf): super(ObjectExpirer, self).__init__(conf) self.conf = conf self.logger = get_logger(conf, log_route='s3-object-expirer') self.logger.set_statsd_prefix('s3-object-expirer') self.interval = int(conf.get('interval') or 300) self.s3_expiring_objects_account = \ (conf.get('auto_create_account_prefix') or '.') + \ (conf.get('expiring_objects_account_name') or 's3_expiring_objects') conf_path = conf.get('__file__') or '/etc/swift/s3-object-expirer.conf' request_tries = int(conf.get('request_tries') or 3) self.swift = InternalClient(conf_path, 'Swift Object Expirer', request_tries) self.glacier = self._init_glacier() self.glacier_account_prefix = '.glacier_' self.report_interval = int(conf.get('report_interval') or 300) self.report_first_time = self.report_last_time = time() self.report_objects = 0 self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = join(self.recon_cache_path, 'object.recon') self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") self.processes = int(self.conf.get('processes', 0)) self.process = int(self.conf.get('process', 0)) self.client = Client(self.conf.get('sentry_sdn', '')) def _init_glacier(self): con = Layer2(region_name='ap-northeast-1') return con.get_vault('swift-s3-transition') def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress is final=True. :param final: Set to True for the last report once the expiration pass has completed. """ if final: elapsed = time() - self.report_first_time self.logger.info(_('Pass completed in %ds; %d objects expired') % (elapsed, self.report_objects)) dump_recon_cache({'object_expiration_pass': elapsed, 'expired_last_pass': self.report_objects}, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_('Pass so far %ds; %d objects expired') % (elapsed, self.report_objects)) self.report_last_time = time() def run_once(self, *args, **kwargs): """ Executes a single pass, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon accepts processes and process keyword args. These will override the values from the config file if provided. """ processes, process = self.get_process_values(kwargs) pool = GreenPool(self.concurrency) containers_to_delete = [] self.report_first_time = self.report_last_time = time() self.report_objects = 0 try: self.logger.debug(_('Run begin')) containers, objects = \ self.swift.get_account_info(self.s3_expiring_objects_account) self.logger.info(_('Pass beginning; %s possible containers; %s ' 'possible objects') % (containers, objects)) for c in self.swift.iter_containers(self. s3_expiring_objects_account): container = c['name'] timestamp = int(container) if timestamp > int(time()): break containers_to_delete.append(container) for o in self.swift.iter_objects(self .s3_expiring_objects_account, container): obj = o['name'].encode('utf8') if processes > 0: obj_process = int( hashlib.md5('%s/%s' % (container, obj)). hexdigest(), 16) if obj_process % processes != process: continue pool.spawn_n(self.delete_object, container, obj) pool.waitall() for container in containers_to_delete: try: self.swift.delete_container( self.s3_expiring_objects_account, container, acceptable_statuses=(2, HTTP_NOT_FOUND, HTTP_CONFLICT)) except (Exception, Timeout) as err: report_exception(self.logger, _('Exception while deleting container %s %s') % (container, str(err)), self.client.captureException()) self.logger.debug(_('Run end')) self.report(final=True) except (Exception, Timeout): report_exception(self.logger, _('Unhandled exception'), self.client) def run_forever(self, *args, **kwargs): """ Executes passes forever, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon has no additional keyword args. """ sleep(random() * self.interval) while True: begin = time() try: self.run_once(*args, **kwargs) except (Exception, Timeout): report_exception(self.logger, _('Unhandled exception'), self.client) elapsed = time() - begin if elapsed < self.interval: sleep(random() * (self.interval - elapsed)) def get_process_values(self, kwargs): """ Gets the processes, process from the kwargs if those values exist. Otherwise, return processes, process set in the config file. :param kwargs: Keyword args passed into the run_forever(), run_once() methods. They have values specified on the command line when the daemon is run. """ if kwargs.get('processes') is not None: processes = int(kwargs['processes']) else: processes = self.processes if kwargs.get('process') is not None: process = int(kwargs['process']) else: process = self.process if process < 0: raise ValueError( 'process must be an integer greater than or equal to 0') if processes < 0: raise ValueError( 'processes must be an integer greater than or equal to 0') if processes and process >= processes: raise ValueError( 'process must be less than or equal to processes') return processes, process def delete_object(self, hidden_container, obj): start_time = time() try: account, container, object = obj.split('/', 2) lifecycle = Lifecycle(account, container, object, swift_client=self.swift) object_header = lifecycle.object.headers object_rule = lifecycle.get_object_rule_by_action('Expiration') last_modified = gmt_to_timestamp(object_header['Last-Modified']) validation_flg = lifecycle.object_lifecycle_validation() if (validation_flg == LIFECYCLE_OK) or \ (validation_flg == DISABLED_TRANSITION): times = calc_when_actions_do(object_rule, last_modified) actual_expire_time = int(times['Expiration']) if actual_expire_time == int(hidden_container): self.delete_actual_object(obj) if lifecycle.get_s3_storage_class() == 'GLACIER': self.delete_glacier_object(obj) self.report_objects += 1 self.logger.increment('objects') self.swift.delete_object(self.s3_expiring_objects_account, hidden_container, obj) except (Exception, Timeout) as err: self.logger.increment('errors') report_exception(self.logger, _('Exception while deleting object %s %s %s') % (hidden_container, obj, str(err)), self.client) self.logger.timing_since('timing', start_time) self.report() def delete_glacier_object(self, obj): account, container, prefix = obj.split('/', 2) glacier_hidden_account = self.glacier_account_prefix + account objs = get_objects_by_prefix(glacier_hidden_account, container, prefix, swift_client=self.swift) glacier_obj = None for o in objs: name = get_glacier_objname_from_hidden_object(o) if name == prefix: glacier_obj = o break glacier_archive_id = get_glacier_key_from_hidden_object(glacier_obj) self.glacier.delete_archive(glacier_archive_id) self.swift.delete_object(glacier_hidden_account, container, glacier_obj) def delete_actual_object(self, obj): """ Deletes the end-user object indicated by the actual object name given '<account>/<container>/<object>' if and only if the X-Delete-At value of the object is exactly the timestamp given. :param obj: The name of the end-user object to delete: '<account>/<container>/<object>' """ path = '/v1/' + urllib.quote(obj.lstrip('/')) self.swift.make_request('DELETE', path, {}, (2, HTTP_NOT_FOUND))
class ObjectTransitor(Daemon): def __init__(self, conf): super(ObjectTransitor, self).__init__(conf) self.conf = conf self.logger = get_logger(conf, log_route='s3-object-transitor') self.logger.set_statsd_prefix('s3-object-transitor') self.interval = int(conf.get('interval') or 300) self.s3_tr_objects_account = \ (conf.get('auto_create_account_prefix') or '.') + \ (conf.get('expiring_objects_account_name') or 's3_transitioning_objects') conf_path = conf.get('__file__') or \ '/etc/swift/s3-object-transitor.conf' request_tries = int(conf.get('request_tries') or 3) self.swift = InternalClient(conf_path, 'Swift Object Transitor', request_tries) self.report_interval = int(conf.get('report_interval') or 300) self.report_first_time = self.report_last_time = time() self.report_objects = 0 self.recon_cache_path = conf.get('recon_cache_path', '/var/cache/swift') self.rcache = join(self.recon_cache_path, 'object.recon') self.concurrency = int(conf.get('concurrency', 1)) if self.concurrency < 1: raise ValueError("concurrency must be set to at least 1") self.processes = int(self.conf.get('processes', 0)) self.process = int(self.conf.get('process', 0)) self.client = Client(self.conf.get('sentry_sdn', '')) def report(self, final=False): """ Emits a log line report of the progress so far, or the final progress is final=True. :param final: Set to True for the last report once the expiration pass has completed. """ if final: elapsed = time() - self.report_first_time self.logger.info(_('Pass completed in %ds; %d objects ' 'transitioned') % (elapsed, self.report_objects)) dump_recon_cache({'object_transition_pass': elapsed, 'transitioned_last_pass': self.report_objects}, self.rcache, self.logger) elif time() - self.report_last_time >= self.report_interval: elapsed = time() - self.report_first_time self.logger.info(_('Pass so far %ds; %d objects transitioned') % (elapsed, self.report_objects)) self.report_last_time = time() def run_once(self, *args, **kwargs): """ Executes a single pass, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon accepts processes and process keyword args. These will override the values from the config file if provided. """ processes, process = self.get_process_values(kwargs) pool = GreenPool(self.concurrency) containers_to_delete = [] self.report_first_time = self.report_last_time = time() self.report_objects = 0 try: self.logger.debug(_('Run begin')) containers, objects = \ self.swift.get_account_info(self.s3_tr_objects_account) self.logger.info(_('Pass beginning; %s possible containers; %s ' 'possible objects') % (containers, objects)) for c in self.swift.iter_containers(self.s3_tr_objects_account): container = c['name'] timestamp = int(container) if timestamp > int(time()): break containers_to_delete.append(container) for o in self.swift.iter_objects(self.s3_tr_objects_account, container): obj = o['name'].encode('utf8') if processes > 0: obj_process = int( hashlib.md5('%s/%s' % (container, obj)). hexdigest(), 16) if obj_process % processes != process: continue pool.spawn_n(self.transition_object, container, obj) pool.waitall() for container in containers_to_delete: try: self.swift.delete_container(self.s3_tr_objects_account, container, (2, 4)) except (Exception, Timeout) as err: report_exception(self.logger, _('Exception while deleting container %s %s') % (container, str(err)), self.client) self.logger.debug(_('Run end')) self.report(final=True) except (Exception, Timeout): report_exception(self.logger, _('Unhandled exception'), self.client) def run_forever(self, *args, **kwargs): """ Executes passes forever, looking for objects to expire. :param args: Extra args to fulfill the Daemon interface; this daemon has no additional args. :param kwargs: Extra keyword args to fulfill the Daemon interface; this daemon has no additional keyword args. """ sleep(random() * self.interval) while True: begin = time() try: self.run_once(*args, **kwargs) except (Exception, Timeout): report_exception(self.logger, _('Unhandled exception'), self.client) elapsed = time() - begin if elapsed < self.interval: sleep(random() * (self.interval - elapsed)) def get_process_values(self, kwargs): """ Gets the processes, process from the kwargs if those values exist. Otherwise, return processes, process set in the config file. :param kwargs: Keyword args passed into the run_forever(), run_once() methods. They have values specified on the command line when the daemon is run. """ if kwargs.get('processes') is not None: processes = int(kwargs['processes']) else: processes = self.processes if kwargs.get('process') is not None: process = int(kwargs['process']) else: process = self.process if process < 0: raise ValueError( 'process must be an integer greater than or equal to 0') if processes < 0: raise ValueError( 'processes must be an integer greater than or equal to 0') if processes and process >= processes: raise ValueError( 'process must be less than or equal to processes') return processes, process def transition_object(self, container, obj): start_time = time() try: obj_account, obj_container, obj_object = obj.split('/', 2) lifecycle = Lifecycle(obj_account, obj_container, obj_object, swift_client=self.swift) if is_success(lifecycle.object.status): object_header = lifecycle.object.headers object_rule = lifecycle.get_object_rule_by_action( 'Transition') last_modified = object_header['Last-Modified'] last_modified = gmt_to_timestamp(last_modified) validation_flg = lifecycle.object_lifecycle_validation() if (validation_flg == LIFECYCLE_OK) or \ (validation_flg == DISABLED_EXPIRATION): times = calc_when_actions_do(object_rule, last_modified) actual_expire_time = int(times['Transition']) if actual_expire_time == int(container): self.request_transition(obj) self.swift.delete_object(self.s3_tr_objects_account, container, obj) except (Exception, Timeout) as err: self.logger.increment('errors') report_exception(self.logger, _('Exception while transitioning object %s %s %s') % (container, obj, str(err)), self.client) self.logger.timing_since('timing', start_time) self.report() def request_transition(self, actual_obj): path = '/v1/' + urllib.quote(actual_obj.lstrip('/')) headers = {GLACIER_FLAG_META: True, 'X-S3-Object-Transition': True} resp = self.swift.make_request('POST', path, headers, (2, 5)) if resp.status_int == 500: raise Exception(resp.body) self.report_objects += 1 self.logger.increment('objects')
class TestObjectExpirer(ReplProbeTest): def setUp(self): self.expirer = Manager(['object-expirer']) self.expirer.start() err = self.expirer.stop() if err: raise unittest.SkipTest('Unable to verify object-expirer service') conf_files = [] for server in self.expirer.servers: conf_files.extend(server.conf_files()) conf_file = conf_files[0] self.client = InternalClient(conf_file, 'probe-test', 3) super(TestObjectExpirer, self).setUp() self.container_name = 'container-%s' % uuid.uuid4() self.object_name = 'object-%s' % uuid.uuid4() self.brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name) def _check_obj_in_container_listing(self): for obj in self.client.iter_objects(self.account, self.container_name): if self.object_name == obj['name']: return True return False @unittest.skipIf(len(ENABLED_POLICIES) < 2, "Need more than one policy") def test_expirer_object_split_brain(self): old_policy = random.choice(ENABLED_POLICIES) wrong_policy = random.choice( [p for p in ENABLED_POLICIES if p != old_policy]) # create an expiring object and a container with the wrong policy self.brain.stop_primary_half() self.brain.put_container(int(old_policy)) self.brain.put_object(headers={'X-Delete-After': 2}) # get the object timestamp metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) create_timestamp = Timestamp(metadata['x-timestamp']) self.brain.start_primary_half() # get the expiring object updates in their queue, while we have all # the servers up Manager(['object-updater']).once() self.brain.stop_handoff_half() self.brain.put_container(int(wrong_policy)) # don't start handoff servers, only wrong policy is available # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # this guy should no-op since it's unable to expire the object self.expirer.once() self.brain.start_handoff_half() self.get_to_final_state() # validate object is expired found_in_policy = None metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4, ), headers={'X-Backend-Storage-Policy-Index': int(old_policy)}) self.assertIn('x-backend-timestamp', metadata) self.assertEqual(Timestamp(metadata['x-backend-timestamp']), create_timestamp) # but it is still in the listing self.assertTrue(self._check_obj_in_container_listing(), msg='Did not find listing for %s' % self.object_name) # clear proxy cache client.post_container(self.url, self.token, self.container_name, {}) # run the expirer again after replication self.expirer.once() # object is not in the listing self.assertFalse(self._check_obj_in_container_listing(), msg='Found listing for %s' % self.object_name) # and validate object is tombstoned found_in_policy = None for policy in ENABLED_POLICIES: metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4, ), headers={'X-Backend-Storage-Policy-Index': int(policy)}) if 'x-backend-timestamp' in metadata: if found_in_policy: self.fail('found object in %s and also %s' % (found_in_policy, policy)) found_in_policy = policy self.assertIn('x-backend-timestamp', metadata) self.assertGreater(Timestamp(metadata['x-backend-timestamp']), create_timestamp) def test_expirer_doesnt_make_async_pendings(self): # The object expirer cleans up its own queue. The inner loop # basically looks like this: # # for obj in stuff_to_delete: # delete_the_object(obj) # remove_the_queue_entry(obj) # # By default, upon receipt of a DELETE request for an expiring # object, the object servers will create async_pending records to # clean the expirer queue. Since the expirer cleans its own queue, # this is unnecessary. The expirer can make requests in such a way # tha the object server does not write out any async pendings; this # test asserts that this is the case. # Make an expiring object in each policy for policy in ENABLED_POLICIES: container_name = "expirer-test-%d" % policy.idx container_headers = {'X-Storage-Policy': policy.name} client.put_container(self.url, self.token, container_name, headers=container_headers) now = time.time() delete_at = int(now + 2.0) client.put_object(self.url, self.token, container_name, "some-object", headers={ 'X-Delete-At': str(delete_at), 'X-Timestamp': Timestamp(now).normal }, contents='dontcare') time.sleep(2.0) # make sure auto-created expirer-queue containers get in the account # listing so the expirer can find them Manager(['container-updater']).once() # Make sure there's no async_pendings anywhere. Probe tests only run # on single-node installs anyway, so this set should be small enough # that an exhaustive check doesn't take too long. all_obj_nodes = self.get_all_object_nodes() pendings_before = self.gather_async_pendings(all_obj_nodes) # expire the objects Manager(['object-expirer']).once() pendings_after = self.gather_async_pendings(all_obj_nodes) self.assertEqual(pendings_after, pendings_before) def test_expirer_object_should_not_be_expired(self): # Current object-expirer checks the correctness via x-if-delete-at # header that it can be deleted by expirer. If there are objects # either which doesn't have x-delete-at header as metadata or which # has different x-delete-at value from x-if-delete-at value, # object-expirer's delete will fail as 412 PreconditionFailed. # However, if some of the objects are in handoff nodes, the expirer # can put the tombstone with the timestamp as same as x-delete-at and # the object consistency will be resolved as the newer timestamp will # be winner (in particular, overwritten case w/o x-delete-at). This # test asserts such a situation that, at least, the overwriten object # which have larger timestamp than the original expirered date should # be safe. def put_object(headers): # use internal client to PUT objects so that X-Timestamp in headers # is effective headers['Content-Length'] = '0' path = self.client.make_path(self.account, self.container_name, self.object_name) try: self.client.make_request('PUT', path, headers, (2, )) except UnexpectedResponse as e: self.fail('Expected 201 for PUT object but got %s' % e.resp.status) obj_brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name, 'object', self.policy) # T(obj_created) < T(obj_deleted with x-delete-at) < T(obj_recreated) # < T(expirer_executed) # Recreated obj should be appeared in any split brain case obj_brain.put_container() # T(obj_deleted with x-delete-at) # object-server accepts req only if X-Delete-At is later than 'now' # so here, T(obj_created) < T(obj_deleted with x-delete-at) now = time.time() delete_at = int(now + 2.0) recreate_at = delete_at + 1.0 put_object(headers={ 'X-Delete-At': str(delete_at), 'X-Timestamp': Timestamp(now).normal }) # some object servers stopped to make a situation that the # object-expirer can put tombstone in the primary nodes. obj_brain.stop_primary_half() # increment the X-Timestamp explicitly # (will be T(obj_deleted with x-delete-at) < T(obj_recreated)) put_object( headers={ 'X-Object-Meta-Expired': 'False', 'X-Timestamp': Timestamp(recreate_at).normal }) # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # sanity, the newer object is still there try: metadata = self.client.get_object_metadata(self.account, self.container_name, self.object_name) except UnexpectedResponse as e: self.fail('Expected 200 for HEAD object but got %s' % e.resp.status) self.assertIn('x-object-meta-expired', metadata) # some object servers recovered obj_brain.start_primary_half() # sleep until after recreated_at while time.time() <= recreate_at: time.sleep(0.1) # Now, expirer runs at the time after obj is recreated self.expirer.once() # verify that original object was deleted by expirer obj_brain.stop_handoff_half() try: metadata = self.client.get_object_metadata( self.account, self.container_name, self.object_name, acceptable_statuses=(4, )) except UnexpectedResponse as e: self.fail('Expected 404 for HEAD object but got %s' % e.resp.status) obj_brain.start_handoff_half() # and inconsistent state of objects is recovered by replicator Manager(['object-replicator']).once() # check if you can get recreated object try: metadata = self.client.get_object_metadata(self.account, self.container_name, self.object_name) except UnexpectedResponse as e: self.fail('Expected 200 for HEAD object but got %s' % e.resp.status) self.assertIn('x-object-meta-expired', metadata) def _test_expirer_delete_outdated_object_version(self, object_exists): # This test simulates a case where the expirer tries to delete # an outdated version of an object. # One case is where the expirer gets a 404, whereas the newest version # of the object is offline. # Another case is where the expirer gets a 412, since the old version # of the object mismatches the expiration time sent by the expirer. # In any of these cases, the expirer should retry deleting the object # later, for as long as a reclaim age has not passed. obj_brain = BrainSplitter(self.url, self.token, self.container_name, self.object_name, 'object', self.policy) obj_brain.put_container() if object_exists: obj_brain.put_object() # currently, the object either doesn't exist, or does not have # an expiration # stop primary servers and put a newer version of the object, this # time with an expiration. only the handoff servers will have # the new version obj_brain.stop_primary_half() now = time.time() delete_at = int(now + 2.0) obj_brain.put_object({'X-Delete-At': str(delete_at)}) # make sure auto-created containers get in the account listing Manager(['container-updater']).once() # update object record in the container listing Manager(['container-replicator']).once() # take handoff servers down, and bring up the outdated primary servers obj_brain.start_primary_half() obj_brain.stop_handoff_half() # wait until object expiration time while time.time() <= delete_at: time.sleep(0.1) # run expirer against the outdated servers. it should fail since # the outdated version does not match the expiration time self.expirer.once() # bring all servers up, and run replicator to update servers obj_brain.start_handoff_half() Manager(['object-replicator']).once() # verify the deletion has failed by checking the container listing self.assertTrue(self._check_obj_in_container_listing(), msg='Did not find listing for %s' % self.object_name) # run expirer again, delete should now succeed self.expirer.once() # verify the deletion by checking the container listing self.assertFalse(self._check_obj_in_container_listing(), msg='Found listing for %s' % self.object_name) def test_expirer_delete_returns_outdated_404(self): self._test_expirer_delete_outdated_object_version(object_exists=False) def test_expirer_delete_returns_outdated_412(self): self._test_expirer_delete_outdated_object_version(object_exists=True) def test_slo_async_delete(self): if not self.cluster_info.get('slo', {}).get('allow_async_delete'): raise unittest.SkipTest('allow_async_delete not enabled') segment_container = self.container_name + '_segments' client.put_container(self.url, self.token, self.container_name, {}) client.put_container(self.url, self.token, segment_container, {}) client.put_object(self.url, self.token, segment_container, 'segment_1', b'1234') client.put_object(self.url, self.token, segment_container, 'segment_2', b'5678') client.put_object(self.url, self.token, self.container_name, 'slo', json.dumps([ { 'path': segment_container + '/segment_1' }, { 'data': 'Cg==' }, { 'path': segment_container + '/segment_2' }, ]), query_string='multipart-manifest=put') _, body = client.get_object(self.url, self.token, self.container_name, 'slo') self.assertEqual(body, b'1234\n5678') client.delete_object( self.url, self.token, self.container_name, 'slo', query_string='multipart-manifest=delete&async=true') # Object's deleted _, objects = client.get_container(self.url, self.token, self.container_name) self.assertEqual(objects, []) with self.assertRaises(client.ClientException) as caught: client.get_object(self.url, self.token, self.container_name, 'slo') self.assertEqual(404, caught.exception.http_status) # But segments are still around and accessible _, objects = client.get_container(self.url, self.token, segment_container) self.assertEqual([o['name'] for o in objects], ['segment_1', 'segment_2']) _, body = client.get_object(self.url, self.token, segment_container, 'segment_1') self.assertEqual(body, b'1234') _, body = client.get_object(self.url, self.token, segment_container, 'segment_2') self.assertEqual(body, b'5678') # make sure auto-created expirer-queue containers get in the account # listing so the expirer can find them Manager(['container-updater']).once() self.expirer.once() # Now the expirer has cleaned up the segments _, objects = client.get_container(self.url, self.token, segment_container) self.assertEqual(objects, []) with self.assertRaises(client.ClientException) as caught: client.get_object(self.url, self.token, segment_container, 'segment_1') self.assertEqual(404, caught.exception.http_status) with self.assertRaises(client.ClientException) as caught: client.get_object(self.url, self.token, segment_container, 'segment_2') self.assertEqual(404, caught.exception.http_status)