def get_hashes(partition_dir, recalculate=None, do_listdir=False, reclaim_age=ONE_WEEK): """ Get a list of hashes for the suffix dir. do_listdir causes it to mistrust the hash cache for suffix existence at the (unexpectedly high) cost of a listdir. reclaim_age is just passed on to hash_suffix. :param partition_dir: absolute path of partition to get hashes for :param recalculate: list of suffixes which should be recalculated when got :param do_listdir: force existence check for all hashes in the partition :param reclaim_age: age at which to remove tombstones :returns: tuple of (number of suffix dirs hashed, dictionary of hashes) """ hashed = 0 hashes_file = join(partition_dir, HASH_FILE) modified = False force_rewrite = False hashes = {} mtime = -1 if recalculate is None: recalculate = [] try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) mtime = getmtime(hashes_file) except Exception: do_listdir = True force_rewrite = True if do_listdir: for suff in os.listdir(partition_dir): if len(suff) == 3: hashes.setdefault(suff, None) modified = True hashes.update((hash_, None) for hash_ in recalculate) for suffix, hash_ in hashes.items(): if not hash_: suffix_dir = join(partition_dir, suffix) try: hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) hashed += 1 except PathNotDir: del hashes[suffix] except OSError: logging.exception(_('Error hashing suffix')) modified = True if modified: with lock_path(partition_dir): if force_rewrite or not exists(hashes_file) or \ getmtime(hashes_file) == mtime: write_pickle( hashes, hashes_file, partition_dir, PICKLE_PROTOCOL) return hashed, hashes return get_hashes(partition_dir, recalculate, do_listdir, reclaim_age) else: return hashed, hashes
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) headers_out = HeaderKeyDict(update['headers']) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) events = [ spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes ] success = True new_successes = False for event in events: event_success, node_id = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) self.logger.increment("unlinks") os.unlink(update_path) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join(device, get_tmp_dir(policy)))
def process_object_update(self, update_path, device): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device """ try: update = pickle.load(open(update_path, "rb")) except Exception: self.logger.exception(_("ERROR Pickle problem, quarantining %s"), update_path) renamer(update_path, os.path.join(device, "quarantined", "objects", os.path.basename(update_path))) return successes = update.get("successes", []) part, nodes = self.get_container_ring().get_nodes(update["account"], update["container"]) obj = "/%s/%s/%s" % (update["account"], update["container"], update["obj"]) success = True for node in nodes: if node["id"] not in successes: status = self.object_update(node, part, update["op"], obj, update["headers"]) if not (200 <= status < 300) and status != 404: success = False else: successes.append(node["id"]) if success: self.successes += 1 self.logger.debug(_("Update sent for %(obj)s %(path)s"), {"obj": obj, "path": update_path}) os.unlink(update_path) else: self.failures += 1 self.logger.debug(_("Update failed for %(obj)s %(path)s"), {"obj": obj, "path": update_path}) update["successes"] = successes write_pickle(update, update_path, os.path.join(device, "tmp"))
def process_object_update(self, update_path, device, policy_idx): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy_idx: storage policy index of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) self.logger.increment('quarantines') renamer( update_path, os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: headers = update['headers'].copy() headers.setdefault('X-Backend-Storage-Policy-Index', str(policy_idx)) status = self.object_update(node, part, update['op'], obj, headers) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) self.logger.increment("unlinks") os.unlink(update_path) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join(device, get_tmp_dir(policy_idx)))
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) self.stats.quarantines += 1 self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) headers_out = HeaderKeyDict(update['headers']) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) events = [spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes] success = True new_successes = False for event in events: event_success, node_id = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if success: self.stats.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) else: self.stats.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join( device, get_tmp_dir(policy)))
def process_object_update(self, update_path, device, policy_idx): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy_idx: storage policy index of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) self.logger.increment('quarantines') renamer(update_path, os.path.join( device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: headers = update['headers'].copy() headers.setdefault('X-Backend-Storage-Policy-Index', str(policy_idx)) status = self.object_update(node, part, update['op'], obj, headers) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) self.logger.increment("unlinks") os.unlink(update_path) else: self.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join( device, get_tmp_dir(policy_idx)))
def test_sweep_logs_multiple_policies(self): for policy in _mocked_policies: asyncdir = os.path.join(self.sda1, get_async_dir(policy.idx)) prefix_dir = os.path.join(asyncdir, 'abc') mkpath(prefix_dir) for o, t in [('abc', 123), ('def', 234), ('ghi', 345)]: ohash = hash_path('account', 'container%d' % policy.idx, o) o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) write_pickle({}, o_path) class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device, policy): os.unlink(update_path) self.stats.successes += 1 self.stats.unlinks += 1 logger = FakeLogger() ou = MockObjectUpdater( { 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'report_interval': '10.0', 'node_timeout': '5' }, logger=logger) now = [time()] def mock_time(): rv = now[0] now[0] += 0.01 return rv with mock.patch('swift.obj.updater.time', mock.MagicMock(time=mock_time)): ou.object_sweep(self.sda1) completion_lines = [ l for l in logger.get_lines_for_level('info') if "sweep complete" in l ] self.assertEqual(len(completion_lines), 1) self.assertIn("sweep complete", completion_lines[0]) self.assertIn( "6 successes, 0 failures, 0 quarantines, 6 unlinks, 0 errors, " "0 redirects", completion_lines[0])
def get_hashes(partition_dir, recalculate=[], do_listdir=False, reclaim_age=ONE_WEEK): """ Get a list of hashes for the suffix dir. do_listdir causes it to mistrust the hash cache for suffix existence at the (unexpectedly high) cost of a listdir. reclaim_age is just passed on to hash_suffix. :param partition_dir: absolute path of partition to get hashes for :param recalculate: list of suffixes which should be recalculated when got :param do_listdir: force existence check for all hashes in the partition :param reclaim_age: age at which to remove tombstones :returns: tuple of (number of suffix dirs hashed, dictionary of hashes) """ hashed = 0 hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): modified = False hashes = {} try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) except Exception: do_listdir = True if do_listdir: hashes = dict( ((suff, hashes.get(suff, None)) for suff in os.listdir(partition_dir) if len(suff) == 3 and isdir(join(partition_dir, suff)))) modified = True for hash_ in recalculate: hashes[hash_] = None for suffix, hash_ in hashes.items(): if not hash_: suffix_dir = join(partition_dir, suffix) if os.path.exists(suffix_dir): try: hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) hashed += 1 except OSError: logging.exception(_('Error hashing suffix')) hashes[suffix] = None else: del hashes[suffix] modified = True sleep() if modified: write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL) return hashed, hashes
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ headers_out['user-agent'] = 'obj-server %s' % os.getpid() full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error(_( 'ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_( 'ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) self.logger.increment('async_pendings') write_pickle( {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out}, os.path.join(async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ headers_out["user-agent"] = "obj-server %s" % os.getpid() full_path = "/%s/%s/%s" % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(":", 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error( _( "ERROR Container update failed " "(saving for async update later): %(status)d " "response from %(ip)s:%(port)s/%(dev)s" ), {"status": response.status, "ip": ip, "port": port, "dev": contdevice}, ) except (Exception, Timeout): self.logger.exception( _("ERROR container update failed with " "%(ip)s:%(port)s/%(dev)s (saving for async update later)"), {"ip": ip, "port": port, "dev": contdevice}, ) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) self.logger.increment("async_pendings") write_pickle( {"op": op, "account": account, "container": container, "obj": obj, "headers": headers_out}, os.path.join(async_dir, ohash[-3:], ohash + "-" + normalize_timestamp(headers_out["x-timestamp"])), os.path.join(self.devices, objdevice, "tmp"), )
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if is_success(response.status): return else: self.logger.error(_( 'ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_( 'ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) self.logger.increment('async_pendings') write_pickle( {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out}, os.path.join(async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def get_hashes(partition_dir, recalculate=[], do_listdir=False, reclaim_age=ONE_WEEK): """ Get a list of hashes for the suffix dir. do_listdir causes it to mistrust the hash cache for suffix existence at the (unexpectedly high) cost of a listdir. reclaim_age is just passed on to hash_suffix. :param partition_dir: absolute path of partition to get hashes for :param recalculate: list of suffixes which should be recalculated when got :param do_listdir: force existence check for all hashes in the partition :param reclaim_age: age at which to remove tombstones :returns: tuple of (number of suffix dirs hashed, dictionary of hashes) """ hashed = 0 hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): modified = False hashes = {} try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) except Exception: do_listdir = True if do_listdir: hashes = dict(((suff, hashes.get(suff, None)) for suff in os.listdir(partition_dir) if len(suff) == 3 and isdir(join(partition_dir, suff)))) modified = True for hash_ in recalculate: hashes[hash_] = None for suffix, hash_ in hashes.items(): if not hash_: suffix_dir = join(partition_dir, suffix) if os.path.exists(suffix_dir): try: hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) hashed += 1 except OSError: logging.exception(_('Error hashing suffix')) hashes[suffix] = None else: del hashes[suffix] modified = True sleep() if modified: write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL) return hashed, hashes
def test_sweep_logs_multiple_policies(self): for policy in _mocked_policies: asyncdir = os.path.join(self.sda1, get_async_dir(policy.idx)) prefix_dir = os.path.join(asyncdir, 'abc') mkpath(prefix_dir) for o, t in [('abc', 123), ('def', 234), ('ghi', 345)]: ohash = hash_path('account', 'container%d' % policy.idx, o) o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) write_pickle({}, o_path) class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device, policy): os.unlink(update_path) self.stats.successes += 1 self.stats.unlinks += 1 logger = FakeLogger() ou = MockObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'report_interval': '10.0', 'node_timeout': '5'}, logger=logger) now = [time()] def mock_time(): rv = now[0] now[0] += 0.01 return rv with mock.patch('swift.obj.updater.time', mock.MagicMock(time=mock_time)): ou.object_sweep(self.sda1) completion_lines = [l for l in logger.get_lines_for_level('info') if "sweep complete" in l] self.assertEqual(len(completion_lines), 1) self.assertIn("sweep complete", completion_lines[0]) self.assertIn( "6 successes, 0 failures, 0 quarantines, 6 unlinks, 0 errors, " "0 redirects", completion_lines[0])
def container_update(self, op, account, container, obj, headers_in, headers_out, objdevice): """ Update the container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param headers_in: dictionary of headers from the original request :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ host = headers_in.get('X-Container-Host', None) partition = headers_in.get('X-Container-Partition', None) contdevice = headers_in.get('X-Container-Device', None) if not all([host, partition, contdevice]): return full_path = '/%s/%s/%s' % (account, container, obj) try: with ConnectionTimeout(self.conn_timeout): ip, port = host.split(':') conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if 200 <= response.status < 300: return else: self.logger.error(_('ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, TimeoutError): self.logger.exception(_('ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) write_pickle( {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out}, os.path.join(async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def container_update(self, op, account, container, obj, headers_in, headers_out, objdevice): """ Update the container when objects are updated. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param headers_in: dictionary of headers from the original request :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ host = headers_in.get('X-Container-Host', None) partition = headers_in.get('X-Container-Partition', None) contdevice = headers_in.get('X-Container-Device', None) if not all([host, partition, contdevice]): return full_path = '/%s/%s/%s' % (account, container, obj) try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if 200 <= response.status < 300: return else: self.logger.error(_('ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, TimeoutError): self.logger.exception(_('ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) write_pickle( {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out}, os.path.join(async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def process_object_update(self, update_path, device): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) renamer( update_path, os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True for node in nodes: if node['id'] not in successes: status = self.object_update(node, part, update['op'], obj, update['headers']) if not (200 <= status < 300) and status != 404: success = False else: successes.append(node['id']) if success: self.successes += 1 self.logger.debug(_('Update sent for %(obj)s %(path)s'), { 'obj': obj, 'path': update_path }) os.unlink(update_path) else: self.failures += 1 self.logger.debug(_('Update failed for %(obj)s %(path)s'), { 'obj': obj, 'path': update_path }) update['successes'] = successes write_pickle(update, update_path, os.path.join(device, 'tmp'))
def test_object_sweep(self): prefix_dir = os.path.join(self.sda1, ASYNCDIR, 'abc') mkpath(prefix_dir) # A non-directory where directory is expected should just be skipped... not_a_dir_path = os.path.join(self.sda1, ASYNCDIR, 'not_a_dir') with open(not_a_dir_path, 'w'): pass objects = { 'a': [1089.3, 18.37, 12.83, 1.3], 'b': [49.4, 49.3, 49.2, 49.1], 'c': [109984.123], } expected = set() for o, timestamps in objects.iteritems(): ohash = hash_path('account', 'container', o) for t in timestamps: o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) if t == timestamps[0]: expected.add(o_path) write_pickle({}, o_path) seen = set() class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device): seen.add(update_path) os.unlink(update_path) cu = MockObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '5' }) cu.object_sweep(self.sda1) self.assert_(not os.path.exists(prefix_dir)) self.assert_(os.path.exists(not_a_dir_path)) self.assertEqual(expected, seen)
def process_object_update(self, update_path, device): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) renamer(update_path, os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True new_successes = False for node in nodes: if node['id'] not in successes: status = self.object_update(node, part, update['op'], obj, update['headers']) if not is_success(status) and status != HTTP_NOT_FOUND: success = False else: successes.append(node['id']) new_successes = True if success: self.successes += 1 self.logger.debug(_('Update sent for %(obj)s %(path)s'), {'obj': obj, 'path': update_path}) os.unlink(update_path) else: self.failures += 1 self.logger.debug(_('Update failed for %(obj)s %(path)s'), {'obj': obj, 'path': update_path}) if new_successes: update['successes'] = successes write_pickle(update, update_path, os.path.join(device, 'tmp'))
def test_object_sweep(self): prefix_dir = os.path.join(self.sda1, ASYNCDIR, 'abc') mkpath(prefix_dir) # A non-directory where directory is expected should just be skipped... not_a_dir_path = os.path.join(self.sda1, ASYNCDIR, 'not_a_dir') with open(not_a_dir_path, 'w'): pass objects = { 'a': [1089.3, 18.37, 12.83, 1.3], 'b': [49.4, 49.3, 49.2, 49.1], 'c': [109984.123], } expected = set() for o, timestamps in objects.iteritems(): ohash = hash_path('account', 'container', o) for t in timestamps: o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) if t == timestamps[0]: expected.add(o_path) write_pickle({}, o_path) seen = set() class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device): seen.add(update_path) os.unlink(update_path) cu = MockObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '5'}) cu.object_sweep(self.sda1) self.assert_(not os.path.exists(prefix_dir)) self.assert_(os.path.exists(not_a_dir_path)) self.assertEqual(expected, seen)
def invalidate_hash(suffix_dir): """ Invalidates the hash for a suffix_dir in the partition's hashes file. :param suffix_dir: absolute path to suffix dir whose hash needs invalidating """ suffix = basename(suffix_dir) partition_dir = dirname(suffix_dir) hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) if suffix in hashes and not hashes[suffix]: return except Exception: return hashes[suffix] = None write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)
def test_sweep_logs(self): asyncdir = os.path.join(self.sda1, ASYNCDIR_BASE) prefix_dir = os.path.join(asyncdir, 'abc') mkpath(prefix_dir) for o, t in [('abc', 123), ('def', 234), ('ghi', 345), ('jkl', 456), ('mno', 567)]: ohash = hash_path('account', 'container', o) o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) write_pickle({}, o_path) class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device, policy): os.unlink(update_path) self.stats.successes += 1 self.stats.unlinks += 1 logger = FakeLogger() ou = MockObjectUpdater( { 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'report_interval': '10.0', 'node_timeout': '5' }, logger=logger) now = [time()] def mock_time_function(): rv = now[0] now[0] += 5 return rv # With 10s between updates, time() advancing 5s every time we look, # and 5 async_pendings on disk, we should get at least two progress # lines. with mock.patch('swift.obj.updater.time', mock.MagicMock(time=mock_time_function)), \ mock.patch.object(object_updater, 'ContextPool', MockPool): ou.object_sweep(self.sda1) info_lines = logger.get_lines_for_level('info') self.assertEqual(4, len(info_lines)) self.assertIn("sweep starting", info_lines[0]) self.assertIn(self.sda1, info_lines[0]) self.assertIn("sweep progress", info_lines[1]) # the space ensures it's a positive number self.assertIn( "2 successes, 0 failures, 0 quarantines, 2 unlinks, 0 errors, " "0 redirects", info_lines[1]) self.assertIn(self.sda1, info_lines[1]) self.assertIn("sweep progress", info_lines[2]) self.assertIn( "4 successes, 0 failures, 0 quarantines, 4 unlinks, 0 errors, " "0 redirects", info_lines[2]) self.assertIn(self.sda1, info_lines[2]) self.assertIn("sweep complete", info_lines[3]) self.assertIn( "5 successes, 0 failures, 0 quarantines, 5 unlinks, 0 errors, " "0 redirects", info_lines[3]) self.assertIn(self.sda1, info_lines[3])
def check_with_idx(index, warn, should_skip): if int(index) > 0: asyncdir = os.path.join(self.sda1, ASYNCDIR_BASE + "-" + index) else: asyncdir = os.path.join(self.sda1, ASYNCDIR_BASE) prefix_dir = os.path.join(asyncdir, 'abc') mkpath(prefix_dir) # A non-directory where directory is expected should just be # skipped, but should not stop processing of subsequent # directories. not_dirs = (os.path.join(self.sda1, 'not_a_dir'), os.path.join(self.sda1, ASYNCDIR_BASE + '-' + 'twentington'), os.path.join( self.sda1, ASYNCDIR_BASE + '-' + str(int(index) + 100))) for not_dir in not_dirs: with open(not_dir, 'w'): pass objects = { 'a': [1089.3, 18.37, 12.83, 1.3], 'b': [49.4, 49.3, 49.2, 49.1], 'c': [109984.123], } expected = set() for o, timestamps in objects.items(): ohash = hash_path('account', 'container', o) for t in timestamps: o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) if t == timestamps[0]: expected.add((o_path, int(index))) write_pickle({}, o_path) seen = set() class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device, policy): seen.add((update_path, int(policy))) os.unlink(update_path) ou = MockObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '5' }) ou.logger = mock_logger = mock.MagicMock() ou.object_sweep(self.sda1) self.assertEqual(mock_logger.warning.call_count, warn) self.assertTrue( os.path.exists(os.path.join(self.sda1, 'not_a_dir'))) if should_skip: # if we were supposed to skip over the dir, we didn't process # anything at all self.assertEqual(set(), seen) else: self.assertEqual(expected, seen) # test cleanup: the tempdir gets cleaned up between runs, but this # way we can be called multiple times in a single test method for not_dir in not_dirs: os.unlink(not_dir)
def test_sweep_logs(self): asyncdir = os.path.join(self.sda1, ASYNCDIR_BASE) prefix_dir = os.path.join(asyncdir, 'abc') mkpath(prefix_dir) for o, t in [('abc', 123), ('def', 234), ('ghi', 345), ('jkl', 456), ('mno', 567)]: ohash = hash_path('account', 'container', o) o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) write_pickle({}, o_path) class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device, policy): os.unlink(update_path) self.stats.successes += 1 self.stats.unlinks += 1 logger = FakeLogger() ou = MockObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'report_interval': '10.0', 'node_timeout': '5'}, logger=logger) now = [time()] def mock_time_function(): rv = now[0] now[0] += 5 return rv # With 10s between updates, time() advancing 5s every time we look, # and 5 async_pendings on disk, we should get at least two progress # lines. with mock.patch('swift.obj.updater.time', mock.MagicMock(time=mock_time_function)), \ mock.patch.object(object_updater, 'ContextPool', MockPool): ou.object_sweep(self.sda1) info_lines = logger.get_lines_for_level('info') self.assertEqual(4, len(info_lines)) self.assertIn("sweep starting", info_lines[0]) self.assertIn(self.sda1, info_lines[0]) self.assertIn("sweep progress", info_lines[1]) # the space ensures it's a positive number self.assertIn( "2 successes, 0 failures, 0 quarantines, 2 unlinks, 0 errors, " "0 redirects", info_lines[1]) self.assertIn(self.sda1, info_lines[1]) self.assertIn("sweep progress", info_lines[2]) self.assertIn( "4 successes, 0 failures, 0 quarantines, 4 unlinks, 0 errors, " "0 redirects", info_lines[2]) self.assertIn(self.sda1, info_lines[2]) self.assertIn("sweep complete", info_lines[3]) self.assertIn( "5 successes, 0 failures, 0 quarantines, 5 unlinks, 0 errors, " "0 redirects", info_lines[3]) self.assertIn(self.sda1, info_lines[3])
def check_with_idx(index, warn, should_skip): if int(index) > 0: asyncdir = os.path.join(self.sda1, ASYNCDIR_BASE + "-" + index) else: asyncdir = os.path.join(self.sda1, ASYNCDIR_BASE) prefix_dir = os.path.join(asyncdir, 'abc') mkpath(prefix_dir) # A non-directory where directory is expected should just be # skipped, but should not stop processing of subsequent # directories. not_dirs = ( os.path.join(self.sda1, 'not_a_dir'), os.path.join(self.sda1, ASYNCDIR_BASE + '-' + 'twentington'), os.path.join(self.sda1, ASYNCDIR_BASE + '-' + str(int(index) + 100))) for not_dir in not_dirs: with open(not_dir, 'w'): pass objects = { 'a': [1089.3, 18.37, 12.83, 1.3], 'b': [49.4, 49.3, 49.2, 49.1], 'c': [109984.123], } expected = set() for o, timestamps in objects.items(): ohash = hash_path('account', 'container', o) for t in timestamps: o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) if t == timestamps[0]: expected.add((o_path, int(index))) write_pickle({}, o_path) seen = set() class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device, policy): seen.add((update_path, int(policy))) os.unlink(update_path) cu = MockObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'swift_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '5'}) cu.logger = mock_logger = mock.MagicMock() cu.object_sweep(self.sda1) self.assertEquals(mock_logger.warn.call_count, warn) self.assert_(os.path.exists(os.path.join(self.sda1, 'not_a_dir'))) if should_skip: # if we were supposed to skip over the dir, we didn't process # anything at all self.assertTrue(os.path.exists(prefix_dir)) self.assertEqual(set(), seen) else: self.assert_(not os.path.exists(prefix_dir)) self.assertEqual(expected, seen) # test cleanup: the tempdir gets cleaned up between runs, but this # way we can be called multiple times in a single test method for not_dir in not_dirs: os.unlink(not_dir)
def process_object_update(self, update_path, device, policy): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device :param policy: storage policy of object update """ try: update = pickle.load(open(update_path, 'rb')) except Exception as e: if getattr(e, 'errno', None) == errno.ENOENT: return self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) self.stats.quarantines += 1 self.logger.increment('quarantines') target_path = os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path)) renamer(update_path, target_path, fsync=False) try: # If this was the last async_pending in the directory, # then this will succeed. Otherwise, it'll fail, and # that's okay. os.rmdir(os.path.dirname(update_path)) except OSError: pass return def do_update(): successes = update.get('successes', []) headers_out = HeaderKeyDict(update['headers'].copy()) headers_out['user-agent'] = 'object-updater %s' % os.getpid() headers_out.setdefault('X-Backend-Storage-Policy-Index', str(int(policy))) headers_out.setdefault('X-Backend-Accept-Redirect', 'true') headers_out.setdefault('X-Backend-Accept-Quoted-Location', 'true') container_path = update.get('container_path') if container_path: acct, cont = split_path('/' + container_path, minsegs=2) else: acct, cont = update['account'], update['container'] part, nodes = self.get_container_ring().get_nodes(acct, cont) obj = '/%s/%s/%s' % (acct, cont, update['obj']) events = [ spawn(self.object_update, node, part, update['op'], obj, headers_out) for node in nodes if node['id'] not in successes ] success = True new_successes = rewrite_pickle = False redirect = None redirects = set() for event in events: event_success, node_id, redirect = event.wait() if event_success is True: successes.append(node_id) new_successes = True else: success = False if redirect: redirects.add(redirect) if success: self.stats.successes += 1 self.logger.increment('successes') self.logger.debug('Update sent for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) self.stats.unlinks += 1 self.logger.increment('unlinks') os.unlink(update_path) try: # If this was the last async_pending in the directory, # then this will succeed. Otherwise, it'll fail, and # that's okay. os.rmdir(os.path.dirname(update_path)) except OSError: pass elif redirects: # erase any previous successes update.pop('successes', None) redirect = max(redirects, key=lambda x: x[-1])[0] redirect_history = update.setdefault('redirect_history', []) if redirect in redirect_history: # force next update to be sent to root, reset history update['container_path'] = None update['redirect_history'] = [] else: update['container_path'] = redirect redirect_history.append(redirect) self.stats.redirects += 1 self.logger.increment("redirects") self.logger.debug( 'Update redirected for %(obj)s %(path)s to %(shard)s', { 'obj': obj, 'path': update_path, 'shard': update['container_path'] }) rewrite_pickle = True else: self.stats.failures += 1 self.logger.increment('failures') self.logger.debug('Update failed for %(obj)s %(path)s', { 'obj': obj, 'path': update_path }) if new_successes: update['successes'] = successes rewrite_pickle = True return rewrite_pickle, redirect rewrite_pickle, redirect = do_update() if redirect: # make one immediate retry to the redirect location rewrite_pickle, redirect = do_update() if rewrite_pickle: write_pickle(update, update_path, os.path.join(device, get_tmp_dir(policy)))