def get_hashes(partition_dir, recalculate=[], do_listdir=False, reclaim_age=ONE_WEEK): """ Get a list of hashes for the suffix dir. do_listdir causes it to mistrust the hash cache for suffix existence at the (unexpectedly high) cost of a listdir. reclaim_age is just passed on to hash_suffix. :param partition_dir: absolute path of partition to get hashes for :param recalculate: list of suffixes which should be recalculated when got :param do_listdir: force existence check for all hashes in the partition :param reclaim_age: age at which to remove tombstones :returns: tuple of (number of suffix dirs hashed, dictionary of hashes) """ hashed = 0 hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): modified = False hashes = {} try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) except Exception: do_listdir = True if do_listdir: hashes = dict( ((suff, hashes.get(suff, None)) for suff in os.listdir(partition_dir) if len(suff) == 3 and isdir(join(partition_dir, suff)))) modified = True for hash_ in recalculate: hashes[hash_] = None for suffix, hash_ in hashes.items(): if not hash_: suffix_dir = join(partition_dir, suffix) if os.path.exists(suffix_dir): try: hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) hashed += 1 except OSError: logging.exception(_('Error hashing suffix')) hashes[suffix] = None else: del hashes[suffix] modified = True sleep() if modified: write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL) return hashed, hashes
def get_hashes(partition_dir, recalculate=[], do_listdir=False, reclaim_age=ONE_WEEK): """ Get a list of hashes for the suffix dir. do_listdir causes it to mistrust the hash cache for suffix existence at the (unexpectedly high) cost of a listdir. reclaim_age is just passed on to hash_suffix. :param partition_dir: absolute path of partition to get hashes for :param recalculate: list of suffixes which should be recalculated when got :param do_listdir: force existence check for all hashes in the partition :param reclaim_age: age at which to remove tombstones :returns: tuple of (number of suffix dirs hashed, dictionary of hashes) """ hashed = 0 hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): modified = False hashes = {} try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) except Exception: do_listdir = True if do_listdir: hashes = dict(((suff, hashes.get(suff, None)) for suff in os.listdir(partition_dir) if len(suff) == 3 and isdir(join(partition_dir, suff)))) modified = True for hash_ in recalculate: hashes[hash_] = None for suffix, hash_ in hashes.items(): if not hash_: suffix_dir = join(partition_dir, suffix) if os.path.exists(suffix_dir): try: hashes[suffix] = hash_suffix(suffix_dir, reclaim_age) hashed += 1 except OSError: logging.exception(_('Error hashing suffix')) hashes[suffix] = None else: del hashes[suffix] modified = True sleep() if modified: write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL) return hashed, hashes
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if 200 <= response.status < 300: return else: self.logger.error(_('ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), {'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice}) except (Exception, Timeout): self.logger.exception(_('ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)'), {'ip': ip, 'port': port, 'dev': contdevice}) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) write_pickle( {'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out}, os.path.join(async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))
def process_object_update(self, update_path, device): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception(_('ERROR Pickle problem, quarantining %s'), update_path) renamer( update_path, os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True for node in nodes: if node['id'] not in successes: status = self.object_update(node, part, update['op'], obj, update['headers']) if not (200 <= status < 300) and status != 404: success = False else: successes.append(node['id']) if success: self.successes += 1 self.logger.debug(_('Update sent for %(obj)s %(path)s'), { 'obj': obj, 'path': update_path }) os.unlink(update_path) else: self.failures += 1 self.logger.debug(_('Update failed for %(obj)s %(path)s'), { 'obj': obj, 'path': update_path }) update['successes'] = successes write_pickle(update, update_path, os.path.join(device, 'tmp'))
def invalidate_hash(suffix_dir): """ Invalidates the hash for a suffix_dir in the partition's hashes file. :param suffix_dir: absolute path to suffix dir whose hash needs invalidating """ suffix = os.path.basename(suffix_dir) partition_dir = os.path.dirname(suffix_dir) hashes_file = join(partition_dir, HASH_FILE) with lock_path(partition_dir): try: with open(hashes_file, 'rb') as fp: hashes = pickle.load(fp) if suffix in hashes and not hashes[suffix]: return except Exception: return hashes[suffix] = None write_pickle(hashes, hashes_file, partition_dir, PICKLE_PROTOCOL)
def process_object_update(self, update_path, device): """ Process the object information to be updated and update. :param update_path: path to pickled object update file :param device: path to device """ try: update = pickle.load(open(update_path, 'rb')) except Exception: self.logger.exception( _('ERROR Pickle problem, quarantining %s'), update_path) renamer(update_path, os.path.join(device, 'quarantined', 'objects', os.path.basename(update_path))) return successes = update.get('successes', []) part, nodes = self.get_container_ring().get_nodes( update['account'], update['container']) obj = '/%s/%s/%s' % \ (update['account'], update['container'], update['obj']) success = True for node in nodes: if node['id'] not in successes: status = self.object_update(node, part, update['op'], obj, update['headers']) if not (200 <= status < 300) and status != 404: success = False else: successes.append(node['id']) if success: self.successes += 1 self.logger.debug(_('Update sent for %(obj)s %(path)s'), {'obj': obj, 'path': update_path}) os.unlink(update_path) else: self.failures += 1 self.logger.debug(_('Update failed for %(obj)s %(path)s'), {'obj': obj, 'path': update_path}) update['successes'] = successes write_pickle(update, update_path, os.path.join(device, 'tmp'))
def test_object_sweep(self): prefix_dir = os.path.join(self.sda1, ASYNCDIR, 'abc') mkpath(prefix_dir) objects = { 'a': [1089.3, 18.37, 12.83, 1.3], 'b': [49.4, 49.3, 49.2, 49.1], 'c': [109984.123], } expected = set() for o, timestamps in objects.iteritems(): ohash = hash_path('account', 'container', o) for t in timestamps: o_path = os.path.join(prefix_dir, ohash + '-' + normalize_timestamp(t)) if t == timestamps[0]: expected.add(o_path) write_pickle({}, o_path) seen = set() class MockObjectUpdater(object_updater.ObjectUpdater): def process_object_update(self, update_path, device): seen.add(update_path) os.unlink(update_path) cu = MockObjectUpdater({ 'devices': self.devices_dir, 'mount_check': 'false', 'chase_dir': self.testdir, 'interval': '1', 'concurrency': '1', 'node_timeout': '5', }) cu.object_sweep(self.sda1) self.assert_(not os.path.exists(prefix_dir)) self.assertEqual(expected, seen)
def async_update(self, op, account, container, obj, host, partition, contdevice, headers_out, objdevice): """ Sends or saves an async update. :param op: operation performed (ex: 'PUT', or 'DELETE') :param account: account name for the object :param container: container name for the object :param obj: object name :param host: host that the container is on :param partition: partition that the container is on :param contdevice: device name that the container is on :param headers_out: dictionary of headers to send in the container request :param objdevice: device name that the object is in """ full_path = '/%s/%s/%s' % (account, container, obj) if all([host, partition, contdevice]): try: with ConnectionTimeout(self.conn_timeout): ip, port = host.rsplit(':', 1) conn = http_connect(ip, port, contdevice, partition, op, full_path, headers_out) with Timeout(self.node_timeout): response = conn.getresponse() response.read() if 200 <= response.status < 300: return else: self.logger.error( _('ERROR Container update failed ' '(saving for async update later): %(status)d ' 'response from %(ip)s:%(port)s/%(dev)s'), { 'status': response.status, 'ip': ip, 'port': port, 'dev': contdevice }) except (Exception, Timeout): self.logger.exception( _('ERROR container update failed with ' '%(ip)s:%(port)s/%(dev)s (saving for async update later)' ), { 'ip': ip, 'port': port, 'dev': contdevice }) async_dir = os.path.join(self.devices, objdevice, ASYNCDIR) ohash = hash_path(account, container, obj) write_pickle( { 'op': op, 'account': account, 'container': container, 'obj': obj, 'headers': headers_out }, os.path.join( async_dir, ohash[-3:], ohash + '-' + normalize_timestamp(headers_out['x-timestamp'])), os.path.join(self.devices, objdevice, 'tmp'))