class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None, rebuild_file=None, full=True): self.pool = GreenPool(concurrency) self.error_file = error_file self.full = bool(full) if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') self.rebuild_file = rebuild_file if self.rebuild_file: fd = open(self.rebuild_file, 'a') self.rebuild_writer = csv.writer(fd, delimiter='|') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def write_rebuilder_input(self, target, obj_meta, ct_meta): try: cid = ct_meta['system']['sys.name'].split('.', 1)[0] except KeyError: cid = ct_meta['properties']['sys.name'].split('.', 1)[0] self.rebuild_writer.writerow((cid, obj_meta['id'], target.chunk)) def write_chunk_error(self, target, obj_meta, chunk=None): if chunk is not None: target = target.copy() target.chunk = chunk if self.error_file: self.write_error(target) if self.rebuild_file: self.write_rebuilder_input( target, obj_meta, self.list_cache[(target.account, target.container)][1]) def _check_chunk_xattr(self, target, obj_meta, xattr_meta): error = False # Composed position -> erasure coding attr_prefix = 'meta' if '.' in obj_meta['pos'] else '' attr_key = attr_prefix + 'chunk_size' if str(obj_meta['size']) != xattr_meta.get(attr_key): print( " Chunk %s '%s' xattr (%s) " "differs from size in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['size'])) error = True attr_key = attr_prefix + 'chunk_hash' if obj_meta['hash'] != xattr_meta.get(attr_key): print( " Chunk %s '%s' xattr (%s) " "differs from hash in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['hash'])) error = True return error def check_chunk(self, target): chunk = target.chunk obj_listing, obj_meta = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing from object listing' % target) error = True db_meta = dict() else: db_meta = obj_listing[chunk] try: xattr_meta = self.blob_client.chunk_head(chunk, xattr=self.full) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) else: if db_meta and self.full: error = self._check_chunk_xattr(target, db_meta, xattr_meta) if error: self.write_chunk_error(target, obj_meta) self.chunks_checked += 1 def check_obj_policy(self, target, obj_meta, chunks): """ Check that the list of chunks of an object matches the object's storage policy. """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) if stg_met.ec: required = stg_met.ec_nb_data + stg_met.ec_nb_parity else: required = stg_met.nb_copy for pos, clist in chunks_by_pos.iteritems(): if len(clist) < required: print(' Missing %d chunks at position %s of %s' % (required - len(clist), pos, target)) if stg_met.ec: subs = {x['num'] for x in clist} for sub in range(required): if sub not in subs: self.write_chunk_error(target, obj_meta, '%d.%d' % (pos, sub)) else: self.write_chunk_error(target, obj_meta, str(pos)) def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing, ct_meta = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing from container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] meta = dict() try: meta, results = self.container_client.content_locate( account=account, reference=container, path=obj) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk self.check_obj_policy(target.copy(), meta, results) self.objects_checked += 1 self.list_cache[(account, container, obj)] = (chunk_listing, meta) self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing, meta def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing from account listing' % target) marker = None results = [] ct_meta = dict() while True: try: _, resp = self.container_client.content_list( account=account, reference=container, marker=marker) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] results.extend(resp['objects']) else: ct_meta = resp ct_meta.pop('objects') break container_listing = dict() for obj in results: container_listing[obj['name']] = obj self.containers_checked += 1 self.list_cache[(account, container)] = container_listing, ct_meta self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing, ct_meta def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] while True: try: resp = self.account_client.container_list(account, marker=marker) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] else: break results.extend(resp['listing']) containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) self.list_cache[account] = containers self.running[account].send(True) del self.running[account] self.accounts_checked += 1 if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None): self.pool = GreenPool(concurrency) self.error_file = error_file if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def check_chunk(self, target): chunk = target.chunk obj_listing = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing in object listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = obj_listing[chunk]['hash'] pass try: self.blob_client.chunk_head(chunk) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) if error and self.error_file: self.write_error(target) self.chunks_checked += 1 def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing in container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] try: _, resp = self.container_client.content_show(acct=account, ref=container, path=obj) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) else: results = resp chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk self.objects_checked += 1 self.list_cache[(account, container, obj)] = chunk_listing self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing in account listing' % target) marker = None results = [] while True: try: resp = self.container_client.container_list(acct=account, ref=container, marker=marker) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] else: break results.extend(resp['objects']) container_listing = dict() for obj in results: container_listing[obj['name']] = obj self.containers_checked += 1 self.list_cache[(account, container)] = container_listing self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] while True: try: resp = self.account_client.containers_list(account, marker=marker) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] else: break results.extend(resp['listing']) containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) self.list_cache[account] = containers self.running[account].send(True) del self.running[account] self.accounts_checked += 1 if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)
class RawxDecommissionTask(XcuteTask): def __init__(self, conf, job_params, logger=None): super(RawxDecommissionTask, self).__init__(conf, job_params, logger=logger) self.service_id = job_params['service_id'] self.rawx_timeout = job_params['rawx_timeout'] self.min_chunk_size = job_params['min_chunk_size'] self.max_chunk_size = job_params['max_chunk_size'] self.excluded_rawx = job_params['excluded_rawx'] self.blob_client = BlobClient(self.conf, logger=self.logger) self.content_factory = ContentFactory(self.conf) self.conscience_client = ConscienceClient(self.conf, logger=self.logger) self.fake_excluded_chunks = self._generate_fake_excluded_chunks( self.excluded_rawx) def _generate_fake_excluded_chunks(self, excluded_rawx): fake_excluded_chunks = list() fake_chunk_id = '0' * 64 for service_id in excluded_rawx: service_addr = self.conscience_client.resolve_service_id( 'rawx', service_id) chunk = dict() chunk['hash'] = '0000000000000000000000000000000000' chunk['pos'] = '0' chunk['size'] = 1 chunk['score'] = 1 chunk['url'] = 'http://{}/{}'.format(service_id, fake_chunk_id) chunk['real_url'] = 'http://{}/{}'.format(service_addr, fake_chunk_id) fake_excluded_chunks.append(chunk) return fake_excluded_chunks def process(self, task_id, task_payload, reqid=None): container_id = task_payload['container_id'] content_id = task_payload['content_id'] chunk_id = task_payload['chunk_id'] chunk_url = 'http://{}/{}'.format(self.service_id, chunk_id) try: meta = self.blob_client.chunk_head(chunk_url, timeout=self.rawx_timeout, reqid=reqid) except NotFound: # The chunk is still present in the rdir, # but the chunk no longer exists in the rawx. # We ignore it because there is nothing to move. return {'skipped_chunks_no_longer_exist': 1} if container_id != meta['container_id']: raise ValueError('Mismatch container ID: %s != %s', container_id, meta['container_id']) if content_id != meta['content_id']: raise ValueError('Mismatch content ID: %s != %s', content_id, meta['content_id']) chunk_size = int(meta['chunk_size']) # Maybe skip the chunk because it doesn't match the size constaint if chunk_size < self.min_chunk_size: self.logger.debug('[reqid=%s] SKIP %s too small', reqid, chunk_url) return {'skipped_chunks_too_small': 1} if self.max_chunk_size > 0 and chunk_size > self.max_chunk_size: self.logger.debug('[reqid=%s] SKIP %s too big', reqid, chunk_url) return {'skipped_chunks_too_big': 1} # Start moving the chunk try: content = self.content_factory.get(container_id, content_id, reqid=reqid) content.move_chunk(chunk_id, fake_excluded_chunks=self.fake_excluded_chunks, reqid=reqid) except (ContentNotFound, OrphanChunk): return {'orphan_chunks': 1} return {'moved_chunks': 1, 'moved_bytes': chunk_size}
class ContentRepairerWorker(ToolWorker): def __init__(self, tool, queue_workers, queue_reply): super(ContentRepairerWorker, self).__init__(tool, queue_workers, queue_reply) self.chunk_operator = ChunkOperator(self.conf, logger=self.logger) self.blob_client = BlobClient(self.conf) self.container_client = ContainerClient(self.conf, logger=self.logger) def _safe_chunk_rebuild(self, item, content_id, chunk_id_or_pos, **kwargs): _, account, container, _, _ = item try: container_id = cid_from_name(account, container) self.chunk_operator.rebuild(container_id, content_id, chunk_id_or_pos, **kwargs) except Exception as exc: # pylint: disable=broad-except self.logger.error('ERROR when rebuilding chunk %s (%s): %s', self.tool.string_from_item(item), chunk_id_or_pos, exc) return exc def _repair_metachunk(self, item, content_id, stg_met, pos, chunks): """ Check that a metachunk has the right number of chunks. :returns: the list (generator) of missing chunks """ exceptions = list() required = stg_met.expected_chunks if len(chunks) < required: if stg_met.ec: subs = {x['num'] for x in chunks} for sub in range(required): if sub not in subs: exc = self._safe_chunk_rebuild(item, content_id, "%d.%d" % (pos, sub)) if exc: exceptions.append(exc) else: missing_chunks = required - len(chunks) for _ in range(missing_chunks): exc = self._safe_chunk_rebuild(item, content_id, pos) if exc: exceptions.append(exc) for chunk in chunks: try: self.blob_client.chunk_head(chunk['url'], xattr=True, check_hash=True) except (NotFound, ClientPreconditionFailed) as e: kwargs = { 'try_chunk_delete': isinstance(e, ClientPreconditionFailed) } exc = self._safe_chunk_rebuild(item, content_id, chunk['url'], **kwargs) if exc: exceptions.append(exc) except Exception as exc: # pylint: disable=broad-except self.logger.error('ERROR when checking chunk %s (%s): %s', self.tool.string_from_item(item), chunk['url'], exc) exceptions.append(exc) return exceptions def _process_item(self, item): namespace, account, container, obj_name, version = item if namespace != self.tool.namespace: raise ValueError('Invalid namespace (actual=%s, expected=%s)' % (namespace, self.tool.namespace)) obj_meta, chunks = self.container_client.content_locate( account=account, reference=container, path=obj_name, version=version, properties=False) content_id = obj_meta['id'] exceptions = list() stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) for pos, chunks in iteritems(chunks_by_pos): try: exceptions += self._repair_metachunk(item, content_id, stg_met, pos, chunks) except Exception as exc: # pylint: disable=broad-except self.logger.error('ERROR when repair metachunk %s (%d): %s', self.tool.string_from_item(item), pos, exc) exceptions.append(exc) if exceptions: raise Exception(exceptions) self.container_client.content_touch(account=account, reference=container, path=obj_name, version=version)
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None, rebuild_file=None, full=True, limit_listings=0, request_attempts=1): self.pool = GreenPool(concurrency) self.error_file = error_file self.full = bool(full) # Optimisation for when we are only checking one object # or one container. # 0 -> do not limit # 1 -> limit account listings (list of containers) # 2 -> limit container listings (list of objects) self.limit_listings = limit_listings if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') self.rebuild_file = rebuild_file if self.rebuild_file: fd = open(self.rebuild_file, 'a') self.rebuild_writer = csv.writer(fd, delimiter='|') conf = {'namespace': namespace} self.account_client = AccountClient(conf, max_retries=request_attempts - 1) self.container_client = ContainerClient( conf, max_retries=request_attempts - 1, request_attempts=request_attempts) self.blob_client = BlobClient(conf=conf) self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target, irreparable=False): error = list() if irreparable: error.append('#IRREPARABLE') error.append(target.account) if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def write_rebuilder_input(self, target, obj_meta, irreparable=False): ct_meta = self.list_cache[(target.account, target.container)][1] try: cid = ct_meta['system']['sys.name'].split('.', 1)[0] except KeyError: cid = ct_meta['properties']['sys.name'].split('.', 1)[0] error = list() if irreparable: error.append('#IRREPARABLE') error.append(cid) error.append(obj_meta['id']) error.append(target.chunk) self.rebuild_writer.writerow(error) def write_chunk_error(self, target, obj_meta, chunk=None, irreparable=False): if chunk is not None: target = target.copy() target.chunk = chunk if self.error_file: self.write_error(target, irreparable=irreparable) if self.rebuild_file: self.write_rebuilder_input(target, obj_meta, irreparable=irreparable) def _check_chunk_xattr(self, target, obj_meta, xattr_meta): error = False # Composed position -> erasure coding attr_prefix = 'meta' if '.' in obj_meta['pos'] else '' attr_key = attr_prefix + 'chunk_size' if str(obj_meta['size']) != xattr_meta.get(attr_key): print( " Chunk %s '%s' xattr (%s) " "differs from size in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['size'])) error = True attr_key = attr_prefix + 'chunk_hash' if obj_meta['hash'] != xattr_meta.get(attr_key): print( " Chunk %s '%s' xattr (%s) " "differs from hash in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['hash'])) error = True return error def _check_chunk(self, target): chunk = target.chunk obj_listing, obj_meta = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing from object listing' % target) error = True db_meta = dict() else: db_meta = obj_listing[chunk] try: xattr_meta = self.blob_client.chunk_head(chunk, xattr=self.full) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) else: if db_meta and self.full: error = self._check_chunk_xattr(target, db_meta, xattr_meta) self.chunks_checked += 1 return error, obj_meta def check_chunk(self, target): error, obj_meta = self._check_chunk(target) if error: self.write_chunk_error(target, obj_meta) def _check_metachunk(self, target, obj_meta, stg_met, pos, chunks, recurse=False): required = stg_met.expected_chunks chunk_errors = list() if len(chunks) < required: missing_chunks = required - len(chunks) print(' Missing %d chunks at position %s of %s' % (missing_chunks, pos, target)) if stg_met.ec: subs = {x['num'] for x in chunks} for sub in range(required): if sub not in subs: chunk_errors.append( (target, obj_meta, '%d.%d' % (pos, sub))) else: for _ in range(missing_chunks): chunk_errors.append((target, obj_meta, str(pos))) if recurse: for chunk in chunks: t = target.copy() t.chunk = chunk['url'] error, obj_meta = self._check_chunk(t) if error: chunk_errors.append((t, obj_meta)) irreparable = required - len(chunk_errors) < stg_met.min_chunks_to_read for chunk_error in chunk_errors: self.write_chunk_error(*chunk_error, irreparable=irreparable) def _check_obj_policy(self, target, obj_meta, chunks, recurse=False): """ Check that the list of chunks of an object matches the object's storage policy. """ stg_met = STORAGE_METHODS.load(obj_meta['chunk_method']) chunks_by_pos = _sort_chunks(chunks, stg_met.ec) for pos, chunks in chunks_by_pos.iteritems(): self.pool.spawn_n(self._check_metachunk, target.copy(), obj_meta, stg_met, pos, chunks, recurse=recurse) def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing, ct_meta = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing from container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] meta = dict() try: meta, results = self.container_client.content_locate( account=account, reference=container, path=obj, properties=False) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk if meta: self.list_cache[(account, container, obj)] = (chunk_listing, meta) self.objects_checked += 1 self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] # Skip the check if we could not locate the object if meta: self._check_obj_policy(target, meta, results, recurse=recurse) if error and self.error_file: self.write_error(target) return chunk_listing, meta def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing from account listing' % target) marker = None results = [] ct_meta = dict() extra_args = dict() if self.limit_listings > 1 and target.obj: # When we are explicitly checking one object, start the listing # where this object is supposed to be, and list only one object. extra_args['prefix'] = target.obj extra_args['limit'] = 1 while True: try: _, resp = self.container_client.content_list( account=account, reference=container, marker=marker, **extra_args) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] results.extend(resp['objects']) if self.limit_listings > 1: break else: ct_meta = resp ct_meta.pop('objects') break container_listing = dict() for obj in results: container_listing[obj['name']] = obj if self.limit_listings <= 1: # We just listed the whole container, keep the result in a cache self.containers_checked += 1 self.list_cache[(account, container)] = container_listing, ct_meta self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing, ct_meta def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] extra_args = dict() if self.limit_listings > 0 and target.container: # When we are explicitly checking one container, start the listing # where this container is supposed to be, and list only one # container. extra_args['prefix'] = target.container extra_args['limit'] = 1 while True: try: resp = self.account_client.container_list(account, marker=marker, **extra_args) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] results.extend(resp['listing']) if self.limit_listings > 0: break else: break containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) if self.limit_listings <= 0: # We just listed the whole account, keep the result in a cache self.accounts_checked += 1 self.list_cache[account] = containers self.running[account].send(True) del self.running[account] if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): success = True def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: success = False _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: success = False _report_stat("Exceptions", self.account_exceptions) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: success = False _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: success = False _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: success = False _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: success = False _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: success = False _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: success = False _report_stat("Exceptions", self.chunk_exceptions) return success