class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) self.account_client.account_create(self.account_id) self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(0.5) # ensure container event have been processed def test_containers_list(self): resp = self.account_client.containers_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry+1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(.5) # ensure container event have been processed def test_containers_list(self): resp = self.account_client.containers_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container1", 0, 0, 0], ["container2", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container1", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [ ["container2", 0, 0, 0] ]) resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
class TestAccountClient(BaseTestCase): def setUp(self): super(TestAccountClient, self).setUp() self.account_id = "test_account_%f" % time.time() self.account_client = AccountClient(self.conf) self.container_client = ContainerClient(self.conf) retry = 3 for i in range(retry + 1): try: self.account_client.account_create(self.account_id) break except ClientException: if i < retry: time.sleep(2) else: raise self.container_client.container_create(acct=self.account_id, ref="container1") self.container_client.container_create(acct=self.account_id, ref="container2") time.sleep(.5) # ensure container event have been processed def test_containers_list(self): resp = self.account_client.containers_list(self.account_id) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0], ["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container1", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container1", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [["container2", 0, 0, 0]]) resp = self.account_client.containers_list(self.account_id, marker="container2", limit=1) self.assertEquals(resp["containers"], 2) self.assertEqual(resp["listing"], [])
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None): self.pool = GreenPool(concurrency) self.error_file = error_file if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def check_chunk(self, target): chunk = target.chunk obj_listing = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing in object listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = obj_listing[chunk]['hash'] pass try: self.blob_client.chunk_head(chunk) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) if error and self.error_file: self.write_error(target) self.chunks_checked += 1 def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing in container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] try: _, resp = self.container_client.content_show(acct=account, ref=container, path=obj) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) else: results = resp chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk self.objects_checked += 1 self.list_cache[(account, container, obj)] = chunk_listing self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing in account listing' % target) marker = None results = [] while True: try: resp = self.container_client.container_list(acct=account, ref=container, marker=marker) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] else: break results.extend(resp['objects']) container_listing = dict() for obj in results: container_listing[obj['name']] = obj self.containers_checked += 1 self.list_cache[(account, container)] = container_listing self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] while True: try: resp = self.account_client.containers_list(account, marker=marker) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] else: break results.extend(resp['listing']) containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) self.list_cache[account] = containers self.running[account].send(True) del self.running[account] self.accounts_checked += 1 if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)
class Checker(object): def __init__(self, namespace, concurrency=50, error_file=None, rebuild_file=None): self.pool = GreenPool(concurrency) self.error_file = error_file if self.error_file: f = open(self.error_file, 'a') self.error_writer = csv.writer(f, delimiter=' ') self.rebuild_file = rebuild_file if self.rebuild_file: fd = open(self.rebuild_file, 'a') self.rebuild_writer = csv.writer(fd, delimiter='|') conf = {'namespace': namespace} self.account_client = AccountClient(conf) self.container_client = ContainerClient(conf) self.blob_client = BlobClient() self.accounts_checked = 0 self.containers_checked = 0 self.objects_checked = 0 self.chunks_checked = 0 self.account_not_found = 0 self.container_not_found = 0 self.object_not_found = 0 self.chunk_not_found = 0 self.account_exceptions = 0 self.container_exceptions = 0 self.object_exceptions = 0 self.chunk_exceptions = 0 self.list_cache = {} self.running = {} def write_error(self, target): error = [target.account] if target.container: error.append(target.container) if target.obj: error.append(target.obj) if target.chunk: error.append(target.chunk) self.error_writer.writerow(error) def write_rebuilder_input(self, target, obj_meta, ct_meta): cid = ct_meta['properties']['sys.name'].split('.', 1)[0] self.rebuild_writer.writerow((cid, obj_meta['id'], target.chunk)) def _check_chunk_xattr(self, target, obj_meta, xattr_meta): error = False # Composed position -> erasure coding attr_prefix = 'meta' if '.' in obj_meta['pos'] else '' attr_key = attr_prefix + 'chunk_size' if str(obj_meta['size']) != xattr_meta.get(attr_key): print(" Chunk %s '%s' xattr (%s) " "differs from size in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['size'])) error = True attr_key = attr_prefix + 'chunk_hash' if obj_meta['hash'] != xattr_meta.get(attr_key): print(" Chunk %s '%s' xattr (%s) " "differs from hash in meta2 (%s)" % (target, attr_key, xattr_meta.get(attr_key), obj_meta['hash'])) error = True return error def check_chunk(self, target): chunk = target.chunk obj_listing, obj_meta = self.check_obj(target) error = False if chunk not in obj_listing: print(' Chunk %s missing from object listing' % target) error = True db_meta = dict() else: db_meta = obj_listing[chunk] try: xattr_meta = self.blob_client.chunk_head(chunk) except exc.NotFound as e: self.chunk_not_found += 1 error = True print(' Not found chunk "%s": %s' % (target, str(e))) except Exception as e: self.chunk_exceptions += 1 error = True print(' Exception chunk "%s": %s' % (target, str(e))) else: if db_meta: error = self._check_chunk_xattr(target, db_meta, xattr_meta) if error: if self.error_file: self.write_error(target) if self.rebuild_file: self.write_rebuilder_input( target, obj_meta, self.list_cache[(target.account, target.container)][1]) self.chunks_checked += 1 def check_obj(self, target, recurse=False): account = target.account container = target.container obj = target.obj if (account, container, obj) in self.running: self.running[(account, container, obj)].wait() if (account, container, obj) in self.list_cache: return self.list_cache[(account, container, obj)] self.running[(account, container, obj)] = Event() print('Checking object "%s"' % target) container_listing, ct_meta = self.check_container(target) error = False if obj not in container_listing: print(' Object %s missing from container listing' % target) error = True # checksum = None else: # TODO check checksum match # checksum = container_listing[obj]['hash'] pass results = [] meta = dict() try: meta, results = self.container_client.content_show( acct=account, ref=container, path=obj) except exc.NotFound as e: self.object_not_found += 1 error = True print(' Not found object "%s": %s' % (target, str(e))) except Exception as e: self.object_exceptions += 1 error = True print(' Exception object "%s": %s' % (target, str(e))) chunk_listing = dict() for chunk in results: chunk_listing[chunk['url']] = chunk self.objects_checked += 1 self.list_cache[(account, container, obj)] = (chunk_listing, meta) self.running[(account, container, obj)].send(True) del self.running[(account, container, obj)] if recurse: for chunk in chunk_listing: t = target.copy() t.chunk = chunk self.pool.spawn_n(self.check_chunk, t) if error and self.error_file: self.write_error(target) return chunk_listing, meta def check_container(self, target, recurse=False): account = target.account container = target.container if (account, container) in self.running: self.running[(account, container)].wait() if (account, container) in self.list_cache: return self.list_cache[(account, container)] self.running[(account, container)] = Event() print('Checking container "%s"' % target) account_listing = self.check_account(target) error = False if container not in account_listing: error = True print(' Container %s missing from account listing' % target) marker = None results = [] ct_meta = dict() while True: try: resp = self.container_client.container_list( acct=account, ref=container, marker=marker) except exc.NotFound as e: self.container_not_found += 1 error = True print(' Not found container "%s": %s' % (target, str(e))) break except Exception as e: self.container_exceptions += 1 error = True print(' Exception container "%s": %s' % (target, str(e))) break if resp['objects']: marker = resp['objects'][-1]['name'] results.extend(resp['objects']) else: ct_meta = resp ct_meta.pop('objects') break container_listing = dict() for obj in results: container_listing[obj['name']] = obj self.containers_checked += 1 self.list_cache[(account, container)] = container_listing, ct_meta self.running[(account, container)].send(True) del self.running[(account, container)] if recurse: for obj in container_listing: t = target.copy() t.obj = obj self.pool.spawn_n(self.check_obj, t, True) if error and self.error_file: self.write_error(target) return container_listing, ct_meta def check_account(self, target, recurse=False): account = target.account if account in self.running: self.running[account].wait() if account in self.list_cache: return self.list_cache[account] self.running[account] = Event() print('Checking account "%s"' % target) error = False marker = None results = [] while True: try: resp = self.account_client.containers_list( account, marker=marker) except Exception as e: self.account_exceptions += 1 error = True print(' Exception account "%s": %s' % (target, str(e))) break if resp['listing']: marker = resp['listing'][-1][0] else: break results.extend(resp['listing']) containers = dict() for e in results: containers[e[0]] = (e[1], e[2]) self.list_cache[account] = containers self.running[account].send(True) del self.running[account] self.accounts_checked += 1 if recurse: for container in containers: t = target.copy() t.container = container self.pool.spawn_n(self.check_container, t, True) if error and self.error_file: self.write_error(target) return containers def check(self, target): if target.chunk and target.obj and target.container: self.pool.spawn_n(self.check_chunk, target) elif target.obj and target.container: self.pool.spawn_n(self.check_obj, target, True) elif target.container: self.pool.spawn_n(self.check_container, target, True) else: self.pool.spawn_n(self.check_account, target, True) def wait(self): self.pool.waitall() def report(self): def _report_stat(name, stat): print("{0:18}: {1}".format(name, stat)) print() print('Report') _report_stat("Accounts checked", self.accounts_checked) if self.account_not_found: _report_stat("Missing accounts", self.account_not_found) if self.account_exceptions: _report_stat("Exceptions", self.account_not_found) print() _report_stat("Containers checked", self.containers_checked) if self.container_not_found: _report_stat("Missing containers", self.container_not_found) if self.container_exceptions: _report_stat("Exceptions", self.container_exceptions) print() _report_stat("Objects checked", self.objects_checked) if self.object_not_found: _report_stat("Missing objects", self.object_not_found) if self.object_exceptions: _report_stat("Exceptions", self.object_exceptions) print() _report_stat("Chunks checked", self.chunks_checked) if self.chunk_not_found: _report_stat("Missing chunks", self.chunk_not_found) if self.chunk_exceptions: _report_stat("Exceptions", self.chunk_exceptions)
class StorageTiererWorker(object): def __init__(self, conf, logger): self.conf = conf self.logger = logger self.account = conf[CONF_ACCOUNT] self.container_client = ContainerClient(self.conf) self.account_client = AccountClient(self.conf) self.content_factory = ContentFactory(self.conf) self.passes = 0 self.errors = 0 self.last_reported = 0 self.contents_run_time = 0 self.total_contents_processed = 0 self.report_interval = int_value(conf.get('report_interval'), 3600) self.max_contents_per_second = int_value( conf.get('contents_per_second'), 30) self.container_fetch_limit = int_value( conf.get('container_fetch_limit'), 100) self.content_fetch_limit = int_value(conf.get('content_fetch_limit'), 100) self.outdated_threshold = int_value(conf.get(CONF_OUTDATED_THRESHOLD), 9999999999) self.new_policy = conf.get(CONF_NEW_POLICY) def _list_containers(self): container = None while True: resp = self.account_client.containers_list( self.account, marker=container, limit=self.container_fetch_limit) if len(resp["listing"]) == 0: break for container, _, _, _ in resp["listing"]: yield container def _list_contents(self): for container in self._list_containers(): marker = None while True: try: resp = self.container_client.container_list( acct=self.account, ref=container, limit=self.content_fetch_limit, marker=marker) except NotFound: self.logger.warn("Container %s in account " "but not found" % container) break if len(resp["objects"]) == 0: break for obj in resp["objects"]: marker = obj["name"] if obj["mtime"] > time.time() - self.outdated_threshold: continue if obj["policy"] == self.new_policy: continue container_id = cid_from_name(self.account, container) yield (container_id, obj["content"]) def run(self): start_time = report_time = time.time() total_errors = 0 for (container_id, content_id) in self._list_contents(): self.safe_change_policy(container_id, content_id) self.contents_run_time = ratelimit(self.contents_run_time, self.max_contents_per_second) self.total_contents_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(total).2f ' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) }) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(content_rate).2f ' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'content_rate': self.total_contents_processed / elapsed }) def safe_change_policy(self, container_id, content_id): try: self.change_policy(container_id, content_id) except Exception: self.errors += 1 self.logger.exception( "ERROR while changing policy for content " "%s/%s", (container_id, content_id)) self.passes += 1 def change_policy(self, container_id, content_id): self.logger.info("Changing policy for content %s/%s" % (container_id, content_id)) self.content_factory.change_policy(container_id, content_id, self.new_policy)
class StorageTiererWorker(object): def __init__(self, conf, logger): self.conf = conf self.logger = logger self.account = conf[CONF_ACCOUNT] self.container_client = ContainerClient(self.conf) self.account_client = AccountClient(self.conf) self.content_factory = ContentFactory(self.conf) self.passes = 0 self.errors = 0 self.last_reported = 0 self.contents_run_time = 0 self.total_contents_processed = 0 self.report_interval = int_value( conf.get('report_interval'), 3600) self.max_contents_per_second = int_value( conf.get('contents_per_second'), 30) self.container_fetch_limit = int_value( conf.get('container_fetch_limit'), 100) self.content_fetch_limit = int_value( conf.get('content_fetch_limit'), 100) self.outdated_threshold = int_value( conf.get(CONF_OUTDATED_THRESHOLD), 9999999999) self.new_policy = conf.get(CONF_NEW_POLICY) def _list_containers(self): container = None while True: resp = self.account_client.containers_list( self.account, marker=container, limit=self.container_fetch_limit) if len(resp["listing"]) == 0: break for container, _, _, _ in resp["listing"]: yield container def _list_contents(self): for container in self._list_containers(): marker = None while True: try: resp = self.container_client.container_list( acct=self.account, ref=container, limit=self.content_fetch_limit, marker=marker) except NotFound: self.logger.warn("Container %s in account " "but not found" % container) break if len(resp["objects"]) == 0: break for obj in resp["objects"]: marker = obj["name"] if obj["mtime"] > time.time() - self.outdated_threshold: continue if obj["policy"] == self.new_policy: continue container_id = cid_from_name(self.account, container) yield (container_id, obj["content"]) def run(self): start_time = report_time = time.time() total_errors = 0 for (container_id, content_id) in self._list_contents(): self.safe_change_policy(container_id, content_id) self.contents_run_time = ratelimit( self.contents_run_time, self.max_contents_per_second ) self.total_contents_processed += 1 now = time.time() if now - self.last_reported >= self.report_interval: self.logger.info( '%(start_time)s ' '%(passes)d ' '%(errors)d ' '%(c_rate).2f ' '%(total).2f ' % { 'start_time': time.ctime(report_time), 'passes': self.passes, 'errors': self.errors, 'c_rate': self.passes / (now - report_time), 'total': (now - start_time) } ) report_time = now total_errors += self.errors self.passes = 0 self.errors = 0 self.last_reported = now elapsed = (time.time() - start_time) or 0.000001 self.logger.info( '%(elapsed).02f ' '%(errors)d ' '%(content_rate).2f ' % { 'elapsed': elapsed, 'errors': total_errors + self.errors, 'content_rate': self.total_contents_processed / elapsed } ) def safe_change_policy(self, container_id, content_id): try: self.change_policy(container_id, content_id) except Exception: self.errors += 1 self.logger.exception("ERROR while changing policy for content " "%s/%s", (container_id, content_id)) self.passes += 1 def change_policy(self, container_id, content_id): self.logger.info("Changing policy for content %s/%s" % (container_id, content_id)) self.content_factory.change_policy( container_id, content_id, self.new_policy)