class Meta2DecommissionJob(XcuteRdirJob): JOB_TYPE = 'meta2-decommission' TASK_CLASS = Meta2DecommissionTask @classmethod def sanitize_params(cls, job_params): sanitized_job_params, _ = super(Meta2DecommissionJob, cls).sanitize_params(job_params) src = job_params.get('service_id') if not src: raise ValueError('Missing service ID') sanitized_job_params['service_id'] = src sanitized_job_params['dst'] = job_params.get('dst') return sanitized_job_params, 'meta2/%s' % src def __init__(self, conf, logger=None): super(Meta2DecommissionJob, self).__init__(conf, logger=logger) self.rdir_client = RdirClient(conf, logger=logger) def get_tasks(self, job_params, marker=None): containers = self._containers_from_rdir(job_params, marker) for marker, container_id in containers: yield marker, dict(container_id=container_id) def get_total_tasks(self, job_params, marker=None): containers = self._containers_from_rdir(job_params, marker) i = 0 for i, (marker, _) in enumerate(containers, 1): if i % 1000 == 0: yield marker, 1000 remaining = i % 1000 if remaining == 0: return yield marker, remaining def _containers_from_rdir(self, job_params, marker): service_id = job_params['service_id'] rdir_fetch_limit = job_params['rdir_fetch_limit'] rdir_timeout = job_params['rdir_timeout'] containers = self.rdir_client.meta2_index_fetch_all( service_id, marker=marker, timeout=rdir_timeout, limit=rdir_fetch_limit) for container_info in containers: container_url = container_info['container_url'] container_id = container_info['container_id'] yield container_url, container_id
class Meta2Rebuilder(Tool): """ Rebuild meta2 databases. """ DEFAULT_RDIR_FETCH_LIMIT = 100 def __init__(self, conf, input_file=None, service_id=None, **kwargs): super(Meta2Rebuilder, self).__init__(conf, **kwargs) # input self.input_file = input_file self.meta2_id = service_id # rawx/rdir self.rdir_client = RdirClient(self.conf, logger=self.logger) self.rdir_fetch_limit = int_value(self.conf.get('rdir_fetch_limit'), self.DEFAULT_RDIR_FETCH_LIMIT) @staticmethod def string_from_item(item): namespace, container_id = item return '%s|%s' % (namespace, container_id) def _fetch_items_from_input_file(self): with open(self.input_file, 'r') as ifile: for line in ifile: stripped = line.strip() if not stripped or stripped.startswith('#'): continue container_id = stripped yield self.namespace, container_id def _fetch_items_from_meta2_id(self): containers = self.rdir_client.meta2_index_fetch_all(self.meta2_id) for container in containers: yield self.namespace, container['container_id'] def _fetch_items(self): if self.input_file: return self._fetch_items_from_input_file() if self.meta2_id: return self._fetch_items_from_meta2_id() def _empty_generator(): return yield # pylint: disable=unreachable return _empty_generator() def _get_report(self, status, end_time, counters): references_processed, total_references_processed, \ errors, total_errors = counters time_since_last_report = (end_time - self.last_report) or 0.00001 total_time = (end_time - self.start_time) or 0.00001 report = ( '%(status)s ' 'last_report=%(last_report)s %(time_since_last_report).2fs ' 'references=%(references)d %(references_rate).2f/s ' 'errors=%(errors)d %(errors_rate).2f%% ' 'start_time=%(start_time)s %(total_time).2fs ' 'total_references=' '%(total_references)d %(total_references_rate).2f/s ' 'total_errors=%(total_errors)d %(total_errors_rate).2f%%' % { 'status': status, 'last_report': datetime.fromtimestamp(int(self.last_report)).isoformat(), 'time_since_last_report': time_since_last_report, 'references': references_processed, 'references_rate': references_processed / time_since_last_report, 'errors': errors, 'errors_rate': 100 * errors / float(references_processed or 1), 'start_time': datetime.fromtimestamp(int(self.start_time)).isoformat(), 'total_time': total_time, 'total_references': total_references_processed, 'total_references_rate': total_references_processed / total_time, 'total_errors': total_errors, 'total_errors_rate': 100 * total_errors / float(total_references_processed or 1) }) if self.total_expected_items is not None: progress = 100 * total_references_processed / \ float(self.total_expected_items or 1) report += ' progress=%d/%d %.2f%%' % \ (total_references_processed, self.total_expected_items, progress) return report def create_worker(self, queue_workers, queue_reply): return ContentRepairerWorker(self, queue_workers, queue_reply) def _load_total_expected_items(self): pass
class TestMeta2Indexing(BaseTestCase): def setUp(self): super(TestMeta2Indexing, self).setUp() self.rdir_client = RdirClient(self.conf) self.directory_client = DirectoryClient(self.conf) self.container_client = ContainerClient(self.conf) self.containers = [random_str(14) for _ in range(0, randint(1, 10))] self.containers_svcs = {} self.event_agent_name = 'event-agent-1' def tearDown(self): super(TestMeta2Indexing, self).tearDown() self._containers_cleanup() self._service(self.event_agent_name, 'start', wait=3) def _containers_cleanup(self): for container in self.containers: self.container_client.container_delete(self.account, container) for svc in self.containers_svcs[container]: self.rdir_client.meta2_index_delete( volume_id=svc['host'], container_path="{0}/{1}/{2}".format( self.ns, self.account, container), container_id=cid_from_name(self.account, container)) def _filter_by_managing_svc(self, all_containers, svc_of_interest): """ Filters through the containers returning only those that have svc_of_interest in their list of managing services. """ containers_list = [] for key in all_containers.keys(): if svc_of_interest in [x['host'] for x in all_containers[key]]: containers_list.append(key) return sorted(containers_list) def test_volume_indexing_worker(self): """ Test steps: - Generate a list of container names and create them - Collect their respective meta2 servers - For each meta2 server: - Run a meta2 indexing worker - List all rdir index records and match then with the services we're expecting. :return: """ self._service(self.event_agent_name, "stop", wait=3) for container in self.containers: self.container_client.container_create(account=self.account, reference=container) for container in self.containers: self.containers_svcs[container] = [ x for x in self.directory_client.list(account=self.account, reference=container)['srv'] if x['type'] == 'meta2' ] meta2_data_paths = {} for svc in self.conf['services']['meta2']: svc_host = svc.get('service_id', svc['addr']) meta2_data_paths[svc_host] = svc['path'] distinct_meta2_servers = set() for svc_list in self.containers_svcs.values(): for svc in svc_list: distinct_meta2_servers.add(svc['host']) for svc in distinct_meta2_servers: expected_containers = self._filter_by_managing_svc( self.containers_svcs, svc) worker = Meta2IndexingWorker(meta2_data_paths[svc], self.conf) worker.crawl_volume() indexed_containers = sorted([ x['container_url'].split('/')[-1] for x in self.rdir_client.meta2_index_fetch_all(volume_id=svc) ]) for cont in expected_containers: self.assertIn(cont, indexed_containers)