def run_forever(self, *args, **kwargs): """Run the container audit until stopped.""" reported = time.time() time.sleep(random() * self.interval) while True: self.logger.info(_('Begin container audit pass.')) begin = time.time() try: all_locs = audit_location_generator( self.devices, container_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s ' 'passed audit, %(fail)s failed audit'), { 'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures }) reported = time.time() self.container_passes = 0 self.container_failures = 0 except (Exception, Timeout): self.logger.exception(_('ERROR auditing')) elapsed = time.time() - begin if elapsed < self.interval: time.sleep(self.interval - elapsed) self.logger.info(_('Container audit pass completed: %.02fs'), elapsed)
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, account_server.DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info(_('Since %(time)s: Account audits: ' '%(passed)s passed audit,' '%(failed)s failed audit'), {'time': time.ctime(reported), 'passed': self.account_passes, 'failed': self.account_failures}) dump_recon_cache({'account_audits_since': reported, 'account_audits_passed': self.account_passes, 'account_audits_failed': self.account_failures}, self.rcache, self.logger) reported = time.time() self.account_passes = 0 self.account_failures = 0 self.accounts_running_time = ratelimit_sleep( self.accounts_running_time, self.max_accounts_per_second) return reported
def run_forever(self): # pragma: no cover """Run the account audit until stopped.""" reported = time.time() time.sleep(random() * self.interval) while True: begin = time.time() all_locs = audit_location_generator(self.devices, account_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Account audits: ' '%(passed)s passed audit, %(failed)s failed audit'), { 'time': time.ctime(reported), 'passed': self.account_passes, 'failed': self.account_failures }) reported = time.time() self.account_passes = 0 self.account_failures = 0 elapsed = time.time() - begin if elapsed < self.interval: time.sleep(self.interval - elapsed)
def run_forever(self): # pragma: no cover """Run the container audit until stopped.""" reported = time.time() time.sleep(random() * self.interval) while True: begin = time.time() all_locs = audit_location_generator(self.devices, container_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s passed ' 'audit, %(fail)s failed audit'), {'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures}) reported = time.time() self.container_passes = 0 self.container_failures = 0 elapsed = time.time() - begin if elapsed < self.interval: time.sleep(self.interval - elapsed)
def relink(swift_dir='/etc/swift', devices='/srv/node', skip_mount_check=False, logger=logging.getLogger(), device=None): mount_check = not skip_mount_check run = False relinked = errors = 0 for policy in POLICIES: policy.object_ring = None # Ensure it will be reloaded policy.load_ring(swift_dir) part_power = policy.object_ring.part_power next_part_power = policy.object_ring.next_part_power if not next_part_power or next_part_power == part_power: continue logging.info('Relinking files for policy %s under %s', policy.name, devices) run = True datadir = diskfile.get_data_dir(policy) locks = [None] states = {} relink_devices_filter = partial(devices_filter, device) relink_hook_pre_device = partial(hook_pre_device, locks, states, datadir) relink_hook_post_device = partial(hook_post_device, locks) relink_partition_filter = partial(partitions_filter, states, STEP_RELINK, part_power, next_part_power) relink_hook_post_partition = partial(hook_post_partition, states, STEP_RELINK) relink_hashes_filter = partial(hashes_filter, next_part_power) locations = audit_location_generator( devices, datadir, mount_check=mount_check, devices_filter=relink_devices_filter, hook_pre_device=relink_hook_pre_device, hook_post_device=relink_hook_post_device, partitions_filter=relink_partition_filter, hook_post_partition=relink_hook_post_partition, hashes_filter=relink_hashes_filter) for fname, _, _ in locations: newfname = replace_partition_in_path(fname, next_part_power) try: diskfile.relink_paths(fname, newfname, check_existing=True) relinked += 1 except OSError as exc: errors += 1 logger.warning("Relinking %s to %s failed: %s", fname, newfname, exc) if not run: logger.warning("No policy found to increase the partition power.") return 2 logging.info('Relinked %d diskfiles (%d errors)', relinked, errors) if errors > 0: return 1 return 0
def _one_audit_pass(self, reported): all_locs = audit_location_generator( self.devices, account_server.DATADIR, mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _("Since %(time)s: Account audits: " "%(passed)s passed audit," "%(failed)s failed audit"), {"time": time.ctime(reported), "passed": self.account_passes, "failed": self.account_failures}, ) self.account_audit(path) dump_recon_cache( { "account_audits_since": reported, "account_audits_passed": self.account_passes, "account_audits_failed": self.account_failures, }, self.rcache, self.logger, ) reported = time.time() self.account_passes = 0 self.account_failures = 0 return reported
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, container_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s passed ' 'audit, %(fail)s failed audit'), { 'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures }) dump_recon_cache( { 'container_audits_since': reported, 'container_audits_passed': self.container_passes, 'container_audits_failed': self.container_failures }, self.rcache, self.logger) reported = time.time() self.container_passes = 0 self.container_failures = 0 return reported
def run_forever(self, *args, **kwargs): """Run the container audit until stopped.""" reported = time.time() time.sleep(random() * self.interval) while True: self.logger.info(_('Begin container audit pass.')) begin = time.time() try: all_locs = audit_location_generator(self.devices, container_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s ' 'passed audit, %(fail)s failed audit'), {'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures}) reported = time.time() self.container_passes = 0 self.container_failures = 0 except (Exception, Timeout): self.logger.exception(_('ERROR auditing')) elapsed = time.time() - begin if elapsed < self.interval: time.sleep(self.interval - elapsed) self.logger.info( _('Container audit pass completed: %.02fs'), elapsed)
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, self.datadir, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.audit(path) if time.time() - reported >= self.logging_interval: self.logger.info( _('Since %(time)s: %(server_type)s audits: %(pass)s ' 'passed audit, %(fail)s failed audit'), { 'time': time.ctime(reported), 'pass': self.passes, 'fail': self.failures, 'server_type': self.server_type }) dump_recon_cache( { '{}_audits_since'.format(self.server_type): reported, '{}_audits_passed'.format(self.server_type): self.passes, '{}_audits_failed'.format(self.server_type): self.failures }, self.rcache, self.logger) reported = time.time() self.passes = 0 self.failures = 0 self.running_time = ratelimit_sleep(self.running_time, self.max_dbs_per_second) return reported
def audit_all_objects(self, mode='once'): self.logger.info(_('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 files_running_time = 0 time_auditing = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: loop_time = time.time() self.object_audit(path, device, partition) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info(_( 'Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin)}) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info(_( 'Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines, 'errors': total_errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed})
def _one_crawler_pass(self): all_locs = audit_location_generator(self.devices, account_server.DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) metaList = [] for path, device, partition in all_locs: metaDict = self.account_crawl(path) if metaDict != {}: metaList.append(format_metadata(metaDict)) AccountSender = Sender(self.conf) AccountSender.sendData(metaList, 'account_crawler', self.ip, self.port)
def process_policy(self, policy): self.logger.info( 'Processing files for policy %s under %s (cleanup=%s)', policy.name, self.root, self.do_cleanup) self.part_power = policy.object_ring.part_power self.next_part_power = policy.object_ring.next_part_power self.diskfile_mgr = self.diskfile_router[policy] self.datadir = diskfile.get_data_dir(policy) self.states = { "part_power": self.part_power, "next_part_power": self.next_part_power, "state": {}, } audit_stats = {} locations = audit_location_generator( self.conf['devices'], self.datadir, mount_check=self.conf['mount_check'], devices_filter=self.devices_filter, hook_pre_device=self.hook_pre_device, hook_post_device=self.hook_post_device, partitions_filter=self.partitions_filter, hook_pre_partition=self.hook_pre_partition, hook_post_partition=self.hook_post_partition, hashes_filter=self.hashes_filter, logger=self.logger, error_counter=audit_stats, yield_hash_dirs=True) if self.conf['files_per_second'] > 0: locations = RateLimitedIterator(locations, self.conf['files_per_second']) for hash_path, device, partition in locations: # note, in cleanup step next_part_power == part_power new_hash_path = replace_partition_in_path(self.conf['devices'], hash_path, self.next_part_power) if new_hash_path == hash_path: continue self.process_location(hash_path, new_hash_path) # any unmounted devices don't trigger the pre_device trigger. # so we'll deal with them here. for dev in audit_stats.get('unmounted', []): self.place_policy_stat(dev, policy, 'unmounted', 1) # Further unlistable_partitions doesn't trigger the post_device, so # we also need to deal with them here. for datadir in audit_stats.get('unlistable_partitions', []): device_path, _ = os.path.split(datadir) device = os.path.basename(device_path) self.place_policy_stat(device, policy, 'unlistable_partitions', 1)
def synced_containers_generator(self): """ Iterates over the list of synced containers yielding the path of the container db """ all_locs = audit_location_generator( self.devices, SYNC_DATADIR, ".db", mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: # What we want to yield is the real path as its being used for # initiating a container broker. The broker would break if not # given the db real path, as it e.g. assumes the existence of # .pending in the same path yield self._synced_container_to_container_path(path)
def _one_audit_pass(self, reported): all_locs = audit_location_generator( self.devices, container_server.DATADIR, mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _("Since %(time)s: Container audits: %(pass)s passed " "audit, %(fail)s failed audit"), {"time": time.ctime(reported), "pass": self.container_passes, "fail": self.container_failures}, ) reported = time.time() self.container_passes = 0 self.container_failures = 0 return reported
def audit_all_objects(self, mode='once'): self.logger.info( _('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 files_running_time = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 if time.time() - reported >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (time.time() - reported), 'brate': self.bytes_processed / (time.time() - reported) }) reported = time.time() self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 elapsed = time.time() - begin self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. ' 'Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f ') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed })
def synced_containers_generator(self): """ Iterates over the list of synced containers yielding the path of the container db """ all_locs = audit_location_generator(self.devices, SYNC_DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: # What we want to yield is the real path as its being used for # initiating a container broker. The broker would break if not # given the db real path, as it e.g. assumes the existence of # .pending in the same path yield self._synced_container_to_container_path(path)
def run_once(self, *args, **kwargs): """ Runs a single container sync scan. """ self.logger.info(_('Begin container sync "once" mode')) begin = time() all_locs = audit_location_generator( self.devices, container_server.DATADIR, ".db", mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: self.container_sync(path) if time() - self.reported >= 3600: # once an hour self.report() self.report() elapsed = time() - begin self.logger.info(_('Container sync "once" mode completed: %.02fs'), elapsed)
def run_once(self, *args, **kwargs): """ Runs a single container sync scan. """ self.logger.info(_('Begin container sync "once" mode')) begin = time() all_locs = audit_location_generator(self.devices, DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_sync(path) if time() - self.reported >= 3600: # once an hour self.report() self.report() elapsed = time() - begin self.logger.info( _('Container sync "once" mode completed: %.02fs'), elapsed)
def audit_all_objects(self, mode='once'): self.logger.info(_('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 files_running_time = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 if time.time() - reported >= self.log_time: self.logger.info(_( 'Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (time.time() - reported), 'brate': self.bytes_processed / (time.time() - reported)}) reported = time.time() self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 elapsed = time.time() - begin self.logger.info(_( 'Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. ' 'Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f ') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed})
def run_forever(self, *args, **kwargs): """ Runs container sync scans until stopped. """ sleep(random() * self.interval) while True: begin = time() all_locs = audit_location_generator(self.devices, DATADIR, '.db', mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_sync(path) if time() - self.reported >= 3600: # once an hour self.report() elapsed = time() - begin if elapsed < self.interval: sleep(self.interval - elapsed)
def run_once(self, *args, **kwargs): """Run the account audit once.""" self.logger.info('Begin account audit "once" mode') begin = reported = time.time() all_locs = audit_location_generator( self.devices, account_server.DATADIR, mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _("Since %(time)s: Account audits: " "%(passed)s passed audit, %(failed)s failed audit"), {"time": time.ctime(reported), "passed": self.account_passes, "failed": self.account_failures}, ) reported = time.time() self.account_passes = 0 self.account_failures = 0 elapsed = time.time() - begin self.logger.info('Account audit "once" mode completed: %.02fs', elapsed)
def run_once(self, *args, **kwargs): """Run the account audit once.""" self.logger.info('Begin account audit "once" mode') begin = reported = time.time() all_locs = audit_location_generator(self.devices, account_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info(_('Since %(time)s: Account audits: ' '%(passed)s passed audit, %(failed)s failed audit'), {'time': time.ctime(reported), 'passed': self.account_passes, 'failed': self.account_failures}) reported = time.time() self.account_passes = 0 self.account_failures = 0 elapsed = time.time() - begin self.logger.info( 'Account audit "once" mode completed: %.02fs', elapsed)
def run_forever(self, *args, **kwargs): """Run the account audit until stopped.""" reported = time.time() time.sleep(random() * self.interval) while True: begin = time.time() all_locs = audit_location_generator( self.devices, account_server.DATADIR, mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: self.account_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _("Since %(time)s: Account audits: " "%(passed)s passed audit, %(failed)s failed audit"), {"time": time.ctime(reported), "passed": self.account_passes, "failed": self.account_failures}, ) reported = time.time() self.account_passes = 0 self.account_failures = 0 elapsed = time.time() - begin if elapsed < self.interval: time.sleep(self.interval - elapsed)
def relink(swift_dir='/etc/swift', devices='/srv/node', skip_mount_check=False, logger=logging.getLogger()): mount_check = not skip_mount_check run = False relinked = errors = 0 for policy in POLICIES: policy.object_ring = None # Ensure it will be reloaded policy.load_ring(swift_dir) part_power = policy.object_ring.part_power next_part_power = policy.object_ring.next_part_power if not next_part_power or next_part_power == part_power: continue logging.info('Relinking files for policy %s under %s', policy.name, devices) run = True locations = audit_location_generator( devices, diskfile.get_data_dir(policy), mount_check=mount_check) for fname, _, _ in locations: newfname = replace_partition_in_path(fname, next_part_power) try: diskfile.relink_paths(fname, newfname, check_existing=True) relinked += 1 except OSError as exc: errors += 1 logger.warning("Relinking %s to %s failed: %s", fname, newfname, exc) if not run: logger.warning("No policy found to increase the partition power.") return 2 logging.info('Relinked %d diskfiles (%d errors)', relinked, errors) if errors > 0: return 1 return 0
def process_policy(self, policy): self.logger.info( 'Processing files for policy %s under %s (cleanup=%s)', policy.name, self.root, self.do_cleanup) self.part_power = policy.object_ring.part_power self.next_part_power = policy.object_ring.next_part_power self.diskfile_mgr = self.diskfile_router[policy] self.datadir = diskfile.get_data_dir(policy) self.states = { "part_power": self.part_power, "next_part_power": self.next_part_power, "state": {}, } locations = audit_location_generator( self.conf['devices'], self.datadir, mount_check=self.conf['mount_check'], devices_filter=self.devices_filter, hook_pre_device=self.hook_pre_device, hook_post_device=self.hook_post_device, partitions_filter=self.partitions_filter, hook_post_partition=self.hook_post_partition, hashes_filter=self.hashes_filter, logger=self.logger, error_counter=self.stats, yield_hash_dirs=True ) if self.conf['files_per_second'] > 0: locations = RateLimitedIterator( locations, self.conf['files_per_second']) for hash_path, device, partition in locations: # note, in cleanup step next_part_power == part_power new_hash_path = replace_partition_in_path( self.conf['devices'], hash_path, self.next_part_power) if new_hash_path == hash_path: continue self.process_location(hash_path, new_hash_path)
def _one_audit_pass(self, reported): all_locs = audit_location_generator(self.devices, container_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s passed ' 'audit, %(fail)s failed audit'), {'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures}) dump_recon_cache( {'container_audits_since': reported, 'container_audits_passed': self.container_passes, 'container_audits_failed': self.container_failures}, self.rcache, self.logger) reported = time.time() self.container_passes = 0 self.container_failures = 0 return reported
def run_once(self, *args, **kwargs): """Run the container audit once.""" self.logger.info(_('Begin container audit "once" mode')) begin = reported = time.time() all_locs = audit_location_generator(self.devices, container_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s passed ' 'audit, %(fail)s failed audit'), {'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures}) reported = time.time() self.container_passes = 0 self.container_failures = 0 elapsed = time.time() - begin self.logger.info( _('Container audit "once" mode completed: %.02fs'), elapsed)
def run_once(self): """Run the container audit once.""" self.logger.info(_('Begin container audit "once" mode')) begin = reported = time.time() all_locs = audit_location_generator(self.devices, container_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: self.container_audit(path) if time.time() - reported >= 3600: # once an hour self.logger.info( _('Since %(time)s: Container audits: %(pass)s passed ' 'audit, %(fail)s failed audit'), { 'time': time.ctime(reported), 'pass': self.container_passes, 'fail': self.container_failures }) reported = time.time() self.container_passes = 0 self.container_failures = 0 elapsed = time.time() - begin self.logger.info(_('Container audit "once" mode completed: %.02fs'), elapsed)
def audit_all_objects(self, mode="once"): self.logger.info(_('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 files_running_time = 0 time_auditing = 0 all_locs = audit_location_generator( self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger ) for path, device, partition in all_locs: loop_time = time.time() self.object_audit(path, device, partition) self.files_running_time = ratelimit_sleep(self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info( _( "Object audit (%(type)s). " "Since %(start_time)s: Locally: %(passes)d passed, " "%(quars)d quarantined, %(errors)d errors " "files/sec: %(frate).2f , bytes/sec: %(brate).2f, " "Total time: %(total).2f, Auditing time: %(audit).2f, " "Rate: %(audit_rate).2f" ) % { "type": self.auditor_type, "start_time": time.ctime(reported), "passes": self.passes, "quars": self.quarantines, "errors": self.errors, "frate": self.passes / (now - reported), "brate": self.bytes_processed / (now - reported), "total": (now - begin), "audit": time_auditing, "audit_rate": time_auditing / (now - begin), } ) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += now - loop_time elapsed = time.time() - begin self.logger.info( _( 'Object audit (%(type)s) "%(mode)s" mode ' "completed: %(elapsed).02fs. Total quarantined: %(quars)d, " "Total errors: %(errors)d, Total files/sec: %(frate).2f , " "Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, " "Rate: %(audit_rate).2f" ) % { "type": self.auditor_type, "mode": mode, "elapsed": elapsed, "quars": total_quarantines, "errors": total_errors, "frate": self.total_files_processed / elapsed, "brate": self.total_bytes_processed / elapsed, "audit": time_auditing, "audit_rate": time_auditing / elapsed, } )
def audit_all_objects(self, mode='once'): self.logger.info( _('Begin object audit "%s" mode (%s)' % (mode, self.auditor_type))) begin = reported = time.time() self.total_bytes_processed = 0 self.total_files_processed = 0 total_quarantines = 0 total_errors = 0 time_auditing = 0 all_locs = audit_location_generator(self.devices, object_server.DATADIR, mount_check=self.mount_check, logger=self.logger) for path, device, partition in all_locs: loop_time = time.time() self.object_audit(path, device, partition) self.logger.timing_since('timing', loop_time) self.files_running_time = ratelimit_sleep( self.files_running_time, self.max_files_per_second) self.total_files_processed += 1 now = time.time() if now - reported >= self.log_time: self.logger.info( _('Object audit (%(type)s). ' 'Since %(start_time)s: Locally: %(passes)d passed, ' '%(quars)d quarantined, %(errors)d errors ' 'files/sec: %(frate).2f , bytes/sec: %(brate).2f, ' 'Total time: %(total).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'start_time': time.ctime(reported), 'passes': self.passes, 'quars': self.quarantines, 'errors': self.errors, 'frate': self.passes / (now - reported), 'brate': self.bytes_processed / (now - reported), 'total': (now - begin), 'audit': time_auditing, 'audit_rate': time_auditing / (now - begin) }) dump_recon_cache( { 'object_auditor_stats_%s' % self.auditor_type: { 'errors': self.errors, 'passes': self.passes, 'quarantined': self.quarantines, 'bytes_processed': self.bytes_processed, 'start_time': reported, 'audit_time': time_auditing } }, self.rcache, self.logger) reported = now total_quarantines += self.quarantines total_errors += self.errors self.passes = 0 self.quarantines = 0 self.errors = 0 self.bytes_processed = 0 time_auditing += (now - loop_time) # Avoid divide by zero during very short runs elapsed = (time.time() - begin) or 0.000001 self.logger.info( _('Object audit (%(type)s) "%(mode)s" mode ' 'completed: %(elapsed).02fs. Total quarantined: %(quars)d, ' 'Total errors: %(errors)d, Total files/sec: %(frate).2f , ' 'Total bytes/sec: %(brate).2f, Auditing time: %(audit).2f, ' 'Rate: %(audit_rate).2f') % { 'type': self.auditor_type, 'mode': mode, 'elapsed': elapsed, 'quars': total_quarantines, 'errors': total_errors, 'frate': self.total_files_processed / elapsed, 'brate': self.total_bytes_processed / elapsed, 'audit': time_auditing, 'audit_rate': time_auditing / elapsed })
def relink(conf, logger, device): diskfile_router = diskfile.DiskFileRouter(conf, logger) found_policy = False relinked = errors = 0 error_counter = {} for policy in POLICIES: diskfile_mgr = diskfile_router[policy] policy.object_ring = None # Ensure it will be reloaded policy.load_ring(conf['swift_dir']) part_power = policy.object_ring.part_power next_part_power = policy.object_ring.next_part_power if not next_part_power or next_part_power == part_power: continue logger.info('Relinking files for policy %s under %s', policy.name, conf['devices']) found_policy = True datadir = diskfile.get_data_dir(policy) locks = [None] states = { "part_power": part_power, "next_part_power": next_part_power, "state": {}, } relink_devices_filter = partial(devices_filter, device) relink_hook_pre_device = partial(hook_pre_device, locks, states, datadir) relink_hook_post_device = partial(hook_post_device, locks) relink_partition_filter = partial(partitions_filter, states, part_power, next_part_power) relink_hook_post_partition = partial(hook_post_partition, states, STEP_RELINK, policy, diskfile_mgr) relink_hashes_filter = partial(hashes_filter, next_part_power) locations = audit_location_generator( conf['devices'], datadir, mount_check=conf['mount_check'], devices_filter=relink_devices_filter, hook_pre_device=relink_hook_pre_device, hook_post_device=relink_hook_post_device, partitions_filter=relink_partition_filter, hook_post_partition=relink_hook_post_partition, hashes_filter=relink_hashes_filter, logger=logger, error_counter=error_counter) if conf['files_per_second'] > 0: locations = RateLimitedIterator(locations, conf['files_per_second']) for fname, _, _ in locations: newfname = replace_partition_in_path(fname, next_part_power) try: diskfile.relink_paths(fname, newfname, check_existing=True) relinked += 1 suffix_dir = os.path.dirname(os.path.dirname(newfname)) diskfile.invalidate_hash(suffix_dir) except OSError as exc: errors += 1 logger.warning("Relinking %s to %s failed: %s", fname, newfname, exc) return determine_exit_code( logger=logger, found_policy=found_policy, processed=relinked, action='relinked', action_errors=errors, error_counter=error_counter, )
def cleanup(conf, logger, device): diskfile_router = diskfile.DiskFileRouter(conf, logger) errors = cleaned_up = 0 error_counter = {} found_policy = False for policy in POLICIES: diskfile_mgr = diskfile_router[policy] policy.object_ring = None # Ensure it will be reloaded policy.load_ring(conf['swift_dir']) part_power = policy.object_ring.part_power next_part_power = policy.object_ring.next_part_power if not next_part_power or next_part_power != part_power: continue logger.info('Cleaning up files for policy %s under %s', policy.name, conf['devices']) found_policy = True datadir = diskfile.get_data_dir(policy) locks = [None] states = { "part_power": part_power, "next_part_power": next_part_power, "state": {}, } cleanup_devices_filter = partial(devices_filter, device) cleanup_hook_pre_device = partial(hook_pre_device, locks, states, datadir) cleanup_hook_post_device = partial(hook_post_device, locks) cleanup_partition_filter = partial(partitions_filter, states, part_power, next_part_power) cleanup_hook_post_partition = partial(hook_post_partition, states, STEP_CLEANUP, policy, diskfile_mgr) cleanup_hashes_filter = partial(hashes_filter, next_part_power) locations = audit_location_generator( conf['devices'], datadir, mount_check=conf['mount_check'], devices_filter=cleanup_devices_filter, hook_pre_device=cleanup_hook_pre_device, hook_post_device=cleanup_hook_post_device, partitions_filter=cleanup_partition_filter, hook_post_partition=cleanup_hook_post_partition, hashes_filter=cleanup_hashes_filter, logger=logger, error_counter=error_counter) if conf['files_per_second'] > 0: locations = RateLimitedIterator(locations, conf['files_per_second']) for fname, device, partition in locations: expected_fname = replace_partition_in_path(fname, part_power) if fname == expected_fname: continue # Make sure there is a valid object file in the expected new # location. Note that this could be newer than the original one # (which happens if there is another PUT after partition power # has been increased, but cleanup did not yet run) loc = diskfile.AuditLocation(os.path.dirname(expected_fname), device, partition, policy) df = diskfile_mgr.get_diskfile_from_audit_location(loc) try: with df.open(): pass except DiskFileQuarantined as exc: logger.warning( 'ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)r', { 'obj': loc, 'err': exc }) errors += 1 continue except DiskFileDeleted: pass except DiskFileNotExist as exc: err = False if policy.policy_type == 'erasure_coding': # Might be a non-durable fragment - check that there is # a fragment in the new path. Will be fixed by the # reconstructor then if not os.path.isfile(expected_fname): err = True else: err = True if err: logger.warning('Error cleaning up %s: %r', fname, exc) errors += 1 continue try: os.remove(fname) cleaned_up += 1 logger.debug("Removed %s", fname) suffix_dir = os.path.dirname(os.path.dirname(fname)) diskfile.invalidate_hash(suffix_dir) except OSError as exc: logger.warning('Error cleaning up %s: %r', fname, exc) errors += 1 return determine_exit_code( logger=logger, found_policy=found_policy, processed=cleaned_up, action='cleaned up', action_errors=errors, error_counter=error_counter, )
def cleanup(swift_dir='/etc/swift', devices='/srv/node', skip_mount_check=False, logger=logging.getLogger()): mount_check = not skip_mount_check conf = {'devices': devices, 'mount_check': mount_check} diskfile_router = diskfile.DiskFileRouter(conf, get_logger(conf)) errors = cleaned_up = 0 run = False for policy in POLICIES: policy.object_ring = None # Ensure it will be reloaded policy.load_ring(swift_dir) part_power = policy.object_ring.part_power next_part_power = policy.object_ring.next_part_power if not next_part_power or next_part_power != part_power: continue logging.info('Cleaning up files for policy %s under %s', policy.name, devices) run = True locations = audit_location_generator( devices, diskfile.get_data_dir(policy), mount_check=mount_check) for fname, device, partition in locations: expected_fname = replace_partition_in_path(fname, part_power) if fname == expected_fname: continue # Make sure there is a valid object file in the expected new # location. Note that this could be newer than the original one # (which happens if there is another PUT after partition power # has been increased, but cleanup did not yet run) loc = diskfile.AuditLocation( os.path.dirname(expected_fname), device, partition, policy) diskfile_mgr = diskfile_router[policy] df = diskfile_mgr.get_diskfile_from_audit_location(loc) try: with df.open(): pass except DiskFileQuarantined as exc: logger.warning('ERROR Object %(obj)s failed audit and was' ' quarantined: %(err)r', {'obj': loc, 'err': exc}) errors += 1 continue except DiskFileDeleted: pass except DiskFileNotExist as exc: err = False if policy.policy_type == 'erasure_coding': # Might be a non-durable fragment - check that there is # a fragment in the new path. Will be fixed by the # reconstructor then if not os.path.isfile(expected_fname): err = True else: err = True if err: logger.warning( 'Error cleaning up %s: %r', fname, exc) errors += 1 continue try: os.remove(fname) cleaned_up += 1 logging.debug("Removed %s", fname) except OSError as exc: logger.warning('Error cleaning up %s: %r', fname, exc) errors += 1 if not run: logger.warning("No policy found to increase the partition power.") return 2 logging.info('Cleaned up %d diskfiles (%d errors)', cleaned_up, errors) if errors > 0: return 1 return 0