def test_quarantined_replicas(): """ QUARANTINED REPLICA (CORE): Add, List and Delete quarantined replicas """ if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): vo = {'vo': get_vo()} else: vo = {} rse_id = get_rse_id(rse='MOCK', **vo) real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id, limit=10000) quarantined_replicas = len(real_replicas) + len(dark_replicas) nbreplicas = 5 replicas = [{ 'path': '/path/' + generate_uuid() } for _ in range(nbreplicas)] add_quarantined_replicas(rse_id=rse_id, replicas=replicas) real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id, limit=10000) assert quarantined_replicas + nbreplicas == len(dark_replicas) + len( real_replicas) delete_quarantined_replicas(rse_id=rse_id, replicas=replicas) real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id, limit=10000) assert quarantined_replicas == len(dark_replicas) + len(real_replicas)
def process_output(output, sanity_check=True, compress=True): """Perform post-consistency-check actions. DARK files are put in the quarantined-replica table so that they may be deleted by the Dark Reaper. LOST files are currently ignored. ``output`` should be an ``str`` with the absolute path to the file produced by ``consistency()``. It must maintain its naming convention. If ``sanity_check`` is ``True`` (default) and the number of entries in the output file is deemed excessive, the actions are aborted. If ``compress`` is ``True`` (default), the file is compressed with bzip2 after the actions are successfully performed. """ logger = logging.getLogger('auditor-worker') dark_replicas = [] try: with open(output) as f: for line in f: label, path = line.rstrip().split(',', 1) if label == 'DARK': scope, name = guess_replica_info(path) dark_replicas.append({ 'path': path, 'scope': scope, 'name': name }) elif label == 'LOST': # TODO: Declare LOST files as suspicious. pass else: raise ValueError('unexpected label') # Since the file is read immediately after its creation, any error # exposes a bug in the Auditor. except Exception as error: logger.critical('Error processing "%s"', output, exc_info=True) raise error rse = os.path.basename(output[:output.rfind('_')]) usage = get_rse_usage(rse, source='rucio')[0] threshold = config.config_get('auditor', 'threshold', False, 0.2) # Perform a basic sanity check by comparing the number of entries # with the total number of files on the RSE. If the percentage is # significant, there is most likely an issue with the site dump. if sanity_check and len(dark_replicas) > threshold * usage['files']: raise AssertionError('number of DARK files is exceeding threshold') add_quarantined_replicas(rse, dark_replicas) logger.debug('Processed %d DARK files from "%s"', len(dark_replicas), output) if compress: destination = bz2_compress_file(output) logger.debug('Compressed "%s"', destination)
def test_quarantined_replicas(): """ QUARANTINED REPLICA (CORE): Add, List and Delete quarantined replicas """ quarantined_replicas = len( list_quarantined_replicas(rse='MOCK', limit=10000)) nbreplicas = 5 replicas = [{ 'path': '/path/' + generate_uuid() } for _ in xrange(nbreplicas)] add_quarantined_replicas(rse='MOCK', replicas=replicas) assert_equal(quarantined_replicas + nbreplicas, len(list_quarantined_replicas(rse='MOCK', limit=10000))) delete_quarantined_replicas(rse='MOCK', replicas=replicas) assert_equal(quarantined_replicas, len(list_quarantined_replicas(rse='MOCK', limit=10000)))
def process_output(output, sanity_check=True, compress=True): """Perform post-consistency-check actions. DARK files are put in the quarantined-replica table so that they may be deleted by the Dark Reaper. LOST files are reported as suspicious so that they may be further checked by the cloud squads. ``output`` should be an ``str`` with the absolute path to the file produced by ``consistency()``. It must maintain its naming convention. If ``sanity_check`` is ``True`` (default) and the number of entries in the output file is deemed excessive, the actions are aborted. If ``compress`` is ``True`` (default), the file is compressed with bzip2 after the actions are successfully performed. """ logger = logging.getLogger('auditor-worker') dark_replicas = [] lost_replicas = [] try: with open(output) as f: for line in f: label, path = line.rstrip().split(',', 1) scope, name = guess_replica_info(path) if label == 'DARK': dark_replicas.append({'path': path, 'scope': InternalScope(scope), 'name': name}) elif label == 'LOST': lost_replicas.append({'scope': InternalScope(scope), 'name': name}) else: raise ValueError('unexpected label') # Since the file is read immediately after its creation, any error # exposes a bug in the Auditor. except Exception as error: logger.critical('Error processing "%s"', output, exc_info=True) raise error rse = os.path.basename(output[:output.rfind('_')]) rse_id = get_rse_id(rse=rse) usage = get_rse_usage(rse_id=rse_id, source='rucio')[0] threshold = config.config_get('auditor', 'threshold', False, 0.2) # Perform a basic sanity check by comparing the number of entries # with the total number of files on the RSE. If the percentage is # significant, there is most likely an issue with the site dump. found_error = False if len(dark_replicas) > threshold * usage['files']: logger.warning('Number of DARK files is exceeding threshold: "%s"', output) found_error = True if len(lost_replicas) > threshold * usage['files']: logger.warning('Number of LOST files is exceeding threshold: "%s"', output) found_error = True if found_error and sanity_check: raise AssertionError('sanity check failed') # While converting LOST replicas to PFNs, entries that do not # correspond to a replica registered in Rucio are silently dropped. lost_pfns = [r['rses'][rse_id][0] for r in list_replicas(lost_replicas) if rse_id in r['rses']] add_quarantined_replicas(rse_id=rse_id, replicas=dark_replicas) logger.debug('Processed %d DARK files from "%s"', len(dark_replicas), output) declare_bad_file_replicas(lost_pfns, reason='Reported by Auditor', issuer=InternalAccount('root'), status=BadFilesStatus.SUSPICIOUS) logger.debug('Processed %d LOST files from "%s"', len(lost_replicas), output) if compress: destination = bz2_compress_file(output) logger.debug('Compressed "%s"', destination)
def process_dark_files(path, scope, rse, latest_run, max_dark_fraction, max_files_at_site, old_enough_run, force_proceed): """ Process the Dark Files. """ prefix = 'storage-consistency-actions (process_dark_files())' logger = formatted_logger(logging.log, prefix + '%s') # Create a cc_dark section in the stats file t0 = time.time() stats_key = "cc_dark" cc_stats = stats = None stats = Stats(latest_run) cc_stats = { "start_time": t0, "end_time": None, "initial_dark_files": 0, "confirmed_dark_files": 0, "x-check_run": old_enough_run, "status": "started" } stats[stats_key] = cc_stats # Compare the two lists, and take only the dark files that are in both latest_dark = re.sub('_stats.json$', '_D.list', latest_run) old_enough_dark = re.sub('_stats.json$', '_D.list', old_enough_run) logger(logging.INFO, 'latest_dark = %s' % latest_dark) logger(logging.INFO, 'old_enough_dark = %s' % old_enough_dark) confirmed_dark = re.sub('_stats.json$', '_DeletionList.csv', latest_run) cmp2dark(new_list=latest_dark, old_list=old_enough_dark, comm_list=confirmed_dark, stats_file=latest_run) ### # SAFEGUARD # If a large fraction (larger than 'max_dark_fraction') of the files at a site # are reported as 'dark', do NOT proceed with the deletion. # Instead, put a warning in the _stats.json file, so that an operator can have a look. ### # Get the number of files recorded by the scanner dark_files = sum(1 for line in open(latest_dark)) confirmed_dark_files = sum(1 for line in open(confirmed_dark)) logger(logging.INFO, 'dark_files %d' % dark_files) logger(logging.INFO, 'confirmed_dark_files %d' % confirmed_dark_files) logger( logging.INFO, 'confirmed_dark_files/max_files_at_sit = %f' % (confirmed_dark_files / max_files_at_site)) logger(logging.INFO, 'max_dark_fraction configured for this RSE: %f' % max_dark_fraction) # Labels for the Prometheus counters/gauges labels = {'rse': rse} record_gauge('storage.consistency.actions_dark_files_found', confirmed_dark_files, labels=labels) record_gauge('storage.consistency.actions_dark_files_confirmed', confirmed_dark_files, labels=labels) deleted_files = 0 if confirmed_dark_files / max_files_at_site < max_dark_fraction or force_proceed is True: logger(logging.INFO, 'Can proceed with dark files deletion') # Then, do the real deletion (code from DeleteReplicas.py) issuer = InternalAccount('root') with open(confirmed_dark, 'r') as csvfile: reader = csv.reader(csvfile) for name, in reader: logger( logging.INFO, 'Processing a dark file:\n RSE %s Scope: %s Name: %s' % (rse, scope, name)) rse_id = get_rse_id(rse=rse) Intscope = InternalScope(scope=scope, vo=issuer.vo) lfns = [{'scope': scope, 'name': name}] attributes = get_rse_info(rse=rse) pfns = lfns2pfns(rse_settings=attributes, lfns=lfns, operation='delete') pfn_key = scope + ':' + name url = pfns[pfn_key] urls = [url] paths = parse_pfns(attributes, urls, operation='delete') replicas = [{ 'scope': Intscope, 'rse_id': rse_id, 'name': name, 'path': paths[url]['path'] + paths[url]['name'] }] add_quarantined_replicas(rse_id, replicas, session=None) deleted_files += 1 labels = {'rse': rse} record_counter( 'storage.consistency.actions_dark_files_deleted_counter', delta=1, labels=labels) # Update the stats t1 = time.time() cc_stats.update({ "end_time": t1, "initial_dark_files": dark_files, "confirmed_dark_files": deleted_files, "status": "done" }) stats[stats_key] = cc_stats record_gauge('storage.consistency.actions_dark_files_deleted', deleted_files, labels=labels) else: darkperc = 100. * confirmed_dark_files / max_files_at_site logger( logging.WARNING, '\n ATTENTION: Too many DARK files! (%3.2f%%) \n\ Stopping and asking for operators help.' % darkperc) # Update the stats t1 = time.time() cc_stats.update({ "end_time": t1, "initial_dark_files": dark_files, "confirmed_dark_files": 0, "status": "ABORTED", "aborted_reason": "%3.2f%% dark" % darkperc, }) stats[stats_key] = cc_stats record_gauge('storage.consistency.actions_dark_files_deleted', 0, labels=labels)