示例#1
0
def test_quarantined_replicas():
    """ QUARANTINED REPLICA (CORE): Add, List and Delete quarantined replicas """
    if config_get_bool('common',
                       'multi_vo',
                       raise_exception=False,
                       default=False):
        vo = {'vo': get_vo()}
    else:
        vo = {}

    rse_id = get_rse_id(rse='MOCK', **vo)

    real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id,
                                                             limit=10000)
    quarantined_replicas = len(real_replicas) + len(dark_replicas)

    nbreplicas = 5

    replicas = [{
        'path': '/path/' + generate_uuid()
    } for _ in range(nbreplicas)]

    add_quarantined_replicas(rse_id=rse_id, replicas=replicas)

    real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id,
                                                             limit=10000)
    assert quarantined_replicas + nbreplicas == len(dark_replicas) + len(
        real_replicas)

    delete_quarantined_replicas(rse_id=rse_id, replicas=replicas)

    real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id,
                                                             limit=10000)
    assert quarantined_replicas == len(dark_replicas) + len(real_replicas)
示例#2
0
def process_output(output, sanity_check=True, compress=True):
    """Perform post-consistency-check actions.

    DARK files are put in the quarantined-replica table so that they
    may be deleted by the Dark Reaper.  LOST files are currently
    ignored.

    ``output`` should be an ``str`` with the absolute path to the file
    produced by ``consistency()``.  It must maintain its naming
    convention.

    If ``sanity_check`` is ``True`` (default) and the number of entries
    in the output file is deemed excessive, the actions are aborted.

    If ``compress`` is ``True`` (default), the file is compressed with
    bzip2 after the actions are successfully performed.
    """
    logger = logging.getLogger('auditor-worker')
    dark_replicas = []
    try:
        with open(output) as f:
            for line in f:
                label, path = line.rstrip().split(',', 1)
                if label == 'DARK':
                    scope, name = guess_replica_info(path)
                    dark_replicas.append({
                        'path': path,
                        'scope': scope,
                        'name': name
                    })
                elif label == 'LOST':
                    # TODO: Declare LOST files as suspicious.
                    pass
                else:
                    raise ValueError('unexpected label')
    # Since the file is read immediately after its creation, any error
    # exposes a bug in the Auditor.
    except Exception as error:
        logger.critical('Error processing "%s"', output, exc_info=True)
        raise error

    rse = os.path.basename(output[:output.rfind('_')])
    usage = get_rse_usage(rse, source='rucio')[0]
    threshold = config.config_get('auditor', 'threshold', False, 0.2)

    # Perform a basic sanity check by comparing the number of entries
    # with the total number of files on the RSE.  If the percentage is
    # significant, there is most likely an issue with the site dump.
    if sanity_check and len(dark_replicas) > threshold * usage['files']:
        raise AssertionError('number of DARK files is exceeding threshold')

    add_quarantined_replicas(rse, dark_replicas)
    logger.debug('Processed %d DARK files from "%s"', len(dark_replicas),
                 output)

    if compress:
        destination = bz2_compress_file(output)
        logger.debug('Compressed "%s"', destination)
示例#3
0
def test_quarantined_replicas():
    """ QUARANTINED REPLICA (CORE): Add, List and Delete quarantined replicas """

    quarantined_replicas = len(
        list_quarantined_replicas(rse='MOCK', limit=10000))

    nbreplicas = 5

    replicas = [{
        'path': '/path/' + generate_uuid()
    } for _ in xrange(nbreplicas)]

    add_quarantined_replicas(rse='MOCK', replicas=replicas)

    assert_equal(quarantined_replicas + nbreplicas,
                 len(list_quarantined_replicas(rse='MOCK', limit=10000)))

    delete_quarantined_replicas(rse='MOCK', replicas=replicas)

    assert_equal(quarantined_replicas,
                 len(list_quarantined_replicas(rse='MOCK', limit=10000)))
示例#4
0
def process_output(output, sanity_check=True, compress=True):
    """Perform post-consistency-check actions.

    DARK files are put in the quarantined-replica table so that they
    may be deleted by the Dark Reaper.  LOST files are reported as
    suspicious so that they may be further checked by the cloud squads.

    ``output`` should be an ``str`` with the absolute path to the file
    produced by ``consistency()``.  It must maintain its naming
    convention.

    If ``sanity_check`` is ``True`` (default) and the number of entries
    in the output file is deemed excessive, the actions are aborted.

    If ``compress`` is ``True`` (default), the file is compressed with
    bzip2 after the actions are successfully performed.
    """
    logger = logging.getLogger('auditor-worker')
    dark_replicas = []
    lost_replicas = []
    try:
        with open(output) as f:
            for line in f:
                label, path = line.rstrip().split(',', 1)
                scope, name = guess_replica_info(path)
                if label == 'DARK':
                    dark_replicas.append({'path': path,
                                          'scope': InternalScope(scope),
                                          'name': name})
                elif label == 'LOST':
                    lost_replicas.append({'scope': InternalScope(scope),
                                          'name': name})
                else:
                    raise ValueError('unexpected label')
    # Since the file is read immediately after its creation, any error
    # exposes a bug in the Auditor.
    except Exception as error:
        logger.critical('Error processing "%s"', output, exc_info=True)
        raise error

    rse = os.path.basename(output[:output.rfind('_')])
    rse_id = get_rse_id(rse=rse)
    usage = get_rse_usage(rse_id=rse_id, source='rucio')[0]
    threshold = config.config_get('auditor', 'threshold', False, 0.2)

    # Perform a basic sanity check by comparing the number of entries
    # with the total number of files on the RSE.  If the percentage is
    # significant, there is most likely an issue with the site dump.
    found_error = False
    if len(dark_replicas) > threshold * usage['files']:
        logger.warning('Number of DARK files is exceeding threshold: "%s"',
                       output)
        found_error = True
    if len(lost_replicas) > threshold * usage['files']:
        logger.warning('Number of LOST files is exceeding threshold: "%s"',
                       output)
        found_error = True
    if found_error and sanity_check:
        raise AssertionError('sanity check failed')

    # While converting LOST replicas to PFNs, entries that do not
    # correspond to a replica registered in Rucio are silently dropped.
    lost_pfns = [r['rses'][rse_id][0] for r in list_replicas(lost_replicas)
                 if rse_id in r['rses']]

    add_quarantined_replicas(rse_id=rse_id, replicas=dark_replicas)
    logger.debug('Processed %d DARK files from "%s"', len(dark_replicas),
                 output)
    declare_bad_file_replicas(lost_pfns, reason='Reported by Auditor',
                              issuer=InternalAccount('root'), status=BadFilesStatus.SUSPICIOUS)
    logger.debug('Processed %d LOST files from "%s"', len(lost_replicas),
                 output)

    if compress:
        destination = bz2_compress_file(output)
        logger.debug('Compressed "%s"', destination)
示例#5
0
def process_dark_files(path, scope, rse, latest_run, max_dark_fraction,
                       max_files_at_site, old_enough_run, force_proceed):
    """
    Process the Dark Files.
    """

    prefix = 'storage-consistency-actions (process_dark_files())'
    logger = formatted_logger(logging.log, prefix + '%s')

    # Create a cc_dark section in the stats file

    t0 = time.time()
    stats_key = "cc_dark"
    cc_stats = stats = None
    stats = Stats(latest_run)
    cc_stats = {
        "start_time": t0,
        "end_time": None,
        "initial_dark_files": 0,
        "confirmed_dark_files": 0,
        "x-check_run": old_enough_run,
        "status": "started"
    }
    stats[stats_key] = cc_stats

    # Compare the two lists, and take only the dark files that are in both
    latest_dark = re.sub('_stats.json$', '_D.list', latest_run)
    old_enough_dark = re.sub('_stats.json$', '_D.list', old_enough_run)
    logger(logging.INFO, 'latest_dark = %s' % latest_dark)
    logger(logging.INFO, 'old_enough_dark = %s' % old_enough_dark)
    confirmed_dark = re.sub('_stats.json$', '_DeletionList.csv', latest_run)
    cmp2dark(new_list=latest_dark,
             old_list=old_enough_dark,
             comm_list=confirmed_dark,
             stats_file=latest_run)

    ###
    #   SAFEGUARD
    #   If a large fraction (larger than 'max_dark_fraction') of the files at a site
    #   are reported as 'dark', do NOT proceed with the deletion.
    #   Instead, put a warning in the _stats.json file, so that an operator can have a look.
    ###

    # Get the number of files recorded by the scanner
    dark_files = sum(1 for line in open(latest_dark))
    confirmed_dark_files = sum(1 for line in open(confirmed_dark))
    logger(logging.INFO, 'dark_files %d' % dark_files)
    logger(logging.INFO, 'confirmed_dark_files %d' % confirmed_dark_files)
    logger(
        logging.INFO, 'confirmed_dark_files/max_files_at_sit = %f' %
        (confirmed_dark_files / max_files_at_site))
    logger(logging.INFO,
           'max_dark_fraction configured for this RSE: %f' % max_dark_fraction)

    # Labels for the Prometheus counters/gauges
    labels = {'rse': rse}

    record_gauge('storage.consistency.actions_dark_files_found',
                 confirmed_dark_files,
                 labels=labels)
    record_gauge('storage.consistency.actions_dark_files_confirmed',
                 confirmed_dark_files,
                 labels=labels)

    deleted_files = 0
    if confirmed_dark_files / max_files_at_site < max_dark_fraction or force_proceed is True:
        logger(logging.INFO, 'Can proceed with dark files deletion')

        # Then, do the real deletion (code from DeleteReplicas.py)
        issuer = InternalAccount('root')
        with open(confirmed_dark, 'r') as csvfile:
            reader = csv.reader(csvfile)
            for name, in reader:
                logger(
                    logging.INFO,
                    'Processing a dark file:\n RSE %s  Scope: %s  Name: %s' %
                    (rse, scope, name))
                rse_id = get_rse_id(rse=rse)
                Intscope = InternalScope(scope=scope, vo=issuer.vo)
                lfns = [{'scope': scope, 'name': name}]

                attributes = get_rse_info(rse=rse)
                pfns = lfns2pfns(rse_settings=attributes,
                                 lfns=lfns,
                                 operation='delete')
                pfn_key = scope + ':' + name
                url = pfns[pfn_key]
                urls = [url]
                paths = parse_pfns(attributes, urls, operation='delete')
                replicas = [{
                    'scope': Intscope,
                    'rse_id': rse_id,
                    'name': name,
                    'path': paths[url]['path'] + paths[url]['name']
                }]
                add_quarantined_replicas(rse_id, replicas, session=None)
                deleted_files += 1
                labels = {'rse': rse}
                record_counter(
                    'storage.consistency.actions_dark_files_deleted_counter',
                    delta=1,
                    labels=labels)


# Update the stats
        t1 = time.time()

        cc_stats.update({
            "end_time": t1,
            "initial_dark_files": dark_files,
            "confirmed_dark_files": deleted_files,
            "status": "done"
        })
        stats[stats_key] = cc_stats
        record_gauge('storage.consistency.actions_dark_files_deleted',
                     deleted_files,
                     labels=labels)

    else:
        darkperc = 100. * confirmed_dark_files / max_files_at_site
        logger(
            logging.WARNING, '\n ATTENTION: Too many DARK files! (%3.2f%%) \n\
               Stopping and asking for operators help.' % darkperc)

        # Update the stats
        t1 = time.time()

        cc_stats.update({
            "end_time": t1,
            "initial_dark_files": dark_files,
            "confirmed_dark_files": 0,
            "status": "ABORTED",
            "aborted_reason": "%3.2f%% dark" % darkperc,
        })
        stats[stats_key] = cc_stats
        record_gauge('storage.consistency.actions_dark_files_deleted',
                     0,
                     labels=labels)