Пример #1
0
def test_quarantined_replicas():
    """ QUARANTINED REPLICA (CORE): Add, List and Delete quarantined replicas """
    if config_get_bool('common',
                       'multi_vo',
                       raise_exception=False,
                       default=False):
        vo = {'vo': get_vo()}
    else:
        vo = {}

    rse_id = get_rse_id(rse='MOCK', **vo)

    real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id,
                                                             limit=10000)
    quarantined_replicas = len(real_replicas) + len(dark_replicas)

    nbreplicas = 5

    replicas = [{
        'path': '/path/' + generate_uuid()
    } for _ in range(nbreplicas)]

    add_quarantined_replicas(rse_id=rse_id, replicas=replicas)

    real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id,
                                                             limit=10000)
    assert quarantined_replicas + nbreplicas == len(dark_replicas) + len(
        real_replicas)

    delete_quarantined_replicas(rse_id=rse_id, replicas=replicas)

    real_replicas, dark_replicas = list_quarantined_replicas(rse_id=rse_id,
                                                             limit=10000)
    assert quarantined_replicas == len(dark_replicas) + len(real_replicas)
Пример #2
0
def test_quarantined_replicas():
    """ QUARANTINED REPLICA (CORE): Add, List and Delete quarantined replicas """

    quarantined_replicas = len(
        list_quarantined_replicas(rse='MOCK', limit=10000))

    nbreplicas = 5

    replicas = [{
        'path': '/path/' + generate_uuid()
    } for _ in xrange(nbreplicas)]

    add_quarantined_replicas(rse='MOCK', replicas=replicas)

    assert_equal(quarantined_replicas + nbreplicas,
                 len(list_quarantined_replicas(rse='MOCK', limit=10000)))

    delete_quarantined_replicas(rse='MOCK', replicas=replicas)

    assert_equal(quarantined_replicas,
                 len(list_quarantined_replicas(rse='MOCK', limit=10000)))
Пример #3
0
def reaper(rses=[],
           worker_number=1,
           total_workers=1,
           chunk_size=100,
           once=False,
           scheme=None):
    """
    Main loop to select and delete files.

    :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs.
    :param worker_number: The worker number.
    :param total_workers:  The total number of workers.
    :param chunk_size: the size of chunk for deletion.
    :param once: If True, only runs one iteration of the main loop.
    :param scheme: Force the reaper to use a particular protocol, e.g., mock.
    """
    logging.info('Starting Dark Reaper %s-%s: Will work on RSEs: %s',
                 worker_number, total_workers, str(rses))

    pid = os.getpid()
    thread = threading.current_thread()
    hostname = socket.gethostname()
    executable = ' '.join(sys.argv)
    hash_executable = hashlib.sha256(sys.argv[0] + ''.join(rses)).hexdigest()
    sanity_check(executable=None, hostname=hostname)

    while not GRACEFUL_STOP.is_set():
        try:
            # heartbeat
            heartbeat = live(executable=executable,
                             hostname=hostname,
                             pid=pid,
                             thread=thread,
                             hash_executable=hash_executable)
            logging.info(
                'Dark Reaper({0[worker_number]}/{0[total_workers]}): Live gives {0[heartbeat]}'
                .format(locals()))
            nothing_to_do = True

            random.shuffle(rses)
            for rse_id in rses:
                rse = rse_core.get_rse_name(rse_id=rse_id)
                replicas = list_quarantined_replicas(
                    rse_id=rse_id,
                    limit=chunk_size,
                    worker_number=worker_number,
                    total_workers=total_workers)

                rse_info = rsemgr.get_rse_info(rse_id=rse_id)
                prot = rsemgr.create_protocol(rse_info,
                                              'delete',
                                              scheme=scheme)
                deleted_replicas = []
                try:
                    prot.connect()
                    for replica in replicas:
                        nothing_to_do = False
                        try:
                            pfn = str(
                                rsemgr.lfns2pfns(rse_settings=rse_info,
                                                 lfns=[{
                                                     'scope':
                                                     replica['scope'].external,
                                                     'name':
                                                     replica['name'],
                                                     'path':
                                                     replica['path']
                                                 }],
                                                 operation='delete',
                                                 scheme=scheme).values()[0])
                            logging.info(
                                'Dark Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s',
                                worker_number, total_workers, replica['scope'],
                                replica['name'], pfn, rse)
                            start = time.time()
                            prot.delete(pfn)
                            duration = time.time() - start
                            logging.info(
                                'Dark Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s in %s seconds',
                                worker_number, total_workers, replica['scope'],
                                replica['name'], pfn, rse, duration)
                            add_message(
                                'deletion-done', {
                                    'scope': replica['scope'].external,
                                    'name': replica['name'],
                                    'rse': rse,
                                    'rse_id': rse_id,
                                    'file-size': replica.get('bytes') or 0,
                                    'bytes': replica.get('bytes') or 0,
                                    'url': pfn,
                                    'duration': duration,
                                    'protocol': prot.attributes['scheme']
                                })
                            deleted_replicas.append(replica)
                        except SourceNotFound:
                            err_msg = 'Dark Reaper %s-%s: Deletion NOTFOUND of %s:%s as %s on %s' % (
                                worker_number, total_workers, replica['scope'],
                                replica['name'], pfn, rse)
                            logging.warning(err_msg)
                            deleted_replicas.append(replica)
                        except (ServiceUnavailable, RSEAccessDenied,
                                ResourceTemporaryUnavailable) as error:
                            err_msg = 'Dark Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (
                                worker_number, total_workers, replica['scope'],
                                replica['name'], pfn, rse, str(error))
                            logging.warning(err_msg)
                            add_message(
                                'deletion-failed', {
                                    'scope': replica['scope'].external,
                                    'name': replica['name'],
                                    'rse': rse,
                                    'rse_id': rse_id,
                                    'file-size': replica['bytes'] or 0,
                                    'bytes': replica['bytes'] or 0,
                                    'url': pfn,
                                    'reason': str(error),
                                    'protocol': prot.attributes['scheme']
                                })

                        except:
                            logging.critical(traceback.format_exc())
                finally:
                    prot.close()

                delete_quarantined_replicas(rse_id=rse_id,
                                            replicas=deleted_replicas)

                if once:
                    break

            if once:
                break

            if nothing_to_do:
                logging.info(
                    'Dark Reaper %s-%s: Nothing to do. I will sleep for 60s',
                    worker_number, total_workers)
                time.sleep(60)

        except DatabaseException as error:
            logging.warning('Reaper:  %s', str(error))
        except:
            logging.critical(traceback.format_exc())

    die(executable=executable,
        hostname=hostname,
        pid=pid,
        thread=thread,
        hash_executable=hash_executable)
    logging.info('Graceful stop requested')
    logging.info('Graceful stop done')
    return
Пример #4
0
def reaper(rses, chunk_size=100, once=False, scheme=None, sleep_time=300):
    """
    Main loop to select and delete files.

    :param rses: List of RSEs the reaper should work against.
    :param chunk_size: the size of chunk for deletion.
    :param once: If True, only runs one iteration of the main loop.
    :param scheme: Force the reaper to use a particular protocol, e.g., mock.
    :param sleep_time: Thread sleep time after each chunk of work.
    """

    pid = os.getpid()
    thread = threading.current_thread()
    hostname = socket.gethostname()
    executable = ' '.join(sys.argv)
    hash_executable = hashlib.sha256(
        (sys.argv[0] + ''.join(rses)).encode()).hexdigest()
    sanity_check(executable=None, hostname=hostname)

    # heartbeat
    heartbeat = live(executable=executable,
                     hostname=hostname,
                     pid=pid,
                     thread=thread,
                     hash_executable=hash_executable)
    prepend_str = 'dark-reaper [%i/%i] : ' % (heartbeat['assign_thread'],
                                              heartbeat['nr_threads'])
    logger = formatted_logger(logging.log, prepend_str + '%s')
    logger(logging.INFO, 'Starting Dark Reaper on RSEs: %s', ', '.join(rses))

    if not once:
        logger(logging.INFO, 'Waiting for heartbeat synchonization')
        GRACEFUL_STOP.wait(
            10
        )  # To prevent running on the same partition if all the reapers restart at the same time

    while not GRACEFUL_STOP.is_set():
        try:
            heartbeat = live(executable=executable,
                             hostname=hostname,
                             pid=pid,
                             thread=thread,
                             hash_executable=hash_executable)
            prepend_str = 'dark-reaper [%i/%i] : ' % (
                heartbeat['assign_thread'], heartbeat['nr_threads'])
            logger = formatted_logger(logging.log, prepend_str + '%s')
            logger(logging.INFO, 'Live gives {0[heartbeat]}'.format(locals()))
            nothing_to_do = True
            start_time = time.time()

            rses_to_process = list(
                set(rses) & set(list_rses_with_quarantined_replicas()))
            random.shuffle(rses_to_process)
            for rse_id in rses_to_process:
                # The following query returns the list of real replicas (deleted_replicas) and list of dark replicas (dark_replicas)
                # Real replicas can be directly removed from the quarantine table
                deleted_replicas, dark_replicas = list_quarantined_replicas(
                    rse_id=rse_id,
                    limit=chunk_size,
                    worker_number=heartbeat['assign_thread'],
                    total_workers=heartbeat['nr_threads'])

                rse_info = rsemgr.get_rse_info(rse_id=rse_id)
                rse = rse_info['rse']
                prot = rsemgr.create_protocol(rse_info,
                                              'delete',
                                              scheme=scheme)

                heartbeat = live(executable=executable,
                                 hostname=hostname,
                                 pid=pid,
                                 thread=thread,
                                 hash_executable=hash_executable)
                prepend_str = 'dark-reaper [%i/%i] : ' % (
                    heartbeat['assign_thread'], heartbeat['nr_threads'])
                logger = formatted_logger(logging.log, prepend_str + '%s')
                try:
                    prot.connect()
                    for replica in dark_replicas:
                        nothing_to_do = False
                        scope = ''
                        if replica['scope']:
                            scope = replica['scope'].external
                        try:
                            pfn = str(
                                list(
                                    rsemgr.lfns2pfns(
                                        rse_settings=rse_info,
                                        lfns=[{
                                            'scope': scope,
                                            'name': replica['name'],
                                            'path': replica['path']
                                        }],
                                        operation='delete',
                                        scheme=scheme).values())[0])
                            logger(logging.INFO,
                                   'Deletion ATTEMPT of %s:%s as %s on %s',
                                   scope, replica['name'], pfn, rse)
                            start = time.time()
                            prot.delete(pfn)
                            duration = time.time() - start
                            logger(
                                logging.INFO,
                                'Deletion SUCCESS of %s:%s as %s on %s in %s seconds',
                                scope, replica['name'], pfn, rse, duration)
                            payload = {
                                'scope': scope,
                                'name': replica['name'],
                                'rse': rse,
                                'rse_id': rse_id,
                                'file-size': replica.get('bytes') or 0,
                                'bytes': replica.get('bytes') or 0,
                                'url': pfn,
                                'duration': duration,
                                'protocol': prot.attributes['scheme']
                            }
                            if replica['scope'].vo != 'def':
                                payload['vo'] = replica['scope'].vo
                            add_message('deletion-done', payload)
                            deleted_replicas.append(replica)
                        except SourceNotFound:
                            err_msg = (
                                'Deletion NOTFOUND of %s:%s as %s on %s' %
                                (scope, replica['name'], pfn, rse))
                            logger(logging.WARNING, err_msg)
                            deleted_replicas.append(replica)
                        except (ServiceUnavailable, RSEAccessDenied,
                                ResourceTemporaryUnavailable) as error:
                            err_msg = (
                                'Deletion NOACCESS of %s:%s as %s on %s: %s' %
                                (scope, replica['name'], pfn, rse, str(error)))
                            logger(logging.WARNING, err_msg)
                            payload = {
                                'scope': scope,
                                'name': replica['name'],
                                'rse': rse,
                                'rse_id': rse_id,
                                'file-size': replica['bytes'] or 0,
                                'bytes': replica['bytes'] or 0,
                                'url': pfn,
                                'reason': str(error),
                                'protocol': prot.attributes['scheme']
                            }
                            if replica['scope'].vo != 'def':
                                payload['vo'] = replica['scope'].vo
                            add_message('deletion-failed', payload)

                        except Exception:
                            logging.critical(traceback.format_exc())
                finally:
                    prot.close()

                delete_quarantined_replicas(rse_id=rse_id,
                                            replicas=deleted_replicas)

                if once:
                    break

            if once:
                break

            if nothing_to_do:
                logger(logging.INFO, 'Nothing to do')
                daemon_sleep(start_time=start_time,
                             sleep_time=sleep_time,
                             graceful_stop=GRACEFUL_STOP)

        except DatabaseException as error:
            logging.warning('Reaper:  %s', str(error))
        except Exception:
            logging.critical(traceback.format_exc())

    die(executable=executable,
        hostname=hostname,
        pid=pid,
        thread=thread,
        hash_executable=hash_executable)
    logging.info('Graceful stop requested')
    logging.info('Graceful stop done')
    return