Exemplo n.º 1
0
def __update_bulk_replicas(replicas, req_type, rule_id, session=None):
    """
    Used by finisher to handle available and unavailable replicas blongs to same rule in bulk way.

    :param replicas:              List of replicas.
    :param req_type:              Request type: STAGEIN, STAGEOUT, TRANSFER.
    :param rule_id:               RULE id.
    :param session:               The database session to use.
    :returns commit_or_rollback:  Boolean.
    """
    try:
        replica_core.update_replicas_states(replicas,
                                            nowait=True,
                                            session=session)
    except ReplicaNotFound as error:
        logging.warn(
            'Failed to bulk update replicas, will do it one by one: %s' %
            str(error))
        raise ReplicaNotFound(error)

    for replica in replicas:
        if not replica['archived']:
            request_core.archive_request(replica['request_id'],
                                         session=session)
        logging.info("HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s" %
                     (replica['request_id'], replica['scope'], replica['name'],
                      replica['rse_id'], str(replica['state'])))
    return True
Exemplo n.º 2
0
def __update_bulk_replicas(replicas, session=None, logger=logging.log):
    """
    Used by finisher to handle available and unavailable replicas blongs to same rule in bulk way.

    :param replicas:              List of replicas.
    :param session:               The database session to use.
    :returns commit_or_rollback:  Boolean.
    """
    try:
        replica_core.update_replicas_states(replicas,
                                            nowait=True,
                                            session=session)
    except ReplicaNotFound as error:
        logger(logging.WARNING,
               'Failed to bulk update replicas, will do it one by one: %s',
               str(error))
        raise ReplicaNotFound(error)

    for replica in replicas:
        if not replica['archived']:
            request_core.archive_request(replica['request_id'],
                                         session=session)
        logger(logging.INFO, "HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s",
               replica['request_id'], replica['scope'], replica['name'],
               replica['rse_id'], str(replica['state']))
    return True
Exemplo n.º 3
0
def __update_replica(replica, session=None):
    """
    Used by finisher to update a replica to a finished state.

    :param replica:               Replica as a dictionary.
    :param rule_id:               RULE id.
    :param session:               The database session to use.
    :returns commit_or_rollback:  Boolean.
    """

    try:
        replica_core.update_replicas_states([replica],
                                            nowait=True,
                                            add_tombstone=True,
                                            session=session)
        if not replica['archived']:
            request_core.archive_request(replica['request_id'],
                                         session=session)
        logging.info("HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s",
                     replica['request_id'], replica['scope'], replica['name'],
                     replica['rse_id'], str(replica['state']))
    except (UnsupportedOperation, ReplicaNotFound) as error:
        logging.warn(
            "ERROR WHEN HANDLING REQUEST %s DID %s:%s AT RSE %s STATE %s: %s",
            replica['request_id'], replica['scope'], replica['name'],
            replica['rse_id'], str(replica['state']), str(error))
        # replica cannot be found. register it and schedule it for deletion
        try:
            if replica['state'] == ReplicaState.AVAILABLE and replica[
                    'request_type'] != RequestType.STAGEIN:
                logging.info(
                    "Replica cannot be found. Adding a replica %s:%s AT RSE %s with tombstone=utcnow",
                    replica['scope'], replica['name'], replica['rse_id'])
                replica_core.add_replica(
                    replica['rse_id'],
                    replica['scope'],
                    replica['name'],
                    replica['bytes'],
                    pfn=replica['pfn'] if 'pfn' in replica else None,
                    account=InternalAccount(
                        'root', vo=replica['scope'].vo
                    ),  # it will deleted immediately, do we need to get the accurate account from rule?
                    adler32=replica['adler32'],
                    tombstone=datetime.datetime.utcnow(),
                    session=session)
            if not replica['archived']:
                request_core.archive_request(replica['request_id'],
                                             session=session)
            logging.info("HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s",
                         replica['request_id'], replica['scope'],
                         replica['name'], replica['rse_id'],
                         str(replica['state']))
        except Exception as error:
            logging.error(
                'Cannot register replica for DID %s:%s at RSE %s - potential dark data - %s',
                replica['scope'], replica['name'], replica['rse_id'],
                str(error))
            raise

    return True
Exemplo n.º 4
0
def __update_bulk_replicas(replicas, session=None):
    """
    Used by finisher to handle available and unavailable replicas blongs to same rule in bulk way.

    :param replicas:              List of replicas.
    :param session:               The database session to use.
    :returns commit_or_rollback:  Boolean.
    """
    try:
        replica_core.update_replicas_states(
            replicas, nowait=True, add_tombstone=False, session=session
        )  # TODO: add_tombstone needs to be changed to True once set_tombstone is fixed
    except ReplicaNotFound as error:
        logging.warn(
            'Failed to bulk update replicas, will do it one by one: %s',
            str(error))
        raise ReplicaNotFound(error)

    for replica in replicas:
        if not replica['archived']:
            request_core.archive_request(replica['request_id'],
                                         session=session)
        logging.info("HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s",
                     replica['request_id'], replica['scope'], replica['name'],
                     replica['rse_id'], str(replica['state']))
    return True
Exemplo n.º 5
0
def update_bad_request(req, dest_rse, new_state, detail, session=None):
    if new_state == RequestState.FAILED:
        request.set_request_state(req['request_id'], new_state, session=session)

        activity = 'default'
        if req['attributes']:
            if type(req['attributes']) is dict:
                req_attributes = json.loads(json.dumps(req['attributes']))
            else:
                req_attributes = json.loads(str(req['attributes']))
            activity = req_attributes['activity'] if req_attributes['activity'] else 'default'

        tss = time.time()
        add_message('transfer-failed', {'activity': activity,
                                        'request-id': req['request_id'],
                                        'checksum-adler': None,
                                        'checksum-md5': None,
                                        'dst-rse': dest_rse,
                                        'dst-url': None,
                                        'name': req['name'],
                                        'guid': None,
                                        'file-size': None,
                                        'previous-request-id': req['request_id'],
                                        'protocol': None,
                                        'reason': detail,
                                        'transfer-link': None,
                                        'scope': req['scope'],
                                        'src-rse': None,
                                        'src-url': None,
                                        'tool-id': 'rucio-conveyor',
                                        'transfer-endpoint': config_get('conveyor', 'ftshosts'),
                                        'transfer-id': None},
                    session=session)

        request.archive_request(req['request_id'], session=session)
        logging.error('BAD DID %s:%s REQUEST %s details: %s' % (req['scope'],
                                                                req['name'],
                                                                req['request_id'],
                                                                detail))
        try:
            replica.update_replicas_states([{'rse': dest_rse,
                                             'scope': req['scope'],
                                             'name': req['name'],
                                             'state': ReplicaState.UNAVAILABLE}],
                                           session=session)
        except:
            logging.critical("Could not update replica state for failed transfer %s:%s at %s (%s)" % (req['scope'],
                                                                                                      req['name'],
                                                                                                      dest_rse,
                                                                                                      traceback.format_exc()))
            raise

        tss = time.time()
        try:
            lock.failed_transfer(req['scope'],
                                 req['name'],
                                 req['dest_rse_id'],
                                 session=session)
        except:
            logging.warn('Could not update lock for failed transfer %s:%s at %s (%s)' % (req['scope'],
                                                                                         req['name'],
                                                                                         dest_rse,
                                                                                         traceback.format_exc()))
            raise
        record_timer('daemons.conveyor.common.update_request_state.lock-failed_transfer', (time.time()-tss)*1000)
Exemplo n.º 6
0
                                                                                                                        rse_name))
                            raise

                except:
                    # could not update successful lock
                    record_timer('daemons.conveyor.common.update_request_state.replica-update_replicas_states', (time.time()-tss)*1000)
                    logging.warn("Could not update replica state for successful transfer %s:%s at %s: %s" % (response['scope'],
                                                                                                             response['name'],
                                                                                                             rse_name,
                                                                                                             traceback.format_exc()))
                    raise

                record_timer('daemons.conveyor.common.update_request_state.replica-update_replicas_states', (time.time()-tss)*1000)

                tss = time.time()
                request.archive_request(response['request_id'], session=session)
                record_timer('daemons.conveyor.common.update_request_state.request-archive_request', (time.time()-tss)*1000)

                add_monitor_message(response, session=session)
            except exception.UnsupportedOperation, e:
                # The replica doesn't exist
                request.archive_request(response['request_id'], session=session)
                logging.warning(str(e).replace('\n', ''))
                return True
            except:
                logging.critical("Something unexpected happened when updating replica state for successful transfer %s:%s at %s (%s)" % (response['scope'],
                                                                                                                                         response['name'],
                                                                                                                                         rse_name,
                                                                                                                                         traceback.format_exc()))
                raise
Exemplo n.º 7
0
def submitter(once=False, rses=[], process=0, total_processes=1, thread=0, total_threads=1, mock=False, bulk=100, activities=None):
    """
    Main loop to submit a new transfer primitive to a transfertool.
    """

    logging.info('submitter starting - process (%i/%i) thread (%i/%i)' % (process,
                                                                          total_processes,
                                                                          thread,
                                                                          total_threads))

    try:
        scheme = config_get('conveyor', 'scheme')
    except NoOptionError:
        scheme = 'srm'

    logging.info('submitter started - process (%i/%i) thread (%i/%i)' % (process,
                                                                         total_processes,
                                                                         thread,
                                                                         total_threads))

    while not graceful_stop.is_set():

        try:

            if activities is None:
                activities = [None]
            for activity in activities:
                if rses is None:
                    rses = [None]

                for rse in rses:
                    if rse:
                        # run in rse list mode
                        rse_info = rsemgr.get_rse_info(rse['rse'])
                        logging.info("Working on RSE: %s" % rse['rse'])
                        ts = time.time()
                        reqs = get_requests(rse_id=rse['id'],
                                            process=process,
                                            total_processes=total_processes,
                                            thread=thread,
                                            total_threads=total_threads,
                                            mock=mock,
                                            bulk=bulk,
                                            activity=activity)
                        record_timer('daemons.conveyor.submitter.get_requests', (time.time() - ts) * 1000)
                    else:
                        # no rse list, run FIFO mode
                        rse_info = None
                        ts = time.time()
                        reqs = get_requests(process=process,
                                            total_processes=total_processes,
                                            thread=thread,
                                            total_threads=total_threads,
                                            mock=mock,
                                            bulk=bulk,
                                            activity=activity)
                        record_timer('daemons.conveyor.submitter.get_requests', (time.time() - ts) * 1000)

                    if reqs:
                        logging.debug('%i:%i - submitting %i requests' % (process, thread, len(reqs)))

                    if not reqs or reqs == []:
                        time.sleep(1)
                        continue

                    for req in reqs:
                        try:
                            if not rse:
                                # no rse list, in FIFO mode
                                dest_rse = rse_core.get_rse(rse=None, rse_id=req['dest_rse_id'])
                                rse_info = rsemgr.get_rse_info(dest_rse['rse'])

                            ts = time.time()
                            transfer = get_transfer(rse_info, req, scheme, mock)
                            record_timer('daemons.conveyor.submitter.get_transfer', (time.time() - ts) * 1000)
                            logging.debug('Transfer for request %s: %s' % (req['request_id'], transfer))

                            if transfer is None:
                                logging.warn("Request %s DID %s:%s RSE %s failed to get transfer" % (req['request_id'],
                                                                                                     req['scope'],
                                                                                                     req['name'],
                                                                                                     rse_info['rse']))
                                # TODO: Merge these two calls
                                request.set_request_state(req['request_id'],
                                                          RequestState.LOST)  # if the DID does not exist anymore
                                request.archive_request(req['request_id'])
                                continue

                            ts = time.time()
                            tmp_metadata = transfer['file_metadata']
                            eids = request.submit_transfers(transfers=[transfer, ],
                                                            transfertool='fts3',
                                                            job_metadata=tmp_metadata)

                            record_timer('daemons.conveyor.submitter.submit_transfer', (time.time() - ts) * 1000)

                            ts = time.time()
                            if req['previous_attempt_id']:
                                logging.info('COPYING RETRY %s REQUEST %s PREVIOUS %s DID %s:%s FROM %s TO %s USING %s with eid: %s' % (req['retry_count'],
                                                                                                                                        req['request_id'],
                                                                                                                                        req['previous_attempt_id'],
                                                                                                                                        req['scope'],
                                                                                                                                        req['name'],
                                                                                                                                        transfer['src_urls'],
                                                                                                                                        transfer['dest_urls'],
                                                                                                                                        eids[req['request_id']]['external_host'],
                                                                                                                                        eids[req['request_id']]['external_id']))
                            else:
                                logging.info('COPYING REQUEST %s DID %s:%s FROM %s TO %s USING %s with eid: %s' % (req['request_id'],
                                                                                                                   req['scope'],
                                                                                                                   req['name'],
                                                                                                                   transfer['src_urls'],
                                                                                                                   transfer['dest_urls'],
                                                                                                                   eids[req['request_id']]['external_host'],
                                                                                                                   eids[req['request_id']]['external_id']))
                            record_counter('daemons.conveyor.submitter.submit_request')
                        except UnsupportedOperation, e:
                            # The replica doesn't exist, need to cancel the request
                            logging.warning(e)
                            logging.info('Cancelling transfer request %s' % req['request_id'])
                            try:
                                # TODO: for now, there is only ever one destination
                                request.cancel_request_did(req['scope'], req['name'], transfer['dest_urls'][0])
                            except Exception, e:
                                logging.warning('Cannot cancel request: %s' % str(e))