示例#1
0
def submit_transfer(transfertool_obj, transfers, job_params, submitter='submitter', timeout=None, logger=logging.log):
    """
    Submit a transfer or staging request

    :param transfertool_obj:      The transfertool object to be used for submission
    :param transfers:             Transfer objects to be submitted
    :param job_params:            Parameters to be used for all transfers in the given job.
    :param submitter:             Name of the submitting entity.
    :param timeout:               Timeout
    :param logger:                Optional decorated logger that can be passed from the calling daemons or servers.
    """

    for transfer in transfers:
        try:
            transfer_core.mark_submitting_and_prepare_sources_for_transfer(transfer, external_host=transfertool_obj.external_host, logger=logger)
        except RequestNotFound as error:
            logger(logging.ERROR, str(error))
            return
        except Exception:
            logger(logging.ERROR, 'Failed to prepare requests %s state to SUBMITTING. Mark it SUBMISSION_FAILED and abort submission.' % [str(t.rws) for t in transfers], exc_info=True)
            set_request_state(request_id=transfer.rws.request_id, new_state=RequestState.SUBMISSION_FAILED)
            return

    try:
        _submit_transfers(transfertool_obj, transfers, job_params, submitter, timeout, logger)
    except DuplicateFileTransferSubmission as error:
        logger(logging.WARNING, 'Failed to bulk submit a job because of duplicate file : %s', str(error))
        logger(logging.INFO, 'Submitting files one by one')
        for transfer in transfers:
            _submit_transfers(transfertool_obj, [transfer], job_params, submitter, timeout, logger)
示例#2
0
文件: common.py 项目: zzaiin/Rucio
def update_request_state(response, session=None):
    """
    Used by poller and consumer to update the internal state of requests,
    after the response by the external transfertool.

    :param response: The transfertool response dictionary, retrieved via request.query_request().
    :param session: The database session to use.
    :returns commit_or_rollback: Boolean.
    """

    try:
        if not response['new_state']:
            request_core.touch_request(response['request_id'], session=session)
            return False
        else:
            request = request_core.get_request(response['request_id'], session=session)
            if request and request['external_id'] == response['transfer_id'] and request['state'] != response['new_state']:
                response['submitted_at'] = request.get('submitted_at', None)
                response['external_host'] = request['external_host']
                transfer_id = response['transfer_id'] if 'transfer_id' in response else None
                logging.info('UPDATING REQUEST %s FOR TRANSFER %s STATE %s' % (str(response['request_id']), transfer_id, str(response['new_state'])))

                job_m_replica = response.get('job_m_replica', None)
                src_url = response.get('src_url', None)
                src_rse = response.get('src_rse', None)
                src_rse_id = response.get('src_rse_id', None)
                started_at = response.get('started_at', None)
                transferred_at = response.get('transferred_at', None)
                scope = response.get('scope', None)
                name = response.get('name', None)
                if job_m_replica and (str(job_m_replica).lower() == str('true')) and src_url:
                    try:
                        src_rse_name, src_rse_id = get_source_rse(response['request_id'], scope, name, src_url, session=session)
                    except:
                        logging.warn('Cannot get correct RSE for source url: %s(%s)' % (src_url, traceback.format_exc()))
                        src_rse_name = None
                    if src_rse_name and src_rse_name != src_rse:
                        response['src_rse'] = src_rse_name
                        response['src_rse_id'] = src_rse_id
                        logging.debug('Correct RSE: %s for source surl: %s' % (src_rse_name, src_url))
                err_msg = get_transfer_error(response['new_state'], response['reason'] if 'reason' in response else None)

                request_core.set_request_state(response['request_id'],
                                               response['new_state'],
                                               transfer_id=transfer_id,
                                               started_at=started_at,
                                               transferred_at=transferred_at,
                                               src_rse_id=src_rse_id,
                                               err_msg=err_msg,
                                               session=session)

                add_monitor_message(request, response, session=session)
                return True
            elif not request:
                logging.debug("Request %s doesn't exist, will not update" % (response['request_id']))
                return False
            elif request['external_id'] != response['transfer_id']:
                logging.warning("Reponse %s with transfer id %s is different from the request transfer id %s, will not update" % (response['request_id'], response['transfer_id'], request['external_id']))
                return False
            else:
                logging.debug("Request %s is already in %s state, will not update" % (response['request_id'], response['new_state']))
                return False
    except UnsupportedOperation as error:
        logging.warning("Request %s doesn't exist - Error: %s" % (response['request_id'], str(error).replace('\n', '')))
        return False
    except:
        logging.critical(traceback.format_exc())
示例#3
0
文件: common.py 项目: zzaiin/Rucio
def set_transfer_state(external_host, transfer_id, state, session=None):
    """
    Used by poller to update the internal state of transfer,
    after the response by the external transfertool.

    :param request_host: Name of the external host.
    :param transfer_id: external transfer job id as a string.
    :param state: request state as a string.
    :param session: The database session to use.
    :returns commit_or_rollback: Boolean.
    """

    try:
        if state == RequestState.LOST:
            reqs = request_core.get_requests_by_transfer(external_host, transfer_id, session=session)
            for req in reqs:
                logging.info('REQUEST %s OF TRANSFER %s ON %s STATE %s' % (str(req['request_id']), external_host, transfer_id, str(state)))
                src_rse_id = req.get('source_rse_id', None)
                dst_rse_id = req.get('dest_rse_id', None)
                src_rse = None
                dst_rse = None
                if src_rse_id:
                    src_rse = rse_core.get_rse_name(src_rse_id, session=session)
                if dst_rse_id:
                    dst_rse = rse_core.get_rse_name(dst_rse_id, session=session)
                response = {'new_state': state,
                            'transfer_id': transfer_id,
                            'job_state': state,
                            'src_url': None,
                            'dst_url': req['dest_url'],
                            'duration': 0,
                            'reason': "The FTS job lost",
                            'scope': req.get('scope', None),
                            'name': req.get('name', None),
                            'src_rse': src_rse,
                            'dst_rse': dst_rse,
                            'request_id': req.get('request_id', None),
                            'activity': req.get('activity', None),
                            'src_rse_id': req.get('source_rse_id', None),
                            'dst_rse_id': req.get('dest_rse_id', None),
                            'previous_attempt_id': req.get('previous_attempt_id', None),
                            'adler32': req.get('adler32', None),
                            'md5': req.get('md5', None),
                            'filesize': req.get('filesize', None),
                            'external_host': external_host,
                            'job_m_replica': None,
                            'created_at': req.get('created_at', None),
                            'submitted_at': req.get('submitted_at', None),
                            'details': None,
                            'account': req.get('account', None)}

                err_msg = get_transfer_error(response['new_state'], response['reason'] if 'reason' in response else None)
                request_core.set_request_state(req['request_id'],
                                               response['new_state'],
                                               transfer_id=transfer_id,
                                               src_rse_id=src_rse_id,
                                               err_msg=err_msg,
                                               session=session)

                add_monitor_message(req, response, session=session)
        else:
            request_core.set_transfer_state(external_host, transfer_id, state, session=session)
        return True
    except UnsupportedOperation as error:
        logging.warning("Transfer %s on %s doesn't exist - Error: %s" % (transfer_id, external_host, str(error).replace('\n', '')))
        return False
示例#4
0
def update_bad_request(req, dest_rse, new_state, detail, session=None):
    if new_state == RequestState.FAILED:
        request.set_request_state(req['request_id'], new_state, session=session)

        activity = 'default'
        if req['attributes']:
            if type(req['attributes']) is dict:
                req_attributes = json.loads(json.dumps(req['attributes']))
            else:
                req_attributes = json.loads(str(req['attributes']))
            activity = req_attributes['activity'] if req_attributes['activity'] else 'default'

        tss = time.time()
        add_message('transfer-failed', {'activity': activity,
                                        'request-id': req['request_id'],
                                        'checksum-adler': None,
                                        'checksum-md5': None,
                                        'dst-rse': dest_rse,
                                        'dst-url': None,
                                        'name': req['name'],
                                        'guid': None,
                                        'file-size': None,
                                        'previous-request-id': req['request_id'],
                                        'protocol': None,
                                        'reason': detail,
                                        'transfer-link': None,
                                        'scope': req['scope'],
                                        'src-rse': None,
                                        'src-url': None,
                                        'tool-id': 'rucio-conveyor',
                                        'transfer-endpoint': config_get('conveyor', 'ftshosts'),
                                        'transfer-id': None},
                    session=session)

        request.archive_request(req['request_id'], session=session)
        logging.error('BAD DID %s:%s REQUEST %s details: %s' % (req['scope'],
                                                                req['name'],
                                                                req['request_id'],
                                                                detail))
        try:
            replica.update_replicas_states([{'rse': dest_rse,
                                             'scope': req['scope'],
                                             'name': req['name'],
                                             'state': ReplicaState.UNAVAILABLE}],
                                           session=session)
        except:
            logging.critical("Could not update replica state for failed transfer %s:%s at %s (%s)" % (req['scope'],
                                                                                                      req['name'],
                                                                                                      dest_rse,
                                                                                                      traceback.format_exc()))
            raise

        tss = time.time()
        try:
            lock.failed_transfer(req['scope'],
                                 req['name'],
                                 req['dest_rse_id'],
                                 session=session)
        except:
            logging.warn('Could not update lock for failed transfer %s:%s at %s (%s)' % (req['scope'],
                                                                                         req['name'],
                                                                                         dest_rse,
                                                                                         traceback.format_exc()))
            raise
        record_timer('daemons.conveyor.common.update_request_state.lock-failed_transfer', (time.time()-tss)*1000)
示例#5
0
        logging.warning("Request %s doesn't exist anymore, should not be updated again. Error: %s" % (response['request_id'], str(e).replace('\n', '')))
        return False

    if not response['new_state']:
        return False
    elif response['new_state'] == RequestState.LOST:
        logging.debug('UPDATING REQUEST %s FOR STATE %s' % (str(response['request_id']), str(response['new_state'])))
    elif response['new_state'] and 'job_state' in response and response['job_state']:
        logging.debug('UPDATING REQUEST %s FOR TRANSFER(%s) STATE %s' % (str(response['request_id']), str(response['transfer_id']), str(response['job_state'])))
    else:
        return False

    if response['new_state']:

        request.set_request_state(response['request_id'],
                                  response['new_state'],
                                  session=session)

        if response['new_state'] == RequestState.DONE:
            rse_name = response['dst_rse']
            rse_update_name = rse_name

            req = request.get_request(response['request_id'], session=session)
            if req['request_type'] == RequestType.STAGEIN:
                rse_update_name = rse_core.list_rse_attributes(response['dst_rse'], session=session)['staging_buffer']
                logging.debug('OVERRIDE REPLICA DID %s:%s RSE %s TO %s' % (response['scope'], response['name'], response['dst_rse'], rse_update_name))

            try:
                tss = time.time()
                logging.debug('UPDATE REPLICA STATE DID %s:%s RSE %s' % (response['scope'], response['name'], rse_update_name))
示例#6
0
def submitter(once=False, rses=[], process=0, total_processes=1, thread=0, total_threads=1, mock=False, bulk=100, activities=None):
    """
    Main loop to submit a new transfer primitive to a transfertool.
    """

    logging.info('submitter starting - process (%i/%i) thread (%i/%i)' % (process,
                                                                          total_processes,
                                                                          thread,
                                                                          total_threads))

    try:
        scheme = config_get('conveyor', 'scheme')
    except NoOptionError:
        scheme = 'srm'

    logging.info('submitter started - process (%i/%i) thread (%i/%i)' % (process,
                                                                         total_processes,
                                                                         thread,
                                                                         total_threads))

    while not graceful_stop.is_set():

        try:

            if activities is None:
                activities = [None]
            for activity in activities:
                if rses is None:
                    rses = [None]

                for rse in rses:
                    if rse:
                        # run in rse list mode
                        rse_info = rsemgr.get_rse_info(rse['rse'])
                        logging.info("Working on RSE: %s" % rse['rse'])
                        ts = time.time()
                        reqs = get_requests(rse_id=rse['id'],
                                            process=process,
                                            total_processes=total_processes,
                                            thread=thread,
                                            total_threads=total_threads,
                                            mock=mock,
                                            bulk=bulk,
                                            activity=activity)
                        record_timer('daemons.conveyor.submitter.get_requests', (time.time() - ts) * 1000)
                    else:
                        # no rse list, run FIFO mode
                        rse_info = None
                        ts = time.time()
                        reqs = get_requests(process=process,
                                            total_processes=total_processes,
                                            thread=thread,
                                            total_threads=total_threads,
                                            mock=mock,
                                            bulk=bulk,
                                            activity=activity)
                        record_timer('daemons.conveyor.submitter.get_requests', (time.time() - ts) * 1000)

                    if reqs:
                        logging.debug('%i:%i - submitting %i requests' % (process, thread, len(reqs)))

                    if not reqs or reqs == []:
                        time.sleep(1)
                        continue

                    for req in reqs:
                        try:
                            if not rse:
                                # no rse list, in FIFO mode
                                dest_rse = rse_core.get_rse(rse=None, rse_id=req['dest_rse_id'])
                                rse_info = rsemgr.get_rse_info(dest_rse['rse'])

                            ts = time.time()
                            transfer = get_transfer(rse_info, req, scheme, mock)
                            record_timer('daemons.conveyor.submitter.get_transfer', (time.time() - ts) * 1000)
                            logging.debug('Transfer for request %s: %s' % (req['request_id'], transfer))

                            if transfer is None:
                                logging.warn("Request %s DID %s:%s RSE %s failed to get transfer" % (req['request_id'],
                                                                                                     req['scope'],
                                                                                                     req['name'],
                                                                                                     rse_info['rse']))
                                # TODO: Merge these two calls
                                request.set_request_state(req['request_id'],
                                                          RequestState.LOST)  # if the DID does not exist anymore
                                request.archive_request(req['request_id'])
                                continue

                            ts = time.time()
                            tmp_metadata = transfer['file_metadata']
                            eids = request.submit_transfers(transfers=[transfer, ],
                                                            transfertool='fts3',
                                                            job_metadata=tmp_metadata)

                            record_timer('daemons.conveyor.submitter.submit_transfer', (time.time() - ts) * 1000)

                            ts = time.time()
                            if req['previous_attempt_id']:
                                logging.info('COPYING RETRY %s REQUEST %s PREVIOUS %s DID %s:%s FROM %s TO %s USING %s with eid: %s' % (req['retry_count'],
                                                                                                                                        req['request_id'],
                                                                                                                                        req['previous_attempt_id'],
                                                                                                                                        req['scope'],
                                                                                                                                        req['name'],
                                                                                                                                        transfer['src_urls'],
                                                                                                                                        transfer['dest_urls'],
                                                                                                                                        eids[req['request_id']]['external_host'],
                                                                                                                                        eids[req['request_id']]['external_id']))
                            else:
                                logging.info('COPYING REQUEST %s DID %s:%s FROM %s TO %s USING %s with eid: %s' % (req['request_id'],
                                                                                                                   req['scope'],
                                                                                                                   req['name'],
                                                                                                                   transfer['src_urls'],
                                                                                                                   transfer['dest_urls'],
                                                                                                                   eids[req['request_id']]['external_host'],
                                                                                                                   eids[req['request_id']]['external_id']))
                            record_counter('daemons.conveyor.submitter.submit_request')
                        except UnsupportedOperation, e:
                            # The replica doesn't exist, need to cancel the request
                            logging.warning(e)
                            logging.info('Cancelling transfer request %s' % req['request_id'])
                            try:
                                # TODO: for now, there is only ever one destination
                                request.cancel_request_did(req['scope'], req['name'], transfer['dest_urls'][0])
                            except Exception, e:
                                logging.warning('Cannot cancel request: %s' % str(e))
示例#7
0
文件: common.py 项目: rak108/rucio
def __create_missing_replicas_and_requests(
        transfer_path: "List[DirectTransferDefinition]",
        default_tombstone_delay: int,
        logger: "Callable",
        session: "Optional[Session]" = None
) -> "Tuple[bool, bool]":
    """
    Create replicas and requests in the database for the intermediate hops
    """
    initial_request_id = transfer_path[-1].rws.request_id
    creation_successful = True
    must_skip_submission = False
    created_requests = []
    # Iterate the path in reverse order. The last hop is the initial request, so
    # next_hop.rws.request_id will always be initialized when handling the current hop.
    for i in reversed(range(len(transfer_path))):
        hop = transfer_path[i]
        rws = hop.rws
        if rws.request_id:
            continue

        tombstone_delay = rws.dest_rse.attributes.get('multihop_tombstone_delay', default_tombstone_delay)
        try:
            tombstone = tombstone_from_delay(tombstone_delay)
        except ValueError:
            logger(logging.ERROR, "%s: Cannot parse multihop tombstone delay %s", initial_request_id, tombstone_delay)
            creation_successful = False
            break

        files = [{'scope': rws.scope,
                  'name': rws.name,
                  'bytes': rws.byte_count,
                  'adler32': rws.adler32,
                  'md5': rws.md5,
                  'tombstone': tombstone,
                  'state': 'C'}]
        try:
            add_replicas(rse_id=rws.dest_rse.id,
                         files=files,
                         account=rws.account,
                         ignore_availability=False,
                         dataset_meta=None,
                         session=session)
            # Set replica state to Copying in case replica already existed in another state.
            # Can happen when a multihop transfer failed previously, and we are re-scheduling it now.
            update_replica_state(rse_id=rws.dest_rse.id, scope=rws.scope, name=rws.name, state=ReplicaState.COPYING, session=session)
        except Exception as error:
            logger(logging.ERROR, '%s: Problem adding replicas on %s : %s', initial_request_id, rws.dest_rse, str(error))

        rws.attributes['is_intermediate_hop'] = True
        rws.attributes['source_replica_expression'] = hop.src.rse.name
        new_req = queue_requests(requests=[{'dest_rse_id': rws.dest_rse.id,
                                            'scope': rws.scope,
                                            'name': rws.name,
                                            'rule_id': '00000000000000000000000000000000',  # Dummy Rule ID used for multihop. TODO: Replace with actual rule_id once we can flag intermediate requests
                                            'attributes': rws.attributes,
                                            'request_type': rws.request_type,
                                            'retry_count': rws.retry_count,
                                            'account': rws.account,
                                            'requested_at': datetime.datetime.now()}], session=session)
        # If a request already exists, new_req will be an empty list.
        if new_req:
            db_req = new_req[0]
        else:
            db_req = request_core.get_request_by_did(rws.scope, rws.name, rws.dest_rse.id, session=session)
            # A transfer already exists for part of the path. Just construct the remaining
            # path, but don't submit the transfer. We must wait for the existing transfer to be
            # completed before continuing.
            must_skip_submission = True

        models.TransferHop(request_id=db_req['id'],
                           next_hop_request_id=transfer_path[i + 1].rws.request_id,
                           initial_request_id=initial_request_id,
                           ).save(session=session, flush=False)
        rws.request_id = db_req['id']
        rws.requested_at = db_req['requested_at']
        logger(logging.DEBUG, '%s: New request created for the transfer between %s and %s : %s', initial_request_id, transfer_path[0].src, transfer_path[-1].dst, rws.request_id)
        set_request_state(rws.request_id, RequestState.QUEUED, session=session, logger=logger)
        created_requests.append(rws.request_id)

    return creation_successful, must_skip_submission