Exemplo n.º 1
0
def resolve_surl(fspec, protocol, ddmconf, **kwargs):
    """
        Get final destination SURL for file to be transferred to Objectstore
        Can be customized at the level of specific copytool

        :param protocol: suggested protocol
        :param ddmconf: full ddm storage data
        :param fspec: file spec data
        :return: dictionary {'surl': surl}
    """
    ddm = ddmconf.get(fspec.ddmendpoint)
    if not ddm:
        raise PilotException('Failed to resolve ddmendpoint by name=%s' %
                             fspec.ddmendpoint)

    if ddm.is_deterministic:
        surl = protocol.get('endpoint', '') + os.path.join(
            protocol.get('path', ''), get_rucio_path(fspec.scope, fspec.lfn))
    elif ddm.type in ['OS_ES', 'OS_LOGS']:
        surl = protocol.get('endpoint', '') + os.path.join(
            protocol.get('path', ''), fspec.lfn)
        fspec.protocol_id = protocol.get('id')
    else:
        raise PilotException(
            'resolve_surl(): Failed to construct SURL for non deterministic ddm=%s: NOT IMPLEMENTED',
            fspec.ddmendpoint)

    return {'surl': surl}
Exemplo n.º 2
0
def copy_out(files, **kwargs):
    """
    Upload given files to S3 storage.

    :param files: list of `FileSpec` objects
    :raise: PilotException in case of controlled error
    """

    workdir = kwargs.pop('workdir')

    for fspec in files:

        path = os.path.join(workdir, fspec.lfn)
        if os.path.exists(path):
            bucket = 'bucket'  # UPDATE ME
            logger.info('uploading %s to bucket=%s using object name=%s', path, bucket, fspec.lfn)
            status, diagnostics = upload_file(path, bucket, object_name=fspec.lfn)

            if not status:  ## an error occurred
                # create new error code(s) in ErrorCodes.py and set it/them in resolve_common_transfer_errors()
                error = resolve_common_transfer_errors(diagnostics, is_stagein=False)
                fspec.status = 'failed'
                fspec.status_code = error.get('rcode')
                raise PilotException(error.get('error'), code=error.get('rcode'), state=error.get('state'))
        else:
            diagnostics = 'local output file does not exist: %s' % path
            logger.warning(diagnostics)
            fspec.status = 'failed'
            fspec.status_code = errors.STAGEOUTFAILED
            raise PilotException(diagnostics, code=fspec.status_code, state=fspec.status)

        fspec.status = 'transferred'
        fspec.status_code = 0

    return files
Exemplo n.º 3
0
Arquivo: data.py Projeto: ptrlv/pilot2
    def resolve_surl(self, fspec, protocol, ddmconf, **kwargs):
        """
            Get final destination SURL for file to be transferred
            Can be customized at the level of specific copytool
            :param protocol: suggested protocol
            :param ddmconf: full ddmconf data
            :param activity: ordered list of preferred activity names to resolve SE protocols
            :return: dict with keys ('pfn', 'ddmendpoint')
        """

        # consider only deterministic sites (output destination)

        ddm = ddmconf.get(fspec.ddmendpoint)
        if not ddm:
            raise PilotException('Failed to resolve ddmendpoint by name=%s' %
                                 fspec.ddmendpoint)

        # path = protocol.get('path', '').rstrip('/')
        # if not (ddm.is_deterministic or (path and path.endswith('/rucio'))):
        if not ddm.is_deterministic:
            raise PilotException(
                'resolve_surl(): Failed to construct SURL for non deterministic ddm=%s: '
                'NOT IMPLEMENTED' % fspec.ddmendpoint,
                code=ErrorCodes.NONDETERMINISTICDDM)

        surl = protocol.get('endpoint', '') + os.path.join(
            protocol.get('path', ''), self.get_path(fspec.scope, fspec.lfn))
        return {'surl': surl}
Exemplo n.º 4
0
def copy_in(files, **kwargs):
    """
        Download given files using xrdcp command.

        :param files: list of `FileSpec` objects
        :raise: PilotException in case of controlled error
    """

    #allow_direct_access = kwargs.get('allow_direct_access') or False
    setup = kwargs.pop('copytools', {}).get('xrdcp', {}).get('setup')
    coption = _resolve_checksum_option(setup, **kwargs)
    trace_report = kwargs.get('trace_report')

    localsite = os.environ.get('RUCIO_LOCAL_SITE_ID', None)
    for fspec in files:
        # update the trace report
        localsite = localsite if localsite else fspec.ddmendpoint
        trace_report.update(localSite=localsite, remoteSite=fspec.ddmendpoint, filesize=fspec.filesize)
        trace_report.update(filename=fspec.lfn, guid=fspec.guid.replace('-', ''))
        trace_report.update(scope=fspec.scope, dataset=fspec.dataset)

        # continue loop for files that are to be accessed directly  ## TOBE DEPRECATED (anisyonk)
        #if fspec.is_directaccess(ensure_replica=False) and allow_direct_access and fspec.accessmode == 'direct':
        #    fspec.status_code = 0
        #    fspec.status = 'remote_io'
        #    trace_report.update(url=fspec.turl, clientState='FOUND_ROOT', stateReason='direct_access')
        #    trace_report.send()
        #    continue

        trace_report.update(catStart=time())

        dst = fspec.workdir or kwargs.get('workdir') or '.'
        destination = os.path.join(dst, fspec.lfn)
        try:
            filesize_cmd, checksum_cmd, checksum_type = _stagefile(coption, fspec.turl, destination, fspec.filesize,
                                                                   is_stagein=True, setup=setup, **kwargs)
            fspec.status_code = 0
            fspec.status = 'transferred'
        except PilotException as error:
            fspec.status = 'failed'
            fspec.status_code = error.get_error_code()
            diagnostics = error.get_detail()
            state = 'STAGEIN_ATTEMPT_FAILED'
            trace_report.update(clientState=state, stateReason=diagnostics, timeEnd=time())
            trace_report.send()
            raise PilotException(diagnostics, code=fspec.status_code, state=state)
        else:
            # compare checksums
            fspec.checksum[checksum_type] = checksum_cmd  # remote checksum
            state, diagnostics = verify_catalog_checksum(fspec, destination)
            if diagnostics != "":
                trace_report.update(clientState=state or 'STAGEIN_ATTEMPT_FAILED', stateReason=diagnostics,
                                    timeEnd=time())
                trace_report.send()
                raise PilotException(diagnostics, code=fspec.status_code, state=state)

        trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time())
        trace_report.send()

    return files
Exemplo n.º 5
0
def resolve_surl(fspec, protocol, ddmconf, **kwargs):
    """
        Get final destination SURL for file to be transferred to Objectstore
        Can be customized at the level of specific copytool

        :param protocol: suggested protocol
        :param ddmconf: full ddm storage data
        :param fspec: file spec data
        :return: dictionary {'surl': surl}
    """
    ddm = ddmconf.get(fspec.ddmendpoint)
    if not ddm:
        raise PilotException('failed to resolve ddmendpoint by name=%s' % fspec.ddmendpoint)

    if ddm.is_deterministic:
        surl = protocol.get('endpoint', '') + os.path.join(protocol.get('path', ''), get_rucio_path(fspec.scope, fspec.lfn))
    elif ddm.type in ['OS_ES', 'OS_LOGS']:
        surl = protocol.get('endpoint', '') + os.path.join(protocol.get('path', ''), fspec.lfn)
        fspec.protocol_id = protocol.get('id')
    else:
        raise PilotException('resolve_surl(): Failed to construct SURL for non deterministic ddm=%s: NOT IMPLEMENTED', fspec.ddmendpoint)

    # example:
    #   protocol = {u'path': u'/atlas-eventservice', u'endpoint': u's3://s3.cern.ch:443/', u'flavour': u'AWS-S3-SSL', u'id': 175}
    #   surl = 's3://s3.cern.ch:443//atlas-eventservice/EventService_premerge_24706191-5013009653-24039149400-322-5.tar'
    return {'surl': surl}
Exemplo n.º 6
0
def _stagefile(coption,
               source,
               destination,
               filesize,
               is_stagein,
               setup=None,
               **kwargs):
    """
        Stage the file (stagein or stageout)
        :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure
        :raise: PilotException in case of controlled error
    """

    filesize_cmd, checksum_cmd, checksum_type = None, None, None

    cmd = '%s -np -f %s %s %s' % (copy_command, coption, source, destination)
    if setup:
        cmd = "source %s; %s" % (setup, cmd)

    #timeout = get_timeout(filesize)
    #logger.info("Executing command: %s, timeout=%s" % (cmd, timeout))

    rcode, stdout, stderr = execute(cmd, **kwargs)
    logger.info('rcode=%d, stdout=%s, stderr=%s' % (rcode, stdout, stderr))

    if rcode:  ## error occurred
        error = resolve_common_transfer_errors(stdout + stderr,
                                               is_stagein=is_stagein)

        #rcode = error.get('rcode')  ## TO BE IMPLEMENTED
        #if not is_stagein and rcode == PilotErrors.ERR_CHKSUMNOTSUP: ## stage-out, on fly checksum verification is not supported .. ignore
        #    logger.info('stage-out: ignore ERR_CHKSUMNOTSUP error .. will explicitly verify uploaded file')
        #    return None, None

        raise PilotException(error.get('error'),
                             code=error.get('rcode'),
                             state=error.get('state'))

    # extract filesize and checksum values from output
    if coption != "":
        filesize_cmd, checksum_cmd, checksum_type = get_file_info_from_output(
            stdout + stderr)

    ## verify transfer by returned checksum or call remote checksum calculation
    ## to be moved at the base level

    is_verified = True  ## TO BE IMPLEMENTED LATER

    if not is_verified:
        rcode = ErrorCodes.GETADMISMATCH if is_stagein else ErrorCodes.PUTADMISMATCH
        raise PilotException("Copy command failed",
                             code=rcode,
                             state='AD_MISMATCH')

    return filesize_cmd, checksum_cmd, checksum_type
Exemplo n.º 7
0
    def prepare_destinations(self, files, activities):
        """
            Resolve destination RSE (filespec.ddmendpoint) for each entry from `files` according to requested `activities`
            Apply Pilot-side logic to choose proper destination
            :param files: list of FileSpec objects to be processed
            :param activities: ordered list of activities to be used to resolve astorages
            :return: updated fspec entries
        """

        if not self.infosys.queuedata:  ## infosys is not initialized: not able to fix destination if need, nothing to do
            return files

        try:
            if isinstance(activities, (str, unicode)):  # Python 2
                activities = [activities]
        except Exception:
            if isinstance(activities, str):  # Python 3
                activities = [activities]

        if not activities:
            raise PilotException("Failed to resolve destination: passed empty activity list. Internal error.",
                                 code=ErrorCodes.INTERNALPILOTPROBLEM, state='INTERNAL_ERROR')

        astorages = self.infosys.queuedata.astorages or {}

        storages = None
        activity = activities[0]
        for a in activities:
            storages = astorages.get(a, {})
            if storages:
                break

        if not storages:
            raise PilotException("Failed to resolve destination: no associated storages defined for activity=%s (%s)"
                                 % (activity, ','.join(activities)), code=ErrorCodes.NOSTORAGE, state='NO_ASTORAGES_DEFINED')

        # take the fist choice for now, extend the logic later if need
        ddm = storages[0]

        self.logger.info("[prepare_destinations][%s]: allowed (local) destinations: %s" % (activity, storages))
        self.logger.info("[prepare_destinations][%s]: resolved default destination ddm=%s" % (activity, ddm))

        for e in files:
            if not e.ddmendpoint:  ## no preferences => use default destination
                self.logger.info("[prepare_destinations][%s]: fspec.ddmendpoint is not set for lfn=%s"
                                 " .. will use default ddm=%s as (local) destination" % (activity, e.lfn, ddm))
                e.ddmendpoint = ddm
            elif e.ddmendpoint not in storages:  ## fspec.ddmendpoint is not in associated storages => assume it as final (non local) alternative destination
                self.logger.info("[prepare_destinations][%s]: Requested fspec.ddmendpoint=%s is not in the list of allowed (local) destinations"
                                 " .. will consider default ddm=%s for transfer and tag %s as alt. location" % (activity, e.ddmendpoint, ddm, e.ddmendpoint))
                e.ddmendpoint = ddm
                e.ddmendpoint_alt = e.ddmendpoint  ###  consider me later

        return files
Exemplo n.º 8
0
def copy_out(files, **kwargs):
    """
    Upload given files to GS storage.

    :param files: list of `FileSpec` objects
    :raise: PilotException in case of controlled error
    """

    workdir = kwargs.pop('workdir')

    for fspec in files:
        logger.info('Going to process fspec.turl=%s', fspec.turl)

        import re
        # bucket = re.sub(r'gs://(.*?)/.*', r'\1', fspec.turl)
        reobj = re.match(r'gs://([^/]*)/(.*)', fspec.turl)
        (bucket, remote_path) = reobj.groups()

        # ["pilotlog.txt", "payload.stdout", "payload.stderr"]:
        for logfile in os.listdir(workdir):
            if logfile.endswith("gz"):
                continue
            path = os.path.join(workdir, logfile)
            if os.path.exists(path):
                object_name = os.path.join(remote_path, logfile)
                logger.info('uploading %s to bucket=%s using object name=%s',
                            path, bucket, object_name)
                status, diagnostics = upload_file(path,
                                                  bucket,
                                                  object_name=object_name)

                if not status:  ## an error occurred
                    # create new error code(s) in ErrorCodes.py and set it/them in resolve_common_transfer_errors()
                    error = resolve_common_transfer_errors(diagnostics,
                                                           is_stagein=False)
                    fspec.status = 'failed'
                    fspec.status_code = error.get('rcode')
                    raise PilotException(error.get('error'),
                                         code=error.get('rcode'),
                                         state=error.get('state'))
            else:
                diagnostics = 'local output file does not exist: %s' % path
                logger.warning(diagnostics)
                fspec.status = 'failed'
                fspec.status_code = errors.STAGEOUTFAILED
                raise PilotException(diagnostics,
                                     code=fspec.status_code,
                                     state=fspec.status)

            fspec.status = 'transferred'
            fspec.status_code = 0

    return files
Exemplo n.º 9
0
def resolve_protocol(fspec, activity, ddm):
    """
        Rosolve protocols to be used to transfer the file with corressponding activity

        :param fspec: file spec data
        :param activity: actvitiy name as string
        :param ddm: ddm storage data
        :return: protocol as dictionary
    """

    logger.info(
        "Resolving protocol for file(lfn: %s, ddmendpoint: %s) with activity(%s)"
        % (fspec.lfn, fspec.ddmendpoint, activity))

    activity = get_ddm_activity(activity)
    protocols = ddm.arprotocols.get(activity)
    protocols_allow = []
    for schema in allowed_schemas:
        for protocol in protocols:
            if schema is None or protocol.get('endpoint', '').startswith(
                    "%s://" % schema):
                protocols_allow.append(protocol)
    if not protocols_allow:
        err = "No available allowed protocols for file(lfn: %s, ddmendpoint: %s) with activity(%s)" % (
            fspec.lfn, fspec.ddmendpoint, activity)
        logger.error(err)
        raise PilotException(err)
    protocol = protocols_allow[0]
    logger.info(
        "Resolved protocol for file(lfn: %s, ddmendpoint: %s) with activity(%s): %s"
        % (fspec.lfn, fspec.ddmendpoint, activity, protocol))
    return protocol
Exemplo n.º 10
0
    def init(self, pandaqueue, confinfo=None, extinfo=None, jobinfo=None):

        self.confinfo = confinfo or PilotConfigProvider()
        self.jobinfo = jobinfo  # or JobInfoProvider()
        self.extinfo = extinfo or ExtInfoProvider(cache_time=self.cache_time)

        self.pandaqueue = pandaqueue

        if not self.pandaqueue:
            raise PilotException(
                'Failed to initialize InfoService: panda queue name is not set'
            )

        self.queues_info = {}  ##  reset cache data
        self.storages_info = {}  ##  reset cache data
        #self.sites_info = {}     ##  reset cache data

        self.queuedata = self.resolve_queuedata(self.pandaqueue)

        if not self.queuedata or not self.queuedata.name:
            raise QueuedataFailure(
                "Failed to resolve queuedata for queue=%s, wrong PandaQueue name?"
                % self.pandaqueue)

        self.resolve_storage_data()  ## prefetch details for all storages
Exemplo n.º 11
0
    def resolve_storage_data(self, ddmendpoints=[]):  ## high level API
        """
            :return: dict of DDMEndpoint settings by DDMEndpoint name as a key
        """

        if isinstance(ddmendpoints, basestring):
            ddmendpoints = [ddmendpoints]

        cache = self.storages_info

        miss_objs = set(ddmendpoints) - set(cache)
        if not ddmendpoints or miss_objs:  # not found in cache: do load and initialize data
            # the order of providers makes the priority
            r = self._resolve_data(self.whoami(),
                                   providers=(self.confinfo, self.jobinfo,
                                              self.extinfo),
                                   args=[miss_objs],
                                   merge=True)
            if ddmendpoints:
                not_resolved = set(ddmendpoints) - set(r)
                if not_resolved:
                    raise PilotException(
                        "internal error: Failed to load storage details for ddms=%s"
                        % sorted(not_resolved))
            for ddm in r:
                cache[ddm] = StorageData(r[ddm])

        return cache
Exemplo n.º 12
0
    def resolve_protocols(self, files):
        """
            Populates filespec.protocols for each entry from `files` according to preferred `fspec.ddm_activity` value
            :param files: list of `FileSpec` objects
            fdat.protocols = [dict(endpoint, path, flavour), ..]
            :return: `files`
        """

        ddmconf = self.infosys.resolve_storage_data()

        for fdat in files:
            ddm = ddmconf.get(fdat.ddmendpoint)
            if not ddm:
                error = 'Failed to resolve output ddmendpoint by name=%s (from PanDA), please check configuration.' % fdat.ddmendpoint
                self.logger.error("resolve_protocols: %s, fspec=%s" % (error, fdat))
                raise PilotException(error, code=ErrorCodes.NOSTORAGE)

            protocols = []
            for aname in fdat.ddm_activity:
                protocols = ddm.arprotocols.get(aname)
                if protocols:
                    break

            fdat.protocols = protocols

        return files
Exemplo n.º 13
0
def create_output_list(files, init_dir, ddmconf):
    """
    Add files to the output list which tells ARC CE which files to upload
    """

    if not ddmconf:
        raise PilotException("copy_out() failed to resolve ddmconf from function arguments",
                             code=ErrorCodes.STAGEOUTFAILED,
                             state='COPY_ERROR')

    for fspec in files:
        arcturl = fspec.turl
        if arcturl.startswith('s3://'):
            # Use Rucio proxy to upload to OS
            arcturl = re.sub(r'^s3', 's3+rucio', arcturl)
            # Add failureallowed option so failed upload does not fail job
            rucio = 'rucio://rucio-lb-prod.cern.ch;failureallowed=yes/objectstores'
            rse = fspec.ddmendpoint
            activity = 'write'
            arcturl = '/'.join([rucio, arcturl, rse, activity])
        else:
            # Add ARC options to TURL
            checksumtype, checksum = list(fspec.checksum.items())[0]  # Python 2/3
            # resolve token value from fspec.ddmendpoint
            token = ddmconf.get(fspec.ddmendpoint).token
            if not token:
                logger.info('No space token info for %s', fspec.ddmendpoint)
            else:
                arcturl = re.sub(r'((:\d+)/)', r'\2;autodir=no;spacetoken=%s/' % token, arcturl)
            arcturl += ':checksumtype=%s:checksumvalue=%s' % (checksumtype, checksum)

        logger.info('Adding to output.list: %s %s', fspec.lfn, arcturl)
        # Write output.list
        with open(os.path.join(init_dir, 'output.list'), 'a') as f:
            f.write('%s %s\n' % (fspec.lfn, arcturl))
Exemplo n.º 14
0
def main():
    """
    Main function of PanDA Pilot 2.
    Prepare for and execute the requested workflow.

    :return: exit code (int).
    """

    # get the logger
    logger = logging.getLogger(__name__)

    # print the pilot version
    pilot_version_banner()

    # define threading events
    args.graceful_stop = threading.Event()
    args.abort_job = threading.Event()
    args.job_aborted = threading.Event()

    # define useful variables
    args.retrieve_next_job = True  # go ahead and download a new job
    args.signal = None  # to store any incoming signals
    args.signal_counter = 0  # keep track of number of received kill signal (suicide counter)
    args.kill_time = 0  # keep track of when first kill signal arrived

    # read and parse config file
    config.read(args.config)

    # perform https setup
    https_setup(args, get_pilot_version())

    # initialize InfoService
    try:
        infosys.init(args.queue)
        # check if queue is ACTIVE
        if infosys.queuedata.state != 'ACTIVE':
            logger.critical('specified queue is NOT ACTIVE: %s -- aborting' %
                            infosys.queuedata.name)
            raise PilotException("Panda Queue is NOT ACTIVE")
    except PilotException as error:
        logger.fatal(error)
        return error.get_error_code()

    # set the site name for rucio  ## is it really used?
    environ['PILOT_RUCIO_SITENAME'] = infosys.queuedata.site

    # set requested workflow
    logger.info('pilot arguments: %s' % str(args))
    logger.info('selected workflow: %s' % args.workflow)
    workflow = __import__('pilot.workflow.%s' % args.workflow, globals(),
                          locals(), [args.workflow], -1)

    # execute workflow
    try:
        exit_code = workflow.run(args)
    except Exception as e:
        logger.fatal('main pilot function caught exception: %s' % e)
        exit_code = None

    return exit_code
Exemplo n.º 15
0
def copy_in(files, **kwargs):
    """
    Download given files from an S3 bucket.

    :param files: list of `FileSpec` objects
    :raise: PilotException in case of controlled error
    """

    for fspec in files:

        dst = fspec.workdir or kwargs.get('workdir') or '.'

        bucket = 'bucket'  # UPDATE ME
        path = os.path.join(dst, fspec.lfn)
        logger.info('downloading object %s from bucket=%s to local file %s', fspec.lfn, bucket, path)
        status, diagnostics = download_file(path, bucket, object_name=fspec.lfn)

        if not status:  ## an error occurred
            error = resolve_common_transfer_errors(diagnostics, is_stagein=True)
            fspec.status = 'failed'
            fspec.status_code = error.get('rcode')
            raise PilotException(error.get('error'), code=error.get('rcode'), state=error.get('state'))

        fspec.status_code = 0
        fspec.status = 'transferred'

    return files
Exemplo n.º 16
0
def copy_in(files, **kwargs):
    """
        Download given files using rucio copytool.

        :param files: list of `FileSpec` objects
        :raise: PilotException in case of controlled error
    """

    # don't spoil the output, we depend on stderr parsing
    os.environ[
        'RUCIO_LOGGING_FORMAT'] = '%(asctime)s %(levelname)s [%(message)s]'

    ddmconf = kwargs.pop('ddmconf', {})
    activity = kwargs.pop('activity', None)
    # trace_report = kwargs.get('trace_report')

    for fspec in files:

        cmd = []
        logger.info("To transfer file: %s" % fspec)
        ddm = ddmconf.get(fspec.ddmendpoint)
        if ddm:
            protocol = resolve_protocol(fspec, activity, ddm)
            surls = resolve_surl(fspec, protocol, ddmconf)
            if 'surl' in surls:
                fspec.surl = surls['surl']
            ddm_special_setup = ddm.get_special_setup(protocol.get('id', None))
            if ddm_special_setup:
                cmd += [ddm_special_setup]

        dst = fspec.workdir or kwargs.get('workdir') or '.'
        cmd += [
            '/usr/bin/env', 'rucio', '-v', 'download', '--no-subdir', '--dir',
            dst
        ]
        if require_replicas:
            cmd += ['--rse', fspec.replicas[0][0]]
        if fspec.surl:
            if fspec.ddmendpoint:
                cmd.extend(['--rse', fspec.ddmendpoint])
            cmd.extend(['--pfn', fspec.surl])
        cmd += ['%s:%s' % (fspec.scope, fspec.lfn)]

        rcode, stdout, stderr = execute(" ".join(cmd), **kwargs)

        if rcode:  ## error occurred
            error = resolve_common_transfer_errors(stderr, is_stagein=True)
            fspec.status = 'failed'
            fspec.status_code = error.get('rcode')
            raise PilotException(error.get('error'),
                                 code=error.get('rcode'),
                                 state=error.get('state'))

        fspec.status_code = 0
        fspec.status = 'transferred'

    return files
Exemplo n.º 17
0
Arquivo: data.py Projeto: ptrlv/pilot2
    def __init__(self,
                 infosys_instance=None,
                 acopytools=None,
                 logger=None,
                 default_copytools='rucio',
                 trace_report=None):
        """
            If `acopytools` is not specified then it will be automatically resolved via infosys. In this case `infosys` requires initialization.
            :param acopytools: dict of copytool names per activity to be used for transfers. Accepts also list of names or string value without activity passed.
            :param logger: logging.Logger object to use for logging (None means no logging)
            :param default_copytools: copytool name(s) to be used in case of unknown activity passed. Accepts either list of names or single string value.
        """

        super(StagingClient, self).__init__()

        if not logger:
            logger = logging.getLogger('%s.%s' % (__name__, 'null'))
            logger.disabled = True

        self.logger = logger
        self.infosys = infosys_instance or infosys

        if isinstance(acopytools, basestring):
            acopytools = {'default': [acopytools]} if acopytools else {}
        if isinstance(acopytools, (list, tuple)):
            acopytools = {'default': acopytools} if acopytools else {}

        self.acopytools = acopytools or {}

        if self.infosys.queuedata:
            if not self.acopytools:  ## resolve from queuedata.acopytools using infosys
                self.acopytools = (self.infosys.queuedata.acopytools
                                   or {}).copy()
            if not self.acopytools:  ## resolve from queuedata.copytools using infosys
                self.acopytools = dict(
                    default=(self.infosys.queuedata.copytools or {}).keys())

        if not self.acopytools.get('default'):
            if isinstance(default_copytools, basestring):
                default_copytools = [default_copytools
                                     ] if default_copytools else []
            self.acopytools['default'] = default_copytools

        if not self.acopytools:
            msg = 'failed to initilize StagingClient: no acopytools options found, acopytools=%s' % self.acopytools
            logger.error(msg)
            self.trace_report.update(clientState='BAD_COPYTOOL',
                                     stateReason=msg)
            self.trace_report.send()
            raise PilotException("failed to resolve acopytools settings")
        logger.info('configured copytools per activity: acopytools=%s' %
                    self.acopytools)

        # get an initialized trace report (has to be updated for get/put if not defined before)
        self.trace_report = trace_report if trace_report else TraceReport(
            pq=os.environ.get('PILOT_SITENAME', ''))
Exemplo n.º 18
0
def copy_out(files, **kwargs):
    """
        Upload given files using xrdcp command.

        :param files: list of `FileSpec` objects
        :raise: PilotException in case of controlled error
    """

    setup = kwargs.pop('copytools', {}).get('xrdcp', {}).get('setup')
    coption = _resolve_checksum_option(setup, **kwargs)
    trace_report = kwargs.get('trace_report')

    for fspec in files:
        trace_report.update(scope=fspec.scope, dataset=fspec.dataset, url=fspec.surl, filesize=fspec.filesize)
        trace_report.update(catStart=time(), filename=fspec.lfn, guid=fspec.guid.replace('-', ''))

        try:
            filesize_cmd, checksum_cmd, checksum_type = _stagefile(coption, fspec.surl, fspec.turl, fspec.filesize,
                                                                   is_stagein=False, setup=setup, **kwargs)
            fspec.status_code = 0
            fspec.status = 'transferred'
            trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time())
            trace_report.send()
        except PilotException as error:
            fspec.status = 'failed'
            fspec.status_code = error.get_error_code()
            state = 'STAGEOUT_ATTEMPT_FAILED'
            diagnostics = error.get_detail()
            trace_report.update(clientState=state, stateReason=diagnostics, timeEnd=time())
            trace_report.send()
            raise PilotException(diagnostics, code=fspec.status_code, state=state)
        else:
            # compare checksums
            fspec.checksum[checksum_type] = checksum_cmd  # remote checksum
            state, diagnostics = verify_catalog_checksum(fspec, fspec.surl)
            if diagnostics != "":
                trace_report.update(clientState=state or 'STAGEIN_ATTEMPT_FAILED', stateReason=diagnostics,
                                    timeEnd=time())
                trace_report.send()
                raise PilotException(diagnostics, code=fspec.status_code, state=state)

    return files
Exemplo n.º 19
0
def copy_out(files, **kwargs):
    """
    Upload given files using gfal command.

    :param files: Files to upload
    :raises: PilotException in case of errors
    """

    if not check_for_gfal():
        raise StageOutFailure("No GFAL2 tools found")

    trace_report = kwargs.get('trace_report')

    for fspec in files:
        trace_report.update(scope=fspec.scope, dataset=fspec.dataset, url=fspec.surl, filesize=fspec.filesize)
        trace_report.update(catStart=time(), filename=fspec.lfn, guid=fspec.guid.replace('-', ''))

        src = fspec.workdir or kwargs.get('workdir') or '.'

        timeout = get_timeout(fspec.filesize)

        source = "file://%s" % os.path.abspath(fspec.surl or os.path.join(src, fspec.lfn))
        destination = fspec.turl

        cmd = ['gfal-copy --verbose -f', ' -t %s' % timeout]

        if fspec.checksum:
            cmd += ['-K', '%s:%s' % list(fspec.checksum.items())[0]]  # Python 2/3

        cmd += [source, destination]

        rcode, stdout, stderr = execute(" ".join(cmd), **kwargs)

        if rcode:  ## error occurred
            if rcode in [errno.ETIMEDOUT, errno.ETIME]:
                error = {'rcode': ErrorCodes.STAGEOUTTIMEOUT,
                         'state': 'CP_TIMEOUT',
                         'error': 'Copy command timed out: %s' % stderr}
            else:
                error = resolve_common_transfer_errors(stdout + stderr, is_stagein=False)
            fspec.status = 'failed'
            fspec.status_code = error.get('rcode')
            trace_report.update(clientState=error.get('state', None) or 'STAGEOUT_ATTEMPT_FAILED',
                                stateReason=error.get('error', 'unknown error'),
                                timeEnd=time())
            trace_report.send()
            raise PilotException(error.get('error'), code=error.get('rcode'), state=error.get('state'))

        fspec.status_code = 0
        fspec.status = 'transferred'
        trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time())
        trace_report.send()

    return files
Exemplo n.º 20
0
def set_info(args):   ## should be DEPRECATED: use `infosys.init(queuename)`
    """
    Set up all necessary site information for given PandaQueue name.
    Resolve everything from the specified queue name (passed via `args.queue`)
    and fill extra lookup structure (Populate `args.info`).

    raise PilotException in case of errors.

    :param args: input (shared) agruments
    :return: None
    """

    # ## initialize info service
    infosys.init(args.queue)

    args.info = collections.namedtuple('info', ['queue', 'infoservice',
                                                # queuedata,
                                                'site', 'storages',
                                                # 'site_info',
                                                'storages_info'])
    args.info.queue = args.queue
    args.info.infoservice = infosys  # ## THIS is actually for tests and redundant - the pilot.info.infosys should be used
    # args.infoservice = infosys  # ??

    # check if queue is ACTIVE
    if infosys.queuedata.state != 'ACTIVE':
        logger.critical('specified queue is NOT ACTIVE: %s -- aborting' % infosys.queuedata.name)
        raise PilotException("Panda Queue is NOT ACTIVE")

    # do we need explicit varible declaration (queuedata)?
    # same as args.location.infoservice.queuedata
    #args.location.queuedata = infosys.queuedata

    # do we need explicit varible declaration (Experiment site name)?
    # same as args.location.infoservice.queuedata.site
    #args.location.site = infosys.queuedata.site

    # do we need explicit varible declaration (storages_info)?
    # same as args.location.infoservice.storages_info
    #args.location.storages_info = infosys.storages_info

    # find all enabled storages at site
    args.info.storages = [ddm for ddm, dat in infosys.storages_info.iteritems() if dat.site == infosys.queuedata.site]

    #args.info.sites_info = infosys.sites_info

    logger.info('queue: %s' % args.info.queue)
Exemplo n.º 21
0
Arquivo: data.py Projeto: ptrlv/pilot2
    def resolve_replica(self,
                        fspec,
                        primary_schemas=None,
                        allowed_schemas=None):
        """
            Resolve input replica first according to `primary_schemas`,
            if not found then look up within `allowed_schemas`
            :param fspec: input `FileSpec` objects
            :param allowed_schemas: list of allowed schemas or any if None
            :return: dict(surl, ddmendpoint, pfn)
            :raise PilotException: if replica not found
        """

        if not fspec.replicas:
            self.logger.warning(
                'resolve_replicas() recevied no fspec.replicas')
            return

        allowed_schemas = allowed_schemas or [None]
        replica = None

        for ddmendpoint, replicas in fspec.replicas:
            if not replicas:  # ignore ddms with no replicas
                continue
            if primary_schemas:  ## look up primary schemas if requested
                replica = self.get_preferred_replica(replicas, primary_schemas)
            if not replica:
                replica = self.get_preferred_replica(replicas, allowed_schemas)
            if replica:
                surl = self.get_preferred_replica(
                    replicas, ['srm']) or replicas[
                        0]  # prefer SRM protocol for surl -- to be verified
                self.logger.info(
                    "[stage-in] surl (srm replica) from Rucio: pfn=%s, ddmendpoint=%s"
                    % (surl, ddmendpoint))
                break

        if not replica:  # replica not found
            schemas = 'any' if not allowed_schemas[0] else ','.join(
                allowed_schemas)
            error = 'Failed to find replica for input file=%s, allowed_schemas=%s, fspec=%s' % (
                fspec.lfn, schemas, fspec)
            self.logger.error("resolve_replica: %s" % error)
            raise PilotException(error, code=ErrorCodes.REPLICANOTFOUND)

        return {'surl': surl, 'ddmendpoint': ddmendpoint, 'pfn': replica}
Exemplo n.º 22
0
def resolve_surl(fspec, protocol, ddmconf, **kwargs):
    """
        Get final destination SURL for file to be transferred to Objectstore
        Can be customized at the level of specific copytool

        :param protocol: suggested protocol
        :param ddmconf: full ddm storage data
        :param fspec: file spec data
        :return: dictionary {'surl': surl}
    """

    ddm = ddmconf.get(fspec.ddmendpoint)
    if not ddm:
        raise PilotException('failed to resolve ddmendpoint by name=%s' %
                             fspec.ddmendpoint)

    dataset = fspec.dataset
    if dataset:
        dataset = dataset.replace("#{pandaid}", os.environ['PANDAID'])
    else:
        dataset = ""

    remote_path = os.path.join(protocol.get('path', ''), dataset)

    # pilot ID is passed by the envvar GTAG
    # try:
    #   rprotocols = ddm.rprotocols
    #   logger.debug('ddm.rprotocols=%s' % rprotocols)
    #   if "http_access" in rprotocols:
    #      http_access = rprotocols["http_access"]
    #      os.environ['GTAG'] = http_access + os.path.join(remote_path, config.Pilot.pilotlog)
    #      logger.debug('http_access=%s' % http_access)
    # except Exception:
    #   logger.warning("Failed in get 'http_access' in ddm.rprotocols")

    surl = protocol.get('endpoint', '') + remote_path
    logger.info('For GCS bucket, set surl=%s', surl)

    # example:
    #   protocol = {u'path': u'/atlas-eventservice', u'endpoint': u's3://s3.cern.ch:443/', u'flavour': u'AWS-S3-SSL', u'id': 175}
    #   surl = 's3://s3.cern.ch:443//atlas-eventservice/EventService_premerge_24706191-5013009653-24039149400-322-5.tar'
    return {'surl': surl}
Exemplo n.º 23
0
    def require_protocols(self, files, copytool, activity):
        """
            Populates fspec.protocols and fspec.turl for each entry in `files` according to preferred fspec.ddm_activity
            :param files: list of `FileSpec` objects
            :param activity: str or ordered list of transfer activity names to resolve acopytools related data
            :return: None
        """

        allowed_schemas = getattr(copytool, 'allowed_schemas', None)

        if self.infosys and self.infosys.queuedata:
            copytool_name = copytool.__name__.rsplit('.', 1)[-1]
            allowed_schemas = self.infosys.queuedata.resolve_allowed_schemas(activity, copytool_name) or allowed_schemas

        files = self.resolve_protocols(files)
        ddmconf = self.infosys.resolve_storage_data()

        for fspec in files:

            protocols = self.resolve_protocol(fspec, allowed_schemas)
            if not protocols:  #  no protocols found
                error = 'Failed to resolve protocol for file=%s, allowed_schemas=%s, fspec=%s' % (fspec.lfn, allowed_schemas, fspec)
                self.logger.error("resolve_protocol: %s" % error)
                raise PilotException(error, code=ErrorCodes.NOSTORAGEPROTOCOL)

            # take first available protocol for copytool: FIX ME LATER if need (do iterate over all allowed protocols?)
            protocol = protocols[0]

            self.logger.info("Resolved protocol to be used for transfer lfn=%s: data=%s" % (protocol, fspec.lfn))

            resolve_surl = getattr(copytool, 'resolve_surl', None)
            if not callable(resolve_surl):
                resolve_surl = self.resolve_surl

            r = resolve_surl(fspec, protocol, ddmconf)  ## pass ddmconf for possible custom look up at the level of copytool
            if r.get('surl'):
                fspec.turl = r['surl']
            if r.get('ddmendpoint'):
                fspec.ddmendpoint = r['ddmendpoint']
Exemplo n.º 24
0
def resolve_surl(fspec, protocol, ddmconf, **kwargs):
    """
        Get final destination SURL for file to be transferred to Objectstore
        Can be customized at the level of specific copytool

        :param protocol: suggested protocol
        :param ddmconf: full ddm storage data
        :param fspec: file spec data
        :return: dictionary {'surl': surl}
    """

    try:
        pandaqueue = infosys.pandaqueue
    except Exception:
        pandaqueue = ""
    if pandaqueue is None:
        pandaqueue = ""

    ddm = ddmconf.get(fspec.ddmendpoint)
    if not ddm:
        raise PilotException('failed to resolve ddmendpoint by name=%s' %
                             fspec.ddmendpoint)

    dataset = fspec.dataset
    if dataset:
        dataset = dataset.replace("#{pandaid}", os.environ['PANDAID'])
    else:
        dataset = ""

    remote_path = os.path.join(protocol.get('path', ''), pandaqueue, dataset)
    surl = protocol.get('endpoint', '') + remote_path
    logger.info('For GCS bucket, set surl=%s', surl)

    # example:
    #   protocol = {u'path': u'/atlas-eventservice', u'endpoint': u's3://s3.cern.ch:443/', u'flavour': u'AWS-S3-SSL', u'id': 175}
    #   surl = 's3://s3.cern.ch:443//atlas-eventservice/EventService_premerge_24706191-5013009653-24039149400-322-5.tar'
    return {'surl': surl}
Exemplo n.º 25
0
    def transfer(self, files, activity=['pw'], **kwargs):  # noqa: C901
        """
            Automatically stage passed files using copy tools related to given `activity`
            :param files: list of `FileSpec` objects
            :param activity: list of activity names used to determine appropriate copytool (prioritized list)
            :param kwargs: extra kwargs to be passed to copytool transfer handler
            :raise: PilotException in case of controlled error
            :return: output of copytool trasfers (to be clarified)
        """

        if isinstance(activity, basestring):
            activity = [activity]

        result, errors = None, []
        avail_activity = False
        for act in activity:
            copytools = self.acopytools.get(act)
            storages = self.astorages.get(act)
            if not copytools:
                logger.warn("No available copytools for activity %s" % act)
                continue
            if act in ['pw', 'pls', 'es_events', 'es_failover'] and not storages:
                # for write activity, if corresponding storages are not defined, should use different activity
                logger.warn("Failed to find corresponding astorages for writing activity(%s), will try next activity" % act)
                continue

            storage = storages[0] if storages else None
            avail_activity = True
            for name in copytools:
                try:
                    if name not in self.copytool_modules:
                        raise PilotException('passed unknown copytool with name=%s .. skipped' % name)
                    module = self.copytool_modules[name]['module_name']
                    logger.info('Trying to use copytool=%s for activity=%s' % (name, act))
                    copytool = __import__('pilot.copytool.%s' % module, globals(), locals(), [module], -1)
                except PilotException as e:
                    errors.append(e)
                    logger.debug('Error: %s' % e)
                    continue
                except Exception as e:
                    logger.warning('Failed to import copytool module=%s, error=%s' % (module, e))
                    logger.debug('Error: %s' % e)
                    continue

                try:
                    result = self.transfer_files(copytool, files, act, storage, **kwargs)
                except PilotException, e:
                    errors.append(e)
                    logger.debug('Error: %s' % e)
                except Exception as e:
                    logger.warning('Failed to transfer files using copytool=%s .. skipped; error=%s' % (copytool, e))
                    logger.error(traceback.format_exc())
                    errors.append(e)

                if errors and isinstance(errors[-1], PilotException) and errors[-1].get_error_code() == ErrorCodes.MISSINGOUTPUTFILE:
                    raise errors[-1]

                if result:
                    break
                else:
                    logger.warn("Failed to transfer files using activity(%s) copytool(%s) with error=%s" % (act, name, errors))
Exemplo n.º 26
0
                    errors.append(e)

                if errors and isinstance(errors[-1], PilotException) and errors[-1].get_error_code() == ErrorCodes.MISSINGOUTPUTFILE:
                    raise errors[-1]

                if result:
                    break
                else:
                    logger.warn("Failed to transfer files using activity(%s) copytool(%s) with error=%s" % (act, name, errors))
            if result:
                break
            else:
                logger.warn("Failed to transfer files using activity(%s) with copytools(%s)" % (act, copytools))

        if not avail_activity:
            raise PilotException('Not available activity with both acopytools and astorages defined')
        if not result:
            raise PilotException('Failed to transfer files with activities %s' % (activity))

        return result


class StageInESClient(StagingESClient, StageInClient):

    def process_storage_id(self, files):
        """
        If storage_id is specified, replace ddmendpoint by parsing storage_id
        """
        for fspec in files:
            if fspec.storage_token:
                storage_id, path_convention = fspec.get_storage_id_and_path_convention()
Exemplo n.º 27
0
def get_command(job,
                xdata,
                queue,
                script,
                eventtype,
                localsite,
                remotesite,
                external_dir,
                label='stage-in',
                container_type='container'):
    """
    Get the middleware container execution command.

    Note: this function is tailor made for stage-in/out.

    :param job: job object.
    :param xdata: list of FileSpec objects.
    :param queue: queue name (string).
    :param script: name of stage-in/out script (string).
    :param eventtype:
    :param localsite:
    :param remotesite:
    :param external_dir: input or output files directory (string).
    :param label: optional 'stage-[in|out]' (string).
    :param container_type: optional 'container/bash' (string).
    :return: stage-in/out command (string).
    :raises PilotException: for stage-in/out related failures
    """

    if label == 'stage-out':
        filedata_dictionary = get_filedata_strings(xdata)
    else:
        filedata_dictionary = get_filedata(xdata)

        # write file data to file
        try:
            status = write_json(
                path.join(job.workdir,
                          config.Container.stagein_replica_dictionary),
                filedata_dictionary)
        except Exception as exc:
            diagnostics = 'exception caught in get_command(): %s' % exc
            logger.warning(diagnostics)
            raise PilotException(diagnostics)
        else:
            if not status:
                diagnostics = 'failed to write replica dictionary to file'
                logger.warning(diagnostics)
                raise PilotException(diagnostics)

    # copy pilot source into container directory, unless it is already there
    diagnostics = copy_pilot_source(job.workdir)
    if diagnostics:
        raise PilotException(diagnostics)

    final_script_path = path.join(job.workdir, script)
    environ['PYTHONPATH'] = environ.get('PYTHONPATH') + ':' + job.workdir
    script_path = path.join('pilot/scripts', script)
    full_script_path = path.join(path.join(job.workdir, script_path))
    copy(full_script_path, final_script_path)

    if container_type == 'container':
        # correct the path when containers have been used
        final_script_path = path.join('.', script)
        workdir = '/srv'
    else:
        # for container_type=bash we need to add the rucio setup
        pilot_user = environ.get('PILOT_USER', 'generic').lower()
        user = __import__('pilot.user.%s.container' % pilot_user, globals(),
                          locals(), [pilot_user], 0)  # Python 2/3
        try:
            final_script_path = user.get_middleware_container_script(
                '', final_script_path, asetup=True)
        except PilotException:
            final_script_path = 'python %s' % final_script_path
        workdir = job.workdir

    cmd = "%s -d -w %s -q %s --eventtype=%s --localsite=%s --remotesite=%s --produserid=\"%s\" --jobid=%s" % \
          (final_script_path, workdir, queue, eventtype, localsite, remotesite, job.produserid.replace(' ', '%20'), job.jobid)

    if label == 'stage-in':
        cmd += " --eventservicemerge=%s --usepcache=%s --usevp=%s --replicadictionary=%s" % \
               (job.is_eventservicemerge, job.infosys.queuedata.use_pcache, job.use_vp, config.Container.stagein_replica_dictionary)
        if external_dir:
            cmd += ' --inputdir=%s' % external_dir
    else:  # stage-out
        cmd += ' --lfns=%s --scopes=%s --datasets=%s --ddmendpoints=%s --guids=%s' % \
               (filedata_dictionary['lfns'], filedata_dictionary['scopes'], filedata_dictionary['datasets'],
                filedata_dictionary['ddmendpoints'], filedata_dictionary['guids'])
        if external_dir:
            cmd += ' --outputdir=%s' % external_dir

    cmd += ' --taskid=%s' % job.taskid
    cmd += ' --jobdefinitionid=%s' % job.jobdefinitionid
    cmd += ' --catchall=%s' % job.infosys.queuedata.catchall

    if container_type == 'bash':
        cmd += '\nexit $?'

    return cmd
Exemplo n.º 28
0
def copy_in(files, **kwargs):
    """
    Download given files using the lsm-get command.

    :param files: list of `FileSpec` objects.
    :raise: PilotException in case of controlled error.
    :return: files `FileSpec` object.
    """

    exit_code = 0
    stdout = ""
    stderr = ""

    copytools = kwargs.get('copytools') or []
    copysetup = get_copysetup(copytools, 'lsm')
    trace_report = kwargs.get('trace_report')
    #allow_direct_access = kwargs.get('allow_direct_access')

    # note, env vars might be unknown inside middleware contrainers, if so get the value already in the trace report
    localsite = os.environ.get('RUCIO_LOCAL_SITE_ID',
                               trace_report.get_value('localSite'))

    for fspec in files:
        # update the trace report
        localsite = localsite if localsite else fspec.ddmendpoint
        trace_report.update(localSite=localsite,
                            remoteSite=fspec.ddmendpoint,
                            filesize=fspec.filesize)
        trace_report.update(filename=fspec.lfn,
                            guid=fspec.guid.replace('-', ''))
        trace_report.update(scope=fspec.scope, dataset=fspec.dataset)

        # continue loop for files that are to be accessed directly  ## TO BE DEPRECATED (anisyonk)
        #if fspec.is_directaccess(ensure_replica=False) and allow_direct_access and fspec.accessmode == 'direct':
        #    fspec.status_code = 0
        #    fspec.status = 'remote_io'
        #    trace_report.update(url=fspec.turl, clientState='FOUND_ROOT', stateReason='direct_access')
        #    trace_report.send()
        #    continue

        trace_report.update(catStart=time())

        dst = fspec.workdir or kwargs.get('workdir') or '.'
        #timeout = get_timeout(fspec.filesize)
        source = fspec.turl
        destination = os.path.join(dst, fspec.lfn)

        logger.info("transferring file %s from %s to %s", fspec.lfn, source,
                    destination)

        exit_code, stdout, stderr = move(source,
                                         destination,
                                         dst_in=True,
                                         copysetup=copysetup)

        if exit_code != 0:
            logger.warning(
                "transfer failed: exit code = %d, stdout = %s, stderr = %s",
                exit_code, stdout, stderr)

            error = resolve_common_transfer_errors(stderr, is_stagein=True)
            fspec.status = 'failed'
            fspec.status_code = error.get('rcode')
            trace_report.update(clientState=error.get('state')
                                or 'STAGEIN_ATTEMPT_FAILED',
                                stateReason=error.get('error'),
                                timeEnd=time())
            trace_report.send()
            raise PilotException(error.get('error'),
                                 code=error.get('rcode'),
                                 state=error.get('state'))

        # verify checksum; compare local checksum with catalog value (fspec.checksum), use same checksum type
        state, diagnostics = verify_catalog_checksum(fspec, destination)
        if diagnostics != "":
            trace_report.update(clientState=state or 'STAGEIN_ATTEMPT_FAILED',
                                stateReason=diagnostics,
                                timeEnd=time())
            trace_report.send()
            raise PilotException(diagnostics,
                                 code=fspec.status_code,
                                 state=state)

        fspec.status_code = 0
        fspec.status = 'transferred'
        trace_report.update(clientState='DONE',
                            stateReason='OK',
                            timeEnd=time())
        trace_report.send()

    # for testing kill signals
    #import signal
    #os.kill(os.getpid(), signal.SIGSEGV)

    return files
Exemplo n.º 29
0
def copy_out(files, **kwargs):
    """
    Upload given files using lsm copytool.

    :param files: list of `FileSpec` objects.
    :raise: PilotException in case of controlled error.
    """

    copytools = kwargs.get('copytools') or []
    copysetup = get_copysetup(copytools, 'lsm')
    trace_report = kwargs.get('trace_report')
    ddmconf = kwargs.get('ddmconf', None)
    if not ddmconf:
        raise PilotException(
            "copy_out() failed to resolve ddmconf from function arguments",
            code=ErrorCodes.STAGEOUTFAILED,
            state='COPY_ERROR')

    for fspec in files:
        trace_report.update(scope=fspec.scope,
                            dataset=fspec.dataset,
                            url=fspec.surl,
                            filesize=fspec.filesize)
        trace_report.update(catStart=time(),
                            filename=fspec.lfn,
                            guid=fspec.guid.replace('-', ''))

        # resolve token value from fspec.ddmendpoint
        ddm = ddmconf.get(fspec.ddmendpoint)
        token = ddm.token
        if not token:
            diagnostics = "copy_out() failed to resolve token value for ddmendpoint=%s" % (
                fspec.ddmendpoint)
            trace_report.update(clientState='STAGEOUT_ATTEMPT_FAILED',
                                stateReason=diagnostics,
                                timeEnd=time())
            trace_report.send()
            raise PilotException(diagnostics,
                                 code=ErrorCodes.STAGEOUTFAILED,
                                 state='COPY_ERROR')

        src = fspec.workdir or kwargs.get('workdir') or '.'
        #timeout = get_timeout(fspec.filesize)
        source = os.path.join(src, fspec.lfn)
        destination = fspec.turl

        # checksum has been calculated in the previous step - transfer_files() in api/data
        # note: pilot is handing over checksum to the command - which will/should verify it after the transfer
        checksum = "adler32:%s" % fspec.checksum.get('adler32')

        # define the command options
        opts = {
            '--size': fspec.filesize,
            '-t': token,
            '--checksum': checksum,
            '--guid': fspec.guid
        }
        try:
            opts = " ".join(["%s %s" % (k, v)
                             for (k, v) in opts.iteritems()])  # Python 2
        except Exception:
            opts = " ".join([
                "%s %s" % (k, v) for (k, v) in list(opts.items())
            ])  # Python 3

        logger.info("transferring file %s from %s to %s", fspec.lfn, source,
                    destination)

        nretries = 1  # input parameter to function?
        for retry in range(nretries):
            exit_code, stdout, stderr = move(source,
                                             destination,
                                             dst_in=False,
                                             copysetup=copysetup,
                                             options=opts)

            if exit_code != 0:
                if stderr == "":
                    stderr = stdout
                error = resolve_common_transfer_errors(stderr,
                                                       is_stagein=False)
                fspec.status = 'failed'
                fspec.status_code = error.get('exit_code')
                trace_report.update(clientState=error.get('state', None)
                                    or 'STAGEOUT_ATTEMPT_FAILED',
                                    stateReason=error.get(
                                        'error', 'unknown error'),
                                    timeEnd=time())
                trace_report.send()
                raise PilotException(error.get('error'),
                                     code=error.get('exit_code'),
                                     state=error.get('state'))
            else:  # all successful
                logger.info('all successful')
                break

        fspec.status_code = 0
        fspec.status = 'transferred'
        trace_report.update(clientState='DONE',
                            stateReason='OK',
                            timeEnd=time())
        trace_report.send()

    return files
Exemplo n.º 30
0
def copy_in(files, **kwargs):
    """
        Download given files using gfal-copy command.

        :param files: list of `FileSpec` objects
        :raise: PilotException in case of controlled error
    """

    #allow_direct_access = kwargs.get('allow_direct_access') or False
    trace_report = kwargs.get('trace_report')

    if not check_for_gfal():
        raise StageInFailure("No GFAL2 tools found")

    localsite = os.environ.get('RUCIO_LOCAL_SITE_ID',
                               os.environ.get('DQ2_LOCAL_SITE_ID', None))
    for fspec in files:
        # update the trace report
        localsite = localsite if localsite else fspec.ddmendpoint
        trace_report.update(localSite=localsite,
                            remoteSite=fspec.ddmendpoint,
                            filesize=fspec.filesize)
        trace_report.update(filename=fspec.lfn,
                            guid=fspec.guid.replace('-', ''))
        trace_report.update(scope=fspec.scope, dataset=fspec.dataset)

        # continue loop for files that are to be accessed directly   ## TO BE DEPRECATED (should be applied at top level) (anisyonk)
        #if fspec.is_directaccess(ensure_replica=False) and allow_direct_access and fspec.accessmode == 'direct':
        #    fspec.status_code = 0
        #    fspec.status = 'remote_io'
        #    trace_report.update(url=fspec.turl, clientState='FOUND_ROOT', stateReason='direct_access')
        #    trace_report.send()
        #    continue

        trace_report.update(catStart=time())

        dst = fspec.workdir or kwargs.get('workdir') or '.'

        timeout = get_timeout(fspec.filesize)
        source = fspec.turl
        destination = "file://%s" % os.path.abspath(
            os.path.join(dst, fspec.lfn))

        cmd = ['gfal-copy --verbose -f', ' -t %s' % timeout]

        if fspec.checksum:
            cmd += ['-K',
                    '%s:%s' % list(fspec.checksum.items())[0]]  # Python 2/3

        cmd += [source, destination]

        rcode, stdout, stderr = execute(" ".join(cmd), **kwargs)

        if rcode:  ## error occurred
            if rcode in [errno.ETIMEDOUT, errno.ETIME]:
                error = {
                    'rcode': ErrorCodes.STAGEINTIMEOUT,
                    'state': 'CP_TIMEOUT',
                    'error': 'Copy command timed out: %s' % stderr
                }
            else:
                error = resolve_common_transfer_errors(stdout + stderr,
                                                       is_stagein=True)
            fspec.status = 'failed'
            fspec.status_code = error.get('rcode')
            trace_report.update(clientState=error.get('state')
                                or 'STAGEIN_ATTEMPT_FAILED',
                                stateReason=error.get('error'),
                                timeEnd=time())
            trace_report.send()

            raise PilotException(error.get('error'),
                                 code=error.get('rcode'),
                                 state=error.get('state'))

        fspec.status_code = 0
        fspec.status = 'transferred'
        trace_report.update(clientState='DONE',
                            stateReason='OK',
                            timeEnd=time())
        trace_report.send()

    return files