Exemplo n.º 1
0
def testS3upload(s3tester, archiveName, hashkey, logger):
    cachename = "%s.tgz" % hashkey
    try:
        t1 = time.time()
        timestamp = time.strftime('%y%m%d_%H%M%S', time.gmtime())
        msecs = int((t1 - int(t1)) * 1000)
        timestamp += '.%03d' % msecs
        with tempSetLogLevel(logger=logger, level=1000):  # disable all logging for this call
            uploadToS3(crabserver=s3tester, objecttype='sandbox', filepath=archiveName,
                       tarballname=cachename, logger=logger)
        status = 'OK'
        logger.debug('Successfully uploaded tarball to S3 as well')
    except Exception as e:
        logger.debug('Tarball upload to S3 failed:\n%s', str(e))
        status = 'FAIL'
        reason = str(e)
    t2 = time.time()
    s3report = {'status':status}
    if status == 'FAIL':
        s3report['reason'] = reason
    thisSite = socket.gethostname()
    thisIP = socket.gethostbyname(thisSite)
    tarballKB = os.stat(archiveName).st_size // 1024
    s3report['timestamp'] = timestamp
    s3report['clienthost'] = thisSite
    s3report['clientip'] = thisIP
    s3report['KBytes'] = tarballKB
    s3report['seconds'] = int(t2-t1)
    return s3report
Exemplo n.º 2
0
 def execute(self, *args, **kw):
     """
     Main execute
     """
     self.task = kw['task']
     # Do not check it for HC
     # ActivitiesToRunEverywhere is used mainly for HC and there is no need to check for it.
     if hasattr(self.config.TaskWorker, 'ActivitiesToRunEverywhere') and \
                self.task['tm_activity'] in self.config.TaskWorker.ActivitiesToRunEverywhere:
         self.logger.info(
             "Will not check possibility to write to destination site because activity: %s is in ActivitiesToRunEverywhere",
             self.task['tm_activity'])
         return
     # If user specified no output and no logs transfer, there is also no need to check it.
     if self.task['tm_save_logs'] == 'F' and self.task[
             'tm_transfer_outputs'] == 'F':
         self.logger.info(
             "Will not check possibility to write to destination site because user specified not transfer any output/log files."
         )
         return
     # Do not need to check if it is dryrun.
     if self.task['tm_dry_run'] == 'T':
         self.logger.info(
             "Will not check possibility to write to destination site. User specified dryrun option."
         )
         return
     self.workflow = self.task['tm_taskname']
     self.proxy = self.task['user_proxy']
     cpCmd, rmCmd, append = getCheckWriteCommand(self.proxy, self.logger)
     if not cpCmd:
         self.logger.info(
             "CRAB3 is not configured to check write permissions. There is no GFAL2 or LCG commands installed. Continuing"
         )
         return
     filename = re.sub("[:-_]", "",
                       self.task['tm_taskname']) + '_crab3check.tmp'
     try:
         with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
             pfn = getPFN(self.proxy, self.task['tm_output_lfn'], filename,
                          self.task['tm_asyncdest'], self.logger)
         cpCmd += append + os.path.abspath(filename) + " " + pfn
         rmCmd += " " + pfn
         createDummyFile(filename, self.logger)
         self.logger.info("Executing cp command: %s ", cpCmd)
         res = self.checkPermissions(cpCmd)
         if res == 0:
             self.logger.info("Executing rm command: %s ", rmCmd)
             self.checkPermissions(rmCmd)
     except IOError as er:
         self.logger.info(
             'IOError %s. CRAB3 backend disk is full. Please report to experts. Task will not be submitted',
             er)
         raise
     except HTTPException as er:
         self.logger.warning(
             "CRAB3 is not able to get pfn from PhEDEx. Error %s", er)
     finally:
         removeDummyFile(filename, self.logger)
     return
Exemplo n.º 3
0
    def tryProxyLogon(self, proxycfg=None):
        """
        Utility function to allow trying with diffenent myproxy configurations.
        It tries to retrieve a valid proxy from myproxy using the configuration
        passed as argument. See WMCore.Credential.Proxy for configuration details.
        If successful returns the proxy filename and list of VOMS groups
        for later addition via voms-proxy-init. If not rises a TW exception.
        Note that logonRenewMyProxy() does not rise exceptions.
        """

        # WMCore proxy methods are awfully verbose, reduce logging level when using them
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            proxy = Proxy(proxycfg)
            userproxy = proxy.getProxyFilename(
                serverRenewer=True)  # this only returns a filename
            proxy.logonRenewMyProxy(
            )  # this tries to create the proxy, but if it fails it does not rise
            usergroups = set(proxy.getAllUserGroups(
                userproxy))  # get VOMS groups from created proxy (if any)
            timeleft = proxy.getTimeLeft(
                userproxy
            )  # this is the way to tell if proxy creation succeeded

        errmsg = ''
        if timeleft is None or timeleft <= 0:
            errmsg = "Impossible to retrieve proxy from %s for %s." % (
                proxycfg['myProxySvr'], proxycfg['userDN'])
        if timeleft < (5 * 24 * 3600):
            errmsg = "Could not get a proxy valid for at least 5-days from %s for %s." % (
                proxycfg['myProxySvr'], proxycfg['userDN'])
        if errmsg:
            self.logger.error(errmsg)
            self.logger.error("Will try again in verbose mode")
            self.logger.error(
                "===========PROXY ERROR START ==========================")
            with tempSetLogLevel(logger=self.logger, level=logging.DEBUG):
                proxy.logonRenewMyProxy()
            self.logger.error(
                "===========PROXY ERROR END   ==========================")
            raise TaskWorkerException(errmsg)

        hoursleft = timeleft / 3600
        minutesleft = (timeleft % 3600) / 60
        self.logger.info('retrieved proxy lifetime in h:m: %d:%d', hoursleft,
                         minutesleft)
        return (userproxy, usergroups)
Exemplo n.º 4
0
 def execute(self, *args, **kwargs):
     result = None
     proxycfg = {'vo': kwargs['task']['tm_user_vo'],
                 'logger': self.logger,
                 'myProxySvr': self.config.Services.MyProxy,
                 'proxyValidity' : '144:0',
                 'min_time_left' : 36000, ## do we need this ? or should we use self.myproxylen? 
                 'userDN' : kwargs['task']['tm_user_dn'],
                 'group' : kwargs['task']['tm_user_group'] if kwargs['task']['tm_user_group'] else '',
                 'role' : kwargs['task']['tm_user_role'] if kwargs['task']['tm_user_role'] else '',
                 'server_key': self.config.MyProxy.serverhostkey,
                 'server_cert': self.config.MyProxy.serverhostcert,
                 'serverDN': self.config.MyProxy.serverdn,
                 'uisource': getattr(self.config.MyProxy, 'uisource', ''),
                 'credServerPath': self.config.MyProxy.credpath,
                 'myproxyAccount' : self.server['host'],
                 'cleanEnvironment' : getattr(self.config.MyProxy, 'cleanEnvironment', False)
                }
     # WMCore proxy methods are awfully verbode, reduce logging level when using them
     with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
         proxy = Proxy(proxycfg)
         userproxy = proxy.getProxyFilename(serverRenewer=True)
         proxy.logonRenewMyProxy()
         timeleft = proxy.getTimeLeft(userproxy)
         usergroups = set(proxy.getAllUserGroups(userproxy))
     if timeleft is None or timeleft <= 0:
         msg = "Impossible to retrieve proxy from %s for %s." % (proxycfg['myProxySvr'], proxycfg['userDN'])
         self.logger.error(msg)
         self.logger.error("\n Will try again in verbose mode")
         self.logger.error("===========PROXY ERROR START ==========================")
         with tempSetLogLevel(logger=self.logger, level=logging.DEBUG):
             userproxy = proxy.getProxyFilename(serverRenewer=True)
             proxy.logonRenewMyProxy()
             timeleft = proxy.getTimeLeft(userproxy)
             usergroups = set(proxy.getAllUserGroups(userproxy))
         self.logger.error("===========PROXY ERROR END   ==========================")
         raise TaskWorkerException(msg)
     else:
         kwargs['task']['user_proxy'] = userproxy
         kwargs['task']['user_groups'] = usergroups
         self.logger.debug("Valid proxy for %s now in %s", proxycfg['userDN'], userproxy)
         result = Result(task=kwargs['task'], result='OK')
     return result
Exemplo n.º 5
0
    def executeInternal(self, *args, **kwargs):

        self.logger.info(
            "Data discovery with DBS")  ## to be changed into debug

        dbsurl = self.config.Services.DBSUrl
        if kwargs['task']['tm_dbs_url']:
            dbsurl = kwargs['task']['tm_dbs_url']
        self.dbs = DBSReader(dbsurl)
        self.dbsInstance = self.dbs.dbs.serverinfo()["dbs_instance"]
        isUserDataset = self.dbsInstance.split('/')[1] != 'global'
        # where to look locations in pre-Rucio world
        PhEDExOrDBS = 'PhEDEx' if not isUserDataset else 'DBS origin site'

        taskName = kwargs['task']['tm_taskname']
        userProxy = kwargs['task']['user_proxy']
        self.logger.debug("Data discovery through %s for %s", self.dbs,
                          taskName)

        inputDataset = kwargs['task']['tm_input_dataset']
        secondaryDataset = kwargs['task'].get('tm_secondary_input_dataset',
                                              None)

        self.checkDatasetStatus(inputDataset, kwargs)
        if secondaryDataset:
            self.checkDatasetStatus(secondaryDataset, kwargs)

        try:
            # Get the list of blocks for the locations.
            # The WMCore DBS3 implementation makes one call to DBS for each block
            # when using locations=True so we are using locations=False and looking up location later
            blocks = [
                x['Name'] for x in self.dbs.getFileBlocksInfo(inputDataset,
                                                              locations=False)
            ]
            if secondaryDataset:
                secondaryBlocks = [
                    x['Name']
                    for x in self.dbs.getFileBlocksInfo(secondaryDataset,
                                                        locations=False)
                ]
        except DBSReaderError as dbsexc:
            # dataset not found in DBS is a known use case
            if str(dbsexc).find('No matching data'):
                raise TaskWorkerException(
                    "CRAB could not find dataset %s in this DBS instance: %s" %
                    inputDataset, dbsurl)
            raise
        ## Create a map for block's locations: for each block get the list of locations.
        ## Note: listFileBlockLocation() gets first the locations from PhEDEx, and if no
        ## locations are found it gets the original locations from DBS. So it should
        ## never be the case at this point that some blocks have no locations.
        ## locationsMap is a dictionary, key=blockName, value=list of PhedexNodes, example:
        ## {'/JetHT/Run2016B-PromptReco-v2/AOD#b10179dc-3723-11e6-9aa5-001e67abf228': [u'T1_IT_CNAF_Buffer', u'T2_US_Wisconsin', u'T1_IT_CNAF_MSS', u'T2_BE_UCL'],
        ## '/JetHT/Run2016B-PromptReco-v2/AOD#89b03ca6-1dc9-11e6-b567-001e67ac06a0': [u'T1_IT_CNAF_Buffer', u'T2_US_Wisconsin', u'T1_IT_CNAF_MSS', u'T2_BE_UCL']}

        # For now apply Rucio data location only to NANOAOD*
        # in time useRucioForLocations may become a more rich expression
        isNano = blocks[0].split("#")[0].split("/")[-1] in [
            "NANOAOD", "NANOAODSIM"
        ]
        if isNano:
            self.logger.info(
                "NANOAOD* datset. Will use Rucio for data location")
        useRucioForLocations = isNano
        locationsFoundWithRucio = False

        if not useRucioForLocations:
            self.logger.info("Will not use Rucio for this dataset")
        # if locations should be in Rucio, try it first and fall back to old ways if Rucio calls fail
        # of if they return no locations (possible Rucio teething pain). If Rucio returns a list, trust it.
        if useRucioForLocations:
            locationsMap = {}
            scope = "cms"
            # If the dataset is a USER one, use the Rucio user scope to find it
            # TODO: we need a way to enable users to indicate others user scopes as source
            if isUserDataset:
                scope = "user.%s" % kwargs['task']['tm_username']
            rucio_config_dict = {
                "phedexCompatible": True,
                "auth_type": "x509",
                "ca_cert": self.config.Services.Rucio_caPath,
                "logger": self.logger,
                "creds": {
                    "client_cert": self.config.TaskWorker.cmscert,
                    "client_key": self.config.TaskWorker.cmskey
                }
            }
            try:
                self.logger.info("Initializing Rucio client")
                # WMCore is awfully verbose
                with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
                    rucioClient = Rucio(
                        self.config.Services.Rucio_account,
                        hostUrl=self.config.Services.Rucio_host,
                        authUrl=self.config.Services.Rucio_authUrl,
                        configDict=rucio_config_dict)
                rucioClient.whoAmI()
                self.logger.info(
                    "Looking up data location with Rucio in %s scope.", scope)
                with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
                    locations = rucioClient.getReplicaInfoForBlocks(
                        scope=scope, block=list(blocks))
            except Exception as exc:
                msg = "Rucio lookup failed with\n%s" % str(exc)
                # TODO when removing fall-back to PhEDEx, this should be a fatal error
                # raise TaskWorkerException(msg)
                self.logger.warn(msg)
                locations = None

            # TODO when removing fall-back to PhEDEx, above code will raise if it fails, therefore
            # the following "if" must be removed and the code shifted left
            if locations:
                located_blocks = locations['phedex']['block']
                for element in located_blocks:
                    if element[
                            'replica']:  # only fill map for blocks which have at least one location
                        locationsMap.update({
                            element['name']:
                            [x['node'] for x in element['replica']]
                        })
                if locationsMap:
                    locationsFoundWithRucio = True
                else:
                    msg = "No locations found with Rucio for this dataset"
                    # since NANO* are not in PhEDEx, this should be a fatal error
                    if isNano:
                        raise TaskWorkerException(msg)
                    else:
                        # note it down and try with PhEDEx
                        self.logger.warn(msg)

        if not locationsFoundWithRucio:  # fall back to pre-Rucio methods
            try:
                self.logger.info("Looking up data locations using %s",
                                 PhEDExOrDBS)
                locationsMap = self.dbs.listFileBlockLocation(
                    list(blocks), dbsOnly=isUserDataset)
            except Exception as ex:
                raise TaskWorkerException(
                    "The CRAB3 server backend could not get the location of the files from dbs nor phedex nor rucio.\n"+\
                    "This is could be a temporary phedex/rucio/dbs glitch, please try to submit a new task (resubmit will not work)"+\
                    " and contact the experts if the error persists.\nError reason: %s" % str(ex)
                    )
            # only fill map for blocks which have at least one location
            locationsMap = {
                key: value
                for key, value in locationsMap.iteritems() if value
            }

        if secondaryDataset:
            secondaryLocationsMap = {}
            # see https://github.com/dmwm/CRABServer/issues/6075#issuecomment-641569446
            self.logger.info(
                "Trying data location of secondary blocks with Rucio")
            try:
                locations = rucioClient.getReplicaInfoForBlocks(
                    scope=scope, block=list(secondaryBlocks))
            except Exception as exc:
                locations = None
                secondaryLocationsMap = {}
                self.logger.warn("Rucio lookup failed with. %s", exc)
            if locations:
                located_blocks = locations['phedex']['block']
                for element in located_blocks:
                    if element[
                            'replica']:  # only fill map for blocks which have at least one location
                        secondaryLocationsMap.update({
                            element['name']:
                            [x['node'] for x in element['replica']]
                        })
            if not secondaryLocationsMap:
                msg = "No locations found with Rucio for secondaryDataset."
                # TODO when removing fall-back to PhEDEx, this should be a fatal error
                # raise TaskWorkerException(msg)
                self.logger.warn(msg)
                self.logger.info(
                    "Trying data location of secondary blocks with PhEDEx")
                try:
                    secondaryLocationsMap = self.dbs.listFileBlockLocation(
                        list(secondaryBlocks), dbsOnly=isUserDataset)
                except Exception as ex:
                    raise TaskWorkerException(
                        "The CRAB3 server backend could not get the location of the secondary dataset files from dbs or phedex or rucio.\n" + \
                        "This is could be a temporary phedex/rucio/dbs glitch, please try to submit a new task (resubmit will not work)" + \
                        " and contact the experts if the error persists.\nError reason: %s" % str(ex)
                    )
                # only fill map for blocks which have at least one location
                secondaryLocationsMap = {
                    key: value
                    for key, value in secondaryLocationsMap.iteritems()
                    if value
                }

        # From now on code is not dependent from having used Rucio or PhEDEx

        blocksWithLocation = locationsMap.keys()
        if secondaryDataset:
            secondaryBlocksWithLocation = secondaryLocationsMap.keys()

        self.keepOnlyDisks(locationsMap)
        if not locationsMap:
            msg = "Task could not be submitted because there is no DISK replica for dataset %s" % inputDataset
            if self.tapeLocations:
                msg += "\nN.B.: the input dataset is stored at %s, but those are TAPE locations." % ', '.join(
                    sorted(self.tapeLocations))
                # submit request to DDM
                ddmRequest = None
                ddmServer = self.config.TaskWorker.DDMServer
                try:
                    ddmRequest = blocksRequest(blocksWithLocation,
                                               ddmServer,
                                               self.config.TaskWorker.cmscert,
                                               self.config.TaskWorker.cmskey,
                                               verbose=False)
                except HTTPException as hte:
                    self.logger.exception(hte)
                    msg += "\nThe automatic stage-out failed, please try again later. If the error persists contact the experts and provide this error message:"
                    msg += "\nHTTP Error while contacting the DDM server %s:\n%s" % (
                        ddmServer, str(hte))
                    msg += "\nHTTP Headers are: %s" % hte.headers
                    msg += "\nYou might want to contact your physics group if you need a disk replica."
                    raise TaskWorkerException(msg, retry=True)

                self.logger.info("Contacted %s using %s and %s, got:\n%s",
                                 self.config.TaskWorker.DDMServer,
                                 self.config.TaskWorker.cmscert,
                                 self.config.TaskWorker.cmskey, ddmRequest)
                # The query above returns a JSON with a format {"result": "OK", "message": "Copy requested", "data": [{"request_id": 18, "site": <site>, "item": [<list of blocks>], "group": "AnalysisOps", "n": 1, "status": "new", "first_request": "2018-02-26 23:57:37", "last_request": "2018-02-26 23:57:37", "request_count": 1}]}
                if ddmRequest["result"] == "OK":
                    # set status to TAPERECALL
                    tapeRecallStatus = 'TAPERECALL'
                    ddmReqId = ddmRequest["data"][0]["request_id"]
                    configreq = {
                        'workflow': taskName,
                        'taskstatus': tapeRecallStatus,
                        'ddmreqid': ddmReqId,
                        'subresource': 'addddmreqid',
                    }
                    try:
                        tapeRecallStatusSet = self.server.post(
                            self.restURInoAPI + '/task',
                            data=urllib.urlencode(configreq))
                    except HTTPException as hte:
                        self.logger.exception(hte)
                        msg = "HTTP Error while contacting the REST Interface %s:\n%s" % (
                            self.config.TaskWorker.restHost, str(hte))
                        msg += "\nSetting %s status and DDM request ID (%d) failed for task %s" % (
                            tapeRecallStatus, ddmReqId, taskName)
                        msg += "\nHTTP Headers are: %s" % hte.headers
                        raise TaskWorkerException(msg, retry=True)

                    msg += "\nA disk replica has been requested on %s to CMS DDM (request ID: %d)" % (
                        ddmRequest["data"][0]["first_request"], ddmReqId)
                    if tapeRecallStatusSet[2] == "OK":
                        self.logger.info("Status for task %s set to '%s'",
                                         taskName, tapeRecallStatus)
                        msg += "\nThis task will be automatically submitted as soon as the stage-out is completed."
                        self.uploadWarning(msg, userProxy, taskName)

                        raise TapeDatasetException(msg)
                    else:
                        msg += ", please try again in two days."

                else:
                    msg += "\nThe disk replica request failed with this error:\n %s" % ddmRequest[
                        "message"]

            msg += "\nPlease, check DAS (https://cmsweb.cern.ch/das) and make sure the dataset is accessible on DISK."
            raise TaskWorkerException(msg)

        # will not need lumi info if user has asked for split by file with no run/lumi mask
        splitAlgo = kwargs['task']['tm_split_algo']
        lumiMask = kwargs['task']['tm_split_args']['lumis']
        runRange = kwargs['task']['tm_split_args']['runs']

        needLumiInfo = splitAlgo != 'FileBased' or lumiMask != [] or runRange != []
        # secondary dataset access relies on run/lumi info
        if secondaryDataset:
            needLumiInfo = True
        if needLumiInfo:
            self.checkBlocksSize(
                blocksWithLocation
            )  # Interested only in blocks with locations, 'blocks' may contain invalid ones and trigger an Exception
            if secondaryDataset:
                self.checkBlocksSize(secondaryBlocksWithLocation)
        try:
            filedetails = self.dbs.listDatasetFileDetails(
                inputDataset,
                getParents=True,
                getLumis=needLumiInfo,
                validFileOnly=0)
            if secondaryDataset:
                moredetails = self.dbs.listDatasetFileDetails(
                    secondaryDataset,
                    getParents=False,
                    getLumis=needLumiInfo,
                    validFileOnly=0)

                for secfilename, secinfos in moredetails.items():
                    secinfos['lumiobj'] = LumiList(
                        runsAndLumis=secinfos['Lumis'])

                self.logger.info(
                    "Beginning to match files from secondary dataset")
                for dummyFilename, infos in filedetails.items():
                    infos['Parents'] = []
                    lumis = LumiList(runsAndLumis=infos['Lumis'])
                    for secfilename, secinfos in moredetails.items():
                        if lumis & secinfos['lumiobj']:
                            infos['Parents'].append(secfilename)
                self.logger.info("Done matching files from secondary dataset")
                kwargs['task']['tm_use_parent'] = 1
        except Exception as ex:  #TODO should we catch HttpException instead?
            self.logger.exception(ex)
            raise TaskWorkerException("The CRAB3 server backend could not contact DBS to get the files details (Lumis, events, etc).\n"+\
                                "This is could be a temporary DBS glitch. Please try to submit a new task (resubmit will not work)"+\
                                " and contact the experts if the error persists.\nError reason: %s" % str(ex))
            #TODO addo the nodes phedex so the user can check themselves
        if not filedetails:
            raise TaskWorkerException(("Cannot find any file inside the dataset. Please, check your dataset in DAS, %s.\n" +\
                                "Aborting submission. Resubmitting your task will not help.") %\
                                ("https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s") %\
                                (self.dbsInstance, inputDataset))

        ## Format the output creating the data structures required by WMCore. Filters out invalid files,
        ## files whose block has no location, and figures out the PSN
        result = self.formatOutput(task=kwargs['task'],
                                   requestname=taskName,
                                   datasetfiles=filedetails,
                                   locations=locationsMap,
                                   tempDir=kwargs['tempDir'])

        if not result.result:
            raise TaskWorkerException((
                "Cannot find any valid file inside the dataset. Please, check your dataset in DAS, %s.\n"
                + "Aborting submission. Resubmitting your task will not help."
            ) % (
                "https://cmsweb.cern.ch/das/request?instance=%s&input=dataset=%s"
            ) % (self.dbsInstance, inputDataset))

        self.logger.debug("Got %s files", len(result.result.getFiles()))

        return result
Exemplo n.º 6
0
    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {
            "cacheduration": 1,
            "pycurl": True
        }  # cache duration is in hours
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[
                        infos['BlockName']]:
                    self.logger.warning(
                        "Skipping %s because its block (%s) has no locations",
                        lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue
                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    self.logger.warning(
                        "Skipping %s because it has no parents")
                    continue
                ## Create a WMCore File object.
                size = infos['FileSize']
                checksums = {
                    'Checksum': infos['Checksum'],
                    'Adler32': infos['Adler32'],
                    'Md5': infos['Md5']
                }
                wmfile = File(lfn=lfn,
                              events=infos['NumberOfEvents'],
                              size=size,
                              checksums=checksums,
                              parents=infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(
                        locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error(
                        "Impossible translating %s to a CMS name through CMS Resource Catalog",
                        locations[wmfile['block']])
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "%d blocks will be skipped because are not completely replicated on DISK: %s" % (
                len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d',
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))
Exemplo n.º 7
0
    def formatOutput(self, task, requestname, datasetfiles, locations,
                     tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {
            "cacheduration": 1,
            "pycurl": True
        }  # cache duration is in hours
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[
                        infos['BlockName']]:
                    self.logger.warning(
                        "Skipping %s because its block (%s) has no locations",
                        lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue

                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    raise TaskWorkerException(
                        "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n"
                        +
                        "because you specified useParents=True but some your files have no"
                        + "parents.\nExample: " + lfn)
                ## Create a WMCore File object.
                try:
                    size = infos['FileSize']
                    checksums = {
                        'Checksum': infos['Checksum'],
                        'Adler32': infos['Adler32'],
                        'Md5': infos['Md5']
                    }
                except:
                    #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                    # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                    #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                    size = infos['Size']
                    checksums = infos['Checksums']
                wmfile = File(lfn=lfn,
                              events=infos['NumberOfEvents'],
                              size=size,
                              checksums=checksums,
                              parents=infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(
                        locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error(
                        "Impossible translating %s to a CMS name through CMS Resource Catalog",
                        locations[wmfile['block']])
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "%d blocks will be skipped because are not completely replicated on DISK: %s" % (
                len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d',
                          (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug(
            "Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList(
        )
        self.logger.debug(
            "Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"),
                  "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"),
                  "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task=task,
                      result=Fileset(name='FilesToSplit', files=set(wmfiles)))
    def get_proxy_from_MyProxy(self, ad):
        vo = 'cms'
        group = ''
        role = ''
        if 'CRAB_UserVO' in ad and ad['CRAB_UserVO']:
            vo = ad['CRAB_UserVO']
        if 'CRAB_UserGroup' in ad and ad['CRAB_UserGroup'] and ad[
                'CRAB_UserGroup'] != classad.Value.Undefined:
            group = ad['CRAB_UserGroup']
        if 'CRAB_UserRole' in ad and ad['CRAB_UserRole'] and ad[
                'CRAB_UserRole'] != classad.Value.Undefined:
            role = ad['CRAB_UserRole']
        username = ad['CRAB_UserHN']
        proxycfg = {
            'vo':
            vo,
            'logger':
            self.logger,
            'myProxySvr':
            self.config.Services.MyProxy,
            'proxyValidity':
            '144:0',
            'min_time_left':
            MINPROXYLENGTH,  ## do we need this ? or should we use self.myproxylen?
            'userDN':
            ad['CRAB_UserDN'],
            'userName':
            username + '_CRAB',
            'group':
            group,
            'role':
            role,
            'server_key':
            self.config.MyProxy.serverhostkey,
            'server_cert':
            self.config.MyProxy.serverhostcert,
            'serverDN':
            'dummy',  # this is only used inside WMCore/Proxy.py functions not used by CRAB
            'uisource':
            getattr(self.config.MyProxy, 'uisource', ''),
            'credServerPath':
            self.config.MyProxy.credpath,
            'cleanEnvironment':
            getattr(self.config.MyProxy, 'cleanEnvironment', False)
        }

        proxy = Proxy(proxycfg)
        userproxy = proxy.getProxyFilename(serverRenewer=True)
        # try first with new username_CRAB
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            proxy.logonRenewMyProxy()
            timeleft = proxy.getTimeLeft(userproxy)
        if not timeleft or timeleft <= 0:
            # if that fails, try with old fashioned DN hash
            del proxycfg['userName']
            proxy = Proxy(proxycfg)
            with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
                proxy.logonRenewMyProxy()
                timeleft = proxy.getTimeLeft(userproxy)
        if timeleft is None or timeleft <= 0:
            self.logger.error("Impossible to retrieve proxy from %s for %s.",
                              proxycfg['myProxySvr'], proxycfg['userDN'])
            self.logger.error("repeat the command in verbose mode")
            proxycfg['userName'] = username + '_CRAB'
            proxy = Proxy(proxycfg)
            proxy.logonRenewMyProxy()
            raise Exception("Failed to retrieve proxy.")
        return userproxy
Exemplo n.º 9
0
    def __init__(self, confFile=None, quiet=False, debug=True, testMode=False):
        """
        Initialise class members

        :arg WMCore.Configuration config: input Publisher configuration
        :arg bool quiet: it tells if a quiet logger is needed
        :arg bool debug: it tells if needs a verbose logger
        :arg bool testMode: it tells if to run in test (no subprocesses) mode.
        """
        def createLogdir(dirname):
            """ Create the directory dirname ignoring erors in case it exists. Exit if
                the directory cannot be created.
            """
            try:
                os.makedirs(dirname)
            except OSError as ose:
                if ose.errno != 17:  #ignore the "Directory already exists error"
                    print(str(ose))
                    print(
                        "The Publisher Worker needs to access the '%s' directory"
                        % dirname)
                    sys.exit(1)

        def setRootLogger(logsDir, quiet=False, debug=True, console=False):
            """Sets the root logger with the desired verbosity level
               The root logger logs to logs/log.txt and every single
               logging instruction is propagated to it (not really nice
               to read)

            :arg bool quiet: it tells if a quiet logger is needed
            :arg bool debug: it tells if needs a verbose logger
            :arg bool console: it tells if to direct all printoput to console rather then files, useful for debug
            :return logger: a logger with the appropriate logger level."""

            createLogdir(logsDir)
            createLogdir(os.path.join(logsDir, 'processes'))
            createLogdir(os.path.join(logsDir, 'tasks'))

            if console:
                # if we are testing log to the console is easier
                logging.getLogger().addHandler(logging.StreamHandler())
            else:
                logHandler = MultiProcessingLog(os.path.join(
                    logsDir, 'log.txt'),
                                                when='midnight')
                logFormatter = logging.Formatter(
                    "%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s"
                )
                logHandler.setFormatter(logFormatter)
                logging.getLogger().addHandler(logHandler)
            loglevel = logging.INFO
            if quiet:
                loglevel = logging.WARNING
            if debug:
                loglevel = logging.DEBUG
            logging.getLogger().setLevel(loglevel)
            logger = setMasterLogger()
            logger.debug("PID %s.", os.getpid())
            logger.debug("Logging level initialized to %s.", loglevel)
            return logger

        def logVersionAndConfig(config=None, logger=None):
            """
            log version number and major config. parameters
            args: config : a configuration object loaded from file
            args: logger : the logger instance to use
            """
            pubstartDict = {}
            pubstartDict['version'] = __version__
            pubstartDict['asoworker'] = config.General.asoworker
            pubstartDict['instance'] = config.General.instance
            if config.General.instance == 'other':
                pubstartDict['restHost'] = config.General.restHost
                pubstartDict['dbInstance'] = config.General.dbInstance
            pubstartDict['max_slaves'] = config.General.max_slaves
            pubstartDict['DBShost'] = config.TaskPublisher.DBShost
            pubstartDict['dryRun'] = config.TaskPublisher.dryRun
            # one line for automatic parsing
            logger.info('PUBSTART: %s', json.dumps(pubstartDict))
            # multiple lines for humans to read
            for k, v in pubstartDict.items():
                logger.info('%s: %s', k, v)
            return

        self.configurationFile = confFile  # remember this, will have to pass it to TaskPublish
        config = loadConfigurationFile(confFile)
        self.config = config.General
        self.TPconfig = config.TaskPublisher

        # these are used for talking to DBS
        os.putenv('X509_USER_CERT', self.config.serviceCert)
        os.putenv('X509_USER_KEY', self.config.serviceKey)
        self.block_publication_timeout = self.config.block_closure_timeout
        self.lfn_map = {}
        self.force_publication = False
        self.force_failure = False
        self.TestMode = testMode
        self.taskFilesDir = self.config.taskFilesDir
        createLogdir(self.taskFilesDir)
        createLogdir(os.path.join(self.taskFilesDir, 'FailedBlocks'))

        self.logger = setRootLogger(self.config.logsDir,
                                    quiet=quiet,
                                    debug=debug,
                                    console=self.TestMode)

        logVersionAndConfig(config, self.logger)

        from WMCore.Credential.Proxy import Proxy
        proxy = Proxy({'logger': self.logger})
        from ServerUtilities import tempSetLogLevel
        with tempSetLogLevel(self.logger, logging.ERROR):
            self.myDN = proxy.getSubjectFromCert(
                certFile=self.config.serviceCert)

        try:
            instance = self.config.instance
        except:
            msg = "No instance provided: need to specify config.General.instance in the configuration"
            raise ConfigException(msg)

        if instance in SERVICE_INSTANCES:
            self.logger.info('Will connect to CRAB service: %s', instance)
            restHost = SERVICE_INSTANCES[instance]['restHost']
            dbInstance = SERVICE_INSTANCES[instance]['dbInstance']
        else:
            msg = "Invalid instance value '%s'" % instance
            raise ConfigException(msg)
        if instance == 'other':
            self.logger.info(
                'Will use restHost and dbInstance from config file')
            try:
                restHost = self.config.restHost
                dbInstance = self.config.dbInstance
            except:
                msg = "Need to specify config.General.restHost and dbInstance in the configuration"
                raise ConfigException(msg)

        self.logger.info(
            'Will connect to CRAB Data Base %s instance via URL: https://%s',
            dbInstance, restHost)

        # CRAB REST API's
        self.max_files_per_block = self.config.max_files_per_block
        self.crabServer = CRABRest(hostname=restHost,
                                   localcert=self.config.serviceCert,
                                   localkey=self.config.serviceKey,
                                   retry=3,
                                   userAgent='CRABPublisher')
        self.crabServer.setDbInstance(dbInstance=dbInstance)
        self.startTime = time.time()
Exemplo n.º 10
0
    def formatOutput(self, task, requestname, datasetfiles, locations, tempDir):
        """
        Receives as input the result of the data location
        discovery operations and fill up the WMCore objects.
        """
        self.logger.debug(" Formatting data discovery output ")

        wmfiles = []
        event_counter = 0
        lumi_counter = 0
        uniquelumis = set()
        datasetLumis = {}
        blocksWithNoLocations = set()
        ## Loop over the sorted list of files.
        configDict = {"cacheduration": 1, "pycurl": True} # cache duration is in hours
        resourceCatalog = CRIC(logger=self.logger, configDict=configDict)
        # can't affort one message from CRIC per file, unless critical !
        with tempSetLogLevel(logger=self.logger, level=logging.ERROR):
            for lfn, infos in datasetfiles.iteritems():
                ## Skip the file if it is not in VALID state.
                if not infos.get('ValidFile', True):
                    self.logger.warning("Skipping invalid file %s", lfn)
                    continue
                ## Skip the file if the block has not been found or has no locations.
                if not infos['BlockName'] in locations or not locations[infos['BlockName']]:
                    self.logger.warning("Skipping %s because its block (%s) has no locations", lfn, infos['BlockName'])
                    blocksWithNoLocations.add(infos['BlockName'])
                    continue

                if task['tm_use_parent'] == 1 and len(infos['Parents']) == 0:
                    raise TaskWorkerException(
                            "The CRAB3 server backend refuses to submit jobs to the Grid scheduler\n" +
                            "because you specified useParents=True but some your files have no" +
                            "parents.\nExample: " + lfn)
                ## Create a WMCore File object.
                try:
                    size = infos['FileSize']
                    checksums = {'Checksum': infos['Checksum'], 'Adler32': infos['Adler32'], 'Md5': infos['Md5']}
                except:
                    #This is so that the task worker does not crash if an old version of WMCore is used (the interface of an API suddenly changed).
                    # We may want to remove the try/except and the following two lines eventually, but keeping them for the moment so other devels won't be affected
                    #See this WMCore commit: https://github.com/dmwm/WMCore/commit/2afc01ae571390f5fa009dd258be757adac89c28#diff-374b7a6640288184175057234e393e1cL204
                    size = infos['Size']
                    checksums = infos['Checksums']
                wmfile = File(lfn = lfn, events = infos['NumberOfEvents'], size = size, checksums = checksums, parents = infos['Parents'])
                wmfile['block'] = infos['BlockName']
                try:
                    wmfile['locations'] = resourceCatalog.PNNstoPSNs(locations[wmfile['block']])
                except Exception as ex:
                    self.logger.error("Impossible translating %s to a CMS name through CMS Resource Catalog", locations[wmfile['block']] )
                    self.logger.error("got this exception:\n %s", ex)
                    raise
                wmfile['workflow'] = requestname
                event_counter += infos['NumberOfEvents']
                for run, lumis in infos['Lumis'].iteritems():
                    datasetLumis.setdefault(run, []).extend(lumis)
                    wmfile.addRun(Run(run, *lumis))
                    for lumi in lumis:
                        uniquelumis.add((run, lumi))
                    lumi_counter += len(lumis)
                wmfiles.append(wmfile)

        if blocksWithNoLocations:
            msg = "The locations of some blocks (%d) have not been found: %s" % (len(blocksWithNoLocations), list(blocksWithNoLocations))
            self.logger.warning(msg)
            self.uploadWarning(msg, task['user_proxy'], task['tm_taskname'])

        uniquelumis = len(uniquelumis)
        self.logger.debug('Tot events found: %d', event_counter)
        self.logger.debug('Tot lumis found: %d', uniquelumis)
        self.logger.debug('Duplicate lumis found: %d', (lumi_counter - uniquelumis))
        self.logger.debug('Tot files found: %d', len(wmfiles))

        self.logger.debug("Starting to create compact lumilists for input dataset")
        datasetLumiList = LumiList(runsAndLumis=datasetLumis)
        datasetLumis = datasetLumiList.getCompactList()
        datasetDuplicateLumis = datasetLumiList.getDuplicates().getCompactList()
        self.logger.debug("Finished to create compact lumilists for input dataset")
        with open(os.path.join(tempDir, "input_dataset_lumis.json"), "w") as fd:
            json.dump(datasetLumis, fd)
        with open(os.path.join(tempDir, "input_dataset_duplicate_lumis.json"), "w") as fd:
            json.dump(datasetDuplicateLumis, fd)

        return Result(task = task, result = Fileset(name = 'FilesToSplit', files = set(wmfiles)))
Exemplo n.º 11
0
    def __init__(self, confFile=None, quiet=False, debug=True, testMode=False):
        """
        Initialise class members

        :arg WMCore.Configuration config: input Publisher configuration
        :arg bool quiet: it tells if a quiet logger is needed
        :arg bool debug: it tells if needs a verbose logger
        :arg bool testMode: it tells if to run in test (no subprocesses) mode.
        """
        def createLogdir(dirname):
            """ Create the directory dirname ignoring erors in case it exists. Exit if
                the directory cannot be created.
            """
            try:
                os.makedirs(dirname)
            except OSError as ose:
                if ose.errno != 17:  #ignore the "Directory already exists error"
                    print(str(ose))
                    print(
                        "The Publisher Worker needs to access the '%s' directory"
                        % dirname)
                    sys.exit(1)

        self.configurationFile = confFile  # remember this, will have to pass it to TaskPublish
        config = loadConfigurationFile(confFile)
        self.config = config.General
        self.TPconfig = config.TaskPublisher

        # these are used for talking to DBS
        os.putenv('X509_USER_CERT', self.config.serviceCert)
        os.putenv('X509_USER_KEY', self.config.serviceKey)
        self.block_publication_timeout = self.config.block_closure_timeout
        self.lfn_map = {}
        self.force_publication = False
        self.force_failure = False
        self.TestMode = testMode
        self.taskFilesDir = self.config.taskFilesDir
        createLogdir(self.taskFilesDir)

        def setRootLogger(logsDir, quiet=False, debug=True, console=False):
            """Sets the root logger with the desired verbosity level
               The root logger logs to logs/twlog.txt and every single
               logging instruction is propagated to it (not really nice
               to read)

            :arg bool quiet: it tells if a quiet logger is needed
            :arg bool debug: it tells if needs a verbose logger
            :arg bool console: it tells if to direct all printoput to console rather then files, useful for debug
            :return logger: a logger with the appropriate logger level."""

            createLogdir(logsDir)
            createLogdir(logsDir + '/processes')
            createLogdir(logsDir + '/tasks')

            if console:
                # if we are testing log to the console is easier
                logging.getLogger().addHandler(logging.StreamHandler())
            else:
                logHandler = MultiProcessingLog(logsDir + '/log.txt',
                                                when='midnight')
                logFormatter = logging.Formatter(
                    "%(asctime)s:%(levelname)s:%(module)s,%(lineno)d:%(message)s"
                )
                logHandler.setFormatter(logFormatter)
                logging.getLogger().addHandler(logHandler)
            loglevel = logging.INFO
            if quiet:
                loglevel = logging.WARNING
            if debug:
                loglevel = logging.DEBUG
            logging.getLogger().setLevel(loglevel)
            logger = setMasterLogger()
            logger.debug("PID %s.", os.getpid())
            logger.debug("Logging level initialized to %s.", loglevel)
            return logger

        self.logger = setRootLogger(self.config.logsDir,
                                    quiet=quiet,
                                    debug=debug,
                                    console=self.TestMode)

        from WMCore.Credential.Proxy import Proxy
        proxy = Proxy({'logger': self.logger})
        from ServerUtilities import tempSetLogLevel
        with tempSetLogLevel(self.logger, logging.ERROR):
            self.myDN = proxy.getSubjectFromCert(
                certFile=self.config.serviceCert)

        # CRABServer REST API's (see CRABInterface)
        try:
            instance = self.config.instance
        except:
            msg = "No instance provided: need to specify config.General.instance in the configuration"
            raise ConfigException(msg)

        if instance in SERVICE_INSTANCES:
            self.logger.info('Will connect to CRAB service: %s', instance)
            restHost = SERVICE_INSTANCES[instance]['restHost']
            dbInstance = SERVICE_INSTANCES[instance]['dbInstance']
        else:
            msg = "Invalid instance value '%s'" % instance
            raise ConfigException(msg)
        if instance == 'other':
            self.logger.info(
                'Will use restHost and dbInstance from config file')
            try:
                restHost = self.config.restHost
                dbInstance = self.config.dbInstance
            except:
                msg = "Need to specify config.General.restHost and dbInstance in the configuration"
                raise ConfigException(msg)

        restURInoAPI = '/crabserver/' + dbInstance
        self.logger.info(
            'Will connect to CRAB Data Base via URL: https://%s/%s', restHost,
            restURInoAPI)

        # CRAB REST API's
        self.REST_filetransfers = restURInoAPI + '/filetransfers'
        #self.REST_usertransfers = restURInoAPI +  '/fileusertransfers'
        self.REST_filemetadata = restURInoAPI + '/filemetadata'
        self.REST_workflow = restURInoAPI + '/workflow'
        self.REST_task = restURInoAPI + '/task'
        self.max_files_per_block = self.config.max_files_per_block
        self.crabServer = HTTPRequests(url=restHost,
                                       localcert=self.config.serviceCert,
                                       localkey=self.config.serviceKey,
                                       retry=3)