示例#1
0
 def convertToJobFileSpec(self,
                          datasetSpec,
                          setType=None,
                          useEventService=False):
     jobFileSpec = JobFileSpec()
     jobFileSpec.fileID = self.fileID
     jobFileSpec.datasetID = datasetSpec.datasetID
     jobFileSpec.jediTaskID = datasetSpec.jediTaskID
     jobFileSpec.lfn = self.lfn
     jobFileSpec.GUID = self.GUID
     if setType is None:
         jobFileSpec.type = self.type
     else:
         jobFileSpec.type = setType
     jobFileSpec.scope = self.scope
     jobFileSpec.fsize = self.fsize
     jobFileSpec.checksum = self.checksum
     jobFileSpec.attemptNr = self.attemptNr
     # dataset attribute
     if datasetSpec is not None:
         # dataset
         if datasetSpec.containerName not in [None, '']:
             jobFileSpec.dataset = datasetSpec.containerName
         else:
             jobFileSpec.dataset = datasetSpec.datasetName
         if self.type in datasetSpec.getInputTypes(
         ) or setType in datasetSpec.getInputTypes():
             # prodDBlock
             jobFileSpec.prodDBlock = datasetSpec.datasetName
             # storage token
             if datasetSpec.storageToken not in ['', None]:
                 jobFileSpec.dispatchDBlockToken = datasetSpec.storageToken
         else:
             # destinationDBlock
             jobFileSpec.destinationDBlock = datasetSpec.datasetName
             # storage token
             if datasetSpec.storageToken not in ['', None]:
                 jobFileSpec.destinationDBlockToken = datasetSpec.storageToken.split(
                     '/')[0]
             # destination
             if datasetSpec.destination not in ['', None]:
                 jobFileSpec.destinationSE = datasetSpec.destination
             # set prodDBlockToken for Event Service
             if useEventService and datasetSpec.getObjectStore(
             ) is not None:
                 jobFileSpec.prodDBlockToken = 'objectstore^{0}'.format(
                     datasetSpec.getObjectStore())
             # allow no output
             if datasetSpec.isAllowedNoOutput():
                 jobFileSpec.allowNoOutput()
     # return
     return jobFileSpec
示例#2
0
 def convertToJobFileSpec(self,datasetSpec,setType=None,useEventService=False):
     jobFileSpec = JobFileSpec()
     jobFileSpec.fileID     = self.fileID
     jobFileSpec.datasetID  = datasetSpec.datasetID
     jobFileSpec.jediTaskID = datasetSpec.jediTaskID
     jobFileSpec.lfn        = self.lfn
     jobFileSpec.GUID       = self.GUID
     if setType == None:
         jobFileSpec.type   = self.type
     else:
         jobFileSpec.type   = setType
     jobFileSpec.scope      = self.scope
     jobFileSpec.fsize      = self.fsize
     jobFileSpec.checksum   = self.checksum
     jobFileSpec.attemptNr  = self.attemptNr
     # dataset attribute
     if datasetSpec != None:
         # dataset
         if not datasetSpec.containerName in [None,'']:
             jobFileSpec.dataset = datasetSpec.containerName
         else:
             jobFileSpec.dataset = datasetSpec.datasetName
         if self.type in datasetSpec.getInputTypes() or setType in datasetSpec.getInputTypes():
             # prodDBlock
             jobFileSpec.prodDBlock = datasetSpec.datasetName
             # storage token    
             if not datasetSpec.storageToken in ['',None]:
                 jobFileSpec.dispatchDBlockToken = datasetSpec.storageToken 
         else:
             # destinationDBlock
             jobFileSpec.destinationDBlock = datasetSpec.datasetName
             # storage token    
             if not datasetSpec.storageToken in ['',None]:
                 jobFileSpec.destinationDBlockToken = datasetSpec.storageToken.split('/')[0] 
             # destination
             if not datasetSpec.destination in ['',None]:
                 jobFileSpec.destinationSE = datasetSpec.destination
             # set prodDBlockToken for Event Service
             if useEventService and datasetSpec.getObjectStore() != None:
                 jobFileSpec.prodDBlockToken = 'objectstore^{0}'.format(datasetSpec.getObjectStore())
             # allow no output
             if datasetSpec.isAllowedNoOutput():
                 jobFileSpec.allowNoOutput()
     # return
     return jobFileSpec
示例#3
0
    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.'))
        
        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()
        
        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud
        

        # now just filling the job from AthenaMC data
        
        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')  
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        
        if app.transform_archive:
            jspec.homepackage       = 'AnalysisTransforms'+app.transform_archive
        elif app.prod_release:
            jspec.homepackage       = 'AnalysisTransforms-AtlasProduction_'+str(app.prod_release)
        jspec.transformation    = '%s/runAthena-00-00-11' % Client.baseURLSUB
            
        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock    = 'NULL'
        if job.inputdata and len(app.inputfiles)>0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock    = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset=""
        for type in ["EVNT","RDO","HITS","AOD","ESD","NTUP"]:
            if type in app.outputpaths.keys():
                outdset=string.replace(app.outputpaths[type],"/",".")
                outdset=outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys())>0
            except:
                logger.error("app.outputpaths is empty: check your output datasets")
                raise
            type=app.outputpaths.keys()[0]
            outdset=string.replace(app.outputpaths[type],"/",".")
            outdset=outdset[1:-1]
            
        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel   = 'user'
        jspec.assignedPriority  = 1000
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite     = self.outsite
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
#       library (source files)
        flib = FileSpec()
        flib.lfn            = self.library
#        flib.GUID           = 
        flib.type           = 'input'
#        flib.status         = 
        flib.dataset        = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid=app.turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid=app.dbturls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid=app.minbias_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid=app.cavern_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
            

#       output files( this includes the logfiles)
        # Output files
        jidtag=""
        job = app._getParent() # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id       
        outfiles=app.subjobsOutfiles[job.id]
        pandaOutfiles={}
        for type in outfiles.keys():
            pandaOutfiles[type]=outfiles[type]+"."+str(jidtag)
            if type=="LOG":
                pandaOutfiles[type]+=".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset=string.replace(app.outputpaths[outtype],"/",".")
            dset=dset[1:-1]
            fout.dataset=dset
            fout.lfn=pandaOutfiles[outtype]
            fout.type              = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE    = jspec.destinationSE
            if outtype=='LOG':
                fout.type='log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE     = job.backend.site
            jspec.addFile(fout)


        #       job parameters
        param =  '-l %s ' % self.library # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '
        
        # transform parameters
        # need to update arglist with final output file name...
        newArgs=[]
        if app.mode == "evgen":
            app.args[3]=app.args[3]+" -t "
            if app.verbosity:
                app.args[3]=app.args[3]+" -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type])>-1:
                    arg=arg.replace(outfiles[type],pandaOutfiles[type])

            newArgs.append(arg)
        arglist=string.join(newArgs," ")
#        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles=app.inputfiles+app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles)>0:
            param+= ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles)>0:
            param+= ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles["LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL        
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"


        jspec.jobParameters = param
        jspec.metadata="--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return
        
        return jspec
示例#4
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()

        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        # now just filling the job from AthenaMC data

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel

        if app.transform_archive:
            jspec.homepackage = 'AnalysisTransforms' + app.transform_archive
        elif app.prod_release:
            jspec.homepackage = 'AnalysisTransforms-AtlasProduction_' + str(
                app.prod_release)
        jspec.transformation = '%s/runAthena-00-00-11' % Client.baseURLSUB

        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock = 'NULL'
        if job.inputdata and len(
                app.inputfiles) > 0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset = ""
        for type in ["EVNT", "RDO", "HITS", "AOD", "ESD", "NTUP"]:
            if type in app.outputpaths.keys():
                outdset = string.replace(app.outputpaths[type], "/", ".")
                outdset = outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys()) > 0
            except:
                logger.error(
                    "app.outputpaths is empty: check your output datasets")
                raise
            type = app.outputpaths.keys()[0]
            outdset = string.replace(app.outputpaths[type], "/", ".")
            outdset = outdset[1:-1]

        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel = 'user'
        jspec.assignedPriority = 1000
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite = self.outsite
        jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        #       library (source files)
        flib = FileSpec()
        flib.lfn = self.library
        #        flib.GUID           =
        flib.type = 'input'
        #        flib.status         =
        flib.dataset = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid = app.turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid = app.dbturls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid = app.minbias_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid = app.cavern_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)


#       output files( this includes the logfiles)
# Output files
        jidtag = ""
        job = app._getParent()  # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id
        outfiles = app.subjobsOutfiles[job.id]
        pandaOutfiles = {}
        for type in outfiles.keys():
            pandaOutfiles[type] = outfiles[type] + "." + str(jidtag)
            if type == "LOG":
                pandaOutfiles[type] += ".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset = string.replace(app.outputpaths[outtype], "/", ".")
            dset = dset[1:-1]
            fout.dataset = dset
            fout.lfn = pandaOutfiles[outtype]
            fout.type = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE = jspec.destinationSE
            if outtype == 'LOG':
                fout.type = 'log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        #       job parameters
        param = '-l %s ' % self.library  # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '

        # transform parameters
        # need to update arglist with final output file name...
        newArgs = []
        if app.mode == "evgen":
            app.args[3] = app.args[3] + " -t "
            if app.verbosity:
                app.args[3] = app.args[3] + " -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type]) > -1:
                    arg = arg.replace(outfiles[type], pandaOutfiles[type])

            newArgs.append(arg)
        arglist = string.join(newArgs, " ")
        #        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles = app.inputfiles + app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles) > 0:
            param += ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles) > 0:
            param += ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles[
            "LOG"]  # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"

        jspec.jobParameters = param
        jspec.metadata = "--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return

        return jspec