Python JobSpec.minRamCount примеры использования

Язык программирования: Python

Пространство имен/Пакет: taskbuffer.JobSpec

Класс/Тип: JobSpec

Метод/Функция: minRamCount

Примеров на hotexamples.com: 5

Python JobSpec.minRamCount - 5 примеров найдено. Это лучшие примеры Python кода для taskbuffer.JobSpec.JobSpec.minRamCount, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

transformation(30)

jobParameters(30)

addFile(29)

prodDBlock(24)

prodSourceLabel(24)

jobName(24)

destinationSE(23)

JobSpec(22)

destinationDBlock(21)

AtlasRelease(20)

homepackage(19)

jobDefinitionID(19)

computingSite(18)

currentPriority(17)

cloud(12)

taskID(11)

cmtConfig(9)

VO(9)

assignedPriority(8)

processingType(8)

attemptNr(6)

metadata(6)

parentID(5)

maxAttempt(5)

specialHandling(2)

sourceSite(2)

workingGroup(2)

minRamCount(2)

maxDiskCount(2)

jediTaskID(2)

coreCount(2)

PandaID(2)

maxCpuCount(1)

prodSeriesLabel(1)

jobStatus(1)

prodUserID(1)

prodUserName(1)

destinationSite(1)

transferType(1)

truncateStringAttr(1)

values(1)

jobsetID(1)

Пример #1

Показать файл

Файл: ExecutablePandaRTHandler.py Проект: Erni1619/ganga

    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaPandaRTHandler prepare called for %s', job.getFQID('.'))

#       in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter
        if job.inputdata and not job._getRoot().subjobs:

            if not job.inputdata.names:
                
                contents = job.inputdata.get_contents(overlap=False, size=True)

                for ds in contents.keys():

                    for f in contents[ds]:
                        job.inputdata.guids.append( f[0] )
                        job.inputdata.names.append( f[1][0] )
                        job.inputdata.sizes.append( f[1][1] )
                        job.inputdata.checksums.append( f[1][2] )
                        job.inputdata.scopes.append( f[1][3] )


        site = job._getRoot().backend.site
        job.backend.site = site
        job.backend.actualCE = site
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

#       if no outputdata are given
        if not job.outputdata:
            job.outputdata = DQ2OutputDataset()
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname
        #if not job.outputdata.datasetname:
        else:
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname

        if not job.outputdata.datasetname:
            raise ApplicationConfigurationError('DQ2OutputDataset has no datasetname')

        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
        jspec.transformation    = '%s/runGen-00-00-02' % Client.baseURLSUB
        if job.inputdata:
            jspec.prodDBlock    = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock    = 'NULL'
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            if not job._getRoot().subjobs or job.id == 0:
                logger.warning('You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.')
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = site
        jspec.prodSourceLabel   = configPanda['prodSourceLabelRun']
        jspec.processingType    = configPanda['processingType']
        jspec.assignedPriority  = configPanda['assignedPriorityRun']
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        # cputime     
        if job.backend.requirements.cputime != -1:
            jspec.maxCpuCount = job.backend.requirements.cputime
        jspec.computingSite     = site

#       library (source files)
        if job.backend.libds:
            flib = FileSpec()
            flib.lfn            = self.fileBO.lfn
            flib.GUID           = self.fileBO.GUID
            flib.type           = 'input'
            flib.status         = self.fileBO.status
            flib.dataset        = self.fileBO.destinationDBlock
            flib.dispatchDBlock = self.fileBO.destinationDBlock
            jspec.addFile(flib)
        elif job.backend.bexec:
            flib = FileSpec()
            flib.lfn            = self.library
            flib.type           = 'input'
            flib.dataset        = self.libDataset
            flib.dispatchDBlock = self.libDataset
            jspec.addFile(flib)

#       input files FIXME: many more input types
        if job.inputdata:            
            for guid, lfn, size, checksum, scope in zip(job.inputdata.guids,job.inputdata.names,job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes):
                finp = FileSpec()
                finp.lfn            = lfn
                finp.GUID           = guid
                finp.scope          = scope
                
#            finp.fsize =
#            finp.md5sum =
                finp.dataset        = job.inputdata.dataset[0]
                finp.prodDBlock     = job.inputdata.dataset[0]
                finp.dispatchDBlock = job.inputdata.dataset[0]
                finp.type           = 'input'
                finp.status         = 'ready'
                jspec.addFile(finp)

#       output files
#        outMap = {}
        
        #FIXME: if options.outMeta != []:
        self.rundirectory = "."

#       log files

        flog = FileSpec()
        flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname
        flog.type = 'log'
        flog.dataset           = job.outputdata.datasetname
        flog.destinationDBlock = job.outputdata.datasetname
        flog.destinationSE     = job.backend.site
        jspec.addFile(flog)

#       job parameters
        param = ''

        # source URL
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL)
        srcURL = ""
        if matchURL != None:
            srcURL = matchURL.group(1)
            param += " --sourceURL %s " % srcURL

        param += '-r "%s" ' % self.rundirectory

        exe_name = job.application.exe
        if job.backend.bexec == '':
            if hasattr(job.application.exe, "name"):
                exe_name = os.path.basename(job.application.exe.name)

            # set jobO parameter
            if job.application.args:
                param += ' -j "" -p "%s %s" '%(exe_name,urllib.quote(" ".join(job.application.args)))
            else:
                param += ' -j "" -p "%s" '%exe_name
            if self.inputsandbox:
                param += ' -a %s '%self.inputsandbox

        else:
            param += '-l %s ' % self.library
            param += '-j "" -p "%s %s" ' % ( exe_name,urllib.quote(" ".join(job.application.args)))

        if job.inputdata:
            param += '-i "%s" ' % job.inputdata.names

        # fill outfiles
        outfiles = {}
        for f in self.extOutFile:
            tarnum = 1
            if f.find('*') != -1:
            # archive *
                outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S") )
                tarnum += 1
            else:
                outfiles[f] = "%s.%s.%s" %(f, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S"))

            fout = FileSpec()
            fout.lfn = outfiles[f]
            fout.type = 'output'
            fout.dataset           = job.outputdata.datasetname
            fout.destinationDBlock = job.outputdata.datasetname
            fout.destinationSE     = job.backend.site
            jspec.addFile(fout)

        param += '-o "%s" ' % (outfiles) # must be double quotes, because python prints strings in 'single quotes' 

        for file in jspec.Files:
            if file.type in [ 'output', 'log'] and configPanda['chirpconfig']:
                file.dispatchDBlockToken = configPanda['chirpconfig']
                logger.debug('chirp file %s',file)

        jspec.jobParameters = param
        
        return jspec

Пример #2

Показать файл

Файл: AthenaMCPandaRTHandler.py Проект: MannyMoo/ganga

    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.'))
        
        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()
        
        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud
        

        # now just filling the job from AthenaMC data
        
        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')  
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        
        if app.transform_archive:
            jspec.homepackage       = 'AnalysisTransforms'+app.transform_archive
        elif app.prod_release:
            jspec.homepackage       = 'AnalysisTransforms-AtlasProduction_'+str(app.prod_release)
        jspec.transformation    = '%s/runAthena-00-00-11' % Client.baseURLSUB
            
        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock    = 'NULL'
        if job.inputdata and len(app.inputfiles)>0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock    = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset=""
        for type in ["EVNT","RDO","HITS","AOD","ESD","NTUP"]:
            if type in app.outputpaths.keys():
                outdset=string.replace(app.outputpaths[type],"/",".")
                outdset=outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys())>0
            except:
                logger.error("app.outputpaths is empty: check your output datasets")
                raise
            type=app.outputpaths.keys()[0]
            outdset=string.replace(app.outputpaths[type],"/",".")
            outdset=outdset[1:-1]
            
        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel   = 'user'
        jspec.assignedPriority  = 1000
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite     = self.outsite
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
#       library (source files)
        flib = FileSpec()
        flib.lfn            = self.library
#        flib.GUID           = 
        flib.type           = 'input'
#        flib.status         = 
        flib.dataset        = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid=app.turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid=app.dbturls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid=app.minbias_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid=app.cavern_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
            

#       output files( this includes the logfiles)
        # Output files
        jidtag=""
        job = app._getParent() # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id       
        outfiles=app.subjobsOutfiles[job.id]
        pandaOutfiles={}
        for type in outfiles.keys():
            pandaOutfiles[type]=outfiles[type]+"."+str(jidtag)
            if type=="LOG":
                pandaOutfiles[type]+=".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset=string.replace(app.outputpaths[outtype],"/",".")
            dset=dset[1:-1]
            fout.dataset=dset
            fout.lfn=pandaOutfiles[outtype]
            fout.type              = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE    = jspec.destinationSE
            if outtype=='LOG':
                fout.type='log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE     = job.backend.site
            jspec.addFile(fout)


        #       job parameters
        param =  '-l %s ' % self.library # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '
        
        # transform parameters
        # need to update arglist with final output file name...
        newArgs=[]
        if app.mode == "evgen":
            app.args[3]=app.args[3]+" -t "
            if app.verbosity:
                app.args[3]=app.args[3]+" -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type])>-1:
                    arg=arg.replace(outfiles[type],pandaOutfiles[type])

            newArgs.append(arg)
        arglist=string.join(newArgs," ")
#        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles=app.inputfiles+app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles)>0:
            param+= ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles)>0:
            param+= ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles["LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL        
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"


        jspec.jobParameters = param
        jspec.metadata="--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return
        
        return jspec

Пример #3

Показать файл

Файл: ReBroker.py Проект: EntityOfPlague/panda-server

 def run(self):
     try:
         # get job
         tmpJobs = self.taskBuffer.getFullJobStatus([self.rPandaID])
         if tmpJobs == [] or tmpJobs[0] == None:
             _logger.debug("cannot find job for PandaID=%s" % self.rPandaID)
             return
         self.job = tmpJobs[0]
         _logger.debug("%s start %s:%s:%s" % (self.token,self.job.jobDefinitionID,self.job.prodUserName,self.job.computingSite))
         # using output container
         if not self.job.destinationDBlock.endswith('/'):
             _logger.debug("%s ouput dataset container is required" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # FIXEME : dont' touch group jobs for now
         if self.job.destinationDBlock.startswith('group') and (not self.userRequest):
             _logger.debug("%s skip group jobs" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check processingType
         typesForRebro = ['pathena','prun','ganga','ganga-rbtest']
         if not self.job.processingType in typesForRebro:
             _logger.debug("%s skip processingType=%s not in %s" % \
                           (self.token,self.job.processingType,str(typesForRebro)))
             _logger.debug("%s end" % self.token)
             return
         # check jobsetID
         if self.job.jobsetID in [0,'NULL',None]:
             _logger.debug("%s jobsetID is undefined" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check metadata 
         if self.job.metadata in [None,'NULL']:
             _logger.debug("%s metadata is unavailable" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --disableRebrokerage
         match = re.search("--disableRebrokerage",self.job.metadata)
         if match != None and (not self.simulation) and (not self.forceOpt) \
                and (not self.userRequest):
             _logger.debug("%s diabled rebrokerage" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --site
         match = re.search("--site",self.job.metadata)
         if match != None and (not self.simulation) and (not self.forceOpt) \
                and (not self.userRequest):
             _logger.debug("%s --site is used" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --libDS
         match = re.search("--libDS",self.job.metadata)
         if match != None:
             _logger.debug("%s --libDS is used" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --workingGroup since it is site-specific 
         match = re.search("--workingGroup",self.job.metadata)
         if match != None:
             _logger.debug("%s workingGroup is specified" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # avoid too many rebrokerage
         if not self.checkRev():
             _logger.debug("%s avoid too many rebrokerage" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check if multiple JobIDs use the same libDS
         if self.bPandaID != None and self.buildStatus not in ['finished','failed']:
             if self.minPandaIDlibDS == None or self.maxPandaIDlibDS == None:
                 _logger.debug("%s max/min PandaIDs are unavailable for the libDS" % self.token)
                 _logger.debug("%s end" % self.token)
                 return
             tmpPandaIDsForLibDS = self.taskBuffer.getFullJobStatus([self.minPandaIDlibDS,self.maxPandaIDlibDS])
             if len(tmpPandaIDsForLibDS) != 2 or tmpPandaIDsForLibDS[0] == None or tmpPandaIDsForLibDS[1] == None:
                 _logger.debug("%s failed to get max/min PandaIDs for the libDS" % self.token)
                 _logger.debug("%s end" % self.token)
                 return
             # check
             if tmpPandaIDsForLibDS[0].jobDefinitionID != tmpPandaIDsForLibDS[1].jobDefinitionID:
                 _logger.debug("%s multiple JobIDs use the libDS %s:%s %s:%s" % (self.token,tmpPandaIDsForLibDS[0].jobDefinitionID,
                                                                                 self.minPandaIDlibDS,tmpPandaIDsForLibDS[1].jobDefinitionID,
                                                                                 self.maxPandaIDlibDS))
                 _logger.debug("%s end" % self.token)
                 return
         # check excludedSite
         if self.excludedSite == None:
             self.excludedSite = []
             match = re.search("--excludedSite( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata)
             if match != None:
                 self.excludedSite = match.group(3).split(',')
         # remove empty
         try:
             self.excludedSite.remove('')
         except:
             pass
         _logger.debug("%s excludedSite=%s" % (self.token,str(self.excludedSite)))
         # check cloud
         if self.cloud == None:
             match = re.search("--cloud( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata)
             if match != None:
                 self.cloud = match.group(3)
         _logger.debug("%s cloud=%s" % (self.token,self.cloud))
         # get inDS/LFNs
         status,tmpMapInDS,maxFileSize = self.taskBuffer.getInDatasetsForReBrokerage(self.jobID,self.userName)
         if not status:
             # failed
             _logger.error("%s failed to get inDS/LFN from DB" % self.token)
             return
         status,inputDS = self.getListDatasetsUsedByJob(tmpMapInDS)
         if not status:
             # failed
             _logger.error("%s failed" % self.token)
             return 
         # get relicas
         replicaMap = {}
         unknownSites = {} 
         for tmpDS in inputDS:
             if tmpDS.endswith('/'):
                 # container
                 status,tmpRepMaps = self.getListDatasetReplicasInContainer(tmpDS)
             else:
                 # normal dataset
                 status,tmpRepMap = self.getListDatasetReplicas(tmpDS)
                 tmpRepMaps = {tmpDS:tmpRepMap}
             if not status:
                 # failed
                 _logger.debug("%s failed" % self.token)
                 return 
             # make map per site
             for tmpDS,tmpRepMap in tmpRepMaps.iteritems():
                 for tmpSite,tmpStat in tmpRepMap.iteritems():
                     # ignore special sites
                     if tmpSite in ['CERN-PROD_TZERO','CERN-PROD_DAQ','CERN-PROD_TMPDISK']:
                         continue
                     # ignore tape sites
                     if tmpSite.endswith('TAPE'):
                         continue
                     # keep sites with unknown replica info 
                     if tmpStat[-1]['found'] == None:
                         if not unknownSites.has_key(tmpDS):
                             unknownSites[tmpDS] = []
                         unknownSites[tmpDS].append(tmpSite)
                     # ignore ToBeDeleted
                     if tmpStat[-1]['archived'] in ['ToBeDeleted',]:
                         continue
                     # change EOS
                     if tmpSite.startswith('CERN-PROD_EOS'):
                         tmpSite = 'CERN-PROD_EOS'
                     # change EOS TMP
                     if tmpSite.startswith('CERN-PROD_TMP'):
                         tmpSite = 'CERN-PROD_TMP'
                     # change DISK to SCRATCHDISK
                     tmpSite = re.sub('_[^_-]+DISK$','',tmpSite)
                     # change PERF-XYZ to SCRATCHDISK
                     tmpSite = re.sub('_PERF-[^_-]+$','',tmpSite)
                     # change PHYS-XYZ to SCRATCHDISK
                     tmpSite = re.sub('_PHYS-[^_-]+$','',tmpSite)
                     # patch for BNLPANDA
                     if tmpSite in ['BNLPANDA']:
                         tmpSite = 'BNL-OSG2'
                     # add to map    
                     if not replicaMap.has_key(tmpSite):
                         replicaMap[tmpSite] = {}
                     replicaMap[tmpSite][tmpDS] = tmpStat[-1]
         _logger.debug("%s replica map -> %s" % (self.token,str(replicaMap)))
         # refresh replica info in needed
         self.refreshReplicaInfo(unknownSites)
         # instantiate SiteMapper
         siteMapper = SiteMapper(self.taskBuffer)
         # get original DDM
         origSiteDDM = self.getAggName(siteMapper.getSite(self.job.computingSite).ddm)
         # check all datasets
         maxDQ2Sites = []
         if inputDS != []:
             # loop over all sites
             for tmpSite,tmpDsVal in replicaMap.iteritems():
                 # loop over all datasets
                 appendFlag = True
                 for tmpOrigDS in inputDS:
                     # check completeness
                     if tmpDsVal.has_key(tmpOrigDS) and tmpDsVal[tmpOrigDS]['found'] != None and \
                            tmpDsVal[tmpOrigDS]['total'] == tmpDsVal[tmpOrigDS]['found']:
                         pass
                     else:
                         appendFlag = False
                 # append
                 if appendFlag:
                     if not tmpSite in maxDQ2Sites:
                         maxDQ2Sites.append(tmpSite)
         _logger.debug("%s candidate DQ2s -> %s" % (self.token,str(maxDQ2Sites)))
         if inputDS != [] and maxDQ2Sites == []:
             _logger.debug("%s no DQ2 candidate" % self.token)
         else:
             maxPandaSites = []
             # original maxinputsize
             origMaxInputSize = siteMapper.getSite(self.job.computingSite).maxinputsize
             # look for Panda siteIDs
             for tmpSiteID,tmpSiteSpec in siteMapper.siteSpecList.iteritems():
                 # use ANALY_ only
                 if not tmpSiteID.startswith('ANALY_'):
                     continue
                 # remove test and local
                 if re.search('_test',tmpSiteID,re.I) != None:
                     continue
                 if re.search('_local',tmpSiteID,re.I) != None:
                     continue
                 # avoid same site
                 if self.avoidSameSite and self.getAggName(tmpSiteSpec.ddm) == origSiteDDM:
                     continue
                 # check DQ2 ID
                 if self.cloud in [None,tmpSiteSpec.cloud] \
                        and (self.getAggName(tmpSiteSpec.ddm) in maxDQ2Sites or inputDS == []):
                     # excluded sites
                     excludedFlag = False
                     for tmpExcSite in self.excludedSite:
                         if re.search(tmpExcSite,tmpSiteID) != None:
                             excludedFlag = True
                             break
                     if excludedFlag:
                         _logger.debug("%s skip %s since excluded" % (self.token,tmpSiteID))
                         continue
                     # use online only
                     if tmpSiteSpec.status != 'online':
                         _logger.debug("%s skip %s status=%s" % (self.token,tmpSiteID,tmpSiteSpec.status))
                         continue
                     # check maxinputsize
                     if (maxFileSize == None and origMaxInputSize > siteMapper.getSite(tmpSiteID).maxinputsize) or \
                            maxFileSize > siteMapper.getSite(tmpSiteID).maxinputsize:
                         _logger.debug("%s skip %s due to maxinputsize" % (self.token,tmpSiteID))
                         continue
                     # append
                     if not tmpSiteID in maxPandaSites:
                         maxPandaSites.append(tmpSiteID)
             # choose at most 20 sites randomly to avoid too many lookup            
             random.shuffle(maxPandaSites)
             maxPandaSites = maxPandaSites[:20]
             _logger.debug("%s candidate PandaSites -> %s" % (self.token,str(maxPandaSites)))
             # no Panda siteIDs            
             if maxPandaSites == []:            
                 _logger.debug("%s no Panda site candidate" % self.token)
             else:
                 # set AtlasRelease and cmtConfig to dummy job
                 tmpJobForBrokerage = JobSpec()
                 if self.job.AtlasRelease in ['NULL',None]:
                     tmpJobForBrokerage.AtlasRelease = ''
                 else:
                     tmpJobForBrokerage.AtlasRelease = self.job.AtlasRelease
                 # use nightlies
                 matchNight = re.search('^AnalysisTransforms-.*_(rel_\d+)$',self.job.homepackage)
                 if matchNight != None:
                     tmpJobForBrokerage.AtlasRelease += ':%s' % matchNight.group(1)
                 # use cache
                 else:
                     matchCache = re.search('^AnalysisTransforms-([^/]+)',self.job.homepackage)
                     if matchCache != None:
                         tmpJobForBrokerage.AtlasRelease = matchCache.group(1).replace('_','-')
                 if not self.job.cmtConfig in ['NULL',None]:    
                     tmpJobForBrokerage.cmtConfig = self.job.cmtConfig
                 # memory size
                 if not self.job.minRamCount in ['NULL',None,0]:
                     tmpJobForBrokerage.minRamCount = self.job.minRamCount
                 # CPU count
                 if not self.job.maxCpuCount in ['NULL',None,0]:
                     tmpJobForBrokerage.maxCpuCount = self.job.maxCpuCount
                 # run brokerage
                 brokerage.broker.schedule([tmpJobForBrokerage],self.taskBuffer,siteMapper,forAnalysis=True,
                                           setScanSiteList=maxPandaSites,trustIS=True,reportLog=True)
                 newSiteID = tmpJobForBrokerage.computingSite
                 self.brokerageInfo += tmpJobForBrokerage.brokerageErrorDiag
                 _logger.debug("%s runBrokerage - > %s" % (self.token,newSiteID))
                 # unknown site
                 if not siteMapper.checkSite(newSiteID):
                     _logger.error("%s unknown site" % self.token)
                     _logger.debug("%s failed" % self.token)
                     return 
                 # get new site spec
                 newSiteSpec = siteMapper.getSite(newSiteID)
                 # avoid repetition
                 if self.getAggName(newSiteSpec.ddm) == origSiteDDM:
                     _logger.debug("%s assigned to the same site %s " % (self.token,newSiteID))
                     _logger.debug("%s end" % self.token)                        
                     return
                 # simulation mode
                 if self.simulation:
                     _logger.debug("%s end simulation" % self.token)                        
                     return
                 # prepare jobs
                 status = self.prepareJob(newSiteID,newSiteSpec)
                 if status:
                     # run SetUpper
                     statusSetUp = self.runSetUpper()
                     if not statusSetUp:
                         _logger.debug("%s runSetUpper failed" % self.token)
                     else:
                         _logger.debug("%s successfully assigned to %s" % (self.token,newSiteID))
         _logger.debug("%s end" % self.token)
     except:
         errType,errValue,errTraceBack = sys.exc_info()
         _logger.error("%s run() : %s %s" % (self.token,errType,errValue))

Пример #4

Показать файл

    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        #       in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter
        if job.inputdata and not job._getRoot().subjobs:

            if not job.inputdata.names:

                contents = job.inputdata.get_contents(overlap=False, size=True)

                for ds in contents.keys():

                    for f in contents[ds]:
                        job.inputdata.guids.append(f[0])
                        job.inputdata.names.append(f[1][0])
                        job.inputdata.sizes.append(f[1][1])
                        job.inputdata.checksums.append(f[1][2])
                        job.inputdata.scopes.append(f[1][3])

        site = job._getRoot().backend.site
        job.backend.site = site
        job.backend.actualCE = site
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        #       if no outputdata are given
        if not job.outputdata:
            job.outputdata = DQ2OutputDataset()
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname
        #if not job.outputdata.datasetname:
        else:
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname

        if not job.outputdata.datasetname:
            raise ApplicationConfigurationError(
                None, 'DQ2OutputDataset has no datasetname')

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.transformation = '%s/runGen-00-00-02' % Client.baseURLSUB
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            if not job._getRoot().subjobs or job.id == 0:
                logger.warning(
                    'You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.'
                )
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = site
        jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.assignedPriority = configPanda['assignedPriorityRun']
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        # cputime
        if job.backend.requirements.cputime != -1:
            jspec.maxCpuCount = job.backend.requirements.cputime
        jspec.computingSite = site

        #       library (source files)
        if job.backend.libds:
            flib = FileSpec()
            flib.lfn = self.fileBO.lfn
            flib.GUID = self.fileBO.GUID
            flib.type = 'input'
            flib.status = self.fileBO.status
            flib.dataset = self.fileBO.destinationDBlock
            flib.dispatchDBlock = self.fileBO.destinationDBlock
            jspec.addFile(flib)
        elif job.backend.bexec:
            flib = FileSpec()
            flib.lfn = self.library
            flib.type = 'input'
            flib.dataset = self.libDataset
            flib.dispatchDBlock = self.libDataset
            jspec.addFile(flib)

#       input files FIXME: many more input types
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(
                    job.inputdata.guids, job.inputdata.names,
                    job.inputdata.sizes, job.inputdata.checksums,
                    job.inputdata.scopes):
                finp = FileSpec()
                finp.lfn = lfn
                finp.GUID = guid
                finp.scope = scope

                #            finp.fsize =
                #            finp.md5sum =
                finp.dataset = job.inputdata.dataset[0]
                finp.prodDBlock = job.inputdata.dataset[0]
                finp.dispatchDBlock = job.inputdata.dataset[0]
                finp.type = 'input'
                finp.status = 'ready'
                jspec.addFile(finp)


#       output files
#        outMap = {}

#FIXME: if options.outMeta != []:
        self.rundirectory = "."

        #       log files

        flog = FileSpec()
        flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname
        flog.type = 'log'
        flog.dataset = job.outputdata.datasetname
        flog.destinationDBlock = job.outputdata.datasetname
        flog.destinationSE = job.backend.site
        jspec.addFile(flog)

        #       job parameters
        param = ''

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL)
        srcURL = ""
        if matchURL != None:
            srcURL = matchURL.group(1)
            param += " --sourceURL %s " % srcURL

        param += '-r "%s" ' % self.rundirectory

        exe_name = job.application.exe
        if job.backend.bexec == '':
            if hasattr(job.application.exe, "name"):
                exe_name = os.path.basename(job.application.exe.name)

            # set jobO parameter
            if job.application.args:
                param += ' -j "" -p "%s %s" ' % (
                    exe_name, urllib.quote(" ".join(job.application.args)))
            else:
                param += ' -j "" -p "%s" ' % exe_name
            if self.inputsandbox:
                param += ' -a %s ' % self.inputsandbox

        else:
            param += '-l %s ' % self.library
            param += '-j "" -p "%s %s" ' % (
                exe_name, urllib.quote(" ".join(job.application.args)))

        if job.inputdata:
            param += '-i "%s" ' % job.inputdata.names

        # fill outfiles
        outfiles = {}
        for f in self.extOutFile:
            tarnum = 1
            if f.find('*') != -1:
                # archive *
                outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (
                    tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S"))
                tarnum += 1
            else:
                outfiles[f] = "%s.%s.%s" % (f, job.getFQID('.'),
                                            time.strftime("%Y%m%d%H%M%S"))

            fout = FileSpec()
            fout.lfn = outfiles[f]
            fout.type = 'output'
            fout.dataset = job.outputdata.datasetname
            fout.destinationDBlock = job.outputdata.datasetname
            fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        param += '-o "%s" ' % (
            outfiles
        )  # must be double quotes, because python prints strings in 'single quotes'

        for file in jspec.Files:
            if file.type in ['output', 'log'] and configPanda['chirpconfig']:
                file.dispatchDBlockToken = configPanda['chirpconfig']
                logger.debug('chirp file %s', file)

        jspec.jobParameters = param

        return jspec

Пример #5

Показать файл

Файл: AthenaMCPandaRTHandler.py Проект: slangrock/ganga

    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()

        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        # now just filling the job from AthenaMC data

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel

        if app.transform_archive:
            jspec.homepackage = 'AnalysisTransforms' + app.transform_archive
        elif app.prod_release:
            jspec.homepackage = 'AnalysisTransforms-AtlasProduction_' + str(
                app.prod_release)
        jspec.transformation = '%s/runAthena-00-00-11' % Client.baseURLSUB

        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock = 'NULL'
        if job.inputdata and len(
                app.inputfiles) > 0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset = ""
        for type in ["EVNT", "RDO", "HITS", "AOD", "ESD", "NTUP"]:
            if type in app.outputpaths.keys():
                outdset = string.replace(app.outputpaths[type], "/", ".")
                outdset = outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys()) > 0
            except:
                logger.error(
                    "app.outputpaths is empty: check your output datasets")
                raise
            type = app.outputpaths.keys()[0]
            outdset = string.replace(app.outputpaths[type], "/", ".")
            outdset = outdset[1:-1]

        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel = 'user'
        jspec.assignedPriority = 1000
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite = self.outsite
        jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        #       library (source files)
        flib = FileSpec()
        flib.lfn = self.library
        #        flib.GUID           =
        flib.type = 'input'
        #        flib.status         =
        flib.dataset = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid = app.turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid = app.dbturls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid = app.minbias_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid = app.cavern_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)


#       output files( this includes the logfiles)
# Output files
        jidtag = ""
        job = app._getParent()  # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id
        outfiles = app.subjobsOutfiles[job.id]
        pandaOutfiles = {}
        for type in outfiles.keys():
            pandaOutfiles[type] = outfiles[type] + "." + str(jidtag)
            if type == "LOG":
                pandaOutfiles[type] += ".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset = string.replace(app.outputpaths[outtype], "/", ".")
            dset = dset[1:-1]
            fout.dataset = dset
            fout.lfn = pandaOutfiles[outtype]
            fout.type = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE = jspec.destinationSE
            if outtype == 'LOG':
                fout.type = 'log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        #       job parameters
        param = '-l %s ' % self.library  # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '

        # transform parameters
        # need to update arglist with final output file name...
        newArgs = []
        if app.mode == "evgen":
            app.args[3] = app.args[3] + " -t "
            if app.verbosity:
                app.args[3] = app.args[3] + " -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type]) > -1:
                    arg = arg.replace(outfiles[type], pandaOutfiles[type])

            newArgs.append(arg)
        arglist = string.join(newArgs, " ")
        #        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles = app.inputfiles + app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles) > 0:
            param += ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles) > 0:
            param += ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles[
            "LOG"]  # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"

        jspec.jobParameters = param
        jspec.metadata = "--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return

        return jspec