Python JobSpec.assignedPriority示例

    def master_prepare(self,app,appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.')) 

        # set chirp variables
        if configPanda['chirpconfig'] or configPanda['chirpserver']:
            setChirpVariables()

#       Pack inputsandbox
        inputsandbox = 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null') 
        inpw = job.getInputWorkspace()
        # add user script to inputsandbox
        if hasattr(job.application.exe, "name"):
            if not job.application.exe in job.inputsandbox:
                job.inputsandbox.append(job.application.exe)

        for fname in [f.name for f in job.inputsandbox]:
            fname.rstrip(os.sep)
            path = fname[:fname.rfind(os.sep)]
            f = fname[fname.rfind(os.sep)+1:]
            rc, output = commands.getstatusoutput('tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
        if len(job.inputsandbox) > 0:
            rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox)))
            if rc:
                logger.error('Packing inputsandbox failed with status %d',rc)
                logger.error(output)
                raise ApplicationConfigurationError('Packing inputsandbox failed.')
            inputsandbox += ".gz"
        else:
            inputsandbox = None

#       Upload Inputsandbox
        if inputsandbox:
            logger.debug('Uploading source tarball ...')
            uploadSources(inpw.getPath(),os.path.basename(inputsandbox))
            self.inputsandbox = inputsandbox
        else:
            self.inputsandbox = None

#       input dataset
        if job.inputdata:
            if job.inputdata._name != 'DQ2Dataset':
                raise ApplicationConfigurationError('PANDA application supports only DQ2Datasets')

        # run brokerage here if not splitting
        if not job.splitter:
            from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
            runPandaBrokerage(job)
        elif job.splitter._name not in ['DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask']:
            raise ApplicationConfigurationError('Panda splitter must be DQ2JobSplitter or ArgSplitter')
        
        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError('site is still AUTO after brokerage!')

#       output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname)

        self.outDsLocation = Client.PandaSites[job.backend.site]['ddm']

        try:
            Client.addDataset(job.outputdata.datasetname,False,location=self.outDsLocation)
            logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,self.outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation)
        except exceptions.SystemExit:
            raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))

        # handle the libds
        if job.backend.libds:
            self.libDataset = job.backend.libds
            self.fileBO = getLibFileSpecFromLibDS(self.libDataset)
            self.library = self.fileBO.lfn
        elif job.backend.bexec:
            self.libDataset = job.outputdata.datasetname+'.lib'
            self.library = '%s.tgz' % self.libDataset
            try:
                Client.addDataset(self.libDataset,False,location=self.outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation)
                logger.info('Lib dataset %s registered at %s'%(self.libDataset,self.outDsLocation))
            except exceptions.SystemExit:
                raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(self.libDataset,sys.exc_info()[0],sys.exc_info()[1]))

        # collect extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.outputsandbox:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # create build job
        if job.backend.bexec != '':
            jspec = JobSpec()
            jspec.jobDefinitionID   = job.id
            jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
            jspec.transformation    = '%s/buildGen-00-00-01' % Client.baseURLSUB
            if Client.isDQ2free(job.backend.site):
                jspec.destinationDBlock = '%s/%s' % (job.outputdata.datasetname,self.libDataset)
                jspec.destinationSE     = 'local'
            else:
                jspec.destinationDBlock = self.libDataset
                jspec.destinationSE     = job.backend.site
            jspec.prodSourceLabel   = configPanda['prodSourceLabelBuild']
            jspec.processingType    = configPanda['processingType']
            jspec.assignedPriority  = configPanda['assignedPriorityBuild']
            jspec.computingSite     = job.backend.site
            jspec.cloud             = job.backend.requirements.cloud
            jspec.jobParameters     = '-o %s' % (self.library)
            if self.inputsandbox:
                jspec.jobParameters     += ' -i %s' % (self.inputsandbox)
            else:
                raise ApplicationConfigurationError('Executable on Panda with build job defined, but inputsandbox is emtpy !')
            matchURL = re.search('(http.*://[^/]+)/',Client.baseURLCSRVSSL)
            if matchURL:
                jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1)
            if job.backend.bexec != '':
                jspec.jobParameters += ' --bexec "%s" ' % urllib.quote(job.backend.bexec)
                jspec.jobParameters += ' -r %s ' % '.'
                

            fout = FileSpec()
            fout.lfn  = self.library
            fout.type = 'output'
            fout.dataset = self.libDataset
            fout.destinationDBlock = self.libDataset
            jspec.addFile(fout)

            flog = FileSpec()
            flog.lfn = '%s.log.tgz' % self.libDataset
            flog.type = 'log'
            flog.dataset = self.libDataset
            flog.destinationDBlock = self.libDataset
            jspec.addFile(flog)
            return jspec
        else:
            return None

示例#2

显示文件

文件： AtlasProdTaskBroker.py 项目： ruslan33/panda-jedi

 def doBrokerage(self,inputList,vo,prodSourceLabel,workQueue):
     # list with a lock
     inputListWorld = ListWithLock([])
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal    = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug('vo={0} label={1} queue={2} nTasks={3}'.format(vo,prodSourceLabel,
                                                                 workQueue.queue_name,
                                                                 len(inputList)))
     # loop over all tasks
     allRwMap    = {}
     prioMap     = {}
     tt2Map      = {}
     expRWs      = {}
     jobSpecList = []
     for tmpJediTaskID,tmpInputList in inputList:
         for taskSpec,cloudName,inputChunk in tmpInputList:
             # collect tasks for WORLD
             if taskSpec.useWorldCloud():
                 inputListWorld.append((taskSpec,inputChunk))
                 continue
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID     = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel  = 'managed'
             jobSpec.processingType   = taskSpec.processingType
             jobSpec.workingGroup     = taskSpec.workingGroup
             jobSpec.metadata         = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority  = taskSpec.currentPriority
             jobSpec.maxDiskCount     = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID]  = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,workQueue,
                                                                    jobSpec.currentPriority) 
                 if tmpRW == None:
                     tmpLog.error('failed to calculate RW with prio={0}'.format(jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error('failed to calculate RW for jediTaskID={0}'.format(jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # for old clouds
     if jobSpecList != []:
         # get fullRWs
         fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo,prodSourceLabel,None,None)
         if fullRWs == None:
             tmpLog.error('failed to calculate full RW')
             return retTmpError
         # set metadata
         for jobSpec in jobSpecList:
             rwValues = allRwMap[jobSpec.currentPriority]
             jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (jobSpec.metadata,
                                                       str(rwValues),str(expRWs),
                                                       str(prioMap),str(fullRWs),
                                                       str(tt2Map))
         tmpLog.debug('run task assigner for {0} tasks'.format(len(jobSpecList)))
         nBunchTask = 0
         while nBunchTask < len(jobSpecList):
             # get a bunch
             jobsBunch = jobSpecList[nBunchTask:nBunchTask+maxBunchTask]
             strIDs = 'jediTaskID='
             for tmpJobSpec in jobsBunch:
                 strIDs += '{0},'.format(tmpJobSpec.taskID)
             strIDs = strIDs[:-1]
             tmpLog.debug(strIDs)
             # increment index
             nBunchTask += maxBunchTask
             # run task brokerge
             stS,outSs = PandaClient.runTaskAssignment(jobsBunch)
             tmpLog.debug('{0}:{1}'.format(stS,str(outSs)))
     # for WORLD
     if len(inputListWorld) > 0:
         # thread pool
         threadPool = ThreadPool()
         # get full RW for WORLD
         fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,None,None)
         if fullRWs == None:
             tmpLog.error('failed to calculate full WORLD RW')
             return retTmpError
         # get RW per priority
         for taskSpec,inputChunk in inputListWorld:
             if not taskSpec.currentPriority in allRwMap:
                 tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(vo,prodSourceLabel,workQueue,
                                                                         taskSpec.currentPriority)
                 if tmpRW == None:
                     tmpLog.error('failed to calculate RW with prio={0}'.format(taskSpec.currentPriority))
                     return retTmpError
                 allRwMap[taskSpec.currentPriority] = tmpRW
         # live counter for RWs
         liveCounter = MapWithLock(allRwMap)
         # make workers
         ddmIF = self.ddmIF.getInterface(vo)
         for iWorker in range(4):
             thr = AtlasProdTaskBrokerThread(inputListWorld,threadPool,
                                             self.taskBufferIF,ddmIF,
                                             fullRWs,liveCounter)
             thr.start()
         threadPool.join(60*10)
     # return
     tmpLog.debug('doBrokerage done')
     return self.SC_SUCCEEDED

示例#3

显示文件

文件： ExecutablePandaRTHandler.py 项目： Erni1619/ganga

    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaPandaRTHandler prepare called for %s', job.getFQID('.'))

#       in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter
        if job.inputdata and not job._getRoot().subjobs:

            if not job.inputdata.names:
                
                contents = job.inputdata.get_contents(overlap=False, size=True)

                for ds in contents.keys():

                    for f in contents[ds]:
                        job.inputdata.guids.append( f[0] )
                        job.inputdata.names.append( f[1][0] )
                        job.inputdata.sizes.append( f[1][1] )
                        job.inputdata.checksums.append( f[1][2] )
                        job.inputdata.scopes.append( f[1][3] )


        site = job._getRoot().backend.site
        job.backend.site = site
        job.backend.actualCE = site
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

#       if no outputdata are given
        if not job.outputdata:
            job.outputdata = DQ2OutputDataset()
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname
        #if not job.outputdata.datasetname:
        else:
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname

        if not job.outputdata.datasetname:
            raise ApplicationConfigurationError('DQ2OutputDataset has no datasetname')

        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
        jspec.transformation    = '%s/runGen-00-00-02' % Client.baseURLSUB
        if job.inputdata:
            jspec.prodDBlock    = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock    = 'NULL'
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            if not job._getRoot().subjobs or job.id == 0:
                logger.warning('You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.')
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = site
        jspec.prodSourceLabel   = configPanda['prodSourceLabelRun']
        jspec.processingType    = configPanda['processingType']
        jspec.assignedPriority  = configPanda['assignedPriorityRun']
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        # cputime     
        if job.backend.requirements.cputime != -1:
            jspec.maxCpuCount = job.backend.requirements.cputime
        jspec.computingSite     = site

#       library (source files)
        if job.backend.libds:
            flib = FileSpec()
            flib.lfn            = self.fileBO.lfn
            flib.GUID           = self.fileBO.GUID
            flib.type           = 'input'
            flib.status         = self.fileBO.status
            flib.dataset        = self.fileBO.destinationDBlock
            flib.dispatchDBlock = self.fileBO.destinationDBlock
            jspec.addFile(flib)
        elif job.backend.bexec:
            flib = FileSpec()
            flib.lfn            = self.library
            flib.type           = 'input'
            flib.dataset        = self.libDataset
            flib.dispatchDBlock = self.libDataset
            jspec.addFile(flib)

#       input files FIXME: many more input types
        if job.inputdata:            
            for guid, lfn, size, checksum, scope in zip(job.inputdata.guids,job.inputdata.names,job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes):
                finp = FileSpec()
                finp.lfn            = lfn
                finp.GUID           = guid
                finp.scope          = scope
                
#            finp.fsize =
#            finp.md5sum =
                finp.dataset        = job.inputdata.dataset[0]
                finp.prodDBlock     = job.inputdata.dataset[0]
                finp.dispatchDBlock = job.inputdata.dataset[0]
                finp.type           = 'input'
                finp.status         = 'ready'
                jspec.addFile(finp)

#       output files
#        outMap = {}
        
        #FIXME: if options.outMeta != []:
        self.rundirectory = "."

#       log files

        flog = FileSpec()
        flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname
        flog.type = 'log'
        flog.dataset           = job.outputdata.datasetname
        flog.destinationDBlock = job.outputdata.datasetname
        flog.destinationSE     = job.backend.site
        jspec.addFile(flog)

#       job parameters
        param = ''

        # source URL
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL)
        srcURL = ""
        if matchURL != None:
            srcURL = matchURL.group(1)
            param += " --sourceURL %s " % srcURL

        param += '-r "%s" ' % self.rundirectory

        exe_name = job.application.exe
        if job.backend.bexec == '':
            if hasattr(job.application.exe, "name"):
                exe_name = os.path.basename(job.application.exe.name)

            # set jobO parameter
            if job.application.args:
                param += ' -j "" -p "%s %s" '%(exe_name,urllib.quote(" ".join(job.application.args)))
            else:
                param += ' -j "" -p "%s" '%exe_name
            if self.inputsandbox:
                param += ' -a %s '%self.inputsandbox

        else:
            param += '-l %s ' % self.library
            param += '-j "" -p "%s %s" ' % ( exe_name,urllib.quote(" ".join(job.application.args)))

        if job.inputdata:
            param += '-i "%s" ' % job.inputdata.names

        # fill outfiles
        outfiles = {}
        for f in self.extOutFile:
            tarnum = 1
            if f.find('*') != -1:
            # archive *
                outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S") )
                tarnum += 1
            else:
                outfiles[f] = "%s.%s.%s" %(f, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S"))

            fout = FileSpec()
            fout.lfn = outfiles[f]
            fout.type = 'output'
            fout.dataset           = job.outputdata.datasetname
            fout.destinationDBlock = job.outputdata.datasetname
            fout.destinationSE     = job.backend.site
            jspec.addFile(fout)

        param += '-o "%s" ' % (outfiles) # must be double quotes, because python prints strings in 'single quotes' 

        for file in jspec.Files:
            if file.type in [ 'output', 'log'] and configPanda['chirpconfig']:
                file.dispatchDBlockToken = configPanda['chirpconfig']
                logger.debug('chirp file %s',file)

        jspec.jobParameters = param
        
        return jspec

示例#4

显示文件

文件： AthenaMCPandaRTHandler.py 项目： MannyMoo/ganga

    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.'))
        
        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()
        
        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud
        

        # now just filling the job from AthenaMC data
        
        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')  
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        
        if app.transform_archive:
            jspec.homepackage       = 'AnalysisTransforms'+app.transform_archive
        elif app.prod_release:
            jspec.homepackage       = 'AnalysisTransforms-AtlasProduction_'+str(app.prod_release)
        jspec.transformation    = '%s/runAthena-00-00-11' % Client.baseURLSUB
            
        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock    = 'NULL'
        if job.inputdata and len(app.inputfiles)>0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock    = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset=""
        for type in ["EVNT","RDO","HITS","AOD","ESD","NTUP"]:
            if type in app.outputpaths.keys():
                outdset=string.replace(app.outputpaths[type],"/",".")
                outdset=outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys())>0
            except:
                logger.error("app.outputpaths is empty: check your output datasets")
                raise
            type=app.outputpaths.keys()[0]
            outdset=string.replace(app.outputpaths[type],"/",".")
            outdset=outdset[1:-1]
            
        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel   = 'user'
        jspec.assignedPriority  = 1000
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite     = self.outsite
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
#       library (source files)
        flib = FileSpec()
        flib.lfn            = self.library
#        flib.GUID           = 
        flib.type           = 'input'
#        flib.status         = 
        flib.dataset        = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid=app.turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid=app.dbturls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid=app.minbias_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid=app.cavern_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
            

#       output files( this includes the logfiles)
        # Output files
        jidtag=""
        job = app._getParent() # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id       
        outfiles=app.subjobsOutfiles[job.id]
        pandaOutfiles={}
        for type in outfiles.keys():
            pandaOutfiles[type]=outfiles[type]+"."+str(jidtag)
            if type=="LOG":
                pandaOutfiles[type]+=".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset=string.replace(app.outputpaths[outtype],"/",".")
            dset=dset[1:-1]
            fout.dataset=dset
            fout.lfn=pandaOutfiles[outtype]
            fout.type              = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE    = jspec.destinationSE
            if outtype=='LOG':
                fout.type='log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE     = job.backend.site
            jspec.addFile(fout)


        #       job parameters
        param =  '-l %s ' % self.library # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '
        
        # transform parameters
        # need to update arglist with final output file name...
        newArgs=[]
        if app.mode == "evgen":
            app.args[3]=app.args[3]+" -t "
            if app.verbosity:
                app.args[3]=app.args[3]+" -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type])>-1:
                    arg=arg.replace(outfiles[type],pandaOutfiles[type])

            newArgs.append(arg)
        arglist=string.join(newArgs," ")
#        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles=app.inputfiles+app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles)>0:
            param+= ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles)>0:
            param+= ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles["LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL        
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"


        jspec.jobParameters = param
        jspec.metadata="--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return
        
        return jspec

示例#5

显示文件

文件： AthenaMCPandaRTHandler.py 项目： MannyMoo/ganga

    def master_prepare(self,app,appmasterconfig):

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler master_prepare called for %s', job.getFQID('.'))
        usertag = configDQ2['usertag']
        #usertag='user09'
        nickname = getNickname(allowMissingNickname=True)
        self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (usertag,nickname,commands.getoutput('hostname').split('.')[0],int(time.time()),job.id)
#        self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity())
        sources = 'sources.%s.tar.gz' % commands.getoutput('uuidgen 2> /dev/null') 
        self.library = '%s.lib.tgz' % self.libDataset

        # check DBRelease
        # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1:
         #   raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'")

#       unpack library
        logger.debug('Creating source tarball ...')        
        tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null')
        os.mkdir(tmpdir)

        inputbox=[]
        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [ File(app.transform_archive) ]
        if app.evgen_job_option:
            self.evgen_job_option=app.evgen_job_option
            if os.path.exists(app.evgen_job_option):
                # locally modified job option file to add to the input sand box
                inputbox += [ File(app.evgen_job_option) ]
                self.evgen_job_option=app.evgen_job_option.split("/")[-1]

         
#       add input sandbox files
        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [ file ]
#        add option files
        for extFile in job.backend.extOutFile:
            try:
                shutil.copy(extFile,tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(extFile,tmpdir)
#       fill the archive
        for opt_file in inputbox:
            try:
                shutil.copy(opt_file.name,tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(opt_file.name,tmpdir)
#       now tar it up again

        inpw = job.getInputWorkspace()
        rc, output = commands.getstatusoutput('tar czf %s -C %s .' % (inpw.getPath(sources),tmpdir))
        if rc:
            logger.error('Packing sources failed with status %d',rc)
            logger.error(output)
            raise ApplicationConfigurationError(None,'Packing sources failed.')

        shutil.rmtree(tmpdir)

#       upload sources

        logger.debug('Uploading source tarball ...')
        try:
            cwd = os.getcwd()
            os.chdir(inpw.getPath())
            rc, output = Client.putFile(sources)
            if output != 'True':
                logger.error('Uploading sources %s failed. Status = %d', sources, rc)
                logger.error(output)
                raise ApplicationConfigurationError(None,'Uploading archive failed')
        finally:
            os.chdir(cwd)      


        # Use Panda's brokerage
##         if job.inputdata and len(app.sites)>0:
##             # update cloud, use inputdata's
##             from dq2.info.TiersOfATLAS import whichCloud,ToACache
##             inclouds=[]
##             for site in app.sites:
##                 cloudSite=whichCloud(app.sites[0])
##                 if cloudSite not in inclouds:
##                     inclouds.append(cloudSite)
##             # now converting inclouds content into proper brokering stuff.
##             outclouds=[]
##             for cloudSite in inclouds:
##                 for cloudID, eachCloud in ToACache.dbcloud.iteritems():
##                     if cloudSite==eachCloud:
##                         cloud=cloudID
##                         outclouds.append(cloud)
##                         break
                    
##             print outclouds
##             # finally, matching with user's wishes
##             if len(outclouds)>0:
##                 if not job.backend.requirements.cloud: # no user wish, update
##                     job.backend.requirements.cloud=outclouds[0]
##                 else:
##                     try:
##                         assert job.backend.requirements.cloud in outclouds
##                     except:
##                         raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds)))
                                                            
        from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
        
        runPandaBrokerage(job)
        
        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(None,'site is still AUTO after brokerage!')

        # output dataset preparation and registration
        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
        except:
            raise ApplicationConfigurationError(None,"Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site)
        if not app.dryrun:
            for outtype in app.outputpaths.keys():
                dset=string.replace(app.outputpaths[outtype],"/",".")
                dset=dset[1:]
                # dataset registration must be done only once.
                print "registering output dataset %s at %s" % (dset,outDsLocation)
                try:
                    Client.addDataset(dset,False,location=outDsLocation)
                    dq2_set_dataset_lifetime(dset, location=outDsLocation)
                except:
                    raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % dset)
            # extend registration to build job lib dataset:
            print "registering output dataset %s at %s" % (self.libDataset,outDsLocation)

            try:
                Client.addDataset(self.libDataset,False,location=outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, outDsLocation)
            except:
                raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % self.libDataset)


        ###
        cacheVer = "-AtlasProduction_" + str(app.prod_release)
            
        logger.debug("master job submit?")
        self.outsite=job.backend.site
        if app.se_name and app.se_name != "none" and not self.outsite:
            self.outsite=app.se_name

       
        #       create build job
        jspec = JobSpec()
        jspec.jobDefinitionID   = job.id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        jspec.homepackage       = 'AnalysisTransforms'+cacheVer#+nightVer
        jspec.transformation    = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare()
        jspec.destinationDBlock = self.libDataset
        jspec.destinationSE     = job.backend.site
        jspec.prodSourceLabel   = 'panda'
        jspec.assignedPriority  = 2000
        jspec.computingSite     = job.backend.site
        jspec.cloud             = job.backend.requirements.cloud
#        jspec.jobParameters     = self.args not known yet
        jspec.jobParameters     = '-o %s' % (self.library)
        if app.userarea:
            print app.userarea
            jspec.jobParameters     += ' -i %s' % (os.path.basename(app.userarea))
        else:
            jspec.jobParameters     += ' -i %s' % (sources)
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        
        matchURL = re.search('(http.*://[^/]+)/',Client.baseURLSSL)
        if matchURL:
            jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1)

        fout = FileSpec()
        fout.lfn  = self.library
        fout.type = 'output'
        fout.dataset = self.libDataset
        fout.destinationDBlock = self.libDataset
        jspec.addFile(fout)

        flog = FileSpec()
        flog.lfn = '%s.log.tgz' % self.libDataset
        flog.type = 'log'
        flog.dataset = self.libDataset
        flog.destinationDBlock = self.libDataset
        jspec.addFile(flog)
        #print "MASTER JOB DETAILS:",jspec.jobParameters

        return jspec

示例#6

显示文件

文件： AtlasProdTaskBroker.py 项目： tertychnyy/panda-jedi

 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue):
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug('vo={0} label={1} queue={2}'.format(
         vo, prodSourceLabel, workQueue.queue_name))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = 'managed'
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (
                 taskSpec.getOutDiskSize() +
                 taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(
                         datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     jobSpec.currentPriority)
                 if tmpRW == None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(
                 jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error(
                     'failed to calculate RW for jediTaskID={0}'.format(
                         jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # get fullRWs
     fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(
         vo, prodSourceLabel, None, None)
     if fullRWs == None:
         tmpLog.error('failed to calculate full RW')
         return retTmpError
     # set metadata
     for jobSpec in jobSpecList:
         rwValues = allRwMap[jobSpec.currentPriority]
         jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
             jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap),
             str(fullRWs), str(tt2Map))
     tmpLog.debug('run task assigner for {0} tasks'.format(
         len(jobSpecList)))
     nBunchTask = 0
     while nBunchTask < len(jobSpecList):
         # get a bunch
         jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask]
         strIDs = 'jediTaskID='
         for tmpJobSpec in jobsBunch:
             strIDs += '{0},'.format(tmpJobSpec.taskID)
         strIDs = strIDs[:-1]
         tmpLog.debug(strIDs)
         # increment index
         nBunchTask += maxBunchTask
         # run task brokerge
         stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
         tmpLog.debug('{0}:{1}'.format(stS, str(outSs)))
     # return
     tmpLog.debug('done')
     return self.SC_SUCCEEDED

示例#7

显示文件

文件： AtlasProdTaskBroker.py 项目： tertychnyy/panda-jedi

 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue):
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug("start doBrokerage")
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug("vo={0} label={1} queue={2}".format(vo, prodSourceLabel, workQueue.queue_name))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = "managed"
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (taskSpec.getOutDiskSize() + taskSpec.getWorkDiskSize()) / 1024 / 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock == False and prodDBlock != None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if not allRwMap.has_key(jobSpec.currentPriority):
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue, jobSpec.currentPriority
                 )
                 if tmpRW == None:
                     tmpLog.error("failed to calculate RW with prio={0}".format(jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(jobSpec.jediTaskID)
             if expRW == None:
                 tmpLog.error("failed to calculate RW for jediTaskID={0}".format(jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # get fullRWs
     fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(vo, prodSourceLabel, None, None)
     if fullRWs == None:
         tmpLog.error("failed to calculate full RW")
         return retTmpError
     # set metadata
     for jobSpec in jobSpecList:
         rwValues = allRwMap[jobSpec.currentPriority]
         jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
             jobSpec.metadata,
             str(rwValues),
             str(expRWs),
             str(prioMap),
             str(fullRWs),
             str(tt2Map),
         )
     tmpLog.debug("run task assigner for {0} tasks".format(len(jobSpecList)))
     nBunchTask = 0
     while nBunchTask < len(jobSpecList):
         # get a bunch
         jobsBunch = jobSpecList[nBunchTask : nBunchTask + maxBunchTask]
         strIDs = "jediTaskID="
         for tmpJobSpec in jobsBunch:
             strIDs += "{0},".format(tmpJobSpec.taskID)
         strIDs = strIDs[:-1]
         tmpLog.debug(strIDs)
         # increment index
         nBunchTask += maxBunchTask
         # run task brokerge
         stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
         tmpLog.debug("{0}:{1}".format(stS, str(outSs)))
     # return
     tmpLog.debug("done")
     return self.SC_SUCCEEDED

示例#8

显示文件

    def master_prepare(self, app, appconfig):
        '''Prepare the master job'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('ExecutablePandaRTHandler master_prepare called for %s',
                     job.getFQID('.'))

        # set chirp variables
        if configPanda['chirpconfig'] or configPanda['chirpserver']:
            setChirpVariables()

#       Pack inputsandbox
        inputsandbox = 'sources.%s.tar' % commands.getoutput(
            'uuidgen 2> /dev/null')
        inpw = job.getInputWorkspace()
        # add user script to inputsandbox
        if hasattr(job.application.exe, "name"):
            if not job.application.exe in job.inputsandbox:
                job.inputsandbox.append(job.application.exe)

        for fname in [f.name for f in job.inputsandbox]:
            fname.rstrip(os.sep)
            path = fname[:fname.rfind(os.sep)]
            f = fname[fname.rfind(os.sep) + 1:]
            rc, output = commands.getstatusoutput(
                'tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f))
            if rc:
                logger.error('Packing inputsandbox failed with status %d', rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Packing inputsandbox failed.')
        if len(job.inputsandbox) > 0:
            rc, output = commands.getstatusoutput('gzip %s' %
                                                  (inpw.getPath(inputsandbox)))
            if rc:
                logger.error('Packing inputsandbox failed with status %d', rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Packing inputsandbox failed.')
            inputsandbox += ".gz"
        else:
            inputsandbox = None

#       Upload Inputsandbox
        if inputsandbox:
            logger.debug('Uploading source tarball ...')
            uploadSources(inpw.getPath(), os.path.basename(inputsandbox))
            self.inputsandbox = inputsandbox
        else:
            self.inputsandbox = None

#       input dataset
        if job.inputdata:
            if job.inputdata._name != 'DQ2Dataset':
                raise ApplicationConfigurationError(
                    None, 'PANDA application supports only DQ2Datasets')

        # run brokerage here if not splitting
        if not job.splitter:
            from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
            runPandaBrokerage(job)
        elif job.splitter._name not in [
                'DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask'
        ]:
            raise ApplicationConfigurationError(
                None, 'Panda splitter must be DQ2JobSplitter or ArgSplitter')

        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(
                None, 'site is still AUTO after brokerage!')

#       output dataset
        if job.outputdata:
            if job.outputdata._name != 'DQ2OutputDataset':
                raise ApplicationConfigurationError(
                    None, 'Panda backend supports only DQ2OutputDataset')
        else:
            logger.info('Adding missing DQ2OutputDataset')
            job.outputdata = DQ2OutputDataset()

        job.outputdata.datasetname, outlfn = dq2outputdatasetname(
            job.outputdata.datasetname, job.id, job.outputdata.isGroupDS,
            job.outputdata.groupname)

        self.outDsLocation = Client.PandaSites[job.backend.site]['ddm']

        try:
            Client.addDataset(job.outputdata.datasetname,
                              False,
                              location=self.outDsLocation)
            logger.info('Output dataset %s registered at %s' %
                        (job.outputdata.datasetname, self.outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname,
                                     location=self.outDsLocation)
        except exceptions.SystemExit:
            raise BackendError(
                'Panda', 'Exception in Client.addDataset %s: %s %s' %
                (job.outputdata.datasetname, sys.exc_info()[0],
                 sys.exc_info()[1]))

        # handle the libds
        if job.backend.libds:
            self.libDataset = job.backend.libds
            self.fileBO = getLibFileSpecFromLibDS(self.libDataset)
            self.library = self.fileBO.lfn
        elif job.backend.bexec:
            self.libDataset = job.outputdata.datasetname + '.lib'
            self.library = '%s.tgz' % self.libDataset
            try:
                Client.addDataset(self.libDataset,
                                  False,
                                  location=self.outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset,
                                         location=self.outDsLocation)
                logger.info('Lib dataset %s registered at %s' %
                            (self.libDataset, self.outDsLocation))
            except exceptions.SystemExit:
                raise BackendError(
                    'Panda', 'Exception in Client.addDataset %s: %s %s' %
                    (self.libDataset, sys.exc_info()[0], sys.exc_info()[1]))

        # collect extOutFiles
        self.extOutFile = []
        for tmpName in job.outputdata.outputdata:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.outputsandbox:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        for tmpName in job.backend.extOutFile:
            if tmpName != '':
                self.extOutFile.append(tmpName)

        # create build job
        if job.backend.bexec != '':
            jspec = JobSpec()
            jspec.jobDefinitionID = job.id
            jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
            jspec.transformation = '%s/buildGen-00-00-01' % Client.baseURLSUB
            if Client.isDQ2free(job.backend.site):
                jspec.destinationDBlock = '%s/%s' % (
                    job.outputdata.datasetname, self.libDataset)
                jspec.destinationSE = 'local'
            else:
                jspec.destinationDBlock = self.libDataset
                jspec.destinationSE = job.backend.site
            jspec.prodSourceLabel = configPanda['prodSourceLabelBuild']
            jspec.processingType = configPanda['processingType']
            jspec.assignedPriority = configPanda['assignedPriorityBuild']
            jspec.computingSite = job.backend.site
            jspec.cloud = job.backend.requirements.cloud
            jspec.jobParameters = '-o %s' % (self.library)
            if self.inputsandbox:
                jspec.jobParameters += ' -i %s' % (self.inputsandbox)
            else:
                raise ApplicationConfigurationError(
                    None,
                    'Executable on Panda with build job defined, but inputsandbox is emtpy !'
                )
            matchURL = re.search('(http.*://[^/]+)/', Client.baseURLCSRVSSL)
            if matchURL:
                jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1)
            if job.backend.bexec != '':
                jspec.jobParameters += ' --bexec "%s" ' % urllib.quote(
                    job.backend.bexec)
                jspec.jobParameters += ' -r %s ' % '.'

            fout = FileSpec()
            fout.lfn = self.library
            fout.type = 'output'
            fout.dataset = self.libDataset
            fout.destinationDBlock = self.libDataset
            jspec.addFile(fout)

            flog = FileSpec()
            flog.lfn = '%s.log.tgz' % self.libDataset
            flog.type = 'log'
            flog.dataset = self.libDataset
            flog.destinationDBlock = self.libDataset
            jspec.addFile(flog)
            return jspec
        else:
            return None

示例#9

显示文件

文件： directSubmit.py 项目： ruslan33/panda-server-ornl

    if int(vers[0]) <= 11:
        job.homepackage = 'JobTransforms'
        for ver in vers:
            job.homepackage += "-%02d" % int(ver)
    else:
        job.homepackage = 'AtlasProduction/%s' % trfVer
    # trf
    job.transformation = trf
    job.destinationDBlock = oDatasets[0]
    # prod DBlock
    job.prodDBlock = iDataset
    # souce lavel
    job.prodSeriesLabel = 'pandatest'
    job.prodSourceLabel = 'managed'
    # priority
    job.assignedPriority = priority
    job.currentPriority = priority
    # CPU, memory,disk ### FIXME

    # attempt number   ### FIXME

    # input files
    if iDataset != 'NULL':
        # remove _tidXXX
        pat = re.sub('_tid\d+$', '', iDataset)
        # search
        m = re.search('(' + pat + '\S+)', line)
        if m != None:
            file = FileSpec()
            file.lfn = m.group(1)
            file.type = 'input'

示例#10

显示文件

 def doBrokerage(self, inputList, vo, prodSourceLabel, workQueue,
                 resource_name):
     # list with a lock
     inputListWorld = ListWithLock([])
     # variables for submission
     maxBunchTask = 100
     # make logger
     tmpLog = MsgWrapper(logger)
     tmpLog.debug('start doBrokerage')
     # return for failure
     retFatal = self.SC_FATAL
     retTmpError = self.SC_FAILED
     tmpLog.debug(
         'vo={0} label={1} queue={2} resource_name={3} nTasks={4}'.format(
             vo, prodSourceLabel, workQueue.queue_name, resource_name,
             len(inputList)))
     # loop over all tasks
     allRwMap = {}
     prioMap = {}
     tt2Map = {}
     expRWs = {}
     jobSpecList = []
     for tmpJediTaskID, tmpInputList in inputList:
         for taskSpec, cloudName, inputChunk in tmpInputList:
             # collect tasks for WORLD
             if taskSpec.useWorldCloud():
                 inputListWorld.append((taskSpec, inputChunk))
                 continue
             # make JobSpec to be submitted for TaskAssigner
             jobSpec = JobSpec()
             jobSpec.taskID = taskSpec.jediTaskID
             jobSpec.jediTaskID = taskSpec.jediTaskID
             # set managed to trigger TA
             jobSpec.prodSourceLabel = 'managed'
             jobSpec.processingType = taskSpec.processingType
             jobSpec.workingGroup = taskSpec.workingGroup
             jobSpec.metadata = taskSpec.processingType
             jobSpec.assignedPriority = taskSpec.taskPriority
             jobSpec.currentPriority = taskSpec.currentPriority
             jobSpec.maxDiskCount = (
                 taskSpec.getOutDiskSize() +
                 taskSpec.getWorkDiskSize()) // 1024 // 1024
             if taskSpec.useWorldCloud():
                 # use destinationSE to trigger task brokerage in WORLD cloud
                 jobSpec.destinationSE = taskSpec.cloud
             prodDBlock = None
             setProdDBlock = False
             for datasetSpec in inputChunk.getDatasets():
                 prodDBlock = datasetSpec.datasetName
                 if datasetSpec.isMaster():
                     jobSpec.prodDBlock = datasetSpec.datasetName
                     setProdDBlock = True
                 for fileSpec in datasetSpec.Files:
                     tmpInFileSpec = fileSpec.convertToJobFileSpec(
                         datasetSpec)
                     jobSpec.addFile(tmpInFileSpec)
             # use secondary dataset name as prodDBlock
             if setProdDBlock is False and prodDBlock is not None:
                 jobSpec.prodDBlock = prodDBlock
             # append
             jobSpecList.append(jobSpec)
             prioMap[jobSpec.taskID] = jobSpec.currentPriority
             tt2Map[jobSpec.taskID] = jobSpec.processingType
             # get RW for a priority
             if jobSpec.currentPriority not in allRwMap:
                 tmpRW = self.taskBufferIF.calculateRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     jobSpec.currentPriority)
                 if tmpRW is None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             jobSpec.currentPriority))
                     return retTmpError
                 allRwMap[jobSpec.currentPriority] = tmpRW
             # get expected RW
             expRW = self.taskBufferIF.calculateTaskRW_JEDI(
                 jobSpec.jediTaskID)
             if expRW is None:
                 tmpLog.error(
                     'failed to calculate RW for jediTaskID={0}'.format(
                         jobSpec.jediTaskID))
                 return retTmpError
             expRWs[jobSpec.taskID] = expRW
     # for old clouds
     if jobSpecList != []:
         # get fullRWs
         fullRWs = self.taskBufferIF.calculateRWwithPrio_JEDI(
             vo, prodSourceLabel, None, None)
         if fullRWs is None:
             tmpLog.error('failed to calculate full RW')
             return retTmpError
         # set metadata
         for jobSpec in jobSpecList:
             rwValues = allRwMap[jobSpec.currentPriority]
             jobSpec.metadata = "%s;%s;%s;%s;%s;%s" % (
                 jobSpec.metadata, str(rwValues), str(expRWs), str(prioMap),
                 str(fullRWs), str(tt2Map))
         tmpLog.debug('run task assigner for {0} tasks'.format(
             len(jobSpecList)))
         nBunchTask = 0
         while nBunchTask < len(jobSpecList):
             # get a bunch
             jobsBunch = jobSpecList[nBunchTask:nBunchTask + maxBunchTask]
             strIDs = 'jediTaskID='
             for tmpJobSpec in jobsBunch:
                 strIDs += '{0},'.format(tmpJobSpec.taskID)
             strIDs = strIDs[:-1]
             tmpLog.debug(strIDs)
             # increment index
             nBunchTask += maxBunchTask
             # run task brokerge
             stS, outSs = PandaClient.runTaskAssignment(jobsBunch)
             tmpLog.debug('{0}:{1}'.format(stS, str(outSs)))
     # for WORLD
     if len(inputListWorld) > 0:
         # thread pool
         threadPool = ThreadPool()
         # get full RW for WORLD
         fullRWs = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(
             vo, prodSourceLabel, None, None)
         if fullRWs is None:
             tmpLog.error('failed to calculate full WORLD RW')
             return retTmpError
         # get RW per priority
         for taskSpec, inputChunk in inputListWorld:
             if taskSpec.currentPriority not in allRwMap:
                 tmpRW = self.taskBufferIF.calculateWorldRWwithPrio_JEDI(
                     vo, prodSourceLabel, workQueue,
                     taskSpec.currentPriority)
                 if tmpRW is None:
                     tmpLog.error(
                         'failed to calculate RW with prio={0}'.format(
                             taskSpec.currentPriority))
                     return retTmpError
                 allRwMap[taskSpec.currentPriority] = tmpRW
         # live counter for RWs
         liveCounter = MapWithLock(allRwMap)
         # make workers
         ddmIF = self.ddmIF.getInterface(vo)
         for iWorker in range(4):
             thr = AtlasProdTaskBrokerThread(inputListWorld, threadPool,
                                             self.taskBufferIF, ddmIF,
                                             fullRWs, liveCounter,
                                             workQueue)
             thr.start()
         threadPool.join(60 * 10)
     # return
     tmpLog.debug('doBrokerage done')
     return self.SC_SUCCEEDED

示例#11

显示文件

    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        #       in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter
        if job.inputdata and not job._getRoot().subjobs:

            if not job.inputdata.names:

                contents = job.inputdata.get_contents(overlap=False, size=True)

                for ds in contents.keys():

                    for f in contents[ds]:
                        job.inputdata.guids.append(f[0])
                        job.inputdata.names.append(f[1][0])
                        job.inputdata.sizes.append(f[1][1])
                        job.inputdata.checksums.append(f[1][2])
                        job.inputdata.scopes.append(f[1][3])

        site = job._getRoot().backend.site
        job.backend.site = site
        job.backend.actualCE = site
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        #       if no outputdata are given
        if not job.outputdata:
            job.outputdata = DQ2OutputDataset()
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname
        #if not job.outputdata.datasetname:
        else:
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname

        if not job.outputdata.datasetname:
            raise ApplicationConfigurationError(
                None, 'DQ2OutputDataset has no datasetname')

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.transformation = '%s/runGen-00-00-02' % Client.baseURLSUB
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            if not job._getRoot().subjobs or job.id == 0:
                logger.warning(
                    'You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.'
                )
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = site
        jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.assignedPriority = configPanda['assignedPriorityRun']
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        # cputime
        if job.backend.requirements.cputime != -1:
            jspec.maxCpuCount = job.backend.requirements.cputime
        jspec.computingSite = site

        #       library (source files)
        if job.backend.libds:
            flib = FileSpec()
            flib.lfn = self.fileBO.lfn
            flib.GUID = self.fileBO.GUID
            flib.type = 'input'
            flib.status = self.fileBO.status
            flib.dataset = self.fileBO.destinationDBlock
            flib.dispatchDBlock = self.fileBO.destinationDBlock
            jspec.addFile(flib)
        elif job.backend.bexec:
            flib = FileSpec()
            flib.lfn = self.library
            flib.type = 'input'
            flib.dataset = self.libDataset
            flib.dispatchDBlock = self.libDataset
            jspec.addFile(flib)

#       input files FIXME: many more input types
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(
                    job.inputdata.guids, job.inputdata.names,
                    job.inputdata.sizes, job.inputdata.checksums,
                    job.inputdata.scopes):
                finp = FileSpec()
                finp.lfn = lfn
                finp.GUID = guid
                finp.scope = scope

                #            finp.fsize =
                #            finp.md5sum =
                finp.dataset = job.inputdata.dataset[0]
                finp.prodDBlock = job.inputdata.dataset[0]
                finp.dispatchDBlock = job.inputdata.dataset[0]
                finp.type = 'input'
                finp.status = 'ready'
                jspec.addFile(finp)


#       output files
#        outMap = {}

#FIXME: if options.outMeta != []:
        self.rundirectory = "."

        #       log files

        flog = FileSpec()
        flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname
        flog.type = 'log'
        flog.dataset = job.outputdata.datasetname
        flog.destinationDBlock = job.outputdata.datasetname
        flog.destinationSE = job.backend.site
        jspec.addFile(flog)

        #       job parameters
        param = ''

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL)
        srcURL = ""
        if matchURL != None:
            srcURL = matchURL.group(1)
            param += " --sourceURL %s " % srcURL

        param += '-r "%s" ' % self.rundirectory

        exe_name = job.application.exe
        if job.backend.bexec == '':
            if hasattr(job.application.exe, "name"):
                exe_name = os.path.basename(job.application.exe.name)

            # set jobO parameter
            if job.application.args:
                param += ' -j "" -p "%s %s" ' % (
                    exe_name, urllib.quote(" ".join(job.application.args)))
            else:
                param += ' -j "" -p "%s" ' % exe_name
            if self.inputsandbox:
                param += ' -a %s ' % self.inputsandbox

        else:
            param += '-l %s ' % self.library
            param += '-j "" -p "%s %s" ' % (
                exe_name, urllib.quote(" ".join(job.application.args)))

        if job.inputdata:
            param += '-i "%s" ' % job.inputdata.names

        # fill outfiles
        outfiles = {}
        for f in self.extOutFile:
            tarnum = 1
            if f.find('*') != -1:
                # archive *
                outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (
                    tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S"))
                tarnum += 1
            else:
                outfiles[f] = "%s.%s.%s" % (f, job.getFQID('.'),
                                            time.strftime("%Y%m%d%H%M%S"))

            fout = FileSpec()
            fout.lfn = outfiles[f]
            fout.type = 'output'
            fout.dataset = job.outputdata.datasetname
            fout.destinationDBlock = job.outputdata.datasetname
            fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        param += '-o "%s" ' % (
            outfiles
        )  # must be double quotes, because python prints strings in 'single quotes'

        for file in jspec.Files:
            if file.type in ['output', 'log'] and configPanda['chirpconfig']:
                file.dispatchDBlockToken = configPanda['chirpconfig']
                logger.debug('chirp file %s', file)

        jspec.jobParameters = param

        return jspec

示例#12

显示文件

文件： XrdTest.py 项目： EntityOfPlague/panda-server

jobDefinitionID = int(time.time()) % 10000

jobList = []

for i in range(2):
    job = JobSpec()
    job.jobDefinitionID   = jobDefinitionID
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),i)
    job.AtlasRelease      = 'Atlas-12.0.6'
    job.homepackage       = 'AnalysisTransforms'
    job.transformation    = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthenaXrd'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.currentPriority   = 3000
    job.assignedPriority  = 3000    
    job.prodSourceLabel   = 'user'
    job.computingSite     = site
    
    file = FileSpec()
    file.lfn = "%s.AANT._%05d.root" % (job.jobName,i)
    file.destinationDBlock = job.destinationDBlock
    file.destinationSE     = job.destinationSE
    file.dataset           = job.destinationDBlock
    file.type = 'output'
    job.addFile(file)
    
    fileOL = FileSpec()
    fileOL.lfn = "%s.job.log.tgz" % job.jobName
    fileOL.destinationDBlock = job.destinationDBlock
    fileOL.destinationSE     = job.destinationSE

示例#13

显示文件

文件： directSubmit.py 项目： EntityOfPlague/panda-server

    if int(vers[0]) <= 11:
        job.homepackage = 'JobTransforms'
        for ver in vers:
            job.homepackage += "-%02d" % int(ver)
    else:
        job.homepackage = 'AtlasProduction/%s' % trfVer
    # trf
    job.transformation    = trf
    job.destinationDBlock = oDatasets[0]
    # prod DBlock
    job.prodDBlock        = iDataset
    # souce lavel
    job.prodSeriesLabel   = 'pandatest'
    job.prodSourceLabel   = 'managed'
    # priority
    job.assignedPriority  = priority
    job.currentPriority   = priority
    # CPU, memory,disk ### FIXME

    # attempt number   ### FIXME

    # input files
    if iDataset != 'NULL':
        # remove _tidXXX
        pat = re.sub('_tid\d+$','',iDataset)
        # search
        m = re.search('('+pat+'\S+)',line)
        if m != None:
            file = FileSpec()
            file.lfn  = m.group(1)
            file.type = 'input'

示例#14

显示文件

文件： XrdTest.py 项目： ruslan33/panda-server-ornl

jobDefinitionID = int(time.time()) % 10000

jobList = []

for i in range(2):
    job = JobSpec()
    job.jobDefinitionID = jobDefinitionID
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i)
    job.AtlasRelease = 'Atlas-12.0.6'
    job.homepackage = 'AnalysisTransforms'
    job.transformation = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthenaXrd'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 3000
    job.assignedPriority = 3000
    job.prodSourceLabel = 'user'
    job.computingSite = site

    file = FileSpec()
    file.lfn = "%s.AANT._%05d.root" % (job.jobName, i)
    file.destinationDBlock = job.destinationDBlock
    file.destinationSE = job.destinationSE
    file.dataset = job.destinationDBlock
    file.type = 'output'
    job.addFile(file)

    fileOL = FileSpec()
    fileOL.lfn = "%s.job.log.tgz" % job.jobName
    fileOL.destinationDBlock = job.destinationDBlock
    fileOL.destinationSE = job.destinationSE

示例#15

显示文件

文件： AthenaMCPandaRTHandler.py 项目： slangrock/ganga

    def master_prepare(self, app, appmasterconfig):

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler master_prepare called for %s',
                     job.getFQID('.'))
        usertag = configDQ2['usertag']
        #usertag='user09'
        nickname = getNickname(allowMissingNickname=True)
        self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (
            usertag, nickname, commands.getoutput('hostname').split('.')[0],
            int(time.time()), job.id)
        #        self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity())
        sources = 'sources.%s.tar.gz' % commands.getoutput(
            'uuidgen 2> /dev/null')
        self.library = '%s.lib.tgz' % self.libDataset

        # check DBRelease
        # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1:
        #   raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'")

        #       unpack library
        logger.debug('Creating source tarball ...')
        tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null')
        os.mkdir(tmpdir)

        inputbox = []
        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [File(app.transform_archive)]
        if app.evgen_job_option:
            self.evgen_job_option = app.evgen_job_option
            if os.path.exists(app.evgen_job_option):
                # locally modified job option file to add to the input sand box
                inputbox += [File(app.evgen_job_option)]
                self.evgen_job_option = app.evgen_job_option.split("/")[-1]

#       add input sandbox files
        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [file]
#        add option files
        for extFile in job.backend.extOutFile:
            try:
                shutil.copy(extFile, tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(extFile, tmpdir)
#       fill the archive
        for opt_file in inputbox:
            try:
                shutil.copy(opt_file.name, tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(opt_file.name, tmpdir)
#       now tar it up again

        inpw = job.getInputWorkspace()
        rc, output = commands.getstatusoutput('tar czf %s -C %s .' %
                                              (inpw.getPath(sources), tmpdir))
        if rc:
            logger.error('Packing sources failed with status %d', rc)
            logger.error(output)
            raise ApplicationConfigurationError(None,
                                                'Packing sources failed.')

        shutil.rmtree(tmpdir)

        #       upload sources

        logger.debug('Uploading source tarball ...')
        try:
            cwd = os.getcwd()
            os.chdir(inpw.getPath())
            rc, output = Client.putFile(sources)
            if output != 'True':
                logger.error('Uploading sources %s failed. Status = %d',
                             sources, rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Uploading archive failed')
        finally:
            os.chdir(cwd)

        # Use Panda's brokerage
##         if job.inputdata and len(app.sites)>0:
##             # update cloud, use inputdata's
##             from dq2.info.TiersOfATLAS import whichCloud,ToACache
##             inclouds=[]
##             for site in app.sites:
##                 cloudSite=whichCloud(app.sites[0])
##                 if cloudSite not in inclouds:
##                     inclouds.append(cloudSite)
##             # now converting inclouds content into proper brokering stuff.
##             outclouds=[]
##             for cloudSite in inclouds:
##                 for cloudID, eachCloud in ToACache.dbcloud.iteritems():
##                     if cloudSite==eachCloud:
##                         cloud=cloudID
##                         outclouds.append(cloud)
##                         break

##             print outclouds
##             # finally, matching with user's wishes
##             if len(outclouds)>0:
##                 if not job.backend.requirements.cloud: # no user wish, update
##                     job.backend.requirements.cloud=outclouds[0]
##                 else:
##                     try:
##                         assert job.backend.requirements.cloud in outclouds
##                     except:
##                         raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds)))

        from GangaPanda.Lib.Panda.Panda import runPandaBrokerage

        runPandaBrokerage(job)

        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(
                None, 'site is still AUTO after brokerage!')

        # output dataset preparation and registration
        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
        except:
            raise ApplicationConfigurationError(
                None,
                "Could not extract output dataset location from job.backend.site value: %s. Aborting"
                % job.backend.site)
        if not app.dryrun:
            for outtype in app.outputpaths.keys():
                dset = string.replace(app.outputpaths[outtype], "/", ".")
                dset = dset[1:]
                # dataset registration must be done only once.
                print "registering output dataset %s at %s" % (dset,
                                                               outDsLocation)
                try:
                    Client.addDataset(dset, False, location=outDsLocation)
                    dq2_set_dataset_lifetime(dset, location=outDsLocation)
                except:
                    raise ApplicationConfigurationError(
                        None,
                        "Fail to create output dataset %s. Aborting" % dset)
            # extend registration to build job lib dataset:
            print "registering output dataset %s at %s" % (self.libDataset,
                                                           outDsLocation)

            try:
                Client.addDataset(self.libDataset,
                                  False,
                                  location=outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, outDsLocation)
            except:
                raise ApplicationConfigurationError(
                    None, "Fail to create output dataset %s. Aborting" %
                    self.libDataset)

        ###
        cacheVer = "-AtlasProduction_" + str(app.prod_release)

        logger.debug("master job submit?")
        self.outsite = job.backend.site
        if app.se_name and app.se_name != "none" and not self.outsite:
            self.outsite = app.se_name

        #       create build job
        jspec = JobSpec()
        jspec.jobDefinitionID = job.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel
        jspec.homepackage = 'AnalysisTransforms' + cacheVer  #+nightVer
        jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB  # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare()
        jspec.destinationDBlock = self.libDataset
        jspec.destinationSE = job.backend.site
        jspec.prodSourceLabel = 'panda'
        jspec.assignedPriority = 2000
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        #        jspec.jobParameters     = self.args not known yet
        jspec.jobParameters = '-o %s' % (self.library)
        if app.userarea:
            print app.userarea
            jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea))
        else:
            jspec.jobParameters += ' -i %s' % (sources)
        jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)

        matchURL = re.search('(http.*://[^/]+)/', Client.baseURLSSL)
        if matchURL:
            jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1)

        fout = FileSpec()
        fout.lfn = self.library
        fout.type = 'output'
        fout.dataset = self.libDataset
        fout.destinationDBlock = self.libDataset
        jspec.addFile(fout)

        flog = FileSpec()
        flog.lfn = '%s.log.tgz' % self.libDataset
        flog.type = 'log'
        flog.dataset = self.libDataset
        flog.destinationDBlock = self.libDataset
        jspec.addFile(flog)
        #print "MASTER JOB DETAILS:",jspec.jobParameters

        return jspec

示例#16

显示文件

文件： AthenaMCPandaRTHandler.py 项目： slangrock/ganga

    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()

        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        # now just filling the job from AthenaMC data

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel

        if app.transform_archive:
            jspec.homepackage = 'AnalysisTransforms' + app.transform_archive
        elif app.prod_release:
            jspec.homepackage = 'AnalysisTransforms-AtlasProduction_' + str(
                app.prod_release)
        jspec.transformation = '%s/runAthena-00-00-11' % Client.baseURLSUB

        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock = 'NULL'
        if job.inputdata and len(
                app.inputfiles) > 0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset = ""
        for type in ["EVNT", "RDO", "HITS", "AOD", "ESD", "NTUP"]:
            if type in app.outputpaths.keys():
                outdset = string.replace(app.outputpaths[type], "/", ".")
                outdset = outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys()) > 0
            except:
                logger.error(
                    "app.outputpaths is empty: check your output datasets")
                raise
            type = app.outputpaths.keys()[0]
            outdset = string.replace(app.outputpaths[type], "/", ".")
            outdset = outdset[1:-1]

        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel = 'user'
        jspec.assignedPriority = 1000
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite = self.outsite
        jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        #       library (source files)
        flib = FileSpec()
        flib.lfn = self.library
        #        flib.GUID           =
        flib.type = 'input'
        #        flib.status         =
        flib.dataset = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid = app.turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid = app.dbturls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid = app.minbias_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid = app.cavern_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)


#       output files( this includes the logfiles)
# Output files
        jidtag = ""
        job = app._getParent()  # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id
        outfiles = app.subjobsOutfiles[job.id]
        pandaOutfiles = {}
        for type in outfiles.keys():
            pandaOutfiles[type] = outfiles[type] + "." + str(jidtag)
            if type == "LOG":
                pandaOutfiles[type] += ".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset = string.replace(app.outputpaths[outtype], "/", ".")
            dset = dset[1:-1]
            fout.dataset = dset
            fout.lfn = pandaOutfiles[outtype]
            fout.type = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE = jspec.destinationSE
            if outtype == 'LOG':
                fout.type = 'log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        #       job parameters
        param = '-l %s ' % self.library  # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '

        # transform parameters
        # need to update arglist with final output file name...
        newArgs = []
        if app.mode == "evgen":
            app.args[3] = app.args[3] + " -t "
            if app.verbosity:
                app.args[3] = app.args[3] + " -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type]) > -1:
                    arg = arg.replace(outfiles[type], pandaOutfiles[type])

            newArgs.append(arg)
        arglist = string.join(newArgs, " ")
        #        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles = app.inputfiles + app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles) > 0:
            param += ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles) > 0:
            param += ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles[
            "LOG"]  # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"

        jspec.jobParameters = param
        jspec.metadata = "--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return

        return jspec