示例#1
0
 def convertToJobFileSpec(self,
                          datasetSpec,
                          setType=None,
                          useEventService=False):
     jobFileSpec = JobFileSpec()
     jobFileSpec.fileID = self.fileID
     jobFileSpec.datasetID = datasetSpec.datasetID
     jobFileSpec.jediTaskID = datasetSpec.jediTaskID
     jobFileSpec.lfn = self.lfn
     jobFileSpec.GUID = self.GUID
     if setType is None:
         jobFileSpec.type = self.type
     else:
         jobFileSpec.type = setType
     jobFileSpec.scope = self.scope
     jobFileSpec.fsize = self.fsize
     jobFileSpec.checksum = self.checksum
     jobFileSpec.attemptNr = self.attemptNr
     # dataset attribute
     if datasetSpec is not None:
         # dataset
         if datasetSpec.containerName not in [None, '']:
             jobFileSpec.dataset = datasetSpec.containerName
         else:
             jobFileSpec.dataset = datasetSpec.datasetName
         if self.type in datasetSpec.getInputTypes(
         ) or setType in datasetSpec.getInputTypes():
             # prodDBlock
             jobFileSpec.prodDBlock = datasetSpec.datasetName
             # storage token
             if datasetSpec.storageToken not in ['', None]:
                 jobFileSpec.dispatchDBlockToken = datasetSpec.storageToken
         else:
             # destinationDBlock
             jobFileSpec.destinationDBlock = datasetSpec.datasetName
             # storage token
             if datasetSpec.storageToken not in ['', None]:
                 jobFileSpec.destinationDBlockToken = datasetSpec.storageToken.split(
                     '/')[0]
             # destination
             if datasetSpec.destination not in ['', None]:
                 jobFileSpec.destinationSE = datasetSpec.destination
             # set prodDBlockToken for Event Service
             if useEventService and datasetSpec.getObjectStore(
             ) is not None:
                 jobFileSpec.prodDBlockToken = 'objectstore^{0}'.format(
                     datasetSpec.getObjectStore())
             # allow no output
             if datasetSpec.isAllowedNoOutput():
                 jobFileSpec.allowNoOutput()
     # return
     return jobFileSpec
示例#2
0
 def convertToJobFileSpec(self,datasetSpec,setType=None,useEventService=False):
     jobFileSpec = JobFileSpec()
     jobFileSpec.fileID     = self.fileID
     jobFileSpec.datasetID  = datasetSpec.datasetID
     jobFileSpec.jediTaskID = datasetSpec.jediTaskID
     jobFileSpec.lfn        = self.lfn
     jobFileSpec.GUID       = self.GUID
     if setType == None:
         jobFileSpec.type   = self.type
     else:
         jobFileSpec.type   = setType
     jobFileSpec.scope      = self.scope
     jobFileSpec.fsize      = self.fsize
     jobFileSpec.checksum   = self.checksum
     jobFileSpec.attemptNr  = self.attemptNr
     # dataset attribute
     if datasetSpec != None:
         # dataset
         if not datasetSpec.containerName in [None,'']:
             jobFileSpec.dataset = datasetSpec.containerName
         else:
             jobFileSpec.dataset = datasetSpec.datasetName
         if self.type in datasetSpec.getInputTypes() or setType in datasetSpec.getInputTypes():
             # prodDBlock
             jobFileSpec.prodDBlock = datasetSpec.datasetName
             # storage token    
             if not datasetSpec.storageToken in ['',None]:
                 jobFileSpec.dispatchDBlockToken = datasetSpec.storageToken 
         else:
             # destinationDBlock
             jobFileSpec.destinationDBlock = datasetSpec.datasetName
             # storage token    
             if not datasetSpec.storageToken in ['',None]:
                 jobFileSpec.destinationDBlockToken = datasetSpec.storageToken.split('/')[0] 
             # destination
             if not datasetSpec.destination in ['',None]:
                 jobFileSpec.destinationSE = datasetSpec.destination
             # set prodDBlockToken for Event Service
             if useEventService and datasetSpec.getObjectStore() != None:
                 jobFileSpec.prodDBlockToken = 'objectstore^{0}'.format(datasetSpec.getObjectStore())
             # allow no output
             if datasetSpec.isAllowedNoOutput():
                 jobFileSpec.allowNoOutput()
     # return
     return jobFileSpec
    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaPandaRTHandler prepare called for %s', job.getFQID('.'))

#       in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter
        if job.inputdata and not job._getRoot().subjobs:

            if not job.inputdata.names:
                
                contents = job.inputdata.get_contents(overlap=False, size=True)

                for ds in contents.keys():

                    for f in contents[ds]:
                        job.inputdata.guids.append( f[0] )
                        job.inputdata.names.append( f[1][0] )
                        job.inputdata.sizes.append( f[1][1] )
                        job.inputdata.checksums.append( f[1][2] )
                        job.inputdata.scopes.append( f[1][3] )


        site = job._getRoot().backend.site
        job.backend.site = site
        job.backend.actualCE = site
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

#       if no outputdata are given
        if not job.outputdata:
            job.outputdata = DQ2OutputDataset()
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname
        #if not job.outputdata.datasetname:
        else:
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname

        if not job.outputdata.datasetname:
            raise ApplicationConfigurationError('DQ2OutputDataset has no datasetname')

        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
        jspec.transformation    = '%s/runGen-00-00-02' % Client.baseURLSUB
        if job.inputdata:
            jspec.prodDBlock    = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock    = 'NULL'
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            if not job._getRoot().subjobs or job.id == 0:
                logger.warning('You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.')
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = site
        jspec.prodSourceLabel   = configPanda['prodSourceLabelRun']
        jspec.processingType    = configPanda['processingType']
        jspec.assignedPriority  = configPanda['assignedPriorityRun']
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        # cputime     
        if job.backend.requirements.cputime != -1:
            jspec.maxCpuCount = job.backend.requirements.cputime
        jspec.computingSite     = site

#       library (source files)
        if job.backend.libds:
            flib = FileSpec()
            flib.lfn            = self.fileBO.lfn
            flib.GUID           = self.fileBO.GUID
            flib.type           = 'input'
            flib.status         = self.fileBO.status
            flib.dataset        = self.fileBO.destinationDBlock
            flib.dispatchDBlock = self.fileBO.destinationDBlock
            jspec.addFile(flib)
        elif job.backend.bexec:
            flib = FileSpec()
            flib.lfn            = self.library
            flib.type           = 'input'
            flib.dataset        = self.libDataset
            flib.dispatchDBlock = self.libDataset
            jspec.addFile(flib)

#       input files FIXME: many more input types
        if job.inputdata:            
            for guid, lfn, size, checksum, scope in zip(job.inputdata.guids,job.inputdata.names,job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes):
                finp = FileSpec()
                finp.lfn            = lfn
                finp.GUID           = guid
                finp.scope          = scope
                
#            finp.fsize =
#            finp.md5sum =
                finp.dataset        = job.inputdata.dataset[0]
                finp.prodDBlock     = job.inputdata.dataset[0]
                finp.dispatchDBlock = job.inputdata.dataset[0]
                finp.type           = 'input'
                finp.status         = 'ready'
                jspec.addFile(finp)

#       output files
#        outMap = {}
        
        #FIXME: if options.outMeta != []:
        self.rundirectory = "."

#       log files

        flog = FileSpec()
        flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname
        flog.type = 'log'
        flog.dataset           = job.outputdata.datasetname
        flog.destinationDBlock = job.outputdata.datasetname
        flog.destinationSE     = job.backend.site
        jspec.addFile(flog)

#       job parameters
        param = ''

        # source URL
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL)
        srcURL = ""
        if matchURL != None:
            srcURL = matchURL.group(1)
            param += " --sourceURL %s " % srcURL

        param += '-r "%s" ' % self.rundirectory

        exe_name = job.application.exe
        if job.backend.bexec == '':
            if hasattr(job.application.exe, "name"):
                exe_name = os.path.basename(job.application.exe.name)

            # set jobO parameter
            if job.application.args:
                param += ' -j "" -p "%s %s" '%(exe_name,urllib.quote(" ".join(job.application.args)))
            else:
                param += ' -j "" -p "%s" '%exe_name
            if self.inputsandbox:
                param += ' -a %s '%self.inputsandbox

        else:
            param += '-l %s ' % self.library
            param += '-j "" -p "%s %s" ' % ( exe_name,urllib.quote(" ".join(job.application.args)))

        if job.inputdata:
            param += '-i "%s" ' % job.inputdata.names

        # fill outfiles
        outfiles = {}
        for f in self.extOutFile:
            tarnum = 1
            if f.find('*') != -1:
            # archive *
                outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S") )
                tarnum += 1
            else:
                outfiles[f] = "%s.%s.%s" %(f, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S"))

            fout = FileSpec()
            fout.lfn = outfiles[f]
            fout.type = 'output'
            fout.dataset           = job.outputdata.datasetname
            fout.destinationDBlock = job.outputdata.datasetname
            fout.destinationSE     = job.backend.site
            jspec.addFile(fout)

        param += '-o "%s" ' % (outfiles) # must be double quotes, because python prints strings in 'single quotes' 

        for file in jspec.Files:
            if file.type in [ 'output', 'log'] and configPanda['chirpconfig']:
                file.dispatchDBlockToken = configPanda['chirpconfig']
                logger.debug('chirp file %s',file)

        jspec.jobParameters = param
        
        return jspec
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """Prepare the specific aspec of each subjob.
           Returns: subjobconfig list of objects understood by backends."""

        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime
        from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs
        
        # make sure we have the correct siteType
        refreshPandaSpecs()

        job = app._getParent()
        masterjob = job._getRoot()

        logger.debug('ProdTransPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        job.backend.actualCE = job.backend.site
        job.backend.requirements.cloud = Client.PandaSites[job.backend.site]['cloud']

        # check that the site is in a submit-able status
        if not job.splitter or job.splitter._name != 'DQ2JobSplitter':
            allowed_sites = job.backend.list_ddm_sites()

        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
            tmpDsExist = False
            if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')):
                #if Client.getDatasets(job.outputdata.datasetname):
                if getDatasets(job.outputdata.datasetname):
                    tmpDsExist = True
                    logger.info('Re-using output dataset %s'%job.outputdata.datasetname)
            if not configPanda['specialHandling']=='ddm:rucio' and not  configPanda['processingType'].startswith('gangarobot') and not configPanda['processingType'].startswith('hammercloud') and not configPanda['processingType'].startswith('rucio_test'):
                Client.addDataset(job.outputdata.datasetname,False,location=outDsLocation,allowProdDisk=True,dsExist=tmpDsExist)
            logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation)
        except exceptions.SystemExit:
            raise BackendError('Panda','Exception in adding dataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))
        
        # JobSpec.
        jspec = JobSpec()
        jspec.currentPriority = app.priority
        jspec.jobDefinitionID = masterjob.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.coreCount = app.core_count
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release
        jspec.homepackage = app.home_package
        jspec.transformation = app.transformation
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = job.backend.site
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        if app.prod_source_label:
            jspec.prodSourceLabel = app.prod_source_label
        else:
            jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.specialHandling = configPanda['specialHandling']
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        jspec.cmtConfig = app.atlas_cmtconfig
        if app.dbrelease == 'LATEST':
            try:
                latest_dbrelease = getLatestDBReleaseCaching()
            except:
                from pandatools import Client
                latest_dbrelease = Client.getLatestDBRelease()
            m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease)
            if m:
                self.dbrelease_dataset = m.group(1)
                self.dbrelease = m.group(2)
            else:
                raise ApplicationConfigurationError(None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually.")
        else:
            self.dbrelease_dataset = app.dbrelease_dataset
            self.dbrelease = app.dbrelease
        jspec.jobParameters = app.job_parameters

        if self.dbrelease:
            if self.dbrelease == 'current':
                jspec.jobParameters += ' --DBRelease=current' 
            else:
                if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                    jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
                else:
                    jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
                dbspec = FileSpec()
                dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease
                dbspec.dataset = self.dbrelease_dataset
                dbspec.prodDBlock = jspec.prodDBlock
                dbspec.type = 'input'
                jspec.addFile(dbspec)

        if job.inputdata:
            m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)',
                          job.inputdata.dataset[0])
            if not m:
                logger.error("Error retrieving run number from dataset name")
                #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name")
                runnumber = 105200
            else:
                runnumber = int(m.group(2))
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --runNumber %d' % runnumber
            else:
                jspec.jobParameters += ' RunNumber=%d' % runnumber
        
        # Output files.
        randomized_lfns = []
        ilfn = 0
        for lfn, lfntype in zip(app.output_files,app.output_type):
            ofspec = FileSpec()
            if app.randomize_lfns:
                randomized_lfn = lfn + ('.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4] ) )
            else:
                randomized_lfn = lfn
            ofspec.lfn = randomized_lfn
            randomized_lfns.append(randomized_lfn)
            ofspec.destinationDBlock = jspec.destinationDBlock
            ofspec.destinationSE = jspec.destinationSE
            ofspec.dataset = jspec.destinationDBlock
            ofspec.type = 'output'
            jspec.addFile(ofspec)
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --output%sFile %s' % (lfntype, randomized_lfns[ilfn])
            else:
                jspec.jobParameters += ' output%sFile=%s' % (lfntype, randomized_lfns[ilfn])
            ilfn=ilfn+1

        # Input files.
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes):
                ifspec = FileSpec()
                ifspec.lfn = lfn
                ifspec.GUID = guid
                ifspec.fsize = size
                ifspec.md5sum = checksum
                ifspec.scope = scope
                ifspec.dataset = jspec.prodDBlock
                ifspec.prodDBlock = jspec.prodDBlock
                ifspec.type = 'input'
                jspec.addFile(ifspec)
            if app.input_type:
                itype = app.input_type
            else:
                itype = m.group(5)
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(job.inputdata.names))
            else:
                jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(job.inputdata.names))

        # Log files.
        lfspec = FileSpec()
        lfspec.lfn = '%s.job.log.tgz' % jspec.jobName
        lfspec.destinationDBlock = jspec.destinationDBlock
        lfspec.destinationSE  = jspec.destinationSE
        lfspec.dataset = jspec.destinationDBlock
        lfspec.type = 'log'
        jspec.addFile(lfspec)
        
        return jspec
示例#5
0
    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.'))
        
        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()
        
        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud
        

        # now just filling the job from AthenaMC data
        
        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')  
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        
        if app.transform_archive:
            jspec.homepackage       = 'AnalysisTransforms'+app.transform_archive
        elif app.prod_release:
            jspec.homepackage       = 'AnalysisTransforms-AtlasProduction_'+str(app.prod_release)
        jspec.transformation    = '%s/runAthena-00-00-11' % Client.baseURLSUB
            
        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock    = 'NULL'
        if job.inputdata and len(app.inputfiles)>0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock    = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset=""
        for type in ["EVNT","RDO","HITS","AOD","ESD","NTUP"]:
            if type in app.outputpaths.keys():
                outdset=string.replace(app.outputpaths[type],"/",".")
                outdset=outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys())>0
            except:
                logger.error("app.outputpaths is empty: check your output datasets")
                raise
            type=app.outputpaths.keys()[0]
            outdset=string.replace(app.outputpaths[type],"/",".")
            outdset=outdset[1:-1]
            
        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel   = 'user'
        jspec.assignedPriority  = 1000
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite     = self.outsite
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
#       library (source files)
        flib = FileSpec()
        flib.lfn            = self.library
#        flib.GUID           = 
        flib.type           = 'input'
#        flib.status         = 
        flib.dataset        = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid=app.turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid=app.dbturls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid=app.minbias_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid=app.cavern_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
            

#       output files( this includes the logfiles)
        # Output files
        jidtag=""
        job = app._getParent() # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id       
        outfiles=app.subjobsOutfiles[job.id]
        pandaOutfiles={}
        for type in outfiles.keys():
            pandaOutfiles[type]=outfiles[type]+"."+str(jidtag)
            if type=="LOG":
                pandaOutfiles[type]+=".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset=string.replace(app.outputpaths[outtype],"/",".")
            dset=dset[1:-1]
            fout.dataset=dset
            fout.lfn=pandaOutfiles[outtype]
            fout.type              = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE    = jspec.destinationSE
            if outtype=='LOG':
                fout.type='log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE     = job.backend.site
            jspec.addFile(fout)


        #       job parameters
        param =  '-l %s ' % self.library # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '
        
        # transform parameters
        # need to update arglist with final output file name...
        newArgs=[]
        if app.mode == "evgen":
            app.args[3]=app.args[3]+" -t "
            if app.verbosity:
                app.args[3]=app.args[3]+" -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type])>-1:
                    arg=arg.replace(outfiles[type],pandaOutfiles[type])

            newArgs.append(arg)
        arglist=string.join(newArgs," ")
#        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles=app.inputfiles+app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles)>0:
            param+= ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles)>0:
            param+= ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles["LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL        
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"


        jspec.jobParameters = param
        jspec.metadata="--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return
        
        return jspec
示例#6
0
job = JobSpec()
job.jobDefinitionID   = int(time.time()) % 10000
job.jobName           = commands.getoutput('/usr/bin/uuidgen') 
job.AtlasRelease      = 'Atlas-9.0.4'
job.prodDBlock        = 'pandatest.000003.dd.input'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('/usr/bin/uuidgen')
job.destinationSE     = 'BNL_SE'

ids = {'pandatest.000003.dd.input._00028.junk':'6c19e1fc-ee8c-4bae-bd4c-c9e5c73aca27',
       'pandatest.000003.dd.input._00033.junk':'98f79ba1-1793-4253-aac7-bdf90a51d1ee',
       'pandatest.000003.dd.input._00039.junk':'33660dd5-7cef-422a-a7fc-6c24cb10deb1'}
for lfn in ids.keys():
    file = FileSpec()
    file.lfn = lfn
    file.GUID = ids[file.lfn]
    file.dataset = 'pandatest.000003.dd.input'
    file.type = 'input'
    job.addFile(file)

s,o = Client.submitJobs([job])
print "---------------------"
print s
print o
print "---------------------"
s,o = Client.getJobStatus([4934, 4766, 4767, 4768, 4769])
print s
if s == 0:
    for job in o:
        if job == None:
            continue
示例#7
0
    def run(self, data):
        datasetName = 'panda:panda.destDB.%s' % commands.getoutput('uuidgen')
        destName = 'ANALY_RRC-KI-HPC'
        site = 'ANALY_RRC-KI-HPC'
        scope = config['DEFAULT_SCOPE']

        distributive = data['distributive']
        release = data['release']
        parameters = data['parameters']
        input_type = data['input_type']
        input_params = data['input_params']
        input_files = data['input_files']
        output_type = data['output_type']
        output_params = data['output_params']
        output_files = data['output_files']

        jobid = data['jobid']
        _logger.debug('Jobid: ' + str(jobid))

        job = JobSpec()
        job.jobDefinitionID = int(time.time()) % 10000
        job.jobName = commands.getoutput('uuidgen')
        job.transformation = config['DEFAULT_TRF']
        job.destinationDBlock = datasetName
        job.destinationSE = destName
        job.currentPriority = 1000
        job.prodSourceLabel = 'user'
        job.computingSite = site
        job.cloud = 'RU'
        job.prodDBlock = "%s:%s.%s" % (scope, scope, job.jobName)

        job.jobParameters = '%s %s "%s"' % (release, distributive, parameters)

        params = {}
        _logger.debug('MoveData')
        ec = 0
        ec, uploaded_input_files = movedata(
            params=params,
            fileList=input_files,
            fromType=input_type,
            fromParams=input_params,
            toType='hpc',
            toParams={'dest': '/' + re.sub(':', '/', job.prodDBlock)})
        if ec != 0:
            _logger.error('Move data error: ' + ec[1])
            return

        for file in uploaded_input_files:
            fileIT = FileSpec()
            fileIT.lfn = file
            fileIT.dataset = job.prodDBlock
            fileIT.prodDBlock = job.prodDBlock
            fileIT.type = 'input'
            fileIT.scope = scope
            fileIT.status = 'ready'
            fileIT.GUID = commands.getoutput('uuidgen')
            job.addFile(fileIT)

        for file in output_files:
            fileOT = FileSpec()
            fileOT.lfn = file
            fileOT.destinationDBlock = job.prodDBlock
            fileOT.destinationSE = job.destinationSE
            fileOT.dataset = job.prodDBlock
            fileOT.type = 'output'
            fileOT.scope = scope
            fileOT.GUID = commands.getoutput('uuidgen')
            job.addFile(fileOT)

        fileOL = FileSpec()
        fileOL.lfn = "%s.log.tgz" % job.jobName
        fileOL.destinationDBlock = job.destinationDBlock
        fileOL.destinationSE = job.destinationSE
        fileOL.dataset = job.destinationDBlock
        fileOL.type = 'log'
        fileOL.scope = 'panda'
        job.addFile(fileOL)

        self.jobList.append(job)

        #submitJob
        o = self.submitJobs(self.jobList)
        x = o[0]

        #update PandaID
        conn = MySQLdb.connect(
            host=self.dbhost,
            db=self.dbname,
            #                                        port=self.dbport, connect_timeout=self.dbtimeout,
            user=self.dbuser,
            passwd=self.dbpasswd)
        cur = conn.cursor()
        try:
            varDict = {}
            PandaID = int(x[0])
            varDict['id'] = jobid
            varDict['pandaId'] = PandaID

            sql = "UPDATE %s SET %s.pandaId=%s WHERE %s.id=%s" % (
                self.table_jobs, self.table_jobs, varDict['pandaId'],
                self.table_jobs, varDict['id'])
            cur.execute(sql, varDict)

        except:
            _logger.error('SENDJOB: Incorrect server response')
        try:
            conn.commit()
            return True
        except:
            _logger.error("commit error")
            return False
示例#8
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        from pandatools import Client
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        #       in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter
        if job.inputdata and not job._getRoot().subjobs:

            if not job.inputdata.names:

                contents = job.inputdata.get_contents(overlap=False, size=True)

                for ds in contents.keys():

                    for f in contents[ds]:
                        job.inputdata.guids.append(f[0])
                        job.inputdata.names.append(f[1][0])
                        job.inputdata.sizes.append(f[1][1])
                        job.inputdata.checksums.append(f[1][2])
                        job.inputdata.scopes.append(f[1][3])

        site = job._getRoot().backend.site
        job.backend.site = site
        job.backend.actualCE = site
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        #       if no outputdata are given
        if not job.outputdata:
            job.outputdata = DQ2OutputDataset()
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname
        #if not job.outputdata.datasetname:
        else:
            job.outputdata.datasetname = job._getRoot().outputdata.datasetname

        if not job.outputdata.datasetname:
            raise ApplicationConfigurationError(
                None, 'DQ2OutputDataset has no datasetname')

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.transformation = '%s/runGen-00-00-02' % Client.baseURLSUB
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            if not job._getRoot().subjobs or job.id == 0:
                logger.warning(
                    'You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.'
                )
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = site
        jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.assignedPriority = configPanda['assignedPriorityRun']
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        # cputime
        if job.backend.requirements.cputime != -1:
            jspec.maxCpuCount = job.backend.requirements.cputime
        jspec.computingSite = site

        #       library (source files)
        if job.backend.libds:
            flib = FileSpec()
            flib.lfn = self.fileBO.lfn
            flib.GUID = self.fileBO.GUID
            flib.type = 'input'
            flib.status = self.fileBO.status
            flib.dataset = self.fileBO.destinationDBlock
            flib.dispatchDBlock = self.fileBO.destinationDBlock
            jspec.addFile(flib)
        elif job.backend.bexec:
            flib = FileSpec()
            flib.lfn = self.library
            flib.type = 'input'
            flib.dataset = self.libDataset
            flib.dispatchDBlock = self.libDataset
            jspec.addFile(flib)

#       input files FIXME: many more input types
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(
                    job.inputdata.guids, job.inputdata.names,
                    job.inputdata.sizes, job.inputdata.checksums,
                    job.inputdata.scopes):
                finp = FileSpec()
                finp.lfn = lfn
                finp.GUID = guid
                finp.scope = scope

                #            finp.fsize =
                #            finp.md5sum =
                finp.dataset = job.inputdata.dataset[0]
                finp.prodDBlock = job.inputdata.dataset[0]
                finp.dispatchDBlock = job.inputdata.dataset[0]
                finp.type = 'input'
                finp.status = 'ready'
                jspec.addFile(finp)


#       output files
#        outMap = {}

#FIXME: if options.outMeta != []:
        self.rundirectory = "."

        #       log files

        flog = FileSpec()
        flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname
        flog.type = 'log'
        flog.dataset = job.outputdata.datasetname
        flog.destinationDBlock = job.outputdata.datasetname
        flog.destinationSE = job.backend.site
        jspec.addFile(flog)

        #       job parameters
        param = ''

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL)
        srcURL = ""
        if matchURL != None:
            srcURL = matchURL.group(1)
            param += " --sourceURL %s " % srcURL

        param += '-r "%s" ' % self.rundirectory

        exe_name = job.application.exe
        if job.backend.bexec == '':
            if hasattr(job.application.exe, "name"):
                exe_name = os.path.basename(job.application.exe.name)

            # set jobO parameter
            if job.application.args:
                param += ' -j "" -p "%s %s" ' % (
                    exe_name, urllib.quote(" ".join(job.application.args)))
            else:
                param += ' -j "" -p "%s" ' % exe_name
            if self.inputsandbox:
                param += ' -a %s ' % self.inputsandbox

        else:
            param += '-l %s ' % self.library
            param += '-j "" -p "%s %s" ' % (
                exe_name, urllib.quote(" ".join(job.application.args)))

        if job.inputdata:
            param += '-i "%s" ' % job.inputdata.names

        # fill outfiles
        outfiles = {}
        for f in self.extOutFile:
            tarnum = 1
            if f.find('*') != -1:
                # archive *
                outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (
                    tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S"))
                tarnum += 1
            else:
                outfiles[f] = "%s.%s.%s" % (f, job.getFQID('.'),
                                            time.strftime("%Y%m%d%H%M%S"))

            fout = FileSpec()
            fout.lfn = outfiles[f]
            fout.type = 'output'
            fout.dataset = job.outputdata.datasetname
            fout.destinationDBlock = job.outputdata.datasetname
            fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        param += '-o "%s" ' % (
            outfiles
        )  # must be double quotes, because python prints strings in 'single quotes'

        for file in jspec.Files:
            if file.type in ['output', 'log'] and configPanda['chirpconfig']:
                file.dispatchDBlockToken = configPanda['chirpconfig']
                logger.debug('chirp file %s', file)

        jspec.jobParameters = param

        return jspec
示例#9
0
def send_job(jobid, siteid):
    _logger.debug('Jobid: ' + str(jobid))

    site = sites_.get(siteid)

    job = jobs_.get(int(jobid))
    cont = job.container
    files_catalog = cont.files

    fscope = getScope(job.owner.username)
    datasetName = '{}:{}'.format(fscope, cont.guid)

    distributive = job.distr.name
    release = job.distr.release

    # Prepare runScript
    parameters = job.distr.command
    parameters = parameters.replace("$COMMAND$", job.params)
    parameters = parameters.replace("$USERNAME$", job.owner.username)
    parameters = parameters.replace("$WORKINGGROUP$", job.owner.working_group)

    # Prepare metadata
    metadata = dict(user=job.owner.username)

    # Prepare PanDA Object
    pandajob = JobSpec()
    pandajob.jobDefinitionID = int(time.time()) % 10000
    pandajob.jobName = cont.guid
    pandajob.transformation = client_config.DEFAULT_TRF
    pandajob.destinationDBlock = datasetName
    pandajob.destinationSE = site.se
    pandajob.currentPriority = 1000
    pandajob.prodSourceLabel = 'user'
    pandajob.computingSite = site.ce
    pandajob.cloud = 'RU'
    pandajob.VO = 'atlas'
    pandajob.prodDBlock = "%s:%s" % (fscope, pandajob.jobName)
    pandajob.coreCount = job.corecount
    pandajob.metadata = json.dumps(metadata)
    #pandajob.workingGroup = job.owner.working_group

    if site.encode_commands:
        # It requires script wrapper on cluster side
        pandajob.jobParameters = '%s %s %s "%s"' % (cont.guid, release,
                                                    distributive, parameters)
    else:
        pandajob.jobParameters = parameters

    has_input = False
    for fcc in files_catalog:
        if fcc.type == 'input':
            f = fcc.file
            guid = f.guid
            fileIT = FileSpec()
            fileIT.lfn = f.lfn
            fileIT.dataset = pandajob.prodDBlock
            fileIT.prodDBlock = pandajob.prodDBlock
            fileIT.type = 'input'
            fileIT.scope = fscope
            fileIT.status = 'ready'
            fileIT.GUID = guid
            pandajob.addFile(fileIT)

            has_input = True
        if fcc.type == 'output':
            f = fcc.file
            fileOT = FileSpec()
            fileOT.lfn = f.lfn
            fileOT.destinationDBlock = pandajob.prodDBlock
            fileOT.destinationSE = pandajob.destinationSE
            fileOT.dataset = pandajob.prodDBlock
            fileOT.type = 'output'
            fileOT.scope = fscope
            fileOT.GUID = f.guid
            pandajob.addFile(fileOT)

            # Save replica meta
            fc.new_replica(f, site)

    if not has_input:
        # Add fake input
        fileIT = FileSpec()
        fileIT.lfn = "fake.input"
        fileIT.dataset = pandajob.prodDBlock
        fileIT.prodDBlock = pandajob.prodDBlock
        fileIT.type = 'input'
        fileIT.scope = fscope
        fileIT.status = 'ready'
        fileIT.GUID = "fake.guid"
        pandajob.addFile(fileIT)

    # Prepare lof file
    fileOL = FileSpec()
    fileOL.lfn = "%s.log.tgz" % pandajob.jobName
    fileOL.destinationDBlock = pandajob.destinationDBlock
    fileOL.destinationSE = pandajob.destinationSE
    fileOL.dataset = '{}:logs'.format(fscope)
    fileOL.type = 'log'
    fileOL.scope = 'panda'
    pandajob.addFile(fileOL)

    # Save log meta
    log = File()
    log.scope = fscope
    log.lfn = fileOL.lfn
    log.guid = getGUID(log.scope, log.lfn)
    log.type = 'log'
    log.status = 'defined'
    files_.save(log)

    # Save replica meta
    fc.new_replica(log, site)

    # Register file in container
    fc.reg_file_in_cont(log, cont, 'log')

    # Submit job
    o = submitJobs([pandajob])
    x = o[0]

    try:
        #update PandaID
        PandaID = int(x[0])
        job.pandaid = PandaID
        job.ce = site.ce
    except:
        job.status = 'submit_error'
    jobs_.save(job)

    return 0
示例#10
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """Prepare the specific aspec of each subjob.
           Returns: subjobconfig list of objects understood by backends."""

        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime
        from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs

        # make sure we have the correct siteType
        refreshPandaSpecs()

        job = app._getParent()
        masterjob = job._getRoot()

        logger.debug('ProdTransPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        job.backend.actualCE = job.backend.site
        job.backend.requirements.cloud = Client.PandaSites[
            job.backend.site]['cloud']

        # check that the site is in a submit-able status
        if not job.splitter or job.splitter._name != 'DQ2JobSplitter':
            allowed_sites = job.backend.list_ddm_sites()

        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
            tmpDsExist = False
            if (configPanda['processingType'].startswith('gangarobot') or
                    configPanda['processingType'].startswith('hammercloud')):
                #if Client.getDatasets(job.outputdata.datasetname):
                if getDatasets(job.outputdata.datasetname):
                    tmpDsExist = True
                    logger.info('Re-using output dataset %s' %
                                job.outputdata.datasetname)
            if not configPanda[
                    'specialHandling'] == 'ddm:rucio' and not configPanda[
                        'processingType'].startswith(
                            'gangarobot'
                        ) and not configPanda['processingType'].startswith(
                            'hammercloud') and not configPanda[
                                'processingType'].startswith('rucio_test'):
                Client.addDataset(job.outputdata.datasetname,
                                  False,
                                  location=outDsLocation,
                                  allowProdDisk=True,
                                  dsExist=tmpDsExist)
            logger.info('Output dataset %s registered at %s' %
                        (job.outputdata.datasetname, outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation)
        except exceptions.SystemExit:
            raise BackendError(
                'Panda', 'Exception in adding dataset %s: %s %s' %
                (job.outputdata.datasetname, sys.exc_info()[0],
                 sys.exc_info()[1]))

        # JobSpec.
        jspec = JobSpec()
        jspec.currentPriority = app.priority
        jspec.jobDefinitionID = masterjob.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.coreCount = app.core_count
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release
        jspec.homepackage = app.home_package
        jspec.transformation = app.transformation
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = job.backend.site
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        if app.prod_source_label:
            jspec.prodSourceLabel = app.prod_source_label
        else:
            jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.specialHandling = configPanda['specialHandling']
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        jspec.cmtConfig = app.atlas_cmtconfig
        if app.dbrelease == 'LATEST':
            try:
                latest_dbrelease = getLatestDBReleaseCaching()
            except:
                from pandatools import Client
                latest_dbrelease = Client.getLatestDBRelease()
            m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease)
            if m:
                self.dbrelease_dataset = m.group(1)
                self.dbrelease = m.group(2)
            else:
                raise ApplicationConfigurationError(
                    None,
                    "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually."
                )
        else:
            self.dbrelease_dataset = app.dbrelease_dataset
            self.dbrelease = app.dbrelease
        jspec.jobParameters = app.job_parameters

        if self.dbrelease:
            if self.dbrelease == 'current':
                jspec.jobParameters += ' --DBRelease=current'
            else:
                if jspec.transformation.endswith(
                        "_tf.py") or jspec.transformation.endswith("_tf"):
                    jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (
                        self.dbrelease, )
                else:
                    jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (
                        self.dbrelease, )
                dbspec = FileSpec()
                dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease
                dbspec.dataset = self.dbrelease_dataset
                dbspec.prodDBlock = jspec.prodDBlock
                dbspec.type = 'input'
                jspec.addFile(dbspec)

        if job.inputdata:
            m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)',
                          job.inputdata.dataset[0])
            if not m:
                logger.error("Error retrieving run number from dataset name")
                #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name")
                runnumber = 105200
            else:
                runnumber = int(m.group(2))
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --runNumber %d' % runnumber
            else:
                jspec.jobParameters += ' RunNumber=%d' % runnumber

        # Output files.
        randomized_lfns = []
        ilfn = 0
        for lfn, lfntype in zip(app.output_files, app.output_type):
            ofspec = FileSpec()
            if app.randomize_lfns:
                randomized_lfn = lfn + (
                    '.%s.%d.%s' %
                    (job.backend.site, int(time.time()),
                     commands.getoutput('uuidgen 2> /dev/null')[:4]))
            else:
                randomized_lfn = lfn
            ofspec.lfn = randomized_lfn
            randomized_lfns.append(randomized_lfn)
            ofspec.destinationDBlock = jspec.destinationDBlock
            ofspec.destinationSE = jspec.destinationSE
            ofspec.dataset = jspec.destinationDBlock
            ofspec.type = 'output'
            jspec.addFile(ofspec)
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --output%sFile %s' % (
                    lfntype, randomized_lfns[ilfn])
            else:
                jspec.jobParameters += ' output%sFile=%s' % (
                    lfntype, randomized_lfns[ilfn])
            ilfn = ilfn + 1

        # Input files.
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(
                    job.inputdata.guids, job.inputdata.names,
                    job.inputdata.sizes, job.inputdata.checksums,
                    job.inputdata.scopes):
                ifspec = FileSpec()
                ifspec.lfn = lfn
                ifspec.GUID = guid
                ifspec.fsize = size
                ifspec.md5sum = checksum
                ifspec.scope = scope
                ifspec.dataset = jspec.prodDBlock
                ifspec.prodDBlock = jspec.prodDBlock
                ifspec.type = 'input'
                jspec.addFile(ifspec)
            if app.input_type:
                itype = app.input_type
            else:
                itype = m.group(5)
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(
                    job.inputdata.names))
            else:
                jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(
                    job.inputdata.names))

        # Log files.
        lfspec = FileSpec()
        lfspec.lfn = '%s.job.log.tgz' % jspec.jobName
        lfspec.destinationDBlock = jspec.destinationDBlock
        lfspec.destinationSE = jspec.destinationSE
        lfspec.dataset = jspec.destinationDBlock
        lfspec.type = 'log'
        jspec.addFile(lfspec)

        return jspec
示例#11
0
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput(
    '/usr/bin/uuidgen')
job.destinationSE = 'BNL_SE'

ids = {
    'pandatest.000003.dd.input._00028.junk':
    '6c19e1fc-ee8c-4bae-bd4c-c9e5c73aca27',
    'pandatest.000003.dd.input._00033.junk':
    '98f79ba1-1793-4253-aac7-bdf90a51d1ee',
    'pandatest.000003.dd.input._00039.junk':
    '33660dd5-7cef-422a-a7fc-6c24cb10deb1'
}
for lfn in ids.keys():
    file = FileSpec()
    file.lfn = lfn
    file.GUID = ids[file.lfn]
    file.dataset = 'pandatest.000003.dd.input'
    file.type = 'input'
    job.addFile(file)

s, o = Client.submitJobs([job])
print "---------------------"
print s
print o
print "---------------------"
s, o = Client.getJobStatus([4934, 4766, 4767, 4768, 4769])
print s
if s == 0:
    for job in o:
        if job == None:
            continue
示例#12
0
def main():
    logger.info('Getting tasks with status send and running')
    #    tasks_list = Task.objects.all().filter(Q(status='send') | Q(status='running'))
    tasks_list = Task.objects.all().filter(name='dvcs2016P09t2r13v1_mu+')
    logger.info('Got list of %s tasks' % len(tasks_list))

    for t in tasks_list:
        logger.info('Getting jobs in status defined or failed for task %s' % t)
        jobs_list_count = Job.objects.all().filter(task=t).count()
        if jobs_list_count > 50:
            jobs_list = Job.objects.all().filter(
                task=t).order_by('id')[:max_send_amount]
        else:
            jobs_list = Job.objects.all().filter(
                task=t).order_by('id')[:jobs_list_count]
        logger.info('Got list of %s jobs' % len(jobs_list))

        i = 0
        for j in jobs_list:
            if i >= max_send_amount:
                break

            logger.info('Going to send job %s of %s task' %
                        (j.file, j.task.name))

            umark = commands.getoutput('uuidgen')
            datasetName = 'panda.destDB.%s' % umark
            destName = 'COMPASSPRODDISK'  # PanDA will not try to move output data, data will be placed by pilot (based on schedconfig)
            TMPRAWFILE = j.file[j.file.rfind('/') + 1:]
            logger.info(TMPRAWFILE)
            TMPMDSTFILE = 'mDST-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.root' % {
                'input_file': j.file,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            logger.info(TMPMDSTFILE)
            TMPHISTFILE = '%(runNumber)s-%(runChunk)s-%(prodSlt)s.root' % {
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(TMPHISTFILE)
            TMPRICHFILE = 'gfile_%(runNumber)s-%(runChunk)s.gfile' % {
                'runNumber': j.run_number,
                'runChunk': j.chunk_number
            }
            logger.info(TMPRICHFILE)
            EVTDUMPFILE = 'evtdump%(prodSlt)s-%(runChunk)s-%(runNumber)s.raw' % {
                'prodSlt': j.task.prodslt,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number
            }
            logger.info(EVTDUMPFILE)
            STDOUTFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stdout' % {
                'prodNameOnly': j.task.soft,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(STDOUTFILE)
            STDERRFILE = '%(prodNameOnly)s.%(runNumber)s-%(runChunk)s-%(prodSlt)s.stderr' % {
                'prodNameOnly': j.task.soft,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt
            }
            logger.info(STDERRFILE)
            try:
                file_year = j.file.split('/')[5]
                logger.info(file_year)
            except:
                logger.error('Error while splitting file to get year')
                sys.exit(1)

            ProdPathAndName = j.task.home + j.task.path + j.task.soft

            job = JobSpec()
            job.taskID = j.task.id
            job.jobDefinitionID = 0
            job.jobName = '%(prodName)s-%(fileYear)s--%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s' % {
                'prodName': j.task.soft,
                'fileYear': file_year,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            job.transformation = j.task.type  # payload (can be URL as well)
            job.destinationDBlock = datasetName
            job.destinationSE = destName
            job.currentPriority = 2000
            job.prodSourceLabel = 'prod_test'
            job.computingSite = site
            job.attemptNr = j.attempt + 1
            job.maxAttempt = j.task.max_attempts
            if j.status == 'failed':
                job.parentID = j.panda_id
            head, tail = os.path.split(j.file)
            #            job.transferType = 'direct'
            job.sourceSite = 'CERN_COMPASS_PROD'

            # logs, and all files generated during execution will be placed in log (except output file)
            #job.jobParameters='source /afs/cern.ch/project/eos/installation/compass/etc/setup.sh;export EOS_MGM_URL=root://eoscompass.cern.ch;export PATH=/afs/cern.ch/project/eos/installation/compass/bin:$PATH;ppwd=$(pwd);echo $ppwd;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;coralpath=%(ProdPathAndName)s/coral;echo $coralpath;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";echo $coralpathsetup;source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/template.opt;xrdcp -np $ppwd/%(TMPMDSTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/mDST/%(TMPMDSTFILE)s;xrdcp -np $ppwd/%(TMPHISTFILE)s xroot://eoscompass.cern.ch//eos/compass/%(prodName)s/histos/%(TMPHISTFILE)s;metadataxml=$(ls metadata-*);echo $metadataxml;cp $metadataxml $metadataxml.PAYLOAD;' % {'TMPMDSTFILE': TMPMDSTFILE, 'TMPHISTFILE': TMPHISTFILE, 'TMPRICHFILE': TMPRICHFILE, 'input_file': input_file, 'ProdPathAndName': ProdPathAndName, 'prodName': prodName}
            job.jobParameters = 'export EOS_MGM_URL=root://eoscompass.cern.ch;ppwd=$(pwd);export COMPASS_SW_PREFIX=/eos/experiment/compass/;export COMPASS_SW_PATH=%(prodPath)s;export COMPASS_PROD_NAME=%(prodName)s;export TMPRAWFILE=%(TMPRAWFILE)s;export TMPMDSTFILE=%(TMPMDSTFILE)s;export TMPHISTFILE=%(TMPHISTFILE)s;export TMPRICHFILE=%(TMPRICHFILE)s;export prodSlt=%(prodSlt)s;export EVTDUMPFILE=%(EVTDUMPFILE)s;xrdcp -N -f root://castorpublic.cern.ch/%(input_file)s\?svcClass=compasscdr .;coralpath=%(ProdPathAndName)s/coral;cd -P $coralpath;export coralpathsetup=$coralpath"/setup.sh";source $coralpathsetup;cd $ppwd;$CORAL/../phast/coral/coral.exe %(ProdPathAndName)s/%(template)s;if [ ! -s testevtdump.raw ]; then echo "PanDA message: the file is empty">testevtdump.raw; fi;cp payload_stderr.txt payload_stderr.out;cp payload_stdout.txt payload_stdout.out;rm %(tail)s' % {
                'TMPRAWFILE': TMPRAWFILE,
                'TMPMDSTFILE': TMPMDSTFILE,
                'TMPHISTFILE': TMPHISTFILE,
                'TMPRICHFILE': TMPRICHFILE,
                'input_file': j.file,
                'ProdPathAndName': ProdPathAndName,
                'prodPath': j.task.path,
                'prodName': j.task.soft,
                'template': j.task.template,
                'tail': tail,
                'prodSlt': j.task.prodslt,
                'EVTDUMPFILE': EVTDUMPFILE,
                'STDOUTFILE': STDOUTFILE,
                'STDERRFILE': STDERRFILE
            }

            fileIRaw = FileSpec()
            fileIRaw.lfn = "%s" % (j.file)
            fileIRaw.GUID = '5874a461-61d3-4543-8f34-6fd7a4624e78'
            fileIRaw.fsize = 1073753368
            fileIRaw.checksum = '671608be'
            fileIRaw.destinationDBlock = job.destinationDBlock
            fileIRaw.destinationSE = job.destinationSE
            fileIRaw.dataset = job.destinationDBlock
            fileIRaw.type = 'input'
            job.addFile(fileIRaw)

            fileOstdout = FileSpec()
            fileOstdout.lfn = "payload_stdout.txt"
            fileOstdout.destinationDBlock = job.destinationDBlock
            fileOstdout.destinationSE = job.destinationSE
            fileOstdout.dataset = job.destinationDBlock
            fileOstdout.type = 'output'
            job.addFile(fileOstdout)

            fileOstderr = FileSpec()
            fileOstderr.lfn = "payload_stderr.txt"
            fileOstderr.destinationDBlock = job.destinationDBlock
            fileOstderr.destinationSE = job.destinationSE
            fileOstderr.dataset = job.destinationDBlock
            fileOstderr.type = 'output'
            job.addFile(fileOstderr)

            fileOLog = FileSpec()
            fileOLog.lfn = "%(prodName)s-%(runNumber)s-%(runChunk)s-%(prodSlt)s-%(phastVer)s.job.log.tgz" % {
                'prodName': j.task.soft,
                'runNumber': j.run_number,
                'runChunk': j.chunk_number,
                'prodSlt': j.task.prodslt,
                'phastVer': j.task.phastver
            }
            fileOLog.destinationDBlock = job.destinationDBlock
            fileOLog.destinationSE = job.destinationSE
            fileOLog.dataset = job.destinationDBlock
            fileOLog.type = 'log'
            job.addFile(fileOLog)

            fileOmDST = FileSpec()
            fileOmDST.lfn = "%s" % (TMPMDSTFILE)
            fileOmDST.destinationDBlock = job.destinationDBlock
            fileOmDST.destinationSE = job.destinationSE
            fileOmDST.dataset = job.destinationDBlock
            fileOmDST.type = 'output'
            job.addFile(fileOmDST)

            fileOTrafdic = FileSpec()
            fileOTrafdic.lfn = "%s" % (TMPHISTFILE)
            fileOTrafdic.destinationDBlock = job.destinationDBlock
            fileOTrafdic.destinationSE = job.destinationSE
            fileOTrafdic.dataset = job.destinationDBlock
            fileOTrafdic.type = 'output'
            job.addFile(fileOTrafdic)

            fileOtestevtdump = FileSpec()
            fileOtestevtdump.lfn = "testevtdump.raw"
            fileOtestevtdump.destinationDBlock = job.destinationDBlock
            fileOtestevtdump.destinationSE = job.destinationSE
            fileOtestevtdump.dataset = job.destinationDBlock
            fileOtestevtdump.type = 'output'
            job.addFile(fileOtestevtdump)

            s, o = Client.submitJobs([job], srvID=aSrvID)
            logger.info(s)
            logger.info(o)
            #             for x in o:
            #                 logger.info("PandaID=%s" % x[0])
            #                 today = datetime.datetime.today()
            #
            #                 if x[0] != 0 and x[0] != 'NULL':
            #                     j_update = Job.objects.get(id=j.id)
            #                     j_update.panda_id = x[0]
            #                     j_update.status = 'sent'
            #                     j_update.attempt = j_update.attempt + 1
            #                     j_update.date_updated = today
            #
            #                     try:
            #                         j_update.save()
            #                         logger.info('Job %s with PandaID %s updated at %s' % (j.id, x[0], today))
            #                     except IntegrityError as e:
            #                         logger.exception('Unique together catched, was not saved')
            #                     except DatabaseError as e:
            #                         logger.exception('Something went wrong while saving: %s' % e.message)
            #                 else:
            #                     logger.info('Job %s was not added to PanDA' % j.id)
            i += 1

    logger.info('done')
示例#13
0
job.transformation = 'Reco_tf.py'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('uuidgen')
job.destinationSE = 'AGLT2_TEST'
job.prodDBlock = 'user.mlassnig:user.mlassnig.pilot.test.single.hits'
job.currentPriority = 1000
#job.prodSourceLabel   = 'ptest'
job.prodSourceLabel = 'user'
job.computingSite = site
job.cloud = cloud
job.cmtConfig = 'x86_64-slc6-gcc48-opt'
job.specialHandling = 'ddm:rucio'
#job.transferType      = 'direct'

ifile = 'HITS.06828093._000096.pool.root.1'
fileI = FileSpec()
fileI.GUID = 'AC5B3759-B606-BA42-8681-4BD86455AE02'
fileI.checksum = 'ad:5d000974'
fileI.dataset = 'user.mlassnig:user.mlassnig.pilot.test.single.hits'
fileI.fsize = 94834717
fileI.lfn = ifile
fileI.prodDBlock = job.prodDBlock
fileI.scope = 'mc15_13TeV'
fileI.type = 'input'
job.addFile(fileI)

ofile = 'RDO_%s.root' % commands.getoutput('uuidgen')
fileO = FileSpec()
fileO.dataset = job.destinationDBlock
fileO.destinationDBlock = job.destinationDBlock
fileO.destinationSE = job.destinationSE
fileO.lfn = ofile
示例#14
0
def send_job(jobid, siteid):
    _logger.debug('Jobid: ' + str(jobid))

    site = sites_.get(siteid)

    job = jobs_.get(int(jobid))
    cont = job.container
    files_catalog = cont.files

    fscope = getScope(job.owner.username)
    datasetName = '{}:{}'.format(fscope, cont.guid)

    distributive = job.distr.name
    release = job.distr.release

    # Prepare runScript
    parameters = job.distr.command
    parameters = parameters.replace("$COMMAND$", job.params)
    parameters = parameters.replace("$USERNAME$", job.owner.username)
    parameters = parameters.replace("$WORKINGGROUP$", job.owner.working_group)

    # Prepare metadata
    metadata = dict(user=job.owner.username)

    # Prepare PanDA Object
    pandajob = JobSpec()
    pandajob.jobDefinitionID = int(time.time()) % 10000
    pandajob.jobName = cont.guid
    pandajob.transformation = client_config.DEFAULT_TRF
    pandajob.destinationDBlock = datasetName
    pandajob.destinationSE = site.se
    pandajob.currentPriority = 1000
    pandajob.prodSourceLabel = 'user'
    pandajob.computingSite = site.ce
    pandajob.cloud = 'RU'
    pandajob.VO = 'atlas'
    pandajob.prodDBlock = "%s:%s" % (fscope, pandajob.jobName)
    pandajob.coreCount = job.corecount
    pandajob.metadata = json.dumps(metadata)
    #pandajob.workingGroup = job.owner.working_group

    if site.encode_commands:
        # It requires script wrapper on cluster side
        pandajob.jobParameters = '%s %s %s "%s"' % (cont.guid, release, distributive, parameters)
    else:
        pandajob.jobParameters = parameters


    has_input = False
    for fcc in files_catalog:
        if fcc.type == 'input':
            f = fcc.file
            guid = f.guid
            fileIT = FileSpec()
            fileIT.lfn = f.lfn
            fileIT.dataset = pandajob.prodDBlock
            fileIT.prodDBlock = pandajob.prodDBlock
            fileIT.type = 'input'
            fileIT.scope = fscope
            fileIT.status = 'ready'
            fileIT.GUID = guid
            pandajob.addFile(fileIT)

            has_input = True
        if fcc.type == 'output':
            f = fcc.file
            fileOT = FileSpec()
            fileOT.lfn = f.lfn
            fileOT.destinationDBlock = pandajob.prodDBlock
            fileOT.destinationSE = pandajob.destinationSE
            fileOT.dataset = pandajob.prodDBlock
            fileOT.type = 'output'
            fileOT.scope = fscope
            fileOT.GUID = f.guid
            pandajob.addFile(fileOT)

            # Save replica meta
            fc.new_replica(f, site)

    if not has_input:
        # Add fake input
        fileIT = FileSpec()
        fileIT.lfn = "fake.input"
        fileIT.dataset = pandajob.prodDBlock
        fileIT.prodDBlock = pandajob.prodDBlock
        fileIT.type = 'input'
        fileIT.scope = fscope
        fileIT.status = 'ready'
        fileIT.GUID = "fake.guid"
        pandajob.addFile(fileIT)

    # Prepare lof file
    fileOL = FileSpec()
    fileOL.lfn = "%s.log.tgz" % pandajob.jobName
    fileOL.destinationDBlock = pandajob.destinationDBlock
    fileOL.destinationSE = pandajob.destinationSE
    fileOL.dataset = '{}:logs'.format(fscope)
    fileOL.type = 'log'
    fileOL.scope = 'panda'
    pandajob.addFile(fileOL)

    # Save log meta
    log = File()
    log.scope = fscope
    log.lfn = fileOL.lfn
    log.guid = getGUID(log.scope, log.lfn)
    log.type = 'log'
    log.status = 'defined'
    files_.save(log)

    # Save replica meta
    fc.new_replica(log, site)

    # Register file in container
    fc.reg_file_in_cont(log, cont, 'log')

    # Submit job
    o = submitJobs([pandajob])
    x = o[0]

    try:
        #update PandaID
        PandaID = int(x[0])
        job.pandaid = PandaID
        job.ce = site.ce
    except:
        job.status = 'submit_error'
    jobs_.save(job)

    return 0
示例#15
0
job.transformation    = 'Reco_tf.py'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('uuidgen')
job.destinationSE     = 'AGLT2_TEST'
job.prodDBlock        = 'user.mlassnig:user.mlassnig.pilot.test.single.hits'
job.currentPriority   = 1000
#job.prodSourceLabel   = 'ptest'
job.prodSourceLabel   = 'user'
job.computingSite     = site
job.cloud             = cloud
job.cmtConfig         = 'x86_64-slc6-gcc48-opt'
job.specialHandling   = 'ddm:rucio'
#job.transferType      = 'direct'

ifile = 'HITS.06828093._000096.pool.root.1'
fileI = FileSpec()
fileI.GUID = 'AC5B3759-B606-BA42-8681-4BD86455AE02'
fileI.checksum = 'ad:5d000974'
fileI.dataset = 'user.mlassnig:user.mlassnig.pilot.test.single.hits'
fileI.fsize = 94834717
fileI.lfn = ifile
fileI.prodDBlock = job.prodDBlock
fileI.scope = 'mc15_13TeV'
fileI.type = 'input'
job.addFile(fileI)

ofile = 'RDO_%s.root' % commands.getoutput('uuidgen')
fileO = FileSpec()
fileO.dataset = job.destinationDBlock
fileO.destinationDBlock = job.destinationDBlock
fileO.destinationSE = job.destinationSE
fileO.lfn = ofile
示例#16
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()

        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        # now just filling the job from AthenaMC data

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel

        if app.transform_archive:
            jspec.homepackage = 'AnalysisTransforms' + app.transform_archive
        elif app.prod_release:
            jspec.homepackage = 'AnalysisTransforms-AtlasProduction_' + str(
                app.prod_release)
        jspec.transformation = '%s/runAthena-00-00-11' % Client.baseURLSUB

        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock = 'NULL'
        if job.inputdata and len(
                app.inputfiles) > 0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset = ""
        for type in ["EVNT", "RDO", "HITS", "AOD", "ESD", "NTUP"]:
            if type in app.outputpaths.keys():
                outdset = string.replace(app.outputpaths[type], "/", ".")
                outdset = outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys()) > 0
            except:
                logger.error(
                    "app.outputpaths is empty: check your output datasets")
                raise
            type = app.outputpaths.keys()[0]
            outdset = string.replace(app.outputpaths[type], "/", ".")
            outdset = outdset[1:-1]

        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel = 'user'
        jspec.assignedPriority = 1000
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite = self.outsite
        jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        #       library (source files)
        flib = FileSpec()
        flib.lfn = self.library
        #        flib.GUID           =
        flib.type = 'input'
        #        flib.status         =
        flib.dataset = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid = app.turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid = app.dbturls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid = app.minbias_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid = app.cavern_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)


#       output files( this includes the logfiles)
# Output files
        jidtag = ""
        job = app._getParent()  # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id
        outfiles = app.subjobsOutfiles[job.id]
        pandaOutfiles = {}
        for type in outfiles.keys():
            pandaOutfiles[type] = outfiles[type] + "." + str(jidtag)
            if type == "LOG":
                pandaOutfiles[type] += ".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset = string.replace(app.outputpaths[outtype], "/", ".")
            dset = dset[1:-1]
            fout.dataset = dset
            fout.lfn = pandaOutfiles[outtype]
            fout.type = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE = jspec.destinationSE
            if outtype == 'LOG':
                fout.type = 'log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        #       job parameters
        param = '-l %s ' % self.library  # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '

        # transform parameters
        # need to update arglist with final output file name...
        newArgs = []
        if app.mode == "evgen":
            app.args[3] = app.args[3] + " -t "
            if app.verbosity:
                app.args[3] = app.args[3] + " -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type]) > -1:
                    arg = arg.replace(outfiles[type], pandaOutfiles[type])

            newArgs.append(arg)
        arglist = string.join(newArgs, " ")
        #        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles = app.inputfiles + app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles) > 0:
            param += ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles) > 0:
            param += ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles[
            "LOG"]  # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"

        jspec.jobParameters = param
        jspec.metadata = "--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return

        return jspec