def defineEvgen16Job(self, i):
        """Define an Evgen16 job based on predefined values and randomly generated names
        """

        job = JobSpec()
        job.computingSite = self.__site
        job.cloud = self.__cloud

        job.jobDefinitionID = int(time.time()) % 10000
        job.jobName = "%s_%d" % (uuid.uuid1(), i)
        job.AtlasRelease = 'Atlas-16.6.2'
        job.homepackage = 'AtlasProduction/16.6.2.1'
        job.transformation = 'Evgen_trf.py'
        job.destinationDBlock = self.__datasetName
        job.destinationSE = self.__destName
        job.currentPriority = 10000
        job.prodSourceLabel = 'test'
        job.cmtConfig = 'i686-slc5-gcc43-opt'

        #Output file
        fileO = FileSpec()
        fileO.lfn = "%s.evgen.pool.root" % job.jobName
        fileO.destinationDBlock = job.destinationDBlock
        fileO.destinationSE = job.destinationSE
        fileO.dataset = job.destinationDBlock
        fileO.destinationDBlockToken = 'ATLASDATADISK'
        fileO.type = 'output'
        job.addFile(fileO)

        #Log file
        fileL = FileSpec()
        fileL.lfn = "%s.job.log.tgz" % job.jobName
        fileL.destinationDBlock = job.destinationDBlock
        fileL.destinationSE = job.destinationSE
        fileL.dataset = job.destinationDBlock
        fileL.destinationDBlockToken = 'ATLASDATADISK'
        fileL.type = 'log'
        job.addFile(fileL)

        job.jobParameters = "2760 105048 19901 101 200 MC10.105048.PythiaB_ccmu3mu1X.py %s NONE NONE NONE MC10JobOpts-latest-test.tar.gz" % fileO.lfn
        return job
    def defineEvgen16Job(self, i):
        """Define an Evgen16 job based on predefined values and randomly generated names
        """

        job = JobSpec()
        job.computingSite = self.__site
        job.cloud = self.__cloud

        job.jobDefinitionID = int(time.time()) % 10000
        job.jobName = "%s_%d" % (uuid.uuid1(), i)
        job.AtlasRelease = 'Atlas-16.6.2'
        job.homepackage = 'AtlasProduction/16.6.2.1'
        job.transformation = 'Evgen_trf.py'
        job.destinationDBlock = self.__datasetName
        job.destinationSE = self.__destName
        job.currentPriority = 10000
        job.prodSourceLabel = 'test'
        job.cmtConfig = 'i686-slc5-gcc43-opt'

        #Output file
        fileO = FileSpec()
        fileO.lfn = "%s.evgen.pool.root" % job.jobName
        fileO.destinationDBlock = job.destinationDBlock
        fileO.destinationSE = job.destinationSE
        fileO.dataset = job.destinationDBlock
        fileO.destinationDBlockToken = 'ATLASDATADISK'
        fileO.type = 'output'
        job.addFile(fileO)

        #Log file
        fileL = FileSpec()
        fileL.lfn = "%s.job.log.tgz" % job.jobName
        fileL.destinationDBlock = job.destinationDBlock
        fileL.destinationSE = job.destinationSE
        fileL.dataset = job.destinationDBlock
        fileL.destinationDBlockToken = 'ATLASDATADISK'
        fileL.type = 'log'
        job.addFile(fileL)

        job.jobParameters = "2760 105048 19901 101 200 MC10.105048.PythiaB_ccmu3mu1X.py %s NONE NONE NONE MC10JobOpts-latest-test.tar.gz" % fileO.lfn
        return job
Пример #3
0
    def run(self):
        for i in range(1):
            prodDBlock        = 'rome.004201.evgen.ZeeJimmy'
            destinationDBlock = 'pandatest.000123.test.simul'
            destinationSE = 'BNL_SE'
            jobs = []
            #for i in range(self.interval):
            for i in range(2):
                job = JobSpec()
                job.jobDefinitionID=self.jobDefinitionID
                job.AtlasRelease='Atlas-11.0.1'
                job.prodDBlock=prodDBlock
                job.destinationDBlock=destinationDBlock
                job.destinationSE=destinationSE
                job.currentPriority=i 

                lfnI = 'rome.004201.evgen.ZeeJimmy._00001.pool.root'
                file = FileSpec()
                file.lfn = lfnI
                file.dataset = 'rome.004201.evgen.ZeeJimmy'
                file.type = 'input'
                file.prodDBlock = prodDBlock
                file.dataset = prodDBlock
                job.addFile(file)

                lfnO ='%s.pool.root.1' % commands.getoutput('uuidgen')
                file = FileSpec()
                file.lfn = lfnO
                file.type = 'output'
                file.destinationDBlock = destinationDBlock
                file.dataset = destinationDBlock
                file.destinationSE     = destinationSE
                job.addFile(file)

                job.homepackage='JobTransforms-11-00-01-01'
                job.transformation='share/rome.g4sim.standard.trf'
                job.jobParameters='%s %s 1 2 14268' % (lfnI,lfnO)
                jobs.append(job)
            self.taskbuffer.storeJobs(jobs,None)
            time.sleep(self.interval)
Пример #4
0
    def master_prepare(self,app,appmasterconfig):

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler master_prepare called for %s', job.getFQID('.'))
        usertag = configDQ2['usertag']
        #usertag='user09'
        nickname = getNickname(allowMissingNickname=True)
        self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (usertag,nickname,commands.getoutput('hostname').split('.')[0],int(time.time()),job.id)
#        self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity())
        sources = 'sources.%s.tar.gz' % commands.getoutput('uuidgen 2> /dev/null') 
        self.library = '%s.lib.tgz' % self.libDataset

        # check DBRelease
        # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1:
         #   raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'")

#       unpack library
        logger.debug('Creating source tarball ...')        
        tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null')
        os.mkdir(tmpdir)

        inputbox=[]
        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [ File(app.transform_archive) ]
        if app.evgen_job_option:
            self.evgen_job_option=app.evgen_job_option
            if os.path.exists(app.evgen_job_option):
                # locally modified job option file to add to the input sand box
                inputbox += [ File(app.evgen_job_option) ]
                self.evgen_job_option=app.evgen_job_option.split("/")[-1]

         
#       add input sandbox files
        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [ file ]
#        add option files
        for extFile in job.backend.extOutFile:
            try:
                shutil.copy(extFile,tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(extFile,tmpdir)
#       fill the archive
        for opt_file in inputbox:
            try:
                shutil.copy(opt_file.name,tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(opt_file.name,tmpdir)
#       now tar it up again

        inpw = job.getInputWorkspace()
        rc, output = commands.getstatusoutput('tar czf %s -C %s .' % (inpw.getPath(sources),tmpdir))
        if rc:
            logger.error('Packing sources failed with status %d',rc)
            logger.error(output)
            raise ApplicationConfigurationError(None,'Packing sources failed.')

        shutil.rmtree(tmpdir)

#       upload sources

        logger.debug('Uploading source tarball ...')
        try:
            cwd = os.getcwd()
            os.chdir(inpw.getPath())
            rc, output = Client.putFile(sources)
            if output != 'True':
                logger.error('Uploading sources %s failed. Status = %d', sources, rc)
                logger.error(output)
                raise ApplicationConfigurationError(None,'Uploading archive failed')
        finally:
            os.chdir(cwd)      


        # Use Panda's brokerage
##         if job.inputdata and len(app.sites)>0:
##             # update cloud, use inputdata's
##             from dq2.info.TiersOfATLAS import whichCloud,ToACache
##             inclouds=[]
##             for site in app.sites:
##                 cloudSite=whichCloud(app.sites[0])
##                 if cloudSite not in inclouds:
##                     inclouds.append(cloudSite)
##             # now converting inclouds content into proper brokering stuff.
##             outclouds=[]
##             for cloudSite in inclouds:
##                 for cloudID, eachCloud in ToACache.dbcloud.iteritems():
##                     if cloudSite==eachCloud:
##                         cloud=cloudID
##                         outclouds.append(cloud)
##                         break
                    
##             print outclouds
##             # finally, matching with user's wishes
##             if len(outclouds)>0:
##                 if not job.backend.requirements.cloud: # no user wish, update
##                     job.backend.requirements.cloud=outclouds[0]
##                 else:
##                     try:
##                         assert job.backend.requirements.cloud in outclouds
##                     except:
##                         raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds)))
                                                            
        from GangaPanda.Lib.Panda.Panda import runPandaBrokerage
        
        runPandaBrokerage(job)
        
        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(None,'site is still AUTO after brokerage!')

        # output dataset preparation and registration
        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
        except:
            raise ApplicationConfigurationError(None,"Could not extract output dataset location from job.backend.site value: %s. Aborting" % job.backend.site)
        if not app.dryrun:
            for outtype in app.outputpaths.keys():
                dset=string.replace(app.outputpaths[outtype],"/",".")
                dset=dset[1:]
                # dataset registration must be done only once.
                print "registering output dataset %s at %s" % (dset,outDsLocation)
                try:
                    Client.addDataset(dset,False,location=outDsLocation)
                    dq2_set_dataset_lifetime(dset, location=outDsLocation)
                except:
                    raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % dset)
            # extend registration to build job lib dataset:
            print "registering output dataset %s at %s" % (self.libDataset,outDsLocation)

            try:
                Client.addDataset(self.libDataset,False,location=outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, outDsLocation)
            except:
                raise ApplicationConfigurationError(None,"Fail to create output dataset %s. Aborting" % self.libDataset)


        ###
        cacheVer = "-AtlasProduction_" + str(app.prod_release)
            
        logger.debug("master job submit?")
        self.outsite=job.backend.site
        if app.se_name and app.se_name != "none" and not self.outsite:
            self.outsite=app.se_name

       
        #       create build job
        jspec = JobSpec()
        jspec.jobDefinitionID   = job.id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        jspec.homepackage       = 'AnalysisTransforms'+cacheVer#+nightVer
        jspec.transformation    = '%s/buildJob-00-00-03' % Client.baseURLSUB # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare()
        jspec.destinationDBlock = self.libDataset
        jspec.destinationSE     = job.backend.site
        jspec.prodSourceLabel   = 'panda'
        jspec.assignedPriority  = 2000
        jspec.computingSite     = job.backend.site
        jspec.cloud             = job.backend.requirements.cloud
#        jspec.jobParameters     = self.args not known yet
        jspec.jobParameters     = '-o %s' % (self.library)
        if app.userarea:
            print app.userarea
            jspec.jobParameters     += ' -i %s' % (os.path.basename(app.userarea))
        else:
            jspec.jobParameters     += ' -i %s' % (sources)
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        
        matchURL = re.search('(http.*://[^/]+)/',Client.baseURLSSL)
        if matchURL:
            jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1)

        fout = FileSpec()
        fout.lfn  = self.library
        fout.type = 'output'
        fout.dataset = self.libDataset
        fout.destinationDBlock = self.libDataset
        jspec.addFile(fout)

        flog = FileSpec()
        flog.lfn = '%s.log.tgz' % self.libDataset
        flog.type = 'log'
        flog.dataset = self.libDataset
        flog.destinationDBlock = self.libDataset
        jspec.addFile(flog)
        #print "MASTER JOB DETAILS:",jspec.jobParameters

        return jspec
Пример #5
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = 'BNL_ATLAS_2'

files = {
    'EVNT.019128._00011.pool.root.1': None,
}

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index)
    job.AtlasRelease = 'Atlas-13.0.40'
    job.homepackage = 'AtlasProduction/13.0.40.3'
    job.transformation = 'csc_simul_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.computingSite = site
    job.prodDBlock = 'valid1.005001.pythia_minbias.evgen.EVNT.e306_tid019128'

    job.prodSourceLabel = 'test'
    job.currentPriority = 10000
    job.cloud = 'IT'

    fileI = FileSpec()
    fileI.dataset = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
    fileI.lfn = lfn
Пример #6
0
import userinterface.Client as Client

from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

if len(sys.argv)>2:
    site = sys.argv[1]
    cloud = sys.argv[2]
else:
    site = "UTA_PAUL_TEST"
    cloud = "US"

job = JobSpec()
job.jobDefinitionID   = int(time.time()) % 10000
job.jobName           = "%s_1" % commands.getoutput('uuidgen')
job.AtlasRelease      = 'Atlas-20.1.4'
job.homepackage       = 'AtlasProduction/20.1.4.14'
#job.AtlasRelease      = 'Atlas-20.20.8'
#job.homepackage       = 'AtlasProduction/20.20.8.4'
job.transformation    = 'Reco_tf.py'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('uuidgen')
job.destinationSE     = 'AGLT2_TEST'
job.prodDBlock        = 'user.mlassnig:user.mlassnig.pilot.test.single.hits'
job.currentPriority   = 1000
#job.prodSourceLabel   = 'ptest'
job.prodSourceLabel   = 'user'
job.computingSite     = site
job.cloud             = cloud
job.cmtConfig         = 'x86_64-slc6-gcc48-opt'
job.specialHandling   = 'ddm:rucio'
#job.transferType      = 'direct'
Пример #7
0
 def run(self):
     try:
         self.putLog('start %s' % self.evpFileName)
         # lock evp file
         self.evpFile = open(self.evpFileName)
         try:
             fcntl.flock(self.evpFile.fileno(),
                         fcntl.LOCK_EX | fcntl.LOCK_NB)
         except:
             # relase
             self.putLog("cannot lock %s" % self.evpFileName)
             self.evpFile.close()
             return True
         # options
         runEvtList = []
         eventPickDataType = ''
         eventPickStreamName = ''
         eventPickDS = []
         eventPickAmiTag = ''
         eventPickNumSites = 1
         inputFileList = []
         tagDsList = []
         tagQuery = ''
         tagStreamRef = ''
         skipDaTRI = False
         runEvtGuidMap = {}
         # read evp file
         for tmpLine in self.evpFile:
             tmpMatch = re.search('^([^=]+)=(.+)$', tmpLine)
             # check format
             if tmpMatch == None:
                 continue
             tmpItems = tmpMatch.groups()
             if tmpItems[0] == 'runEvent':
                 # get run and event number
                 tmpRunEvt = tmpItems[1].split(',')
                 if len(tmpRunEvt) == 2:
                     runEvtList.append(tmpRunEvt)
             elif tmpItems[0] == 'eventPickDataType':
                 # data type
                 eventPickDataType = tmpItems[1]
             elif tmpItems[0] == 'eventPickStreamName':
                 # stream name
                 eventPickStreamName = tmpItems[1]
             elif tmpItems[0] == 'eventPickDS':
                 # dataset pattern
                 eventPickDS = tmpItems[1].split(',')
             elif tmpItems[0] == 'eventPickAmiTag':
                 # AMI tag
                 eventPickAmiTag = tmpItems[1]
             elif tmpItems[0] == 'eventPickNumSites':
                 # the number of sites where datasets are distributed
                 try:
                     eventPickNumSites = int(tmpItems[1])
                 except:
                     pass
             elif tmpItems[0] == 'userName':
                 # user name
                 self.userDN = tmpItems[1]
                 self.putLog("user=%s" % self.userDN)
             elif tmpItems[0] == 'userTaskName':
                 # user task name
                 self.userTaskName = tmpItems[1]
             elif tmpItems[0] == 'userDatasetName':
                 # user dataset name
                 self.userDatasetName = tmpItems[1]
             elif tmpItems[0] == 'lockedBy':
                 # client name
                 self.lockedBy = tmpItems[1]
             elif tmpItems[0] == 'creationTime':
                 # creation time
                 self.creationTime = tmpItems[1]
             elif tmpItems[0] == 'params':
                 # parameters
                 self.params = tmpItems[1]
             elif tmpItems[0] == 'inputFileList':
                 # input file list
                 inputFileList = tmpItems[1].split(',')
                 try:
                     inputFileList.remove('')
                 except:
                     pass
             elif tmpItems[0] == 'tagDS':
                 # TAG dataset
                 tagDsList = tmpItems[1].split(',')
             elif tmpItems[0] == 'tagQuery':
                 # query for TAG
                 tagQuery = tmpItems[1]
             elif tmpItems[0] == 'tagStreamRef':
                 # StreamRef for TAG
                 tagStreamRef = tmpItems[1]
                 if not tagStreamRef.endswith('_ref'):
                     tagStreamRef += '_ref'
             elif tmpItems[0] == 'runEvtGuidMap':
                 # GUIDs
                 try:
                     exec "runEvtGuidMap=" + tmpItems[1]
                 except:
                     pass
         # extract task name
         if self.userTaskName == '' and self.params != '':
             try:
                 tmpMatch = re.search('--outDS(=| ) *([^ ]+)', self.params)
                 if tmpMatch != None:
                     self.userTaskName = tmpMatch.group(2)
                     if not self.userTaskName.endswith('/'):
                         self.userTaskName += '/'
             except:
                 pass
         # suppress DaTRI
         if self.params != '':
             if '--eventPickSkipDaTRI' in self.params:
                 skipDaTRI = True
         # get compact user name
         compactDN = self.taskBuffer.cleanUserID(self.userDN)
         # get jediTaskID
         self.jediTaskID = self.taskBuffer.getTaskIDwithTaskNameJEDI(
             compactDN, self.userTaskName)
         # convert
         if tagDsList == [] or tagQuery == '':
             # convert run/event list to dataset/file list
             tmpRet, locationMap, allFiles = self.pd2p.convertEvtRunToDatasets(
                 runEvtList, eventPickDataType, eventPickStreamName,
                 eventPickDS, eventPickAmiTag, self.userDN, runEvtGuidMap)
             if not tmpRet:
                 if 'isFatal' in locationMap and locationMap[
                         'isFatal'] == True:
                     self.ignoreError = False
                 self.endWithError(
                     'Failed to convert the run/event list to a dataset/file list'
                 )
                 return False
         else:
             # get parent dataset/files with TAG
             tmpRet, locationMap, allFiles = self.pd2p.getTagParentInfoUsingTagQuery(
                 tagDsList, tagQuery, tagStreamRef)
             if not tmpRet:
                 self.endWithError(
                     'Failed to get parent dataset/file list with TAG')
                 return False
         # use only files in the list
         if inputFileList != []:
             tmpAllFiles = []
             for tmpFile in allFiles:
                 if tmpFile['lfn'] in inputFileList:
                     tmpAllFiles.append(tmpFile)
             allFiles = tmpAllFiles
         # remove redundant CN from DN
         tmpDN = self.userDN
         tmpDN = re.sub('/CN=limited proxy', '', tmpDN)
         tmpDN = re.sub('(/CN=proxy)+$', '', tmpDN)
         # make dataset container
         tmpRet = self.pd2p.registerDatasetContainerWithDatasets(
             self.userDatasetName,
             allFiles,
             locationMap,
             nSites=eventPickNumSites,
             owner=tmpDN)
         if not tmpRet:
             self.endWithError('Failed to make a dataset container %s' %
                               self.userDatasetName)
             return False
         # skip DaTRI
         if skipDaTRI:
             # successfully terminated
             self.putLog("skip DaTRI")
             # update task
             self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID)
         else:
             # get candidates
             tmpRet, candidateMaps = self.pd2p.getCandidates(
                 self.userDatasetName, checkUsedFile=False, useHidden=True)
             if not tmpRet:
                 self.endWithError(
                     'Failed to find candidate for destination')
                 return False
             # collect all candidates
             allCandidates = []
             for tmpDS, tmpDsVal in candidateMaps.iteritems():
                 for tmpCloud, tmpCloudVal in tmpDsVal.iteritems():
                     for tmpSiteName in tmpCloudVal[0]:
                         if not tmpSiteName in allCandidates:
                             allCandidates.append(tmpSiteName)
             if allCandidates == []:
                 self.endWithError('No candidate for destination')
                 return False
             # get list of dataset (container) names
             if eventPickNumSites > 1:
                 # decompose container to transfer datasets separately
                 tmpRet, tmpOut = self.pd2p.getListDatasetReplicasInContainer(
                     self.userDatasetName)
                 if not tmpRet:
                     self.endWithError('Failed to get the size of %s' %
                                       self.userDatasetName)
                     return False
                 userDatasetNameList = tmpOut.keys()
             else:
                 # transfer container at once
                 userDatasetNameList = [self.userDatasetName]
             # loop over all datasets
             sitesUsed = []
             for tmpUserDatasetName in userDatasetNameList:
                 # get size of dataset container
                 tmpRet, totalInputSize = rucioAPI.getDatasetSize(
                     tmpUserDatasetName)
                 if not tmpRet:
                     self.endWithError('Failed to get the size of %s' %
                                       tmpUserDatasetName)
                     return False
                 # run brokerage
                 tmpJob = JobSpec()
                 tmpJob.AtlasRelease = ''
                 self.putLog("run brokerage for %s" % tmpDS)
                 brokerage.broker.schedule([tmpJob],
                                           self.taskBuffer,
                                           self.siteMapper,
                                           True,
                                           allCandidates,
                                           True,
                                           datasetSize=totalInputSize)
                 if tmpJob.computingSite.startswith('ERROR'):
                     self.endWithError('brokerage failed with %s' %
                                       tmpJob.computingSite)
                     return False
                 self.putLog("site -> %s" % tmpJob.computingSite)
                 # send transfer request
                 try:
                     tmpDN = rucioAPI.parse_dn(tmpDN)
                     tmpStatus, userInfo = rucioAPI.finger(tmpDN)
                     if not tmpStatus:
                         raise RuntimeError, 'user info not found for {0} with {1}'.format(
                             tmpDN, userInfo)
                     tmpDN = userInfo['nickname']
                     tmpDQ2ID = self.siteMapper.getSite(
                         tmpJob.computingSite).ddm
                     tmpMsg = "%s ds=%s site=%s id=%s" % (
                         'registerDatasetLocation for DaTRI ',
                         tmpUserDatasetName, tmpDQ2ID, tmpDN)
                     self.putLog(tmpMsg)
                     rucioAPI.registerDatasetLocation(
                         tmpDS, [tmpDQ2ID],
                         lifetime=14,
                         owner=tmpDN,
                         activity="User Subscriptions")
                     self.putLog('OK')
                 except:
                     errType, errValue = sys.exc_info()[:2]
                     tmpStr = 'Failed to send transfer request : %s %s' % (
                         errType, errValue)
                     tmpStr.strip()
                     tmpStr += traceback.format_exc()
                     self.endWithError(tmpStr)
                     return False
                 # list of sites already used
                 sitesUsed.append(tmpJob.computingSite)
                 self.putLog("used %s sites" % len(sitesUsed))
                 # set candidates
                 if len(sitesUsed) >= eventPickNumSites:
                     # reset candidates to limit the number of sites
                     allCandidates = sitesUsed
                     sitesUsed = []
                 else:
                     # remove site
                     allCandidates.remove(tmpJob.computingSite)
             # send email notification for success
             tmpMsg = 'A transfer request was successfully sent to Rucio.\n'
             tmpMsg += 'Your task will get started once transfer is completed.'
             self.sendEmail(True, tmpMsg)
         try:
             # unlock and delete evp file
             fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_UN)
             self.evpFile.close()
             os.remove(self.evpFileName)
         except:
             pass
         # successfully terminated
         self.putLog("end %s" % self.evpFileName)
         return True
     except:
         errType, errValue = sys.exc_info()[:2]
         self.endWithError('Got exception %s:%s %s' %
                           (errType, errValue, traceback.format_exc()))
         return False
Пример #8
0
if len(sys.argv) > 1:
    site = sys.argv[1]
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = 'BNL_ATLAS_2'

jobList = []
for i in range(20):

    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = commands.getoutput('uuidgen')
    job.AtlasRelease = 'Atlas-11.0.41'
    #job.AtlasRelease      = 'Atlas-11.0.3'
    job.homepackage = 'AnalysisTransforms'
    job.transformation = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 100
    job.prodSourceLabel = 'user'
    job.computingSite = site
    #job.prodDBlock        = "pandatest.b1599dfa-cd36-4fc5-92f6-495781a94c66"
    job.prodDBlock = "pandatest.f228b051-077b-4f81-90bf-496340644379"

    fileI = FileSpec()
    fileI.dataset = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
    fileI.lfn = "lib.f228b051-077b-4f81-90bf-496340644379.tgz"
Пример #9
0
import time
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

job = JobSpec()
job.jobDefinitionID   = int(time.time()) % 10000
job.jobName           = commands.getoutput('/usr/bin/uuidgen') 
job.AtlasRelease      = 'Atlas-9.0.4'
job.prodDBlock        = 'pandatest.000003.dd.input'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('/usr/bin/uuidgen')
job.destinationSE     = 'BNL_SE'

ids = {'pandatest.000003.dd.input._00028.junk':'6c19e1fc-ee8c-4bae-bd4c-c9e5c73aca27',
       'pandatest.000003.dd.input._00033.junk':'98f79ba1-1793-4253-aac7-bdf90a51d1ee',
       'pandatest.000003.dd.input._00039.junk':'33660dd5-7cef-422a-a7fc-6c24cb10deb1'}
for lfn in ids.keys():
    file = FileSpec()
    file.lfn = lfn
    file.GUID = ids[file.lfn]
    file.dataset = 'pandatest.000003.dd.input'
    file.type = 'input'
    job.addFile(file)

s,o = Client.submitJobs([job])
print "---------------------"
print s
print o
print "---------------------"
s,o = Client.getJobStatus([4934, 4766, 4767, 4768, 4769])
Пример #10
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile:None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = (time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-15.3.1'
    job.homepackage       = 'AtlasProduction/15.3.1.5'
    job.transformation    = 'csc_atlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite     = site
    job.prodDBlock        = prodDBlock
    
    job.prodSourceLabel   = 'test'
    job.processingType    = 'test'
    job.currentPriority   = 10000
    job.cloud             = cloud
    job.cmtConfig         = 'i686-slc4-gcc34-opt'

    fileI = FileSpec()
    fileI.dataset    = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
Пример #11
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile:None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = (time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-17.0.5'
    job.homepackage       = 'AtlasProduction/17.0.5.6'
    job.transformation    = 'AtlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite     = site
    job.prodDBlock        = prodDBlock
    
    job.prodSourceLabel   = 'test'
    job.processingType    = 'test'
    job.currentPriority   = 10000
    job.cloud             = cloud
    job.cmtConfig         = 'i686-slc5-gcc43-opt'

    fileI = FileSpec()
    fileI.dataset    = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
Пример #12
0
destName = 'BNL_ATLAS_2'

files = {
    'EVNT.023986._00001.pool.root.1': None,
    #'EVNT.023989._00001.pool.root.1':None,
}

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID = (time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index)
    job.AtlasRelease = 'Atlas-14.2.20'
    job.homepackage = 'AtlasProduction/14.2.20.1'
    job.transformation = 'csc_simul_reco_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.computingSite = site
    job.prodDBlock = 'mc08.105031.Jimmy_jetsJ2.evgen.EVNT.e347_tid023986'
    #job.prodDBlock        = 'mc08.105034.Jimmy_jetsJ5.evgen.EVNT.e347_tid023989'

    job.prodSourceLabel = 'test'
    job.processingType = 'test'
    job.currentPriority = 10000
    job.cloud = cloud

    fileI = FileSpec()
    fileI.dataset = job.prodDBlock
Пример #13
0

# instantiate JobSpecs
iJob = 0
jobList = []
for line in taskFile:
    iJob += 1
    job = JobSpec()
    # job ID  ###### FIXME
    job.jobDefinitionID   = int(time.time()) % 10000
    # job name
    job.jobName           = "%s_%05d.job" % (taskName,iJob)
    # AtlasRelease
    if len(re.findall('\.',trfVer)) > 2:
        match = re.search('^(\d+\.\d+\.\d+)',trfVer)
        job.AtlasRelease  = 'Atlas-%s' % match.group(1)
    else:
        job.AtlasRelease  = 'Atlas-%s' % trfVer
    # homepackage
    vers = trfVer.split('.')
    if int(vers[0]) <= 11:
        job.homepackage = 'JobTransforms'
        for ver in vers:
            job.homepackage += "-%02d" % int(ver)
    else:
        job.homepackage = 'AtlasProduction/%s' % trfVer
    # trf
    job.transformation    = trf
    job.destinationDBlock = oDatasets[0]
    # prod DBlock
    job.prodDBlock        = iDataset
Пример #14
0
import userinterface.Client as Client

from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

if len(sys.argv) > 2:
    site = sys.argv[1]
    cloud = sys.argv[2]
else:
    site = "UTA_PAUL_TEST"
    cloud = "US"

job = JobSpec()
job.jobDefinitionID = int(time.time()) % 10000
job.jobName = "%s_1" % commands.getoutput('uuidgen')
job.AtlasRelease = 'Atlas-20.1.4'
job.homepackage = 'AtlasProduction/20.1.4.14'
#job.AtlasRelease      = 'Atlas-20.20.8'
#job.homepackage       = 'AtlasProduction/20.20.8.4'
job.transformation = 'Reco_tf.py'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput('uuidgen')
job.destinationSE = 'AGLT2_TEST'
job.prodDBlock = 'user.mlassnig:user.mlassnig.pilot.test.single.hits'
job.currentPriority = 1000
#job.prodSourceLabel   = 'ptest'
job.prodSourceLabel = 'user'
job.computingSite = site
job.cloud = cloud
job.cmtConfig = 'x86_64-slc6-gcc48-opt'
job.specialHandling = 'ddm:rucio'
#job.transferType      = 'direct'
Пример #15
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName    = 'BNL_ATLAS_2'

files = {
    'EVNT.019128._00011.pool.root.1':None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-13.0.40'
    job.homepackage       = 'AtlasProduction/13.0.40.3'
    job.transformation    = 'csc_simul_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.computingSite     = site
    job.prodDBlock        = 'valid1.005001.pythia_minbias.evgen.EVNT.e306_tid019128'
    
    job.prodSourceLabel   = 'test'    
    job.currentPriority   = 10000
    job.cloud             = 'IT'

    fileI = FileSpec()
    fileI.dataset    = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
    fileI.lfn = lfn
Пример #16
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile:None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = (time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-16.6.2'
    job.homepackage       = 'AtlasProduction/16.6.2.1'
    job.transformation    = 'AtlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite     = site
    job.prodDBlock        = prodDBlock
    
    job.prodSourceLabel   = 'test'
    job.processingType    = 'test'
    job.currentPriority   = 10000
    job.cloud             = cloud
    job.cmtConfig         = 'i686-slc5-gcc43-opt'

    fileI = FileSpec()
    fileI.dataset    = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
Пример #17
0
    def master_prepare(self, app, appmasterconfig):

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler master_prepare called for %s',
                     job.getFQID('.'))
        usertag = configDQ2['usertag']
        #usertag='user09'
        nickname = getNickname(allowMissingNickname=True)
        self.libDataset = '%s.%s.ganga.%s_%d.lib._%06d' % (
            usertag, nickname, commands.getoutput('hostname').split('.')[0],
            int(time.time()), job.id)
        #        self.userprefix='%s.%s.ganga' % (usertag,gridProxy.identity())
        sources = 'sources.%s.tar.gz' % commands.getoutput(
            'uuidgen 2> /dev/null')
        self.library = '%s.lib.tgz' % self.libDataset

        # check DBRelease
        # if job.backend.dbRelease != '' and job.backend.dbRelease.find(':') == -1:
        #   raise ApplicationConfigurationError(None,"ERROR : invalid argument for backend.dbRelease. Must be 'DatasetName:FileName'")

        #       unpack library
        logger.debug('Creating source tarball ...')
        tmpdir = '/tmp/%s' % commands.getoutput('uuidgen 2> /dev/null')
        os.mkdir(tmpdir)

        inputbox = []
        if os.path.exists(app.transform_archive):
            # must add a condition on size.
            inputbox += [File(app.transform_archive)]
        if app.evgen_job_option:
            self.evgen_job_option = app.evgen_job_option
            if os.path.exists(app.evgen_job_option):
                # locally modified job option file to add to the input sand box
                inputbox += [File(app.evgen_job_option)]
                self.evgen_job_option = app.evgen_job_option.split("/")[-1]

#       add input sandbox files
        if (job.inputsandbox):
            for file in job.inputsandbox:
                inputbox += [file]
#        add option files
        for extFile in job.backend.extOutFile:
            try:
                shutil.copy(extFile, tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(extFile, tmpdir)
#       fill the archive
        for opt_file in inputbox:
            try:
                shutil.copy(opt_file.name, tmpdir)
            except IOError:
                os.makedirs(tmpdir)
                shutil.copy(opt_file.name, tmpdir)
#       now tar it up again

        inpw = job.getInputWorkspace()
        rc, output = commands.getstatusoutput('tar czf %s -C %s .' %
                                              (inpw.getPath(sources), tmpdir))
        if rc:
            logger.error('Packing sources failed with status %d', rc)
            logger.error(output)
            raise ApplicationConfigurationError(None,
                                                'Packing sources failed.')

        shutil.rmtree(tmpdir)

        #       upload sources

        logger.debug('Uploading source tarball ...')
        try:
            cwd = os.getcwd()
            os.chdir(inpw.getPath())
            rc, output = Client.putFile(sources)
            if output != 'True':
                logger.error('Uploading sources %s failed. Status = %d',
                             sources, rc)
                logger.error(output)
                raise ApplicationConfigurationError(
                    None, 'Uploading archive failed')
        finally:
            os.chdir(cwd)

        # Use Panda's brokerage
##         if job.inputdata and len(app.sites)>0:
##             # update cloud, use inputdata's
##             from dq2.info.TiersOfATLAS import whichCloud,ToACache
##             inclouds=[]
##             for site in app.sites:
##                 cloudSite=whichCloud(app.sites[0])
##                 if cloudSite not in inclouds:
##                     inclouds.append(cloudSite)
##             # now converting inclouds content into proper brokering stuff.
##             outclouds=[]
##             for cloudSite in inclouds:
##                 for cloudID, eachCloud in ToACache.dbcloud.iteritems():
##                     if cloudSite==eachCloud:
##                         cloud=cloudID
##                         outclouds.append(cloud)
##                         break

##             print outclouds
##             # finally, matching with user's wishes
##             if len(outclouds)>0:
##                 if not job.backend.requirements.cloud: # no user wish, update
##                     job.backend.requirements.cloud=outclouds[0]
##                 else:
##                     try:
##                         assert job.backend.requirements.cloud in outclouds
##                     except:
##                         raise ApplicationConfigurationError(None,'Input dataset not available in target cloud %s. Please try any of the following %s' % (job.backend.requirements.cloud, str(outclouds)))

        from GangaPanda.Lib.Panda.Panda import runPandaBrokerage

        runPandaBrokerage(job)

        if job.backend.site == 'AUTO':
            raise ApplicationConfigurationError(
                None, 'site is still AUTO after brokerage!')

        # output dataset preparation and registration
        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
        except:
            raise ApplicationConfigurationError(
                None,
                "Could not extract output dataset location from job.backend.site value: %s. Aborting"
                % job.backend.site)
        if not app.dryrun:
            for outtype in app.outputpaths.keys():
                dset = string.replace(app.outputpaths[outtype], "/", ".")
                dset = dset[1:]
                # dataset registration must be done only once.
                print "registering output dataset %s at %s" % (dset,
                                                               outDsLocation)
                try:
                    Client.addDataset(dset, False, location=outDsLocation)
                    dq2_set_dataset_lifetime(dset, location=outDsLocation)
                except:
                    raise ApplicationConfigurationError(
                        None,
                        "Fail to create output dataset %s. Aborting" % dset)
            # extend registration to build job lib dataset:
            print "registering output dataset %s at %s" % (self.libDataset,
                                                           outDsLocation)

            try:
                Client.addDataset(self.libDataset,
                                  False,
                                  location=outDsLocation)
                dq2_set_dataset_lifetime(self.libDataset, outDsLocation)
            except:
                raise ApplicationConfigurationError(
                    None, "Fail to create output dataset %s. Aborting" %
                    self.libDataset)

        ###
        cacheVer = "-AtlasProduction_" + str(app.prod_release)

        logger.debug("master job submit?")
        self.outsite = job.backend.site
        if app.se_name and app.se_name != "none" and not self.outsite:
            self.outsite = app.se_name

        #       create build job
        jspec = JobSpec()
        jspec.jobDefinitionID = job.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel
        jspec.homepackage = 'AnalysisTransforms' + cacheVer  #+nightVer
        jspec.transformation = '%s/buildJob-00-00-03' % Client.baseURLSUB  # common base to Athena and AthenaMC jobs: buildJob is a pilot job which takes care of all inputs for the real jobs (in prepare()
        jspec.destinationDBlock = self.libDataset
        jspec.destinationSE = job.backend.site
        jspec.prodSourceLabel = 'panda'
        jspec.assignedPriority = 2000
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        #        jspec.jobParameters     = self.args not known yet
        jspec.jobParameters = '-o %s' % (self.library)
        if app.userarea:
            print app.userarea
            jspec.jobParameters += ' -i %s' % (os.path.basename(app.userarea))
        else:
            jspec.jobParameters += ' -i %s' % (sources)
        jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)

        matchURL = re.search('(http.*://[^/]+)/', Client.baseURLSSL)
        if matchURL:
            jspec.jobParameters += ' --sourceURL %s' % matchURL.group(1)

        fout = FileSpec()
        fout.lfn = self.library
        fout.type = 'output'
        fout.dataset = self.libDataset
        fout.destinationDBlock = self.libDataset
        jspec.addFile(fout)

        flog = FileSpec()
        flog.lfn = '%s.log.tgz' % self.libDataset
        flog.type = 'log'
        flog.dataset = self.libDataset
        flog.destinationDBlock = self.libDataset
        jspec.addFile(flog)
        #print "MASTER JOB DETAILS:",jspec.jobParameters

        return jspec
Пример #18
0
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

site = sys.argv[1]
cloud = sys.argv[2]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = None

jobList = []

for i in range(1):
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i)
    job.AtlasRelease = 'Atlas-17.0.5'
    job.homepackage = 'AtlasProduction/17.0.5.6'
    job.transformation = 'Evgen_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 10000
    job.prodSourceLabel = 'test'
    job.computingSite = site
    job.cloud = cloud
    job.cmtConfig = 'i686-slc5-gcc43-opt'

    file = FileSpec()
    file.lfn = "%s.evgen.pool.root" % job.jobName
    file.destinationDBlock = job.destinationDBlock
    file.destinationSE = job.destinationSE
    file.dataset = job.destinationDBlock
Пример #19
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile: None,
}

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID = (time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index)
    job.AtlasRelease = 'Atlas-15.3.1'
    job.homepackage = 'AtlasProduction/15.3.1.5'
    job.transformation = 'csc_atlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite = site
    job.prodDBlock = prodDBlock

    job.prodSourceLabel = 'test'
    job.processingType = 'test'
    job.currentPriority = 10000
    job.cloud = cloud
    job.cmtConfig = 'i686-slc4-gcc34-opt'

    fileI = FileSpec()
    fileI.dataset = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
Пример #20
0
 def run(self):
     try:
         # get job
         tmpJobs = self.taskBuffer.getFullJobStatus([self.rPandaID])
         if tmpJobs == [] or tmpJobs[0] == None:
             _logger.debug("cannot find job for PandaID=%s" % self.rPandaID)
             return
         self.job = tmpJobs[0]
         _logger.debug("%s start %s:%s:%s" % (self.token,self.job.jobDefinitionID,self.job.prodUserName,self.job.computingSite))
         # using output container
         if not self.job.destinationDBlock.endswith('/'):
             _logger.debug("%s ouput dataset container is required" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # FIXEME : dont' touch group jobs for now
         if self.job.destinationDBlock.startswith('group') and (not self.userRequest):
             _logger.debug("%s skip group jobs" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check processingType
         typesForRebro = ['pathena','prun','ganga','ganga-rbtest']
         if not self.job.processingType in typesForRebro:
             _logger.debug("%s skip processingType=%s not in %s" % \
                           (self.token,self.job.processingType,str(typesForRebro)))
             _logger.debug("%s end" % self.token)
             return
         # check jobsetID
         if self.job.jobsetID in [0,'NULL',None]:
             _logger.debug("%s jobsetID is undefined" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check metadata 
         if self.job.metadata in [None,'NULL']:
             _logger.debug("%s metadata is unavailable" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --disableRebrokerage
         match = re.search("--disableRebrokerage",self.job.metadata)
         if match != None and (not self.simulation) and (not self.forceOpt) \
                and (not self.userRequest):
             _logger.debug("%s diabled rebrokerage" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --site
         match = re.search("--site",self.job.metadata)
         if match != None and (not self.simulation) and (not self.forceOpt) \
                and (not self.userRequest):
             _logger.debug("%s --site is used" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --libDS
         match = re.search("--libDS",self.job.metadata)
         if match != None:
             _logger.debug("%s --libDS is used" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check --workingGroup since it is site-specific 
         match = re.search("--workingGroup",self.job.metadata)
         if match != None:
             _logger.debug("%s workingGroup is specified" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # avoid too many rebrokerage
         if not self.checkRev():
             _logger.debug("%s avoid too many rebrokerage" % self.token)
             _logger.debug("%s end" % self.token)
             return
         # check if multiple JobIDs use the same libDS
         if self.bPandaID != None and self.buildStatus not in ['finished','failed']:
             if self.minPandaIDlibDS == None or self.maxPandaIDlibDS == None:
                 _logger.debug("%s max/min PandaIDs are unavailable for the libDS" % self.token)
                 _logger.debug("%s end" % self.token)
                 return
             tmpPandaIDsForLibDS = self.taskBuffer.getFullJobStatus([self.minPandaIDlibDS,self.maxPandaIDlibDS])
             if len(tmpPandaIDsForLibDS) != 2 or tmpPandaIDsForLibDS[0] == None or tmpPandaIDsForLibDS[1] == None:
                 _logger.debug("%s failed to get max/min PandaIDs for the libDS" % self.token)
                 _logger.debug("%s end" % self.token)
                 return
             # check
             if tmpPandaIDsForLibDS[0].jobDefinitionID != tmpPandaIDsForLibDS[1].jobDefinitionID:
                 _logger.debug("%s multiple JobIDs use the libDS %s:%s %s:%s" % (self.token,tmpPandaIDsForLibDS[0].jobDefinitionID,
                                                                                 self.minPandaIDlibDS,tmpPandaIDsForLibDS[1].jobDefinitionID,
                                                                                 self.maxPandaIDlibDS))
                 _logger.debug("%s end" % self.token)
                 return
         # check excludedSite
         if self.excludedSite == None:
             self.excludedSite = []
             match = re.search("--excludedSite( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata)
             if match != None:
                 self.excludedSite = match.group(3).split(',')
         # remove empty
         try:
             self.excludedSite.remove('')
         except:
             pass
         _logger.debug("%s excludedSite=%s" % (self.token,str(self.excludedSite)))
         # check cloud
         if self.cloud == None:
             match = re.search("--cloud( +|=)\s*(\'|\")*([^ \"\';$]+)",self.job.metadata)
             if match != None:
                 self.cloud = match.group(3)
         _logger.debug("%s cloud=%s" % (self.token,self.cloud))
         # get inDS/LFNs
         status,tmpMapInDS,maxFileSize = self.taskBuffer.getInDatasetsForReBrokerage(self.jobID,self.userName)
         if not status:
             # failed
             _logger.error("%s failed to get inDS/LFN from DB" % self.token)
             return
         status,inputDS = self.getListDatasetsUsedByJob(tmpMapInDS)
         if not status:
             # failed
             _logger.error("%s failed" % self.token)
             return 
         # get relicas
         replicaMap = {}
         unknownSites = {} 
         for tmpDS in inputDS:
             if tmpDS.endswith('/'):
                 # container
                 status,tmpRepMaps = self.getListDatasetReplicasInContainer(tmpDS)
             else:
                 # normal dataset
                 status,tmpRepMap = self.getListDatasetReplicas(tmpDS)
                 tmpRepMaps = {tmpDS:tmpRepMap}
             if not status:
                 # failed
                 _logger.debug("%s failed" % self.token)
                 return 
             # make map per site
             for tmpDS,tmpRepMap in tmpRepMaps.iteritems():
                 for tmpSite,tmpStat in tmpRepMap.iteritems():
                     # ignore special sites
                     if tmpSite in ['CERN-PROD_TZERO','CERN-PROD_DAQ','CERN-PROD_TMPDISK']:
                         continue
                     # ignore tape sites
                     if tmpSite.endswith('TAPE'):
                         continue
                     # keep sites with unknown replica info 
                     if tmpStat[-1]['found'] == None:
                         if not unknownSites.has_key(tmpDS):
                             unknownSites[tmpDS] = []
                         unknownSites[tmpDS].append(tmpSite)
                     # ignore ToBeDeleted
                     if tmpStat[-1]['archived'] in ['ToBeDeleted',]:
                         continue
                     # change EOS
                     if tmpSite.startswith('CERN-PROD_EOS'):
                         tmpSite = 'CERN-PROD_EOS'
                     # change EOS TMP
                     if tmpSite.startswith('CERN-PROD_TMP'):
                         tmpSite = 'CERN-PROD_TMP'
                     # change DISK to SCRATCHDISK
                     tmpSite = re.sub('_[^_-]+DISK$','',tmpSite)
                     # change PERF-XYZ to SCRATCHDISK
                     tmpSite = re.sub('_PERF-[^_-]+$','',tmpSite)
                     # change PHYS-XYZ to SCRATCHDISK
                     tmpSite = re.sub('_PHYS-[^_-]+$','',tmpSite)
                     # patch for BNLPANDA
                     if tmpSite in ['BNLPANDA']:
                         tmpSite = 'BNL-OSG2'
                     # add to map    
                     if not replicaMap.has_key(tmpSite):
                         replicaMap[tmpSite] = {}
                     replicaMap[tmpSite][tmpDS] = tmpStat[-1]
         _logger.debug("%s replica map -> %s" % (self.token,str(replicaMap)))
         # refresh replica info in needed
         self.refreshReplicaInfo(unknownSites)
         # instantiate SiteMapper
         siteMapper = SiteMapper(self.taskBuffer)
         # get original DDM
         origSiteDDM = self.getAggName(siteMapper.getSite(self.job.computingSite).ddm)
         # check all datasets
         maxDQ2Sites = []
         if inputDS != []:
             # loop over all sites
             for tmpSite,tmpDsVal in replicaMap.iteritems():
                 # loop over all datasets
                 appendFlag = True
                 for tmpOrigDS in inputDS:
                     # check completeness
                     if tmpDsVal.has_key(tmpOrigDS) and tmpDsVal[tmpOrigDS]['found'] != None and \
                            tmpDsVal[tmpOrigDS]['total'] == tmpDsVal[tmpOrigDS]['found']:
                         pass
                     else:
                         appendFlag = False
                 # append
                 if appendFlag:
                     if not tmpSite in maxDQ2Sites:
                         maxDQ2Sites.append(tmpSite)
         _logger.debug("%s candidate DQ2s -> %s" % (self.token,str(maxDQ2Sites)))
         if inputDS != [] and maxDQ2Sites == []:
             _logger.debug("%s no DQ2 candidate" % self.token)
         else:
             maxPandaSites = []
             # original maxinputsize
             origMaxInputSize = siteMapper.getSite(self.job.computingSite).maxinputsize
             # look for Panda siteIDs
             for tmpSiteID,tmpSiteSpec in siteMapper.siteSpecList.iteritems():
                 # use ANALY_ only
                 if not tmpSiteID.startswith('ANALY_'):
                     continue
                 # remove test and local
                 if re.search('_test',tmpSiteID,re.I) != None:
                     continue
                 if re.search('_local',tmpSiteID,re.I) != None:
                     continue
                 # avoid same site
                 if self.avoidSameSite and self.getAggName(tmpSiteSpec.ddm) == origSiteDDM:
                     continue
                 # check DQ2 ID
                 if self.cloud in [None,tmpSiteSpec.cloud] \
                        and (self.getAggName(tmpSiteSpec.ddm) in maxDQ2Sites or inputDS == []):
                     # excluded sites
                     excludedFlag = False
                     for tmpExcSite in self.excludedSite:
                         if re.search(tmpExcSite,tmpSiteID) != None:
                             excludedFlag = True
                             break
                     if excludedFlag:
                         _logger.debug("%s skip %s since excluded" % (self.token,tmpSiteID))
                         continue
                     # use online only
                     if tmpSiteSpec.status != 'online':
                         _logger.debug("%s skip %s status=%s" % (self.token,tmpSiteID,tmpSiteSpec.status))
                         continue
                     # check maxinputsize
                     if (maxFileSize == None and origMaxInputSize > siteMapper.getSite(tmpSiteID).maxinputsize) or \
                            maxFileSize > siteMapper.getSite(tmpSiteID).maxinputsize:
                         _logger.debug("%s skip %s due to maxinputsize" % (self.token,tmpSiteID))
                         continue
                     # append
                     if not tmpSiteID in maxPandaSites:
                         maxPandaSites.append(tmpSiteID)
             # choose at most 20 sites randomly to avoid too many lookup            
             random.shuffle(maxPandaSites)
             maxPandaSites = maxPandaSites[:20]
             _logger.debug("%s candidate PandaSites -> %s" % (self.token,str(maxPandaSites)))
             # no Panda siteIDs            
             if maxPandaSites == []:            
                 _logger.debug("%s no Panda site candidate" % self.token)
             else:
                 # set AtlasRelease and cmtConfig to dummy job
                 tmpJobForBrokerage = JobSpec()
                 if self.job.AtlasRelease in ['NULL',None]:
                     tmpJobForBrokerage.AtlasRelease = ''
                 else:
                     tmpJobForBrokerage.AtlasRelease = self.job.AtlasRelease
                 # use nightlies
                 matchNight = re.search('^AnalysisTransforms-.*_(rel_\d+)$',self.job.homepackage)
                 if matchNight != None:
                     tmpJobForBrokerage.AtlasRelease += ':%s' % matchNight.group(1)
                 # use cache
                 else:
                     matchCache = re.search('^AnalysisTransforms-([^/]+)',self.job.homepackage)
                     if matchCache != None:
                         tmpJobForBrokerage.AtlasRelease = matchCache.group(1).replace('_','-')
                 if not self.job.cmtConfig in ['NULL',None]:    
                     tmpJobForBrokerage.cmtConfig = self.job.cmtConfig
                 # memory size
                 if not self.job.minRamCount in ['NULL',None,0]:
                     tmpJobForBrokerage.minRamCount = self.job.minRamCount
                 # CPU count
                 if not self.job.maxCpuCount in ['NULL',None,0]:
                     tmpJobForBrokerage.maxCpuCount = self.job.maxCpuCount
                 # run brokerage
                 brokerage.broker.schedule([tmpJobForBrokerage],self.taskBuffer,siteMapper,forAnalysis=True,
                                           setScanSiteList=maxPandaSites,trustIS=True,reportLog=True)
                 newSiteID = tmpJobForBrokerage.computingSite
                 self.brokerageInfo += tmpJobForBrokerage.brokerageErrorDiag
                 _logger.debug("%s runBrokerage - > %s" % (self.token,newSiteID))
                 # unknown site
                 if not siteMapper.checkSite(newSiteID):
                     _logger.error("%s unknown site" % self.token)
                     _logger.debug("%s failed" % self.token)
                     return 
                 # get new site spec
                 newSiteSpec = siteMapper.getSite(newSiteID)
                 # avoid repetition
                 if self.getAggName(newSiteSpec.ddm) == origSiteDDM:
                     _logger.debug("%s assigned to the same site %s " % (self.token,newSiteID))
                     _logger.debug("%s end" % self.token)                        
                     return
                 # simulation mode
                 if self.simulation:
                     _logger.debug("%s end simulation" % self.token)                        
                     return
                 # prepare jobs
                 status = self.prepareJob(newSiteID,newSiteSpec)
                 if status:
                     # run SetUpper
                     statusSetUp = self.runSetUpper()
                     if not statusSetUp:
                         _logger.debug("%s runSetUpper failed" % self.token)
                     else:
                         _logger.debug("%s successfully assigned to %s" % (self.token,newSiteID))
         _logger.debug("%s end" % self.token)
     except:
         errType,errValue,errTraceBack = sys.exc_info()
         _logger.error("%s run() : %s %s" % (self.token,errType,errValue))
Пример #21
0
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

site = sys.argv[1]
cloud = sys.argv[2]

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = None

jobList = []

for i in range(1):
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), i)
    job.AtlasRelease = 'Atlas-15.6.10'
    job.homepackage = 'AtlasProduction/15.6.10.1'
    job.transformation = 'Evgen_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 10000
    job.prodSourceLabel = 'test'
    job.computingSite = site
    job.cloud = cloud
    job.cmtConfig = 'i686-slc5-gcc43-opt'

    file = FileSpec()
    file.lfn = "%s.evgen.pool.root" % job.jobName
    file.destinationDBlock = job.destinationDBlock
    file.destinationSE = job.destinationSE
    file.dataset = job.destinationDBlock
Пример #22
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = None

files = {
    'daq.ATLAS.0092045.physics.RPCwBeam.LB0016.SFO-2._0009.data': None,
}

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index)
    job.AtlasRelease = 'Atlas-14.4.0'
    job.homepackage = 'AtlasTier0/14.4.0.2'
    job.transformation = 'Reco_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.computingSite = site
    job.prodDBlock = 'data08_cos.00092045.physics_RPCwBeam.daq.RAW.o4_T1224560091'

    job.prodSourceLabel = 'test'
    job.processingType = 'reprocessing'
    job.currentPriority = 10000
    job.cloud = cloud
    job.cmtConfig = 'i686-slc4-gcc34-opt'

    origParams = """inputBSFile=daq.ATLAS.0092045.physics.RPCwBeam.LB0016.SFO-2._0009.data maxEvents=5 skipEvents=0 autoConfiguration=FieldAndGeo preInclude=RecExCommission/RecExCommission.py,RecExCommission/MinimalCommissioningSetup.py,RecJobTransforms/UseOracle.py preExec="jetFlags.Enabled.set_Value_and_Lock(False)" DBRelease=DBRelease-6.2.1.5.tar.gz conditionsTag=COMCOND-ES1C-000-00 RunNumber=92045 beamType=cosmics AMITag=r595 projectName=data08_cos trigStream=physics_RPCwBeam outputTypes=DPDCOMM outputESDFile=ESD.029868._01110.pool.root outputTAGComm=TAG_COMM.029868._01110.pool.root outputAODFile=AOD.029868._01110.pool.root outputMergedDQMonitorFile=DQM_MERGED.029868._01110.root DPD_PIXELCOMM=DPD_PIXELCOMM.029868._01110.pool.root DPD_SCTCOMM=DPD_SCTCOMM.029868._01110.pool.root DPD_IDCOMM=DPD_IDCOMM.029868._01110.pool.root DPD_IDPROJCOMM=DPD_IDPROJCOMM.029868._01110.pool.root DPD_CALOCOMM=DPD_CALOCOMM.029868._01110.pool.root DPD_TILECOMM=DPD_TILECOMM.029868._01110.pool.root DPD_EMCLUSTCOMM=DPD_EMCLUSTCOMM.029868._01110.pool.root DPD_EGAMMACOMM=DPD_EGAMMACOMM.029868._01110.pool.root DPD_RPCCOMM=DPD_RPCCOMM.029868._01110.pool.root DPD_TGCCOMM=DPD_TGCCOMM.029868._01110.pool.root --ignoreunknown"""
Пример #23
0
 def run(self):
     try:
         self.putLog('start %s' % self.evpFileName)            
         # lock evp file
         self.evpFile = open(self.evpFileName)
         try:
             fcntl.flock(self.evpFile.fileno(),fcntl.LOCK_EX|fcntl.LOCK_NB)
         except:
             # relase
             self.putLog("cannot lock %s" % self.evpFileName)
             self.evpFile.close()
             return True
         # options
         runEvtList          = []
         eventPickDataType   = ''
         eventPickStreamName = ''
         eventPickDS         = []
         eventPickAmiTag     = ''
         eventPickNumSites   = 1
         inputFileList       = []
         tagDsList           = []
         tagQuery            = ''
         tagStreamRef        = ''
         skipDaTRI           = False
         runEvtGuidMap       = {}
         ei_api              = ''
         # read evp file
         for tmpLine in self.evpFile:
             tmpMatch = re.search('^([^=]+)=(.+)$',tmpLine)
             # check format
             if tmpMatch == None:
                 continue
             tmpItems = tmpMatch.groups()
             if tmpItems[0] == 'runEvent':
                 # get run and event number
                 tmpRunEvt = tmpItems[1].split(',')
                 if len(tmpRunEvt) == 2:
                     runEvtList.append(tmpRunEvt)
             elif tmpItems[0] == 'eventPickDataType':
                 # data type
                 eventPickDataType = tmpItems[1]
             elif tmpItems[0] == 'eventPickStreamName':
                 # stream name
                 eventPickStreamName = tmpItems[1]
             elif tmpItems[0] == 'eventPickDS':
                 # dataset pattern
                 eventPickDS = tmpItems[1].split(',')
             elif tmpItems[0] == 'eventPickAmiTag':
                 # AMI tag
                 eventPickAmiTag = tmpItems[1]
             elif tmpItems[0] == 'eventPickNumSites':
                 # the number of sites where datasets are distributed
                 try:
                     eventPickNumSites = int(tmpItems[1])
                 except:
                     pass
             elif tmpItems[0] == 'userName':
                 # user name
                 self.userDN = tmpItems[1]
                 self.putLog("user=%s" % self.userDN)
             elif tmpItems[0] == 'userTaskName':
                 # user task name
                 self.userTaskName = tmpItems[1]
             elif tmpItems[0] == 'userDatasetName':
                 # user dataset name
                 self.userDatasetName = tmpItems[1]
             elif tmpItems[0] == 'lockedBy':
                 # client name
                 self.lockedBy = tmpItems[1]
             elif tmpItems[0] == 'creationTime':
                 # creation time
                 self.creationTime = tmpItems[1]
             elif tmpItems[0] == 'params':
                 # parameters
                 self.params = tmpItems[1]
             elif tmpItems[0] == 'ei_api':
                 # ei api parameter for MC
                 ei_api = tmpItems[1]
             elif tmpItems[0] == 'inputFileList':
                 # input file list
                 inputFileList = tmpItems[1].split(',')
                 try:
                     inputFileList.remove('')
                 except:
                     pass
             elif tmpItems[0] == 'tagDS':
                 # TAG dataset
                 tagDsList = tmpItems[1].split(',')
             elif tmpItems[0] == 'tagQuery':
                 # query for TAG
                 tagQuery = tmpItems[1]
             elif tmpItems[0] == 'tagStreamRef':
                 # StreamRef for TAG
                 tagStreamRef = tmpItems[1]
                 if not tagStreamRef.endswith('_ref'):
                     tagStreamRef += '_ref'
             elif tmpItems[0] == 'runEvtGuidMap':
                 # GUIDs
                 try:
                     exec "runEvtGuidMap="+tmpItems[1]
                 except:
                     pass
         # extract task name
         if self.userTaskName == '' and self.params != '':
             try:
                 tmpMatch = re.search('--outDS(=| ) *([^ ]+)',self.params)
                 if tmpMatch != None:
                     self.userTaskName = tmpMatch.group(2)
                     if not self.userTaskName.endswith('/'):
                         self.userTaskName += '/'
             except:
                 pass
         # suppress DaTRI
         if self.params != '':
             if '--eventPickSkipDaTRI' in self.params:
                 skipDaTRI = True
         # get compact user name
         compactDN = self.taskBuffer.cleanUserID(self.userDN)
         # get jediTaskID
         self.jediTaskID = self.taskBuffer.getTaskIDwithTaskNameJEDI(compactDN,self.userTaskName)
         # convert 
         if tagDsList == [] or tagQuery == '':
             # convert run/event list to dataset/file list
             tmpRet,locationMap,allFiles = self.pd2p.convertEvtRunToDatasets(runEvtList,
                                                                             eventPickDataType,
                                                                             eventPickStreamName,
                                                                             eventPickDS,
                                                                             eventPickAmiTag,
                                                                             self.userDN,
                                                                             runEvtGuidMap,
                                                                             ei_api
                                                                             )
             if not tmpRet:
                 if 'isFatal' in locationMap and locationMap['isFatal'] == True:
                     self.ignoreError = False
                 self.endWithError('Failed to convert the run/event list to a dataset/file list')
                 return False
         else:
             # get parent dataset/files with TAG
             tmpRet,locationMap,allFiles = self.pd2p.getTagParentInfoUsingTagQuery(tagDsList,tagQuery,tagStreamRef) 
             if not tmpRet:
                 self.endWithError('Failed to get parent dataset/file list with TAG')
                 return False
         # use only files in the list
         if inputFileList != []:
             tmpAllFiles = []
             for tmpFile in allFiles:
                 if tmpFile['lfn'] in inputFileList:
                     tmpAllFiles.append(tmpFile)
             allFiles = tmpAllFiles        
         # remove redundant CN from DN
         tmpDN = self.userDN
         tmpDN = re.sub('/CN=limited proxy','',tmpDN)
         tmpDN = re.sub('(/CN=proxy)+$','',tmpDN)
         # make dataset container
         tmpRet = self.pd2p.registerDatasetContainerWithDatasets(self.userDatasetName,allFiles,
                                                                 locationMap,
                                                                 nSites=eventPickNumSites,
                                                                 owner=tmpDN)
         if not tmpRet:
             self.endWithError('Failed to make a dataset container %s' % self.userDatasetName)
             return False
         # skip DaTRI
         if skipDaTRI:
             # successfully terminated
             self.putLog("skip DaTRI")
             # update task
             self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID)
         else:
             # get candidates
             tmpRet,candidateMaps = self.pd2p.getCandidates(self.userDatasetName,checkUsedFile=False,
                                                            useHidden=True)
             if not tmpRet:
                 self.endWithError('Failed to find candidate for destination')
                 return False
             # collect all candidates
             allCandidates = [] 
             for tmpDS,tmpDsVal in candidateMaps.iteritems():
                 for tmpCloud,tmpCloudVal in tmpDsVal.iteritems():
                     for tmpSiteName in tmpCloudVal[0]:
                         if not tmpSiteName in allCandidates:
                             allCandidates.append(tmpSiteName)
             if allCandidates == []:
                 self.endWithError('No candidate for destination')
                 return False
             # get list of dataset (container) names
             if eventPickNumSites > 1:
                 # decompose container to transfer datasets separately
                 tmpRet,tmpOut = self.pd2p.getListDatasetReplicasInContainer(self.userDatasetName) 
                 if not tmpRet:
                     self.endWithError('Failed to get the size of %s' % self.userDatasetName)
                     return False
                 userDatasetNameList = tmpOut.keys()
             else:
                 # transfer container at once
                 userDatasetNameList = [self.userDatasetName]
             # loop over all datasets
             sitesUsed = []
             for tmpUserDatasetName in userDatasetNameList:
                 # get size of dataset container
                 tmpRet,totalInputSize = rucioAPI.getDatasetSize(tmpUserDatasetName)
                 if not tmpRet:
                     self.endWithError('Failed to get the size of %s' % tmpUserDatasetName)
                     return False
                 # run brokerage
                 tmpJob = JobSpec()
                 tmpJob.AtlasRelease = ''
                 self.putLog("run brokerage for %s" % tmpDS)
                 brokerage.broker.schedule([tmpJob],self.taskBuffer,self.siteMapper,True,allCandidates,
                                           True,datasetSize=totalInputSize)
                 if tmpJob.computingSite.startswith('ERROR'):
                     self.endWithError('brokerage failed with %s' % tmpJob.computingSite)
                     return False
                 self.putLog("site -> %s" % tmpJob.computingSite)
                 # send transfer request
                 try:
                     tmpDN = rucioAPI.parse_dn(tmpDN)
                     tmpStatus,userInfo = rucioAPI.finger(tmpDN)
                     if not tmpStatus:
                         raise RuntimeError,'user info not found for {0} with {1}'.format(tmpDN,userInfo)
                     tmpDN = userInfo['nickname']
                     tmpDQ2ID = self.siteMapper.getSite(tmpJob.computingSite).ddm_input
                     tmpMsg = "%s ds=%s site=%s id=%s" % ('registerDatasetLocation for DaTRI ',
                                                          tmpUserDatasetName,
                                                          tmpDQ2ID,
                                                          tmpDN)
                     self.putLog(tmpMsg)
                     rucioAPI.registerDatasetLocation(tmpDS,[tmpDQ2ID],lifetime=14,owner=tmpDN,
                                                      activity="User Subscriptions")
                     self.putLog('OK')
                 except:
                     errType,errValue = sys.exc_info()[:2]
                     tmpStr = 'Failed to send transfer request : %s %s' % (errType,errValue)
                     tmpStr.strip()
                     tmpStr += traceback.format_exc()
                     self.endWithError(tmpStr)
                     return False
                 # list of sites already used
                 sitesUsed.append(tmpJob.computingSite)
                 self.putLog("used %s sites" % len(sitesUsed))
                 # set candidates
                 if len(sitesUsed) >= eventPickNumSites:
                     # reset candidates to limit the number of sites
                     allCandidates = sitesUsed
                     sitesUsed = []
                 else:
                     # remove site
                     allCandidates.remove(tmpJob.computingSite)
             # send email notification for success
             tmpMsg =  'A transfer request was successfully sent to Rucio.\n'
             tmpMsg += 'Your task will get started once transfer is completed.'
             self.sendEmail(True,tmpMsg)
         try:
             # unlock and delete evp file
             fcntl.flock(self.evpFile.fileno(),fcntl.LOCK_UN)
             self.evpFile.close()
             os.remove(self.evpFileName)
         except:
             pass
         # successfully terminated
         self.putLog("end %s" % self.evpFileName)
         return True
     except:
         errType,errValue = sys.exc_info()[:2]
         self.endWithError('Got exception %s:%s %s' % (errType,errValue,traceback.format_exc()))
         return False
Пример #24
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName    = None

files = {
    'daq.ATLAS.0092045.physics.RPCwBeam.LB0016.SFO-2._0009.data':None,
    }

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),index)
    job.AtlasRelease      = 'Atlas-14.4.0'
    job.homepackage       = 'AtlasTier0/14.4.0.2'
    job.transformation    = 'Reco_trf.py'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.computingSite     = site
    job.prodDBlock        = 'data08_cos.00092045.physics_RPCwBeam.daq.RAW.o4_T1224560091' 
    
    job.prodSourceLabel   = 'test'
    job.processingType    = 'reprocessing'        
    job.currentPriority   = 10000
    job.cloud = cloud
    job.cmtConfig         = 'i686-slc4-gcc34-opt'

    origParams = """inputBSFile=daq.ATLAS.0092045.physics.RPCwBeam.LB0016.SFO-2._0009.data maxEvents=5 skipEvents=0 autoConfiguration=FieldAndGeo preInclude=RecExCommission/RecExCommission.py,RecExCommission/MinimalCommissioningSetup.py,RecJobTransforms/UseOracle.py preExec="jetFlags.Enabled.set_Value_and_Lock(False)" DBRelease=DBRelease-6.2.1.5.tar.gz conditionsTag=COMCOND-ES1C-000-00 RunNumber=92045 beamType=cosmics AMITag=r595 projectName=data08_cos trigStream=physics_RPCwBeam outputTypes=DPDCOMM outputESDFile=ESD.029868._01110.pool.root outputTAGComm=TAG_COMM.029868._01110.pool.root outputAODFile=AOD.029868._01110.pool.root outputMergedDQMonitorFile=DQM_MERGED.029868._01110.root DPD_PIXELCOMM=DPD_PIXELCOMM.029868._01110.pool.root DPD_SCTCOMM=DPD_SCTCOMM.029868._01110.pool.root DPD_IDCOMM=DPD_IDCOMM.029868._01110.pool.root DPD_IDPROJCOMM=DPD_IDPROJCOMM.029868._01110.pool.root DPD_CALOCOMM=DPD_CALOCOMM.029868._01110.pool.root DPD_TILECOMM=DPD_TILECOMM.029868._01110.pool.root DPD_EMCLUSTCOMM=DPD_EMCLUSTCOMM.029868._01110.pool.root DPD_EGAMMACOMM=DPD_EGAMMACOMM.029868._01110.pool.root DPD_RPCCOMM=DPD_RPCCOMM.029868._01110.pool.root DPD_TGCCOMM=DPD_TGCCOMM.029868._01110.pool.root --ignoreunknown"""
Пример #25
0
                                    int(taskID))

# instantiate JobSpecs
iJob = 0
jobList = []
for line in taskFile:
    iJob += 1
    job = JobSpec()
    # job ID  ###### FIXME
    job.jobDefinitionID = int(time.time()) % 10000
    # job name
    job.jobName = "%s_%05d.job" % (taskName, iJob)
    # AtlasRelease
    if len(re.findall('\.', trfVer)) > 2:
        match = re.search('^(\d+\.\d+\.\d+)', trfVer)
        job.AtlasRelease = 'Atlas-%s' % match.group(1)
    else:
        job.AtlasRelease = 'Atlas-%s' % trfVer
    # homepackage
    vers = trfVer.split('.')
    if int(vers[0]) <= 11:
        job.homepackage = 'JobTransforms'
        for ver in vers:
            job.homepackage += "-%02d" % int(ver)
    else:
        job.homepackage = 'AtlasProduction/%s' % trfVer
    # trf
    job.transformation = trf
    job.destinationDBlock = oDatasets[0]
    # prod DBlock
    job.prodDBlock = iDataset
Пример #26
0
import time
import commands
import userinterface.Client as Client
from taskbuffer.JobSpec import JobSpec
from taskbuffer.FileSpec import FileSpec

job = JobSpec()
job.jobDefinitionID = int(time.time()) % 10000
job.jobName = commands.getoutput('/usr/bin/uuidgen')
job.AtlasRelease = 'Atlas-9.0.4'
job.prodDBlock = 'pandatest.000003.dd.input'
job.destinationDBlock = 'panda.destDB.%s' % commands.getoutput(
    '/usr/bin/uuidgen')
job.destinationSE = 'BNL_SE'

ids = {
    'pandatest.000003.dd.input._00028.junk':
    '6c19e1fc-ee8c-4bae-bd4c-c9e5c73aca27',
    'pandatest.000003.dd.input._00033.junk':
    '98f79ba1-1793-4253-aac7-bdf90a51d1ee',
    'pandatest.000003.dd.input._00039.junk':
    '33660dd5-7cef-422a-a7fc-6c24cb10deb1'
}
for lfn in ids.keys():
    file = FileSpec()
    file.lfn = lfn
    file.GUID = ids[file.lfn]
    file.dataset = 'pandatest.000003.dd.input'
    file.type = 'input'
    job.addFile(file)
Пример #27
0
if len(sys.argv)>1:
    site = sys.argv[1]
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName    = 'BNL_ATLAS_2'

jobList = []
for i in range(20):
    
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = commands.getoutput('uuidgen') 
    job.AtlasRelease      = 'Atlas-11.0.41'
    #job.AtlasRelease      = 'Atlas-11.0.3'
    job.homepackage       = 'AnalysisTransforms'
    job.transformation    = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena'
    job.destinationDBlock = datasetName
    job.destinationSE     = destName
    job.currentPriority   = 100
    job.prodSourceLabel   = 'user'
    job.computingSite     = site
    #job.prodDBlock        = "pandatest.b1599dfa-cd36-4fc5-92f6-495781a94c66"
    job.prodDBlock        = "pandatest.f228b051-077b-4f81-90bf-496340644379"
    
    fileI = FileSpec()
    fileI.dataset    = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
    fileI.lfn = "lib.f228b051-077b-4f81-90bf-496340644379.tgz"
Пример #28
0
datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')

files = {
    inputFile: None,
}

jobList = []

index = 0
for lfn in files.keys():
    index += 1
    job = JobSpec()
    job.jobDefinitionID = (time.time()) % 10000
    job.jobName = "%s_%d" % (commands.getoutput('uuidgen'), index)
    job.AtlasRelease = 'Atlas-16.6.2'
    job.homepackage = 'AtlasProduction/16.6.2.1'
    job.transformation = 'AtlasG4_trf.py'
    job.destinationDBlock = datasetName
    job.computingSite = site
    job.prodDBlock = prodDBlock

    job.prodSourceLabel = 'test'
    job.processingType = 'test'
    job.currentPriority = 10000
    job.cloud = cloud
    job.cmtConfig = 'i686-slc5-gcc43-opt'

    fileI = FileSpec()
    fileI.dataset = job.prodDBlock
    fileI.prodDBlock = job.prodDBlock
Пример #29
0
from taskbuffer.FileSpec import FileSpec

if len(sys.argv) > 1:
    site = sys.argv[1]
else:
    site = None

jobList = []
for i in range(2):
    datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
    destName = 'ANALY_BNL_ATLAS_1'

    job = JobSpec()
    job.jobDefinitionID = 1
    job.jobName = commands.getoutput('uuidgen')
    job.AtlasRelease = 'Atlas-12.0.2'
    job.homepackage = 'AnalysisTransforms'
    job.transformation = 'https://gridui01.usatlas.bnl.gov:24443/dav/test/runAthena2'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 3000
    job.prodSourceLabel = 'user'
    job.computingSite = site
    job.prodDBlock = 'testIdeal_06.005001.pythia_minbias.recon.AOD.v12000103'

    fileOL = FileSpec()
    fileOL.lfn = "%s.job.log.tgz" % commands.getoutput('uuidgen')
    fileOL.destinationDBlock = job.destinationDBlock
    fileOL.destinationSE = job.destinationSE
    fileOL.dataset = job.destinationDBlock
    fileOL.type = 'log'
Пример #30
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """Prepare the specific aspec of each subjob.
           Returns: subjobconfig list of objects understood by backends."""

        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime
        from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs

        # make sure we have the correct siteType
        refreshPandaSpecs()

        job = app._getParent()
        masterjob = job._getRoot()

        logger.debug('ProdTransPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        job.backend.actualCE = job.backend.site
        job.backend.requirements.cloud = Client.PandaSites[
            job.backend.site]['cloud']

        # check that the site is in a submit-able status
        if not job.splitter or job.splitter._name != 'DQ2JobSplitter':
            allowed_sites = job.backend.list_ddm_sites()

        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
            tmpDsExist = False
            if (configPanda['processingType'].startswith('gangarobot') or
                    configPanda['processingType'].startswith('hammercloud')):
                #if Client.getDatasets(job.outputdata.datasetname):
                if getDatasets(job.outputdata.datasetname):
                    tmpDsExist = True
                    logger.info('Re-using output dataset %s' %
                                job.outputdata.datasetname)
            if not configPanda[
                    'specialHandling'] == 'ddm:rucio' and not configPanda[
                        'processingType'].startswith(
                            'gangarobot'
                        ) and not configPanda['processingType'].startswith(
                            'hammercloud') and not configPanda[
                                'processingType'].startswith('rucio_test'):
                Client.addDataset(job.outputdata.datasetname,
                                  False,
                                  location=outDsLocation,
                                  allowProdDisk=True,
                                  dsExist=tmpDsExist)
            logger.info('Output dataset %s registered at %s' %
                        (job.outputdata.datasetname, outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation)
        except exceptions.SystemExit:
            raise BackendError(
                'Panda', 'Exception in adding dataset %s: %s %s' %
                (job.outputdata.datasetname, sys.exc_info()[0],
                 sys.exc_info()[1]))

        # JobSpec.
        jspec = JobSpec()
        jspec.currentPriority = app.priority
        jspec.jobDefinitionID = masterjob.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.coreCount = app.core_count
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release
        jspec.homepackage = app.home_package
        jspec.transformation = app.transformation
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = job.backend.site
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        if app.prod_source_label:
            jspec.prodSourceLabel = app.prod_source_label
        else:
            jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.specialHandling = configPanda['specialHandling']
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        jspec.cmtConfig = app.atlas_cmtconfig
        if app.dbrelease == 'LATEST':
            try:
                latest_dbrelease = getLatestDBReleaseCaching()
            except:
                from pandatools import Client
                latest_dbrelease = Client.getLatestDBRelease()
            m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease)
            if m:
                self.dbrelease_dataset = m.group(1)
                self.dbrelease = m.group(2)
            else:
                raise ApplicationConfigurationError(
                    None,
                    "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually."
                )
        else:
            self.dbrelease_dataset = app.dbrelease_dataset
            self.dbrelease = app.dbrelease
        jspec.jobParameters = app.job_parameters

        if self.dbrelease:
            if self.dbrelease == 'current':
                jspec.jobParameters += ' --DBRelease=current'
            else:
                if jspec.transformation.endswith(
                        "_tf.py") or jspec.transformation.endswith("_tf"):
                    jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (
                        self.dbrelease, )
                else:
                    jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (
                        self.dbrelease, )
                dbspec = FileSpec()
                dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease
                dbspec.dataset = self.dbrelease_dataset
                dbspec.prodDBlock = jspec.prodDBlock
                dbspec.type = 'input'
                jspec.addFile(dbspec)

        if job.inputdata:
            m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)',
                          job.inputdata.dataset[0])
            if not m:
                logger.error("Error retrieving run number from dataset name")
                #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name")
                runnumber = 105200
            else:
                runnumber = int(m.group(2))
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --runNumber %d' % runnumber
            else:
                jspec.jobParameters += ' RunNumber=%d' % runnumber

        # Output files.
        randomized_lfns = []
        ilfn = 0
        for lfn, lfntype in zip(app.output_files, app.output_type):
            ofspec = FileSpec()
            if app.randomize_lfns:
                randomized_lfn = lfn + (
                    '.%s.%d.%s' %
                    (job.backend.site, int(time.time()),
                     commands.getoutput('uuidgen 2> /dev/null')[:4]))
            else:
                randomized_lfn = lfn
            ofspec.lfn = randomized_lfn
            randomized_lfns.append(randomized_lfn)
            ofspec.destinationDBlock = jspec.destinationDBlock
            ofspec.destinationSE = jspec.destinationSE
            ofspec.dataset = jspec.destinationDBlock
            ofspec.type = 'output'
            jspec.addFile(ofspec)
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --output%sFile %s' % (
                    lfntype, randomized_lfns[ilfn])
            else:
                jspec.jobParameters += ' output%sFile=%s' % (
                    lfntype, randomized_lfns[ilfn])
            ilfn = ilfn + 1

        # Input files.
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(
                    job.inputdata.guids, job.inputdata.names,
                    job.inputdata.sizes, job.inputdata.checksums,
                    job.inputdata.scopes):
                ifspec = FileSpec()
                ifspec.lfn = lfn
                ifspec.GUID = guid
                ifspec.fsize = size
                ifspec.md5sum = checksum
                ifspec.scope = scope
                ifspec.dataset = jspec.prodDBlock
                ifspec.prodDBlock = jspec.prodDBlock
                ifspec.type = 'input'
                jspec.addFile(ifspec)
            if app.input_type:
                itype = app.input_type
            else:
                itype = m.group(5)
            if jspec.transformation.endswith(
                    "_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(
                    job.inputdata.names))
            else:
                jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(
                    job.inputdata.names))

        # Log files.
        lfspec = FileSpec()
        lfspec.lfn = '%s.job.log.tgz' % jspec.jobName
        lfspec.destinationDBlock = jspec.destinationDBlock
        lfspec.destinationSE = jspec.destinationSE
        lfspec.dataset = jspec.destinationDBlock
        lfspec.type = 'log'
        jspec.addFile(lfspec)

        return jspec
Пример #31
0
    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        '''prepare the subjob specific configuration'''
 
        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s', job.getFQID('.'))
        
        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()
        
        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud
        

        # now just filling the job from AthenaMC data
        
        jspec = JobSpec()
        jspec.jobDefinitionID   = job._getRoot().id
        jspec.jobName           = commands.getoutput('uuidgen 2> /dev/null')  
        jspec.AtlasRelease      = 'Atlas-%s' % app.atlas_rel
        
        if app.transform_archive:
            jspec.homepackage       = 'AnalysisTransforms'+app.transform_archive
        elif app.prod_release:
            jspec.homepackage       = 'AnalysisTransforms-AtlasProduction_'+str(app.prod_release)
        jspec.transformation    = '%s/runAthena-00-00-11' % Client.baseURLSUB
            
        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock    = 'NULL'
        if job.inputdata and len(app.inputfiles)>0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock    = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset=""
        for type in ["EVNT","RDO","HITS","AOD","ESD","NTUP"]:
            if type in app.outputpaths.keys():
                outdset=string.replace(app.outputpaths[type],"/",".")
                outdset=outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys())>0
            except:
                logger.error("app.outputpaths is empty: check your output datasets")
                raise
            type=app.outputpaths.keys()[0]
            outdset=string.replace(app.outputpaths[type],"/",".")
            outdset=outdset[1:-1]
            
        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel   = 'user'
        jspec.assignedPriority  = 1000
        jspec.cloud             = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite     = self.outsite
        jspec.cmtConfig         = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
#       library (source files)
        flib = FileSpec()
        flib.lfn            = self.library
#        flib.GUID           = 
        flib.type           = 'input'
#        flib.status         = 
        flib.dataset        = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid=app.turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid=app.dbturls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid=app.minbias_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid=app.cavern_turls[lfn].replace("guid:","")
            finp = FileSpec()
            finp.lfn            = lfn
            finp.GUID           = useguid
            finp.dataset        = app.dsetmap[lfn]
            finp.prodDBlock     = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type           = 'input'
            finp.status         = 'ready'
            jspec.addFile(finp)
            

#       output files( this includes the logfiles)
        # Output files
        jidtag=""
        job = app._getParent() # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id       
        outfiles=app.subjobsOutfiles[job.id]
        pandaOutfiles={}
        for type in outfiles.keys():
            pandaOutfiles[type]=outfiles[type]+"."+str(jidtag)
            if type=="LOG":
                pandaOutfiles[type]+=".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset=string.replace(app.outputpaths[outtype],"/",".")
            dset=dset[1:-1]
            fout.dataset=dset
            fout.lfn=pandaOutfiles[outtype]
            fout.type              = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE    = jspec.destinationSE
            if outtype=='LOG':
                fout.type='log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE     = job.backend.site
            jspec.addFile(fout)


        #       job parameters
        param =  '-l %s ' % self.library # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '
        
        # transform parameters
        # need to update arglist with final output file name...
        newArgs=[]
        if app.mode == "evgen":
            app.args[3]=app.args[3]+" -t "
            if app.verbosity:
                app.args[3]=app.args[3]+" -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type])>-1:
                    arg=arg.replace(outfiles[type],pandaOutfiles[type])

            newArgs.append(arg)
        arglist=string.join(newArgs," ")
#        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles=app.inputfiles+app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles)>0:
            param+= ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles)>0:
            param+= ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles["LOG"] # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL        
        matchURL = re.search("(http.*://[^/]+)/",Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"


        jspec.jobParameters = param
        jspec.metadata="--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return
        
        return jspec
Пример #32
0
if len(sys.argv) > 1:
    site = sys.argv[1]
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
destName = 'BNL_SE'

jobListE = []
lfnListE = []

for i in range(2):
    job = JobSpec()
    job.jobDefinitionID = int(time.time()) % 10000
    job.jobName = commands.getoutput('uuidgen')
    job.AtlasRelease = 'Atlas-11.0.3'
    job.homepackage = 'JobTransforms-11-00-03-03'
    job.transformation = 'share/csc.evgen.trf'
    job.destinationDBlock = datasetName
    job.destinationSE = destName
    job.currentPriority = 1000
    job.prodSourceLabel = 'test'
    job.computingSite = site

    file = FileSpec()
    file.lfn = "%s.evgen.pool.root" % commands.getoutput('uuidgen')
    lfnListE.append(file.lfn)
    file.lfn += ('.%d' % (i + 1))
    file.destinationDBlock = job.destinationDBlock
    file.destinationSE = job.destinationSE
    file.dataset = job.destinationDBlock
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """Prepare the specific aspec of each subjob.
           Returns: subjobconfig list of objects understood by backends."""

        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec
        from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime
        from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs
        
        # make sure we have the correct siteType
        refreshPandaSpecs()

        job = app._getParent()
        masterjob = job._getRoot()

        logger.debug('ProdTransPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        job.backend.actualCE = job.backend.site
        job.backend.requirements.cloud = Client.PandaSites[job.backend.site]['cloud']

        # check that the site is in a submit-able status
        if not job.splitter or job.splitter._name != 'DQ2JobSplitter':
            allowed_sites = job.backend.list_ddm_sites()

        try:
            outDsLocation = Client.PandaSites[job.backend.site]['ddm']
            tmpDsExist = False
            if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')):
                #if Client.getDatasets(job.outputdata.datasetname):
                if getDatasets(job.outputdata.datasetname):
                    tmpDsExist = True
                    logger.info('Re-using output dataset %s'%job.outputdata.datasetname)
            if not configPanda['specialHandling']=='ddm:rucio' and not  configPanda['processingType'].startswith('gangarobot') and not configPanda['processingType'].startswith('hammercloud') and not configPanda['processingType'].startswith('rucio_test'):
                Client.addDataset(job.outputdata.datasetname,False,location=outDsLocation,allowProdDisk=True,dsExist=tmpDsExist)
            logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,outDsLocation))
            dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation)
        except exceptions.SystemExit:
            raise BackendError('Panda','Exception in adding dataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1]))
        
        # JobSpec.
        jspec = JobSpec()
        jspec.currentPriority = app.priority
        jspec.jobDefinitionID = masterjob.id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.coreCount = app.core_count
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release
        jspec.homepackage = app.home_package
        jspec.transformation = app.transformation
        jspec.destinationDBlock = job.outputdata.datasetname
        if job.outputdata.location:
            jspec.destinationSE = job.outputdata.location
        else:
            jspec.destinationSE = job.backend.site
        if job.inputdata:
            jspec.prodDBlock = job.inputdata.dataset[0]
        else:
            jspec.prodDBlock = 'NULL'
        if app.prod_source_label:
            jspec.prodSourceLabel = app.prod_source_label
        else:
            jspec.prodSourceLabel = configPanda['prodSourceLabelRun']
        jspec.processingType = configPanda['processingType']
        jspec.specialHandling = configPanda['specialHandling']
        jspec.computingSite = job.backend.site
        jspec.cloud = job.backend.requirements.cloud
        jspec.cmtConfig = app.atlas_cmtconfig
        if app.dbrelease == 'LATEST':
            try:
                latest_dbrelease = getLatestDBReleaseCaching()
            except:
                from pandatools import Client
                latest_dbrelease = Client.getLatestDBRelease()
            m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease)
            if m:
                self.dbrelease_dataset = m.group(1)
                self.dbrelease = m.group(2)
            else:
                raise ApplicationConfigurationError(None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually.")
        else:
            self.dbrelease_dataset = app.dbrelease_dataset
            self.dbrelease = app.dbrelease
        jspec.jobParameters = app.job_parameters

        if self.dbrelease:
            if self.dbrelease == 'current':
                jspec.jobParameters += ' --DBRelease=current' 
            else:
                if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                    jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
                else:
                    jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,)
                dbspec = FileSpec()
                dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease
                dbspec.dataset = self.dbrelease_dataset
                dbspec.prodDBlock = jspec.prodDBlock
                dbspec.type = 'input'
                jspec.addFile(dbspec)

        if job.inputdata:
            m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)',
                          job.inputdata.dataset[0])
            if not m:
                logger.error("Error retrieving run number from dataset name")
                #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name")
                runnumber = 105200
            else:
                runnumber = int(m.group(2))
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --runNumber %d' % runnumber
            else:
                jspec.jobParameters += ' RunNumber=%d' % runnumber
        
        # Output files.
        randomized_lfns = []
        ilfn = 0
        for lfn, lfntype in zip(app.output_files,app.output_type):
            ofspec = FileSpec()
            if app.randomize_lfns:
                randomized_lfn = lfn + ('.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4] ) )
            else:
                randomized_lfn = lfn
            ofspec.lfn = randomized_lfn
            randomized_lfns.append(randomized_lfn)
            ofspec.destinationDBlock = jspec.destinationDBlock
            ofspec.destinationSE = jspec.destinationSE
            ofspec.dataset = jspec.destinationDBlock
            ofspec.type = 'output'
            jspec.addFile(ofspec)
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --output%sFile %s' % (lfntype, randomized_lfns[ilfn])
            else:
                jspec.jobParameters += ' output%sFile=%s' % (lfntype, randomized_lfns[ilfn])
            ilfn=ilfn+1

        # Input files.
        if job.inputdata:
            for guid, lfn, size, checksum, scope in zip(job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes):
                ifspec = FileSpec()
                ifspec.lfn = lfn
                ifspec.GUID = guid
                ifspec.fsize = size
                ifspec.md5sum = checksum
                ifspec.scope = scope
                ifspec.dataset = jspec.prodDBlock
                ifspec.prodDBlock = jspec.prodDBlock
                ifspec.type = 'input'
                jspec.addFile(ifspec)
            if app.input_type:
                itype = app.input_type
            else:
                itype = m.group(5)
            if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"):
                jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(job.inputdata.names))
            else:
                jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(job.inputdata.names))

        # Log files.
        lfspec = FileSpec()
        lfspec.lfn = '%s.job.log.tgz' % jspec.jobName
        lfspec.destinationDBlock = jspec.destinationDBlock
        lfspec.destinationSE  = jspec.destinationSE
        lfspec.dataset = jspec.destinationDBlock
        lfspec.type = 'log'
        jspec.addFile(lfspec)
        
        return jspec
Пример #34
0
if len(sys.argv)>1:
    site = sys.argv[1]
else:
    site = None

datasetName = 'panda.destDB.%s' % commands.getoutput('uuidgen')
#destName   = 'BNL_SE'

jobList = []

for i in [999905,999906,999907]:
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),i)
    job.AtlasRelease      = 'Atlas-14.1.0'
    job.homepackage       = 'AtlasProduction/12.0.6.2'
    job.transformation    = 'csc_evgen_trf.py'
    job.destinationDBlock = datasetName
    #job.destinationSE     = destName
    job.currentPriority   = 1000
    job.prodSourceLabel   = 'managed'
    #job.prodSourceLabel   = 'test'
    #job.computingSite     = site
    job.cmtConfig         = 'i686-slc4-gcc34-opt'
    job.metadata          = 'evgen;%s;%s;%s' % (str({'FR': 46, 'NL': 45, 'NDGF': 300, 'CERN': 19, 'TW': 44110, 'CA': 2922, 'DE': 9903, 'IT': 1168, 'US': 6226, 'UK': 1026, 'ES': 26619}),str({999907:100,999906:200,999905:300}),str({999905:100,999906:910,999907:500}))
    #job.metadata          = 'evgen;%s' % str({'FR': 46, 'NL': 45, 'NDGF': 300, 'CERN': 19, 'TW': 44110, 'CA': 2922, 'DE': 9903, 'IT': 1168, 'US': 6226, 'UK': 1026, 'ES': 26619})

    #job.cloud = "UK"
    job.taskID = i
    
Пример #35
0
else:
    site = None
    cloud = None
    
datasetName = 'panda.destDB.%s_tid999991' % commands.getoutput('uuidgen')
taskid = 999989

jobList = []

for i in range(1):
    job = JobSpec()
    job.jobDefinitionID   = int(time.time()) % 10000
    job.jobName           = "%s_%d" % (commands.getoutput('uuidgen'),i)
#    job.AtlasRelease      = 'Atlas-12.0.6'
#    job.homepackage       = 'AtlasProduction/12.0.6.5'
    job.AtlasRelease      = 'Atlas-12.0.7'
    job.homepackage       = 'AtlasProduction/12.0.7.1'

    job.transformation    = 'csc_evgen_trf.py'
    job.destinationDBlock = datasetName
#    job.destinationSE     = destName
#    job.cloud             = 'CA'
    job.cloud             = cloud
    job.taskID = taskid
    job.currentPriority   = 1000
    job.prodSourceLabel   = 'test'
#    job.prodSourceLabel   = 'cloudtest'
    job.computingSite     = site
    
    file = FileSpec()
    file.lfn = "%s.evgen.pool.root" % job.jobName
Пример #36
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        '''prepare the subjob specific configuration'''

        # PandaTools
        from pandatools import Client
        from pandatools import AthenaUtils
        from taskbuffer.JobSpec import JobSpec
        from taskbuffer.FileSpec import FileSpec

        job = app._getParent()
        logger.debug('AthenaMCPandaRTHandler prepare called for %s',
                     job.getFQID('.'))

        try:
            assert self.outsite
        except:
            logger.error("outsite not set. Aborting")
            raise Exception()

        job.backend.site = self.outsite
        job.backend.actualCE = self.outsite
        cloud = job._getRoot().backend.requirements.cloud
        job.backend.requirements.cloud = cloud

        # now just filling the job from AthenaMC data

        jspec = JobSpec()
        jspec.jobDefinitionID = job._getRoot().id
        jspec.jobName = commands.getoutput('uuidgen 2> /dev/null')
        jspec.AtlasRelease = 'Atlas-%s' % app.atlas_rel

        if app.transform_archive:
            jspec.homepackage = 'AnalysisTransforms' + app.transform_archive
        elif app.prod_release:
            jspec.homepackage = 'AnalysisTransforms-AtlasProduction_' + str(
                app.prod_release)
        jspec.transformation = '%s/runAthena-00-00-11' % Client.baseURLSUB

        #---->????  prodDBlock and destinationDBlock when facing several input / output datasets?

        jspec.prodDBlock = 'NULL'
        if job.inputdata and len(
                app.inputfiles) > 0 and app.inputfiles[0] in app.dsetmap:
            jspec.prodDBlock = app.dsetmap[app.inputfiles[0]]

        # How to specify jspec.destinationDBlock  when more than one type of output is available? Panda prod jobs seem to specify only the last output dataset
        outdset = ""
        for type in ["EVNT", "RDO", "HITS", "AOD", "ESD", "NTUP"]:
            if type in app.outputpaths.keys():
                outdset = string.replace(app.outputpaths[type], "/", ".")
                outdset = outdset[1:-1]
                break
        if not outdset:
            try:
                assert len(app.outputpaths.keys()) > 0
            except:
                logger.error(
                    "app.outputpaths is empty: check your output datasets")
                raise
            type = app.outputpaths.keys()[0]
            outdset = string.replace(app.outputpaths[type], "/", ".")
            outdset = outdset[1:-1]

        jspec.destinationDBlock = outdset
        jspec.destinationSE = self.outsite
        jspec.prodSourceLabel = 'user'
        jspec.assignedPriority = 1000
        jspec.cloud = cloud
        # memory
        if job.backend.requirements.memory != -1:
            jspec.minRamCount = job.backend.requirements.memory
        jspec.computingSite = self.outsite
        jspec.cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_rel)
        #       library (source files)
        flib = FileSpec()
        flib.lfn = self.library
        #        flib.GUID           =
        flib.type = 'input'
        #        flib.status         =
        flib.dataset = self.libDataset
        flib.dispatchDBlock = self.libDataset
        jspec.addFile(flib)

        #       input files FIXME: many more input types
        for lfn in app.inputfiles:
            useguid = app.turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # add dbfiles if any:
        for lfn in app.dbfiles:
            useguid = app.dbturls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then minbias files
        for lfn in app.mbfiles:
            useguid = app.minbias_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)
        # then cavern files
        for lfn in app.cavernfiles:
            useguid = app.cavern_turls[lfn].replace("guid:", "")
            finp = FileSpec()
            finp.lfn = lfn
            finp.GUID = useguid
            finp.dataset = app.dsetmap[lfn]
            finp.prodDBlock = app.dsetmap[lfn]
            finp.prodDBlockToken = 'local'
            finp.dispatchDBlock = app.dsetmap[lfn]
            finp.type = 'input'
            finp.status = 'ready'
            jspec.addFile(finp)


#       output files( this includes the logfiles)
# Output files
        jidtag = ""
        job = app._getParent()  # Returns job or subjob object
        if job._getRoot().subjobs:
            jidtag = job._getRoot().id
        else:
            jidtag = "%d" % job.id
        outfiles = app.subjobsOutfiles[job.id]
        pandaOutfiles = {}
        for type in outfiles.keys():
            pandaOutfiles[type] = outfiles[type] + "." + str(jidtag)
            if type == "LOG":
                pandaOutfiles[type] += ".tgz"
        #print pandaOutfiles

        for outtype in pandaOutfiles.keys():
            fout = FileSpec()
            dset = string.replace(app.outputpaths[outtype], "/", ".")
            dset = dset[1:-1]
            fout.dataset = dset
            fout.lfn = pandaOutfiles[outtype]
            fout.type = 'output'
            #            fout.destinationDBlock = jspec.destinationDBlock
            fout.destinationDBlock = fout.dataset
            fout.destinationSE = jspec.destinationSE
            if outtype == 'LOG':
                fout.type = 'log'
                fout.destinationDBlock = fout.dataset
                fout.destinationSE = job.backend.site
            jspec.addFile(fout)

        #       job parameters
        param = '-l %s ' % self.library  # user tarball.
        # use corruption checker
        if job.backend.requirements.corCheck:
            param += '--corCheck '
        # disable to skip missing files
        if job.backend.requirements.notSkipMissing:
            param += '--notSkipMissing '

        # transform parameters
        # need to update arglist with final output file name...
        newArgs = []
        if app.mode == "evgen":
            app.args[3] = app.args[3] + " -t "
            if app.verbosity:
                app.args[3] = app.args[3] + " -l %s " % app.verbosity

        for arg in app.args[3:]:
            for type in outfiles.keys():
                if arg.find(outfiles[type]) > -1:
                    arg = arg.replace(outfiles[type], pandaOutfiles[type])

            newArgs.append(arg)
        arglist = string.join(newArgs, " ")
        #        print "Arglist:",arglist

        param += ' -r ./ '
        param += ' -j "%s"' % urllib.quote(arglist)

        allinfiles = app.inputfiles + app.dbfiles
        # Input files.
        param += ' -i "%s" ' % allinfiles
        if len(app.mbfiles) > 0:
            param += ' -m "%s" ' % app.mbfiles
        if len(app.cavernfiles) > 0:
            param += ' -n "%s" ' % app.cavernfiles
        #        param += '-m "[]" ' #%minList FIXME
        #        param += '-n "[]" ' #%cavList FIXME

        del pandaOutfiles[
            "LOG"]  # logfiles do not appear in IROOT block, and this one is not needed anymore...
        param += ' -o "{\'IROOT\':%s }"' % str(pandaOutfiles.items())

        # source URL
        matchURL = re.search("(http.*://[^/]+)/", Client.baseURLSSL)
        if matchURL != None:
            param += " --sourceURL %s " % matchURL.group(1)
        param += " --trf"

        jspec.jobParameters = param
        jspec.metadata = "--trf \"%s\"" % arglist

        #print "SUBJOB DETAILS:",jspec.values()
        if app.dryrun:
            print "job.application.dryrun activated, printing out job parameters"
            print jspec.values()
            return

        return jspec