def convertToJobFileSpec(self, datasetSpec, setType=None, useEventService=False): jobFileSpec = JobFileSpec() jobFileSpec.fileID = self.fileID jobFileSpec.datasetID = datasetSpec.datasetID jobFileSpec.jediTaskID = datasetSpec.jediTaskID jobFileSpec.lfn = self.lfn jobFileSpec.GUID = self.GUID if setType is None: jobFileSpec.type = self.type else: jobFileSpec.type = setType jobFileSpec.scope = self.scope jobFileSpec.fsize = self.fsize jobFileSpec.checksum = self.checksum jobFileSpec.attemptNr = self.attemptNr # dataset attribute if datasetSpec is not None: # dataset if datasetSpec.containerName not in [None, '']: jobFileSpec.dataset = datasetSpec.containerName else: jobFileSpec.dataset = datasetSpec.datasetName if self.type in datasetSpec.getInputTypes( ) or setType in datasetSpec.getInputTypes(): # prodDBlock jobFileSpec.prodDBlock = datasetSpec.datasetName # storage token if datasetSpec.storageToken not in ['', None]: jobFileSpec.dispatchDBlockToken = datasetSpec.storageToken else: # destinationDBlock jobFileSpec.destinationDBlock = datasetSpec.datasetName # storage token if datasetSpec.storageToken not in ['', None]: jobFileSpec.destinationDBlockToken = datasetSpec.storageToken.split( '/')[0] # destination if datasetSpec.destination not in ['', None]: jobFileSpec.destinationSE = datasetSpec.destination # set prodDBlockToken for Event Service if useEventService and datasetSpec.getObjectStore( ) is not None: jobFileSpec.prodDBlockToken = 'objectstore^{0}'.format( datasetSpec.getObjectStore()) # allow no output if datasetSpec.isAllowedNoOutput(): jobFileSpec.allowNoOutput() # return return jobFileSpec
def convertToJobFileSpec(self,datasetSpec,setType=None,useEventService=False): jobFileSpec = JobFileSpec() jobFileSpec.fileID = self.fileID jobFileSpec.datasetID = datasetSpec.datasetID jobFileSpec.jediTaskID = datasetSpec.jediTaskID jobFileSpec.lfn = self.lfn jobFileSpec.GUID = self.GUID if setType == None: jobFileSpec.type = self.type else: jobFileSpec.type = setType jobFileSpec.scope = self.scope jobFileSpec.fsize = self.fsize jobFileSpec.checksum = self.checksum jobFileSpec.attemptNr = self.attemptNr # dataset attribute if datasetSpec != None: # dataset if not datasetSpec.containerName in [None,'']: jobFileSpec.dataset = datasetSpec.containerName else: jobFileSpec.dataset = datasetSpec.datasetName if self.type in datasetSpec.getInputTypes() or setType in datasetSpec.getInputTypes(): # prodDBlock jobFileSpec.prodDBlock = datasetSpec.datasetName # storage token if not datasetSpec.storageToken in ['',None]: jobFileSpec.dispatchDBlockToken = datasetSpec.storageToken else: # destinationDBlock jobFileSpec.destinationDBlock = datasetSpec.datasetName # storage token if not datasetSpec.storageToken in ['',None]: jobFileSpec.destinationDBlockToken = datasetSpec.storageToken.split('/')[0] # destination if not datasetSpec.destination in ['',None]: jobFileSpec.destinationSE = datasetSpec.destination # set prodDBlockToken for Event Service if useEventService and datasetSpec.getObjectStore() != None: jobFileSpec.prodDBlockToken = 'objectstore^{0}'.format(datasetSpec.getObjectStore()) # allow no output if datasetSpec.isAllowedNoOutput(): jobFileSpec.allowNoOutput() # return return jobFileSpec
def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig): '''prepare the subjob specific configuration''' from pandatools import Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaPandaRTHandler prepare called for %s', job.getFQID('.')) # in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter if job.inputdata and not job._getRoot().subjobs: if not job.inputdata.names: contents = job.inputdata.get_contents(overlap=False, size=True) for ds in contents.keys(): for f in contents[ds]: job.inputdata.guids.append( f[0] ) job.inputdata.names.append( f[1][0] ) job.inputdata.sizes.append( f[1][1] ) job.inputdata.checksums.append( f[1][2] ) job.inputdata.scopes.append( f[1][3] ) site = job._getRoot().backend.site job.backend.site = site job.backend.actualCE = site cloud = job._getRoot().backend.requirements.cloud job.backend.requirements.cloud = cloud # if no outputdata are given if not job.outputdata: job.outputdata = DQ2OutputDataset() job.outputdata.datasetname = job._getRoot().outputdata.datasetname #if not job.outputdata.datasetname: else: job.outputdata.datasetname = job._getRoot().outputdata.datasetname if not job.outputdata.datasetname: raise ApplicationConfigurationError('DQ2OutputDataset has no datasetname') jspec = JobSpec() jspec.jobDefinitionID = job._getRoot().id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.transformation = '%s/runGen-00-00-02' % Client.baseURLSUB if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: if not job._getRoot().subjobs or job.id == 0: logger.warning('You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.') jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = site jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.assignedPriority = configPanda['assignedPriorityRun'] jspec.cloud = cloud # memory if job.backend.requirements.memory != -1: jspec.minRamCount = job.backend.requirements.memory # cputime if job.backend.requirements.cputime != -1: jspec.maxCpuCount = job.backend.requirements.cputime jspec.computingSite = site # library (source files) if job.backend.libds: flib = FileSpec() flib.lfn = self.fileBO.lfn flib.GUID = self.fileBO.GUID flib.type = 'input' flib.status = self.fileBO.status flib.dataset = self.fileBO.destinationDBlock flib.dispatchDBlock = self.fileBO.destinationDBlock jspec.addFile(flib) elif job.backend.bexec: flib = FileSpec() flib.lfn = self.library flib.type = 'input' flib.dataset = self.libDataset flib.dispatchDBlock = self.libDataset jspec.addFile(flib) # input files FIXME: many more input types if job.inputdata: for guid, lfn, size, checksum, scope in zip(job.inputdata.guids,job.inputdata.names,job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): finp = FileSpec() finp.lfn = lfn finp.GUID = guid finp.scope = scope # finp.fsize = # finp.md5sum = finp.dataset = job.inputdata.dataset[0] finp.prodDBlock = job.inputdata.dataset[0] finp.dispatchDBlock = job.inputdata.dataset[0] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # output files # outMap = {} #FIXME: if options.outMeta != []: self.rundirectory = "." # log files flog = FileSpec() flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname flog.type = 'log' flog.dataset = job.outputdata.datasetname flog.destinationDBlock = job.outputdata.datasetname flog.destinationSE = job.backend.site jspec.addFile(flog) # job parameters param = '' # source URL matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL) srcURL = "" if matchURL != None: srcURL = matchURL.group(1) param += " --sourceURL %s " % srcURL param += '-r "%s" ' % self.rundirectory exe_name = job.application.exe if job.backend.bexec == '': if hasattr(job.application.exe, "name"): exe_name = os.path.basename(job.application.exe.name) # set jobO parameter if job.application.args: param += ' -j "" -p "%s %s" '%(exe_name,urllib.quote(" ".join(job.application.args))) else: param += ' -j "" -p "%s" '%exe_name if self.inputsandbox: param += ' -a %s '%self.inputsandbox else: param += '-l %s ' % self.library param += '-j "" -p "%s %s" ' % ( exe_name,urllib.quote(" ".join(job.application.args))) if job.inputdata: param += '-i "%s" ' % job.inputdata.names # fill outfiles outfiles = {} for f in self.extOutFile: tarnum = 1 if f.find('*') != -1: # archive * outfiles[f] = "outputbox%i.%s.%s.tar.gz" % (tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S") ) tarnum += 1 else: outfiles[f] = "%s.%s.%s" %(f, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S")) fout = FileSpec() fout.lfn = outfiles[f] fout.type = 'output' fout.dataset = job.outputdata.datasetname fout.destinationDBlock = job.outputdata.datasetname fout.destinationSE = job.backend.site jspec.addFile(fout) param += '-o "%s" ' % (outfiles) # must be double quotes, because python prints strings in 'single quotes' for file in jspec.Files: if file.type in [ 'output', 'log'] and configPanda['chirpconfig']: file.dispatchDBlockToken = configPanda['chirpconfig'] logger.debug('chirp file %s',file) jspec.jobParameters = param return jspec
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """Prepare the specific aspec of each subjob. Returns: subjobconfig list of objects understood by backends.""" from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs # make sure we have the correct siteType refreshPandaSpecs() job = app._getParent() masterjob = job._getRoot() logger.debug('ProdTransPandaRTHandler prepare called for %s', job.getFQID('.')) job.backend.actualCE = job.backend.site job.backend.requirements.cloud = Client.PandaSites[job.backend.site]['cloud'] # check that the site is in a submit-able status if not job.splitter or job.splitter._name != 'DQ2JobSplitter': allowed_sites = job.backend.list_ddm_sites() try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] tmpDsExist = False if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')): #if Client.getDatasets(job.outputdata.datasetname): if getDatasets(job.outputdata.datasetname): tmpDsExist = True logger.info('Re-using output dataset %s'%job.outputdata.datasetname) if not configPanda['specialHandling']=='ddm:rucio' and not configPanda['processingType'].startswith('gangarobot') and not configPanda['processingType'].startswith('hammercloud') and not configPanda['processingType'].startswith('rucio_test'): Client.addDataset(job.outputdata.datasetname,False,location=outDsLocation,allowProdDisk=True,dsExist=tmpDsExist) logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation) except exceptions.SystemExit: raise BackendError('Panda','Exception in adding dataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1])) # JobSpec. jspec = JobSpec() jspec.currentPriority = app.priority jspec.jobDefinitionID = masterjob.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.coreCount = app.core_count jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release jspec.homepackage = app.home_package jspec.transformation = app.transformation jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = job.backend.site if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' if app.prod_source_label: jspec.prodSourceLabel = app.prod_source_label else: jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.specialHandling = configPanda['specialHandling'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.cmtConfig = app.atlas_cmtconfig if app.dbrelease == 'LATEST': try: latest_dbrelease = getLatestDBReleaseCaching() except: from pandatools import Client latest_dbrelease = Client.getLatestDBRelease() m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease) if m: self.dbrelease_dataset = m.group(1) self.dbrelease = m.group(2) else: raise ApplicationConfigurationError(None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually.") else: self.dbrelease_dataset = app.dbrelease_dataset self.dbrelease = app.dbrelease jspec.jobParameters = app.job_parameters if self.dbrelease: if self.dbrelease == 'current': jspec.jobParameters += ' --DBRelease=current' else: if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,) else: jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % (self.dbrelease,) dbspec = FileSpec() dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease dbspec.dataset = self.dbrelease_dataset dbspec.prodDBlock = jspec.prodDBlock dbspec.type = 'input' jspec.addFile(dbspec) if job.inputdata: m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)', job.inputdata.dataset[0]) if not m: logger.error("Error retrieving run number from dataset name") #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name") runnumber = 105200 else: runnumber = int(m.group(2)) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --runNumber %d' % runnumber else: jspec.jobParameters += ' RunNumber=%d' % runnumber # Output files. randomized_lfns = [] ilfn = 0 for lfn, lfntype in zip(app.output_files,app.output_type): ofspec = FileSpec() if app.randomize_lfns: randomized_lfn = lfn + ('.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4] ) ) else: randomized_lfn = lfn ofspec.lfn = randomized_lfn randomized_lfns.append(randomized_lfn) ofspec.destinationDBlock = jspec.destinationDBlock ofspec.destinationSE = jspec.destinationSE ofspec.dataset = jspec.destinationDBlock ofspec.type = 'output' jspec.addFile(ofspec) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --output%sFile %s' % (lfntype, randomized_lfns[ilfn]) else: jspec.jobParameters += ' output%sFile=%s' % (lfntype, randomized_lfns[ilfn]) ilfn=ilfn+1 # Input files. if job.inputdata: for guid, lfn, size, checksum, scope in zip(job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): ifspec = FileSpec() ifspec.lfn = lfn ifspec.GUID = guid ifspec.fsize = size ifspec.md5sum = checksum ifspec.scope = scope ifspec.dataset = jspec.prodDBlock ifspec.prodDBlock = jspec.prodDBlock ifspec.type = 'input' jspec.addFile(ifspec) if app.input_type: itype = app.input_type else: itype = m.group(5) if jspec.transformation.endswith("_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join(job.inputdata.names)) else: jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join(job.inputdata.names)) # Log files. lfspec = FileSpec() lfspec.lfn = '%s.job.log.tgz' % jspec.jobName lfspec.destinationDBlock = jspec.destinationDBlock lfspec.destinationSE = jspec.destinationSE lfspec.dataset = jspec.destinationDBlock lfspec.type = 'log' jspec.addFile(lfspec) return jspec
def run(self, data): datasetName = 'panda:panda.destDB.%s' % commands.getoutput('uuidgen') destName = 'ANALY_RRC-KI-HPC' site = 'ANALY_RRC-KI-HPC' scope = config['DEFAULT_SCOPE'] distributive = data['distributive'] release = data['release'] parameters = data['parameters'] input_type = data['input_type'] input_params = data['input_params'] input_files = data['input_files'] output_type = data['output_type'] output_params = data['output_params'] output_files = data['output_files'] jobid = data['jobid'] _logger.debug('Jobid: ' + str(jobid)) job = JobSpec() job.jobDefinitionID = int(time.time()) % 10000 job.jobName = commands.getoutput('uuidgen') job.transformation = config['DEFAULT_TRF'] job.destinationDBlock = datasetName job.destinationSE = destName job.currentPriority = 1000 job.prodSourceLabel = 'user' job.computingSite = site job.cloud = 'RU' job.prodDBlock = "%s:%s.%s" % (scope, scope, job.jobName) job.jobParameters = '%s %s "%s"' % (release, distributive, parameters) params = {} _logger.debug('MoveData') ec = 0 ec, uploaded_input_files = movedata( params=params, fileList=input_files, fromType=input_type, fromParams=input_params, toType='hpc', toParams={'dest': '/' + re.sub(':', '/', job.prodDBlock)}) if ec != 0: _logger.error('Move data error: ' + ec[1]) return for file in uploaded_input_files: fileIT = FileSpec() fileIT.lfn = file fileIT.dataset = job.prodDBlock fileIT.prodDBlock = job.prodDBlock fileIT.type = 'input' fileIT.scope = scope fileIT.status = 'ready' fileIT.GUID = commands.getoutput('uuidgen') job.addFile(fileIT) for file in output_files: fileOT = FileSpec() fileOT.lfn = file fileOT.destinationDBlock = job.prodDBlock fileOT.destinationSE = job.destinationSE fileOT.dataset = job.prodDBlock fileOT.type = 'output' fileOT.scope = scope fileOT.GUID = commands.getoutput('uuidgen') job.addFile(fileOT) fileOL = FileSpec() fileOL.lfn = "%s.log.tgz" % job.jobName fileOL.destinationDBlock = job.destinationDBlock fileOL.destinationSE = job.destinationSE fileOL.dataset = job.destinationDBlock fileOL.type = 'log' fileOL.scope = 'panda' job.addFile(fileOL) self.jobList.append(job) #submitJob o = self.submitJobs(self.jobList) x = o[0] #update PandaID conn = MySQLdb.connect( host=self.dbhost, db=self.dbname, # port=self.dbport, connect_timeout=self.dbtimeout, user=self.dbuser, passwd=self.dbpasswd) cur = conn.cursor() try: varDict = {} PandaID = int(x[0]) varDict['id'] = jobid varDict['pandaId'] = PandaID sql = "UPDATE %s SET %s.pandaId=%s WHERE %s.id=%s" % ( self.table_jobs, self.table_jobs, varDict['pandaId'], self.table_jobs, varDict['id']) cur.execute(sql, varDict) except: _logger.error('SENDJOB: Incorrect server response') try: conn.commit() return True except: _logger.error("commit error") return False
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): '''prepare the subjob specific configuration''' from pandatools import Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('AthenaPandaRTHandler prepare called for %s', job.getFQID('.')) # in case of a simple job get the dataset content, otherwise subjobs are filled by the splitter if job.inputdata and not job._getRoot().subjobs: if not job.inputdata.names: contents = job.inputdata.get_contents(overlap=False, size=True) for ds in contents.keys(): for f in contents[ds]: job.inputdata.guids.append(f[0]) job.inputdata.names.append(f[1][0]) job.inputdata.sizes.append(f[1][1]) job.inputdata.checksums.append(f[1][2]) job.inputdata.scopes.append(f[1][3]) site = job._getRoot().backend.site job.backend.site = site job.backend.actualCE = site cloud = job._getRoot().backend.requirements.cloud job.backend.requirements.cloud = cloud # if no outputdata are given if not job.outputdata: job.outputdata = DQ2OutputDataset() job.outputdata.datasetname = job._getRoot().outputdata.datasetname #if not job.outputdata.datasetname: else: job.outputdata.datasetname = job._getRoot().outputdata.datasetname if not job.outputdata.datasetname: raise ApplicationConfigurationError( None, 'DQ2OutputDataset has no datasetname') jspec = JobSpec() jspec.jobDefinitionID = job._getRoot().id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.transformation = '%s/runGen-00-00-02' % Client.baseURLSUB if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: if not job._getRoot().subjobs or job.id == 0: logger.warning( 'You have specified outputdata.location. Note that Panda may not support writing to a user-defined output location.' ) jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = site jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.assignedPriority = configPanda['assignedPriorityRun'] jspec.cloud = cloud # memory if job.backend.requirements.memory != -1: jspec.minRamCount = job.backend.requirements.memory # cputime if job.backend.requirements.cputime != -1: jspec.maxCpuCount = job.backend.requirements.cputime jspec.computingSite = site # library (source files) if job.backend.libds: flib = FileSpec() flib.lfn = self.fileBO.lfn flib.GUID = self.fileBO.GUID flib.type = 'input' flib.status = self.fileBO.status flib.dataset = self.fileBO.destinationDBlock flib.dispatchDBlock = self.fileBO.destinationDBlock jspec.addFile(flib) elif job.backend.bexec: flib = FileSpec() flib.lfn = self.library flib.type = 'input' flib.dataset = self.libDataset flib.dispatchDBlock = self.libDataset jspec.addFile(flib) # input files FIXME: many more input types if job.inputdata: for guid, lfn, size, checksum, scope in zip( job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): finp = FileSpec() finp.lfn = lfn finp.GUID = guid finp.scope = scope # finp.fsize = # finp.md5sum = finp.dataset = job.inputdata.dataset[0] finp.prodDBlock = job.inputdata.dataset[0] finp.dispatchDBlock = job.inputdata.dataset[0] finp.type = 'input' finp.status = 'ready' jspec.addFile(finp) # output files # outMap = {} #FIXME: if options.outMeta != []: self.rundirectory = "." # log files flog = FileSpec() flog.lfn = '%s._$PANDAID.log.tgz' % job.outputdata.datasetname flog.type = 'log' flog.dataset = job.outputdata.datasetname flog.destinationDBlock = job.outputdata.datasetname flog.destinationSE = job.backend.site jspec.addFile(flog) # job parameters param = '' # source URL matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL) srcURL = "" if matchURL != None: srcURL = matchURL.group(1) param += " --sourceURL %s " % srcURL param += '-r "%s" ' % self.rundirectory exe_name = job.application.exe if job.backend.bexec == '': if hasattr(job.application.exe, "name"): exe_name = os.path.basename(job.application.exe.name) # set jobO parameter if job.application.args: param += ' -j "" -p "%s %s" ' % ( exe_name, urllib.quote(" ".join(job.application.args))) else: param += ' -j "" -p "%s" ' % exe_name if self.inputsandbox: param += ' -a %s ' % self.inputsandbox else: param += '-l %s ' % self.library param += '-j "" -p "%s %s" ' % ( exe_name, urllib.quote(" ".join(job.application.args))) if job.inputdata: param += '-i "%s" ' % job.inputdata.names # fill outfiles outfiles = {} for f in self.extOutFile: tarnum = 1 if f.find('*') != -1: # archive * outfiles[f] = "outputbox%i.%s.%s.tar.gz" % ( tarnum, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S")) tarnum += 1 else: outfiles[f] = "%s.%s.%s" % (f, job.getFQID('.'), time.strftime("%Y%m%d%H%M%S")) fout = FileSpec() fout.lfn = outfiles[f] fout.type = 'output' fout.dataset = job.outputdata.datasetname fout.destinationDBlock = job.outputdata.datasetname fout.destinationSE = job.backend.site jspec.addFile(fout) param += '-o "%s" ' % ( outfiles ) # must be double quotes, because python prints strings in 'single quotes' for file in jspec.Files: if file.type in ['output', 'log'] and configPanda['chirpconfig']: file.dispatchDBlockToken = configPanda['chirpconfig'] logger.debug('chirp file %s', file) jspec.jobParameters = param return jspec
def send_job(jobid, siteid): _logger.debug('Jobid: ' + str(jobid)) site = sites_.get(siteid) job = jobs_.get(int(jobid)) cont = job.container files_catalog = cont.files fscope = getScope(job.owner.username) datasetName = '{}:{}'.format(fscope, cont.guid) distributive = job.distr.name release = job.distr.release # Prepare runScript parameters = job.distr.command parameters = parameters.replace("$COMMAND$", job.params) parameters = parameters.replace("$USERNAME$", job.owner.username) parameters = parameters.replace("$WORKINGGROUP$", job.owner.working_group) # Prepare metadata metadata = dict(user=job.owner.username) # Prepare PanDA Object pandajob = JobSpec() pandajob.jobDefinitionID = int(time.time()) % 10000 pandajob.jobName = cont.guid pandajob.transformation = client_config.DEFAULT_TRF pandajob.destinationDBlock = datasetName pandajob.destinationSE = site.se pandajob.currentPriority = 1000 pandajob.prodSourceLabel = 'user' pandajob.computingSite = site.ce pandajob.cloud = 'RU' pandajob.VO = 'atlas' pandajob.prodDBlock = "%s:%s" % (fscope, pandajob.jobName) pandajob.coreCount = job.corecount pandajob.metadata = json.dumps(metadata) #pandajob.workingGroup = job.owner.working_group if site.encode_commands: # It requires script wrapper on cluster side pandajob.jobParameters = '%s %s %s "%s"' % (cont.guid, release, distributive, parameters) else: pandajob.jobParameters = parameters has_input = False for fcc in files_catalog: if fcc.type == 'input': f = fcc.file guid = f.guid fileIT = FileSpec() fileIT.lfn = f.lfn fileIT.dataset = pandajob.prodDBlock fileIT.prodDBlock = pandajob.prodDBlock fileIT.type = 'input' fileIT.scope = fscope fileIT.status = 'ready' fileIT.GUID = guid pandajob.addFile(fileIT) has_input = True if fcc.type == 'output': f = fcc.file fileOT = FileSpec() fileOT.lfn = f.lfn fileOT.destinationDBlock = pandajob.prodDBlock fileOT.destinationSE = pandajob.destinationSE fileOT.dataset = pandajob.prodDBlock fileOT.type = 'output' fileOT.scope = fscope fileOT.GUID = f.guid pandajob.addFile(fileOT) # Save replica meta fc.new_replica(f, site) if not has_input: # Add fake input fileIT = FileSpec() fileIT.lfn = "fake.input" fileIT.dataset = pandajob.prodDBlock fileIT.prodDBlock = pandajob.prodDBlock fileIT.type = 'input' fileIT.scope = fscope fileIT.status = 'ready' fileIT.GUID = "fake.guid" pandajob.addFile(fileIT) # Prepare lof file fileOL = FileSpec() fileOL.lfn = "%s.log.tgz" % pandajob.jobName fileOL.destinationDBlock = pandajob.destinationDBlock fileOL.destinationSE = pandajob.destinationSE fileOL.dataset = '{}:logs'.format(fscope) fileOL.type = 'log' fileOL.scope = 'panda' pandajob.addFile(fileOL) # Save log meta log = File() log.scope = fscope log.lfn = fileOL.lfn log.guid = getGUID(log.scope, log.lfn) log.type = 'log' log.status = 'defined' files_.save(log) # Save replica meta fc.new_replica(log, site) # Register file in container fc.reg_file_in_cont(log, cont, 'log') # Submit job o = submitJobs([pandajob]) x = o[0] try: #update PandaID PandaID = int(x[0]) job.pandaid = PandaID job.ce = site.ce except: job.status = 'submit_error' jobs_.save(job) return 0
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """Prepare the specific aspec of each subjob. Returns: subjobconfig list of objects understood by backends.""" from pandatools import Client from pandatools import AthenaUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2_set_dataset_lifetime from GangaPanda.Lib.Panda.Panda import refreshPandaSpecs # make sure we have the correct siteType refreshPandaSpecs() job = app._getParent() masterjob = job._getRoot() logger.debug('ProdTransPandaRTHandler prepare called for %s', job.getFQID('.')) job.backend.actualCE = job.backend.site job.backend.requirements.cloud = Client.PandaSites[ job.backend.site]['cloud'] # check that the site is in a submit-able status if not job.splitter or job.splitter._name != 'DQ2JobSplitter': allowed_sites = job.backend.list_ddm_sites() try: outDsLocation = Client.PandaSites[job.backend.site]['ddm'] tmpDsExist = False if (configPanda['processingType'].startswith('gangarobot') or configPanda['processingType'].startswith('hammercloud')): #if Client.getDatasets(job.outputdata.datasetname): if getDatasets(job.outputdata.datasetname): tmpDsExist = True logger.info('Re-using output dataset %s' % job.outputdata.datasetname) if not configPanda[ 'specialHandling'] == 'ddm:rucio' and not configPanda[ 'processingType'].startswith( 'gangarobot' ) and not configPanda['processingType'].startswith( 'hammercloud') and not configPanda[ 'processingType'].startswith('rucio_test'): Client.addDataset(job.outputdata.datasetname, False, location=outDsLocation, allowProdDisk=True, dsExist=tmpDsExist) logger.info('Output dataset %s registered at %s' % (job.outputdata.datasetname, outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, outDsLocation) except exceptions.SystemExit: raise BackendError( 'Panda', 'Exception in adding dataset %s: %s %s' % (job.outputdata.datasetname, sys.exc_info()[0], sys.exc_info()[1])) # JobSpec. jspec = JobSpec() jspec.currentPriority = app.priority jspec.jobDefinitionID = masterjob.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.coreCount = app.core_count jspec.AtlasRelease = 'Atlas-%s' % app.atlas_release jspec.homepackage = app.home_package jspec.transformation = app.transformation jspec.destinationDBlock = job.outputdata.datasetname if job.outputdata.location: jspec.destinationSE = job.outputdata.location else: jspec.destinationSE = job.backend.site if job.inputdata: jspec.prodDBlock = job.inputdata.dataset[0] else: jspec.prodDBlock = 'NULL' if app.prod_source_label: jspec.prodSourceLabel = app.prod_source_label else: jspec.prodSourceLabel = configPanda['prodSourceLabelRun'] jspec.processingType = configPanda['processingType'] jspec.specialHandling = configPanda['specialHandling'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.cmtConfig = app.atlas_cmtconfig if app.dbrelease == 'LATEST': try: latest_dbrelease = getLatestDBReleaseCaching() except: from pandatools import Client latest_dbrelease = Client.getLatestDBRelease() m = re.search('(.*):DBRelease-(.*)\.tar\.gz', latest_dbrelease) if m: self.dbrelease_dataset = m.group(1) self.dbrelease = m.group(2) else: raise ApplicationConfigurationError( None, "Error retrieving LATEST DBRelease. Try setting application.dbrelease manually." ) else: self.dbrelease_dataset = app.dbrelease_dataset self.dbrelease = app.dbrelease jspec.jobParameters = app.job_parameters if self.dbrelease: if self.dbrelease == 'current': jspec.jobParameters += ' --DBRelease=current' else: if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --DBRelease=DBRelease-%s.tar.gz' % ( self.dbrelease, ) else: jspec.jobParameters += ' DBRelease=DBRelease-%s.tar.gz' % ( self.dbrelease, ) dbspec = FileSpec() dbspec.lfn = 'DBRelease-%s.tar.gz' % self.dbrelease dbspec.dataset = self.dbrelease_dataset dbspec.prodDBlock = jspec.prodDBlock dbspec.type = 'input' jspec.addFile(dbspec) if job.inputdata: m = re.search('(.*)\.(.*)\.(.*)\.(.*)\.(.*)\.(.*)', job.inputdata.dataset[0]) if not m: logger.error("Error retrieving run number from dataset name") #raise ApplicationConfigurationError(None, "Error retrieving run number from dataset name") runnumber = 105200 else: runnumber = int(m.group(2)) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --runNumber %d' % runnumber else: jspec.jobParameters += ' RunNumber=%d' % runnumber # Output files. randomized_lfns = [] ilfn = 0 for lfn, lfntype in zip(app.output_files, app.output_type): ofspec = FileSpec() if app.randomize_lfns: randomized_lfn = lfn + ( '.%s.%d.%s' % (job.backend.site, int(time.time()), commands.getoutput('uuidgen 2> /dev/null')[:4])) else: randomized_lfn = lfn ofspec.lfn = randomized_lfn randomized_lfns.append(randomized_lfn) ofspec.destinationDBlock = jspec.destinationDBlock ofspec.destinationSE = jspec.destinationSE ofspec.dataset = jspec.destinationDBlock ofspec.type = 'output' jspec.addFile(ofspec) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --output%sFile %s' % ( lfntype, randomized_lfns[ilfn]) else: jspec.jobParameters += ' output%sFile=%s' % ( lfntype, randomized_lfns[ilfn]) ilfn = ilfn + 1 # Input files. if job.inputdata: for guid, lfn, size, checksum, scope in zip( job.inputdata.guids, job.inputdata.names, job.inputdata.sizes, job.inputdata.checksums, job.inputdata.scopes): ifspec = FileSpec() ifspec.lfn = lfn ifspec.GUID = guid ifspec.fsize = size ifspec.md5sum = checksum ifspec.scope = scope ifspec.dataset = jspec.prodDBlock ifspec.prodDBlock = jspec.prodDBlock ifspec.type = 'input' jspec.addFile(ifspec) if app.input_type: itype = app.input_type else: itype = m.group(5) if jspec.transformation.endswith( "_tf.py") or jspec.transformation.endswith("_tf"): jspec.jobParameters += ' --input%sFile %s' % (itype, ','.join( job.inputdata.names)) else: jspec.jobParameters += ' input%sFile=%s' % (itype, ','.join( job.inputdata.names)) # Log files. lfspec = FileSpec() lfspec.lfn = '%s.job.log.tgz' % jspec.jobName lfspec.destinationDBlock = jspec.destinationDBlock lfspec.destinationSE = jspec.destinationSE lfspec.dataset = jspec.destinationDBlock lfspec.type = 'log' jspec.addFile(lfspec) return jspec
job.prodSourceLabel = 'user' job.computingSite = site job.cloud = cloud job.cmtConfig = 'x86_64-slc6-gcc48-opt' job.specialHandling = 'ddm:rucio' #job.transferType = 'direct' ifile = 'HITS.06828093._000096.pool.root.1' fileI = FileSpec() fileI.GUID = 'AC5B3759-B606-BA42-8681-4BD86455AE02' fileI.checksum = 'ad:5d000974' fileI.dataset = 'user.mlassnig:user.mlassnig.pilot.test.single.hits' fileI.fsize = 94834717 fileI.lfn = ifile fileI.prodDBlock = job.prodDBlock fileI.scope = 'mc15_13TeV' fileI.type = 'input' job.addFile(fileI) ofile = 'RDO_%s.root' % commands.getoutput('uuidgen') fileO = FileSpec() fileO.dataset = job.destinationDBlock fileO.destinationDBlock = job.destinationDBlock fileO.destinationSE = job.destinationSE fileO.lfn = ofile fileO.type = 'output' job.addFile(fileO) fileOL = FileSpec() fileOL.lfn = "%s.job.log.tgz" % job.jobName fileOL.destinationDBlock = job.destinationDBlock