def __setup_sandboxcache__(self, job): '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend''' re_token = re.compile('^token:(.*):(.*)$') self.sandboxcache.vo = config['VirtualOrganisation'] self.sandboxcache.middleware = 'GLITE' self.sandboxcache.timeout = config['SandboxTransferTimeout'] if self.sandboxcache._name == 'LCGSandboxCache': if not self.sandboxcache.lfc_host: self.sandboxcache.lfc_host = grids[ self.sandboxcache.middleware].__get_lfc_host__() if not self.sandboxcache.se: token = '' se_host = config['DefaultSE'] m = re_token.match(se_host) if m: token = m.group(1) se_host = m.group(2) self.sandboxcache.se = se_host if token: self.sandboxcache.srm_token = token if (self.sandboxcache.se_type in ['srmv2']) and (not self.sandboxcache.srm_token): self.sandboxcache.srm_token = config['DefaultSRMToken'] elif self.sandboxcache._name == 'DQ2SandboxCache': # generate a new dataset name if not given if not self.sandboxcache.dataset_name: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2outputdatasetname self.sandboxcache.dataset_name, unused = dq2outputdatasetname( "%s.input" % get_uuid(), 0, False, '') # subjobs inherits the dataset name from the master job for sj in job.subjobs: sj.backend.sandboxcache.dataset_name = self.sandboxcache.dataset_name elif self.sandboxcache._name == 'GridftpSandboxCache': if config['CreamInputSandboxBaseURI']: self.sandboxcache.baseURI = config['CreamInputSandboxBaseURI'] elif self.CE: ce_host = re.sub(r'\:[0-9]+', '', self.CE.split('/cream')[0]) self.sandboxcache.baseURI = 'gsiftp://%s/opt/glite/var/cream_sandbox/%s' % ( ce_host, self.sandboxcache.vo) else: logger.error('baseURI not available for GridftpSandboxCache') return False return True
def __setup_sandboxcache__(self, job): """Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend""" re_token = re.compile("^token:(.*):(.*)$") self.sandboxcache.vo = config["VirtualOrganisation"] self.sandboxcache.middleware = "GLITE" self.sandboxcache.timeout = config["SandboxTransferTimeout"] if self.sandboxcache._name == "LCGSandboxCache": if not self.sandboxcache.lfc_host: self.sandboxcache.lfc_host = grids[self.sandboxcache.middleware].__get_lfc_host__() if not self.sandboxcache.se: token = "" se_host = config["DefaultSE"] m = re_token.match(se_host) if m: token = m.group(1) se_host = m.group(2) self.sandboxcache.se = se_host if token: self.sandboxcache.srm_token = token if (self.sandboxcache.se_type in ["srmv2"]) and (not self.sandboxcache.srm_token): self.sandboxcache.srm_token = config["DefaultSRMToken"] elif self.sandboxcache._name == "DQ2SandboxCache": # generate a new dataset name if not given if not self.sandboxcache.dataset_name: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import dq2outputdatasetname self.sandboxcache.dataset_name, unused = dq2outputdatasetname("%s.input" % get_uuid(), 0, False, "") # subjobs inherits the dataset name from the master job for sj in job.subjobs: sj.backend.sandboxcache.dataset_name = self.sandboxcache.dataset_name elif self.sandboxcache._name == "GridftpSandboxCache": if config["CreamInputSandboxBaseURI"]: self.sandboxcache.baseURI = config["CreamInputSandboxBaseURI"] elif self.CE: ce_host = re.sub(r"\:[0-9]+", "", self.CE.split("/cream")[0]) self.sandboxcache.baseURI = "gsiftp://%s/opt/glite/var/cream_sandbox/%s" % ( ce_host, self.sandboxcache.vo, ) else: logger.error("baseURI not available for GridftpSandboxCache") return False return True
def master_prepare(self,app,appconfig): '''Prepare the master job''' from pandatools import Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.')) # set chirp variables if configPanda['chirpconfig'] or configPanda['chirpserver']: setChirpVariables() # Pack inputsandbox inputsandbox = 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null') inpw = job.getInputWorkspace() # add user script to inputsandbox if hasattr(job.application.exe, "name"): if not job.application.exe in job.inputsandbox: job.inputsandbox.append(job.application.exe) for fname in [f.name for f in job.inputsandbox]: fname.rstrip(os.sep) path = fname[:fname.rfind(os.sep)] f = fname[fname.rfind(os.sep)+1:] rc, output = commands.getstatusoutput('tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f)) if rc: logger.error('Packing inputsandbox failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError('Packing inputsandbox failed.') if len(job.inputsandbox) > 0: rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox))) if rc: logger.error('Packing inputsandbox failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError('Packing inputsandbox failed.') inputsandbox += ".gz" else: inputsandbox = None # Upload Inputsandbox if inputsandbox: logger.debug('Uploading source tarball ...') uploadSources(inpw.getPath(),os.path.basename(inputsandbox)) self.inputsandbox = inputsandbox else: self.inputsandbox = None # input dataset if job.inputdata: if job.inputdata._name != 'DQ2Dataset': raise ApplicationConfigurationError('PANDA application supports only DQ2Datasets') # run brokerage here if not splitting if not job.splitter: from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) elif job.splitter._name not in ['DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask']: raise ApplicationConfigurationError('Panda splitter must be DQ2JobSplitter or ArgSplitter') if job.backend.site == 'AUTO': raise ApplicationConfigurationError('site is still AUTO after brokerage!') # output dataset if job.outputdata: if job.outputdata._name != 'DQ2OutputDataset': raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset') else: logger.info('Adding missing DQ2OutputDataset') job.outputdata = DQ2OutputDataset() job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname) self.outDsLocation = Client.PandaSites[job.backend.site]['ddm'] try: Client.addDataset(job.outputdata.datasetname,False,location=self.outDsLocation) logger.info('Output dataset %s registered at %s'%(job.outputdata.datasetname,self.outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation) except exceptions.SystemExit: raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(job.outputdata.datasetname,sys.exc_info()[0],sys.exc_info()[1])) # handle the libds if job.backend.libds: self.libDataset = job.backend.libds self.fileBO = getLibFileSpecFromLibDS(self.libDataset) self.library = self.fileBO.lfn elif job.backend.bexec: self.libDataset = job.outputdata.datasetname+'.lib' self.library = '%s.tgz' % self.libDataset try: Client.addDataset(self.libDataset,False,location=self.outDsLocation) dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation) logger.info('Lib dataset %s registered at %s'%(self.libDataset,self.outDsLocation)) except exceptions.SystemExit: raise BackendError('Panda','Exception in Client.addDataset %s: %s %s'%(self.libDataset,sys.exc_info()[0],sys.exc_info()[1])) # collect extOutFiles self.extOutFile = [] for tmpName in job.outputdata.outputdata: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.outputsandbox: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.backend.extOutFile: if tmpName != '': self.extOutFile.append(tmpName) # create build job if job.backend.bexec != '': jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.transformation = '%s/buildGen-00-00-01' % Client.baseURLSUB if Client.isDQ2free(job.backend.site): jspec.destinationDBlock = '%s/%s' % (job.outputdata.datasetname,self.libDataset) jspec.destinationSE = 'local' else: jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = configPanda['prodSourceLabelBuild'] jspec.processingType = configPanda['processingType'] jspec.assignedPriority = configPanda['assignedPriorityBuild'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.jobParameters = '-o %s' % (self.library) if self.inputsandbox: jspec.jobParameters += ' -i %s' % (self.inputsandbox) else: raise ApplicationConfigurationError('Executable on Panda with build job defined, but inputsandbox is emtpy !') matchURL = re.search('(http.*://[^/]+)/',Client.baseURLCSRVSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1) if job.backend.bexec != '': jspec.jobParameters += ' --bexec "%s" ' % urllib.quote(job.backend.bexec) jspec.jobParameters += ' -r %s ' % '.' fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) return jspec else: return None
def master_prepare(self, app, appconfig): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLocalRTHandler master_prepare called, %s", job.id) if job._getRoot().subjobs: jobid = "%d" % (job._getRoot().id) else: jobid = "%d" % job.id # Generate output dataset name if job.outputdata: if job.outputdata._name == 'DQ2OutputDataset': dq2_datasetname = job.outputdata.datasetname dq2_isGroupDS = job.outputdata.isGroupDS dq2_groupname = job.outputdata.groupname else: dq2_datasetname = '' dq2_isGroupDS = False dq2_groupname = '' self.output_datasetname, self.output_lfn = dq2outputdatasetname( dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname) # Expand Athena jobOptions if not app.option_file and not app.command_line: raise ConfigError( "j.application.option_file='' - No Athena jobOptions files specified." ) athena_options = '' inputbox = [ File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh')) ] if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']: for option_file in app.option_file: athena_options += ' ' + os.path.basename(option_file.name) inputbox += [File(option_file.name)] athena_options += ' %s ' % app.options else: for option_file in app.option_file: athena_option = os.path.basename(option_file.name) athena_options += ' ' + athena_option if app.options: athena_options = app.options + ' ' + athena_options inputbox += [File(option_file.name)] if app.command_line: athena_options = app.command_line athena_usersetupfile = os.path.basename(app.user_setupfile.name) # prepare input sandbox if app.user_setupfile.name: inputbox += [File(app.user_setupfile.name)] #CN: added extra test for TNTJobSplitter if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'ATLASTier3Dataset' ] or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox, 'ganga-stage-in-out-dq2.py') _append_files(inputbox, 'dq2_get') _append_files(inputbox, 'dq2info.tar.gz') _append_files(inputbox, 'libdcap.so') if job.inputdata and job.inputdata._name == 'ATLASDataset': if job.inputdata.lfc: _append_files(inputbox, 'ganga-stagein-lfc.py') else: _append_files(inputbox, 'ganga-stagein.py') ## insert more scripts to inputsandbox for FileStager if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in ['FILE_STAGER']: _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py', 'fs-copy.py') if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'getstats.py') if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': if not job.outputdata.location: raise ApplicationConfigurationError( None, 'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !' ) if not File( os.path.join(os.path.dirname(__file__), 'ganga-stage-in-out-dq2.py')) in inputbox: _append_files(inputbox, 'ganga-stage-in-out-dq2.py') _append_files(inputbox, 'dq2info.tar.gz') _append_files(inputbox, 'libdcap.so') _append_files(inputbox, 'ganga-joboption-parse.py') if job.inputsandbox: for file in job.inputsandbox: inputbox += [file] if app.user_area.name: if app.is_prepared is True: inputbox += [File(app.user_area.name)] else: inputbox += [ File( os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.user_area.name))) ] if app.group_area.name and string.find(app.group_area.name, "http") < 0: if app.is_prepared is True: inputbox += [File(app.group_area.name)] else: inputbox += [ File( os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.group_area.name))) ] # prepare environment try: atlas_software = config['ATLAS_SOFTWARE'] except ConfigError: raise ConfigError( 'No default location of ATLAS_SOFTWARE specified in the configuration.' ) if app.atlas_release == '' and app.atlas_project != "AthAnalysisBase": raise ApplicationConfigurationError( None, 'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.' ) environment = { 'ATLAS_RELEASE': app.atlas_release, 'ATHENA_OPTIONS': athena_options, 'ATLAS_SOFTWARE': atlas_software, 'ATHENA_USERSETUPFILE': athena_usersetupfile, 'ATLAS_PROJECT': app.atlas_project, 'ATLAS_EXETYPE': app.atlas_exetype, 'GANGA_VERSION': configSystem['GANGA_VERSION'], 'DQ2_SETUP_SCRIPT': configDQ2['setupScript'] } # Set athena architecture: 32 or 64 bit environment['ATLAS_ARCH'] = '32' cmtconfig = app.atlas_cmtconfig if cmtconfig.find('x86_64') >= 0: environment['ATLAS_ARCH'] = '64' environment['ATLAS_CMTCONFIG'] = app.atlas_cmtconfig environment['DCACHE_RA_BUFFER'] = str(config['DCACHE_RA_BUFFER']) if app.atlas_environment: for var in app.atlas_environment: vars = var.split('=') if len(vars) == 2: environment[vars[0]] = vars[1] if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.') <= 0): environment['ATLAS_PRODUCTION'] = app.atlas_production if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name) if app.group_area.name: if string.find(app.group_area.name, "http") >= 0: environment['GROUP_AREA_REMOTE'] = "%s" % (app.group_area.name) else: environment['GROUP_AREA'] = os.path.basename( app.group_area.name) if app.max_events: if (app.max_events != -999) and (app.max_events > -2): environment['ATHENA_MAX_EVENTS'] = str(app.max_events) if job.inputdata and job.inputdata._name == 'StagerDataset': if job.inputdata.type not in ['LOCAL']: try: environment['X509CERTDIR'] = os.environ['X509_CERT_DIR'] except KeyError: environment['X509CERTDIR'] = '' try: proxy = os.environ['X509_USER_PROXY'] except KeyError: proxy = '/tmp/x509up_u%s' % os.getuid() REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy) environment['REMOTE_PROXY'] = REMOTE_PROXY try: environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP'] except: pass if job.inputdata and job.inputdata._name == 'DQ2Dataset': if job.inputdata.dataset: datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2[ 'DQ2_URL_SERVER_SSL'] #environment['DATASETTYPE']=job.inputdata.type # At present, DQ2 download is the only thing that works environment['DATASETTYPE'] = "DQ2_DOWNLOAD" if job.inputdata.accessprotocol: environment[ 'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol try: environment['X509CERTDIR'] = os.environ['X509_CERT_DIR'] except KeyError: environment['X509CERTDIR'] = '' try: proxy = os.environ['X509_USER_PROXY'] except KeyError: proxy = '/tmp/x509up_u%s' % os.getuid() REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy) environment['REMOTE_PROXY'] = REMOTE_PROXY try: environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP'] except: pass else: raise ConfigError( "j.inputdata.dataset='' - DQ2 dataset name needs to be specified." ) if job.inputdata.tagdataset: environment['TAGDATASETNAME'] = ':'.join( job.inputdata.tagdataset) if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] try: environment['X509CERTDIR'] = os.environ['X509_CERT_DIR'] except KeyError: environment['X509CERTDIR'] = '' try: proxy = os.environ['X509_USER_PROXY'] except KeyError: proxy = '/tmp/x509up_u%s' % os.getuid() REMOTE_PROXY = '%s:%s' % (socket.getfqdn(), proxy) environment['REMOTE_PROXY'] = REMOTE_PROXY try: environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP'] except: pass if hasattr(job.backend, 'extraopts'): if job.backend.extraopts.find('site=hh') > 0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK' elif job.backend.extraopts.find('site=zn') > 0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK' else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2[ 'DQ2_LOCAL_SITE_ID'] else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh') # output sandbox outputbox = [] outputGUIDs = 'output_guids' outputLOCATION = 'output_location' outputDATA = 'output_data' outputbox.append(outputGUIDs) outputbox.append(outputLOCATION) outputbox.append(outputDATA) outputbox.append('stats.pickle') if (job.outputsandbox): for file in job.outputsandbox: outputbox += [file] ## retrieve the FileStager log if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in ['FILE_STAGER']: outputbox += ['FileStager.out', 'FileStager.err'] # Switch for DEBUG print-out in logfiles if app.useNoDebugLogs: environment['GANGA_LOG_DEBUG'] = '0' else: environment['GANGA_LOG_DEBUG'] = '1' return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
def master_prepare(self,app,appconfig): '''Prepare the master job''' from pandatools import Client from pandatools import MiscUtils from pandatools import AthenaUtils from pandatools import PsubUtils # create a random number for this submission to allow multiple use of containers self.rndSubNum = random.randint(1111,9999) job = app._getParent() logger.debug('AthenaJediRTHandler master_prepare called for %s', job.getFQID('.')) if app.useRootCoreNoBuild: logger.info('Athena.useRootCoreNoBuild is True, setting Panda.nobuild=True.') job.backend.nobuild = True if job.backend.bexec and job.backend.nobuild: raise ApplicationConfigurationError("Contradicting options: job.backend.bexec and job.backend.nobuild are both enabled.") if job.backend.requirements.rootver != '' and job.backend.nobuild: raise ApplicationConfigurationError("Contradicting options: job.backend.requirements.rootver given and job.backend.nobuild are enabled.") # Switch on compilation flag if bexec is set or libds is empty if job.backend.bexec != '' or not job.backend.nobuild: app.athena_compile = True for sj in job.subjobs: sj.application.athena_compile = True logger.info('"job.backend.nobuild=False" or "job.backend.bexec" is set - Panda build job is enabled.') if job.backend.nobuild: app.athena_compile = False for sj in job.subjobs: sj.application.athena_compile = False logger.info('"job.backend.nobuild=True" or "--nobuild" chosen - Panda build job is switched off.') # check for auto datri if job.outputdata.location != '': if not PsubUtils.checkDestSE(job.outputdata.location,job.outputdata.datasetname,False): raise ApplicationConfigurationError("Problems with outputdata.location setting '%s'" % job.outputdata.location) # validate application if not app.atlas_release and not job.backend.requirements.rootver and not app.atlas_exetype in [ 'EXE' ]: raise ApplicationConfigurationError("application.atlas_release is not set. Did you run application.prepare()") self.dbrelease = app.atlas_dbrelease if self.dbrelease != '' and self.dbrelease != 'LATEST' and self.dbrelease.find(':') == -1: raise ApplicationConfigurationError("ERROR : invalid argument for DB Release. Must be 'LATEST' or 'DatasetName:FileName'") self.runConfig = AthenaUtils.ConfigAttr(app.atlas_run_config) for k in self.runConfig.keys(): self.runConfig[k]=AthenaUtils.ConfigAttr(self.runConfig[k]) if not app.atlas_run_dir: raise ApplicationConfigurationError("application.atlas_run_dir is not set. Did you run application.prepare()") self.rundirectory = app.atlas_run_dir self.cacheVer = '' if app.atlas_project and app.atlas_production: self.cacheVer = "-" + app.atlas_project + "_" + app.atlas_production # handle different atlas_exetypes self.job_options = '' if app.atlas_exetype == 'TRF': self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file]) #if not job.outputdata.outputdata: # raise ApplicationConfigurationError("job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend") #raise ApplicationConfigurationError("Sorry TRF on Panda backend not yet supported") if app.options: self.job_options += ' %s ' % app.options elif app.atlas_exetype == 'ATHENA': if len(app.atlas_environment) > 0 and app.atlas_environment[0].find('DBRELEASE_OVERRIDE')==-1: logger.warning("Passing of environment variables to Athena using Panda not supported. Ignoring atlas_environment setting.") if job.outputdata.outputdata: raise ApplicationConfigurationError("job.outputdata.outputdata must be empty if atlas_exetype='ATHENA' and Panda backend is used (outputs are auto-detected)") if app.options: if app.options.startswith('-c'): self.job_options += ' %s ' % app.options else: self.job_options += ' -c %s ' % app.options logger.warning('The value of j.application.options has been prepended with " -c " ') logger.warning('Please make sure to use proper quotes for the values of j.application.options !') self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file]) # check for TAG compression if 'subcoll.tar.gz' in app.append_to_user_area: self.job_options = ' uncompress.py ' + self.job_options elif app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']: #if not job.outputdata.outputdata: # raise ApplicationConfigurationError("job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend") self.job_options += ' '.join([os.path.basename(fopt.name) for fopt in app.option_file]) # sort out environment variables env_str = "" if len(app.atlas_environment) > 0: for env_var in app.atlas_environment: env_str += "export %s ; " % env_var else: env_str = "" # below fixes issue with runGen -- job_options are executed by os.system when dbrelease is used, and by the shell otherwise ## - REMOVED FIX DUE TO CHANGE IN PILOT - MWS 8/11/11 if job.backend.requirements.usecommainputtxt: input_str = '/bin/echo %IN > input.txt; cat input.txt; ' else: input_str = '/bin/echo %IN | sed \'s/,/\\\n/g\' > input.txt; cat input.txt; ' if app.atlas_exetype == 'PYARA': self.job_options = env_str + input_str + ' python ' + self.job_options elif app.atlas_exetype == 'ARES': self.job_options = env_str + input_str + ' athena.py ' + self.job_options elif app.atlas_exetype == 'ROOT': self.job_options = env_str + input_str + ' root -b -q ' + self.job_options elif app.atlas_exetype == 'EXE': self.job_options = env_str + input_str + self.job_options if app.options: self.job_options += ' %s ' % app.options if self.job_options == '': raise ApplicationConfigurationError("No Job Options found!") logger.info('Running job options: %s'%self.job_options) # validate dbrelease if self.dbrelease != "LATEST": self.dbrFiles,self.dbrDsList = getDBDatasets(self.job_options,'',self.dbrelease) # handle the output dataset if job.outputdata: if job.outputdata._name != 'DQ2OutputDataset': raise ApplicationConfigurationError('Panda backend supports only DQ2OutputDataset') else: logger.info('Adding missing DQ2OutputDataset') job.outputdata = DQ2OutputDataset() # validate the output dataset name (and make it a container) job.outputdata.datasetname,outlfn = dq2outputdatasetname(job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname) if not job.outputdata.datasetname.endswith('/'): job.outputdata.datasetname+='/' # add extOutFiles self.extOutFile = [] for tmpName in job.outputdata.outputdata: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.backend.extOutFile: if tmpName != '': self.extOutFile.append(tmpName) # use the shared area if possible tmp_user_area_name = app.user_area.name if app.is_prepared is not True: from Ganga.Utility.files import expandfilename shared_path = os.path.join(expandfilename(getConfig('Configuration')['gangadir']),'shared',getConfig('Configuration')['user']) tmp_user_area_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name)) # Add inputsandbox to user_area if job.inputsandbox: logger.warning("Submitting Panda job with inputsandbox. This may slow the submission slightly.") if tmp_user_area_name: inpw = os.path.dirname(tmp_user_area_name) self.inputsandbox = os.path.join(inpw, 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null')) else: inpw = job.getInputWorkspace() self.inputsandbox = inpw.getPath('sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null')) if tmp_user_area_name: rc, output = commands.getstatusoutput('cp %s %s.gz' % (tmp_user_area_name, self.inputsandbox)) if rc: logger.error('Copying user_area failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError('Packing inputsandbox failed.') rc, output = commands.getstatusoutput('gunzip %s.gz' % (self.inputsandbox)) if rc: logger.error('Unzipping user_area failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError('Packing inputsandbox failed.') for fname in [os.path.abspath(f.name) for f in job.inputsandbox]: fname.rstrip(os.sep) path = os.path.dirname(fname) fn = os.path.basename(fname) #app.atlas_run_dir # get Athena versions rc, out = AthenaUtils.getAthenaVer() # failed if not rc: #raise ApplicationConfigurationError('CMT could not parse correct environment ! \n Did you start/setup ganga in the run/ or cmt/ subdirectory of your athena analysis package ?') logger.warning("CMT could not parse correct environment for inputsandbox - will use the atlas_run_dir as default") # as we don't have to be in the run dir now, create a copy of the run_dir directory structure and use that input_dir = os.path.dirname(self.inputsandbox) run_path = "%s/sbx_tree/%s" % (input_dir, app.atlas_run_dir) rc, output = commands.getstatusoutput("mkdir -p %s" % run_path) if not rc: # copy this sandbox file rc, output = commands.getstatusoutput("cp %s %s" % (fname, run_path)) if not rc: path = os.path.join(input_dir, 'sbx_tree') fn = os.path.join(app.atlas_run_dir, fn) else: raise ApplicationConfigurationError("Couldn't copy file %s to recreate run_dir for input sandbox" % fname) else: raise ApplicationConfigurationError("Couldn't create directory structure to match run_dir %s for input sandbox" % run_path) else: userarea = out['workArea'] # strip the path from the filename if present in the userarea ua = os.path.abspath(userarea) if ua in path: fn = fname[len(ua)+1:] path = ua rc, output = commands.getstatusoutput('tar -h -r -f %s -C %s %s' % (self.inputsandbox, path, fn)) if rc: logger.error('Packing inputsandbox failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError('Packing inputsandbox failed.') # remove sandbox tree if created if "sbx_tree" in os.listdir(os.path.dirname(self.inputsandbox)): rc, output = commands.getstatusoutput("rm -r %s/sbx_tree" % os.path.dirname(self.inputsandbox)) if rc: raise ApplicationConfigurationError("Couldn't remove directory structure used for input sandbox") rc, output = commands.getstatusoutput('gzip %s' % (self.inputsandbox)) if rc: logger.error('Packing inputsandbox failed with status %d',rc) logger.error(output) raise ApplicationConfigurationError('Packing inputsandbox failed.') self.inputsandbox += ".gz" else: self.inputsandbox = tmp_user_area_name # job name jobName = 'ganga.%s' % MiscUtils.wrappedUuidGen() # make task taskParamMap = {} # Enforce that outputdataset name ends with / for container if not job.outputdata.datasetname.endswith('/'): job.outputdata.datasetname = job.outputdata.datasetname + '/' taskParamMap['taskName'] = job.outputdata.datasetname taskParamMap['uniqueTaskName'] = True taskParamMap['vo'] = 'atlas' taskParamMap['architecture'] = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig) if app.atlas_release: taskParamMap['transUses'] = 'Atlas-%s' % app.atlas_release else: taskParamMap['transUses'] = '' taskParamMap['transHome'] = 'AnalysisTransforms'+self.cacheVer#+nightVer configSys = getConfig('System') gangaver = configSys['GANGA_VERSION'].lower() if not gangaver: gangaver = "ganga" if app.atlas_exetype in ["ATHENA", "TRF"]: taskParamMap['processingType'] = '{0}-jedi-athena'.format(gangaver) else: taskParamMap['processingType'] = '{0}-jedi-run'.format(gangaver) #if options.eventPickEvtList != '': # taskParamMap['processingType'] += '-evp' taskParamMap['prodSourceLabel'] = 'user' if job.backend.site != 'AUTO': taskParamMap['cloud'] = Client.PandaSites[job.backend.site]['cloud'] taskParamMap['site'] = job.backend.site elif job.backend.requirements.cloud != None and not job.backend.requirements.anyCloud: taskParamMap['cloud'] = job.backend.requirements.cloud if job.backend.requirements.excluded_sites != []: taskParamMap['excludedSite'] = expandExcludedSiteList( job ) # if only a single site specifed, don't set includedSite #if job.backend.site != 'AUTO': # taskParamMap['includedSite'] = job.backend.site #taskParamMap['cliParams'] = fullExecString if job.backend.requirements.noEmail: taskParamMap['noEmail'] = True if job.backend.requirements.skipScout: taskParamMap['skipScout'] = True if not app.atlas_exetype in ["ATHENA", "TRF"]: taskParamMap['nMaxFilesPerJob'] = job.backend.requirements.maxNFilesPerJob if job.backend.requirements.disableAutoRetry: taskParamMap['disableAutoRetry'] = 1 # source URL matchURL = re.search("(http.*://[^/]+)/",Client.baseURLCSRVSSL) if matchURL != None: taskParamMap['sourceURL'] = matchURL.group(1) # dataset names outDatasetName = job.outputdata.datasetname logDatasetName = re.sub('/$','.log/',job.outputdata.datasetname) # log taskParamMap['log'] = {'dataset': logDatasetName, 'container': logDatasetName, 'type':'template', 'param_type':'log', 'value':'{0}.${{SN}}.log.tgz'.format(logDatasetName[:-1]) } # job parameters if app.atlas_exetype in ["ATHENA", "TRF"]: taskParamMap['jobParameters'] = [ {'type':'constant', 'value': ' --sourceURL ${SURL}', }, ] else: taskParamMap['jobParameters'] = [ {'type':'constant', 'value': '-j "" --sourceURL ${SURL}', }, ] taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '-r {0}'.format(self.rundirectory), }, ] # Add the --trf option to jobParameters if required if app.atlas_exetype == "TRF": taskParamMap['jobParameters'] += [{'type': 'constant', 'value': '--trf'}] # output # output files outMap = {} if app.atlas_exetype in ["ATHENA", "TRF"]: outMap, tmpParamList = AthenaUtils.convertConfToOutput(self.runConfig, self.extOutFile, job.outputdata.datasetname, destination=job.outputdata.location) taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '-o "%s" ' % outMap }, ] taskParamMap['jobParameters'] += tmpParamList else: if job.outputdata.outputdata: for tmpLFN in job.outputdata.outputdata: if len(job.outputdata.datasetname.split('.')) > 2: lfn = '{0}.{1}'.format(*job.outputdata.datasetname.split('.')[:2]) else: lfn = job.outputdata.datasetname[:-1] lfn += '.$JOBSETID._${{SN/P}}.{0}'.format(tmpLFN) dataset = '{0}_{1}/'.format(job.outputdata.datasetname[:-1],tmpLFN) taskParamMap['jobParameters'] += MiscUtils.makeJediJobParam(lfn,dataset,'output',hidden=True, destination=job.outputdata.location) outMap[tmpLFN] = lfn taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '-o "{0}"'.format(str(outMap)), }, ] if app.atlas_exetype in ["ATHENA"]: # jobO parameter tmpJobO = self.job_options # replace full-path jobOs for tmpFullName,tmpLocalName in AthenaUtils.fullPathJobOs.iteritems(): tmpJobO = re.sub(tmpFullName,tmpLocalName,tmpJobO) # modify one-liner for G4 random seeds if self.runConfig.other.G4RandomSeeds > 0: if app.options != '': tmpJobO = re.sub('-c "%s" ' % app.options, '-c "%s;from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' \ % app.options,tmpJobO) else: tmpJobO = '-c "from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' dictItem = {'type':'template', 'param_type':'number', 'value':'${RNDMSEED}', 'hidden':True, 'offset':self.runConfig.other.G4RandomSeeds, } taskParamMap['jobParameters'] += [dictItem] elif app.atlas_exetype in ["TRF"]: # replace parameters for TRF tmpJobO = self.job_options # output : basenames are in outMap['IROOT'] trough extOutFile tmpOutMap = [] for tmpName,tmpLFN in outMap['IROOT']: tmpJobO = tmpJobO.replace('%OUT.' + tmpName,tmpName) # replace DBR tmpJobO = re.sub('%DB=[^ \'\";]+','${DBR}',tmpJobO) if app.atlas_exetype in ["TRF"]: taskParamMap['useLocalIO'] = 1 # build if job.backend.nobuild: taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '-a {0}'.format(os.path.basename(self.inputsandbox)), }, ] else: taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '-l ${LIB}', }, ] # # input if job.inputdata and job.inputdata._name == 'DQ2Dataset': if job.backend.requirements.nFilesPerJob > 0 and job.inputdata.number_of_files == 0 and job.backend.requirements.split > 0: job.inputdata.number_of_files = job.backend.requirements.nFilesPerJob * job.backend.requirements.split if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.number_of_files != 0: taskParamMap['nFiles'] = job.inputdata.number_of_files elif job.backend.requirements.nFilesPerJob > 0 and job.backend.requirements.split > 0: # pathena does this for some reason even if there is no input files taskParamMap['nFiles'] = job.backend.requirements.nFilesPerJob * job.backend.requirements.split if job.backend.requirements.nFilesPerJob > 0: taskParamMap['nFilesPerJob'] = job.backend.requirements.nFilesPerJob if job.backend.requirements.nEventsPerFile > 0: taskParamMap['nEventsPerFile'] = job.backend.requirements.nEventsPerFile if not job.backend.requirements.nGBPerJob in [ 0,'MAX']: try: if job.backend.requirements.nGBPerJob != 'MAX': job.backend.requirments.nGBPerJob = int(job.backend.requirements.nGBPerJob) except: logger.error("nGBPerJob must be an integer or MAX") # check negative if job.backend.requirements.nGBPerJob <= 0: logger.error("nGBPerJob must be positive") # don't set MAX since it is the defalt on the server side if not job.backend.requirements.nGBPerJob in [-1,'MAX']: taskParamMap['nGBPerJob'] = job.backend.requirements.nGBPerJob if app.atlas_exetype in ["ATHENA", "TRF"]: inputMap = {} if job.inputdata and job.inputdata._name == 'DQ2Dataset': tmpDict = {'type':'template', 'param_type':'input', 'value':'-i "${IN/T}"', 'dataset': ','.join(job.inputdata.dataset), 'expand':True, 'exclude':'\.log\.tgz(\.\d+)*$', } #if options.inputType != '': # tmpDict['include'] = options.inputType taskParamMap['jobParameters'].append(tmpDict) taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset) inputMap['IN'] = ','.join(job.inputdata.dataset) else: # no input taskParamMap['noInput'] = True if job.backend.requirements.split > 0: taskParamMap['nEvents'] = job.backend.requirements.split else: taskParamMap['nEvents'] = 1 taskParamMap['nEventsPerJob'] = 1 taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '-i "[]"', }, ] else: if job.inputdata and job.inputdata._name == 'DQ2Dataset': tmpDict = {'type':'template', 'param_type':'input', 'value':'-i "${IN/T}"', 'dataset': ','.join(job.inputdata.dataset), 'expand':True, 'exclude':'\.log\.tgz(\.\d+)*$', } #if options.nSkipFiles != 0: # tmpDict['offset'] = options.nSkipFiles taskParamMap['jobParameters'].append(tmpDict) taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset) else: # no input taskParamMap['noInput'] = True if job.backend.requirements.split > 0: taskParamMap['nEvents'] = job.backend.requirements.split else: taskParamMap['nEvents'] = 1 taskParamMap['nEventsPerJob'] = 1 # param for DBR if self.dbrelease != '': dbrDS = self.dbrelease.split(':')[0] # change LATEST to DBR_LATEST if dbrDS == 'LATEST': dbrDS = 'DBR_LATEST' dictItem = {'type':'template', 'param_type':'input', 'value':'--dbrFile=${DBR}', 'dataset':dbrDS, } taskParamMap['jobParameters'] += [dictItem] # no expansion #if options.notExpandDBR: #dictItem = {'type':'constant', # 'value':'--noExpandDBR', # } #taskParamMap['jobParameters'] += [dictItem] # secondary FIXME disabled self.secondaryDSs = {} if self.secondaryDSs != {}: inMap = {} streamNames = [] for tmpDsName,tmpMap in self.secondaryDSs.iteritems(): # make template item streamName = tmpMap['streamName'] dictItem = MiscUtils.makeJediJobParam('${'+streamName+'}',tmpDsName,'input',hidden=True, expand=True,include=tmpMap['pattern'],offset=tmpMap['nSkip'], nFilesPerJob=tmpMap['nFiles']) taskParamMap['jobParameters'] += dictItem inMap[streamName] = 'tmp_'+streamName streamNames.append(streamName) # make constant item strInMap = str(inMap) # set placeholders for streamName in streamNames: strInMap = strInMap.replace("'tmp_"+streamName+"'",'${'+streamName+'/T}') dictItem = {'type':'constant', 'value':'--inMap "%s"' % strInMap, } taskParamMap['jobParameters'] += [dictItem] # misc jobParameters = '' # use Athena packages if app.atlas_exetype == 'ARES' or (app.atlas_exetype in ['PYARA','ROOT','EXE'] and app.useAthenaPackages): jobParameters += "--useAthenaPackages " # use RootCore if app.useRootCore or app.useRootCoreNoBuild: jobParameters += "--useRootCore " # use mana if app.useMana: jobParameters += "--useMana " if app.atlas_release != "": jobParameters += "--manaVer %s " % app.atlas_release # root if app.atlas_exetype in ['PYARA','ROOT','EXE'] and job.backend.requirements.rootver != '': rootver = re.sub('/','.', job.backend.requirements.rootver) jobParameters += "--rootVer %s " % rootver # write input to txt #if options.writeInputToTxt != '': # jobParameters += "--writeInputToTxt %s " % options.writeInputToTxt # debug parameters #if options.queueData != '': # jobParameters += "--overwriteQueuedata=%s " % options.queueData # JEM #if options.enableJEM: # jobParameters += "--enable-jem " # if options.configJEM != '': # jobParameters += "--jem-config %s " % options.configJEM # set task param if jobParameters != '': taskParamMap['jobParameters'] += [ {'type':'constant', 'value': jobParameters, }, ] # force stage-in if job.backend.accessmode == "LocalIO": taskParamMap['useLocalIO'] = 1 # set jobO parameter if app.atlas_exetype in ["ATHENA", "TRF"]: taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '-j "', 'padding':False, }, ] taskParamMap['jobParameters'] += PsubUtils.convertParamStrToJediParam(tmpJobO,inputMap,job.outputdata.datasetname[:-1], True,False) taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '"', }, ] else: taskParamMap['jobParameters'] += [ {'type':'constant', 'value': '-p "{0}"'.format(urllib.quote(self.job_options)), }, ] # build step if not job.backend.nobuild: jobParameters = '-i ${IN} -o ${OUT} --sourceURL ${SURL} ' if job.backend.bexec != '': jobParameters += ' --bexec "%s" ' % urllib.quote(job.backend.bexec) if app.atlas_exetype == 'ARES' or (app.atlas_exetype in ['PYARA','ROOT','EXE'] and app.useAthenaPackages): # use Athena packages jobParameters += "--useAthenaPackages " # use RootCore if app.useRootCore or app.useRootCoreNoBuild: jobParameters += "--useRootCore " # run directory if app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']: jobParameters += '-r {0} '.format(self.rundirectory) # no compile #if options.noCompile: # jobParameters += "--noCompile " # use mana if app.useMana: jobParameters += "--useMana " if app.atlas_release != "": jobParameters += "--manaVer %s " % app.atlas_release # root if app.atlas_exetype in ['PYARA','ROOT','EXE'] and job.backend.requirements.rootver != '': rootver = re.sub('/','.', job.backend.requirements.rootver) jobParameters += "--rootVer %s " % rootver # cmt config if app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']: if not app.atlas_cmtconfig in ['','NULL',None]: jobParameters += " --cmtConfig %s " % app.atlas_cmtconfig #cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig) #if cmtConfig: # jobParameters += "--cmtConfig %s " % cmtConfig # debug parameters #if options.queueData != '': # jobParameters += "--overwriteQueuedata=%s " % options.queueData # set task param taskParamMap['buildSpec'] = { 'prodSourceLabel':'panda', 'archiveName':os.path.basename(self.inputsandbox), 'jobParameters':jobParameters, } # enable merging if job.backend.requirements.enableMerge: jobParameters = '-r {0} '.format(self.rundirectory) if 'exec' in job.backend.requirements.configMerge and job.backend.requirements.configMerge['exec'] != '': jobParameters += '-j "{0}" '.format(job.backend.requirements.configMerge['exec']) if not job.backend.nobuild: jobParameters += '-l ${LIB} ' else: jobParameters += '-a {0} '.format(os.path.basename(self.inputsandbox)) jobParameters += "--sourceURL ${SURL} " jobParameters += '${TRN_OUTPUT:OUTPUT} ${TRN_LOG:LOG}' taskParamMap['mergeSpec'] = {} taskParamMap['mergeSpec']['useLocalIO'] = 1 taskParamMap['mergeSpec']['jobParameters'] = jobParameters taskParamMap['mergeOutput'] = True # Selected by Jedi #if not app.atlas_exetype in ['PYARA','ROOT','EXE']: # taskParamMap['transPath'] = 'http://atlpan.web.cern.ch/atlpan/runAthena-00-00-12' logger.debug(taskParamMap) # upload sources if self.inputsandbox and not job.backend.libds: uploadSources(os.path.dirname(self.inputsandbox),os.path.basename(self.inputsandbox)) if not self.inputsandbox == tmp_user_area_name: logger.info('Removing source tarball %s ...' % self.inputsandbox ) os.remove(self.inputsandbox) return taskParamMap
def master_prepare(self, app, appconfig): '''Prepare the master job''' from pandatools import Client from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec job = app._getParent() logger.debug('ExecutablePandaRTHandler master_prepare called for %s', job.getFQID('.')) # set chirp variables if configPanda['chirpconfig'] or configPanda['chirpserver']: setChirpVariables() # Pack inputsandbox inputsandbox = 'sources.%s.tar' % commands.getoutput( 'uuidgen 2> /dev/null') inpw = job.getInputWorkspace() # add user script to inputsandbox if hasattr(job.application.exe, "name"): if not job.application.exe in job.inputsandbox: job.inputsandbox.append(job.application.exe) for fname in [f.name for f in job.inputsandbox]: fname.rstrip(os.sep) path = fname[:fname.rfind(os.sep)] f = fname[fname.rfind(os.sep) + 1:] rc, output = commands.getstatusoutput( 'tar rf %s -C %s %s' % (inpw.getPath(inputsandbox), path, f)) if rc: logger.error('Packing inputsandbox failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError( None, 'Packing inputsandbox failed.') if len(job.inputsandbox) > 0: rc, output = commands.getstatusoutput('gzip %s' % (inpw.getPath(inputsandbox))) if rc: logger.error('Packing inputsandbox failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError( None, 'Packing inputsandbox failed.') inputsandbox += ".gz" else: inputsandbox = None # Upload Inputsandbox if inputsandbox: logger.debug('Uploading source tarball ...') uploadSources(inpw.getPath(), os.path.basename(inputsandbox)) self.inputsandbox = inputsandbox else: self.inputsandbox = None # input dataset if job.inputdata: if job.inputdata._name != 'DQ2Dataset': raise ApplicationConfigurationError( None, 'PANDA application supports only DQ2Datasets') # run brokerage here if not splitting if not job.splitter: from GangaPanda.Lib.Panda.Panda import runPandaBrokerage runPandaBrokerage(job) elif job.splitter._name not in [ 'DQ2JobSplitter', 'ArgSplitter', 'ArgSplitterTask' ]: raise ApplicationConfigurationError( None, 'Panda splitter must be DQ2JobSplitter or ArgSplitter') if job.backend.site == 'AUTO': raise ApplicationConfigurationError( None, 'site is still AUTO after brokerage!') # output dataset if job.outputdata: if job.outputdata._name != 'DQ2OutputDataset': raise ApplicationConfigurationError( None, 'Panda backend supports only DQ2OutputDataset') else: logger.info('Adding missing DQ2OutputDataset') job.outputdata = DQ2OutputDataset() job.outputdata.datasetname, outlfn = dq2outputdatasetname( job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname) self.outDsLocation = Client.PandaSites[job.backend.site]['ddm'] try: Client.addDataset(job.outputdata.datasetname, False, location=self.outDsLocation) logger.info('Output dataset %s registered at %s' % (job.outputdata.datasetname, self.outDsLocation)) dq2_set_dataset_lifetime(job.outputdata.datasetname, location=self.outDsLocation) except exceptions.SystemExit: raise BackendError( 'Panda', 'Exception in Client.addDataset %s: %s %s' % (job.outputdata.datasetname, sys.exc_info()[0], sys.exc_info()[1])) # handle the libds if job.backend.libds: self.libDataset = job.backend.libds self.fileBO = getLibFileSpecFromLibDS(self.libDataset) self.library = self.fileBO.lfn elif job.backend.bexec: self.libDataset = job.outputdata.datasetname + '.lib' self.library = '%s.tgz' % self.libDataset try: Client.addDataset(self.libDataset, False, location=self.outDsLocation) dq2_set_dataset_lifetime(self.libDataset, location=self.outDsLocation) logger.info('Lib dataset %s registered at %s' % (self.libDataset, self.outDsLocation)) except exceptions.SystemExit: raise BackendError( 'Panda', 'Exception in Client.addDataset %s: %s %s' % (self.libDataset, sys.exc_info()[0], sys.exc_info()[1])) # collect extOutFiles self.extOutFile = [] for tmpName in job.outputdata.outputdata: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.outputsandbox: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.backend.extOutFile: if tmpName != '': self.extOutFile.append(tmpName) # create build job if job.backend.bexec != '': jspec = JobSpec() jspec.jobDefinitionID = job.id jspec.jobName = commands.getoutput('uuidgen 2> /dev/null') jspec.transformation = '%s/buildGen-00-00-01' % Client.baseURLSUB if Client.isDQ2free(job.backend.site): jspec.destinationDBlock = '%s/%s' % ( job.outputdata.datasetname, self.libDataset) jspec.destinationSE = 'local' else: jspec.destinationDBlock = self.libDataset jspec.destinationSE = job.backend.site jspec.prodSourceLabel = configPanda['prodSourceLabelBuild'] jspec.processingType = configPanda['processingType'] jspec.assignedPriority = configPanda['assignedPriorityBuild'] jspec.computingSite = job.backend.site jspec.cloud = job.backend.requirements.cloud jspec.jobParameters = '-o %s' % (self.library) if self.inputsandbox: jspec.jobParameters += ' -i %s' % (self.inputsandbox) else: raise ApplicationConfigurationError( None, 'Executable on Panda with build job defined, but inputsandbox is emtpy !' ) matchURL = re.search('(http.*://[^/]+)/', Client.baseURLCSRVSSL) if matchURL: jspec.jobParameters += ' --sourceURL %s ' % matchURL.group(1) if job.backend.bexec != '': jspec.jobParameters += ' --bexec "%s" ' % urllib.quote( job.backend.bexec) jspec.jobParameters += ' -r %s ' % '.' fout = FileSpec() fout.lfn = self.library fout.type = 'output' fout.dataset = self.libDataset fout.destinationDBlock = self.libDataset jspec.addFile(fout) flog = FileSpec() flog.lfn = '%s.log.tgz' % self.libDataset flog.type = 'log' flog.dataset = self.libDataset flog.destinationDBlock = self.libDataset jspec.addFile(flog) return jspec else: return None
def master_prepare(self, app, appconfig): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id) if job._getRoot().subjobs: jobid = "%d" % (job._getRoot().id) else: jobid = "%d" % job.id # Generate output dataset name if job.outputdata: if job.outputdata._name == 'DQ2OutputDataset': dq2_datasetname = job.outputdata.datasetname dq2_isGroupDS = job.outputdata.isGroupDS dq2_groupname = job.outputdata.groupname else: dq2_datasetname = '' dq2_isGroupDS = False dq2_groupname = '' self.output_datasetname, self.output_lfn = dq2outputdatasetname( dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname) # Check if all sites are in the same cloud if job.backend.requirements.sites: firstCloud = whichCloud(job.backend.requirements.sites[0]) for site in job.backend.requirements.sites: cloud = whichCloud(site) if cloud != firstCloud: printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % ( job.backend.requirements.sites) raise ApplicationConfigurationError(None, printout) #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job #will already have been prepared #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing. #if app.is_prepared is not True: # for position in xrange(len(app.option_file)): # app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name))) # Expand Athena jobOptions if not app.atlas_exetype in ['EXE']: athena_options = ' '.join([ os.path.basename(opt_file.name) for opt_file in app.option_file ]) #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options if app.options: athena_options = app.options + ' ' + athena_options inputbox = [File(opt_file.name) for opt_file in app.option_file] else: athena_options = ' '.join([ os.path.basename(opt_file.name) for opt_file in app.option_file ]) inputbox = [] athena_usersetupfile = os.path.basename(app.user_setupfile.name) # prepare input sandbox inputbox.append(File(os.path.join(__directory__, 'athena-utility.sh'))) if job.inputdata and job.inputdata._name == "AMIDataset" and job.inputdata.goodRunListXML.name != '': inputbox.append(File(job.inputdata.goodRunListXML.name)) if job.inputdata and job.inputdata._name == 'ATLASDataset': if job.inputdata.lfc: _append_files(inputbox, 'ganga-stagein-lfc.py') else: _append_files(inputbox, 'ganga-stagein.py') if app.user_area.name: #we will now use the user_area that's stored in the users shared directory if app.is_prepared is not True: tmp_user_name = os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.user_area.name)) inputbox.append(File(tmp_user_name)) else: inputbox.append(File(app.user_area.name)) #if app.group_area.name: inputbox += [ File(app.group_area.name) ] if app.group_area.name and str(app.group_area.name).find('http') < 0: #we will now use the group_area that's stored in the users shared directory if app.is_prepared is not True: tmp_group_name = os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.group_area.name)) inputbox.append(File(tmp_group_name)) else: inputbox.append(File(app.group_area.name)) if app.user_setupfile.name: inputbox.append(File(app.user_setupfile.name)) # CN: added TNTJobSplitter clause if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]) or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox, 'ganga-stage-in-out-dq2.py', 'dq2_get', 'dq2info.tar.gz') if job.inputdata and job.inputdata.type == 'LFC' and not ( job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox, 'dq2_get_old') if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': _append_files(inputbox, 'ganga-stage-in-out-dq2.py', 'dq2info.tar.gz') ## insert more scripts to inputsandbox for FileStager if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]) and job.inputdata.type in ['FILE_STAGER']: _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py', 'fs-copy.py') #_append_files(inputbox,'make_filestager_joption.py','dm_util.py') if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': #if not job.outputdata.location: # raise ApplicationConfigurationError(None,'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !') if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'ganga-stage-in-out-dq2.py') _append_files(inputbox, 'ganga-joboption-parse.py') if not 'dq2info.tar.gz' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'dq2info.tar.gz') # add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'ganga-stage-in-out-dq2.py') if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'dq2tracerreport.py') if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'db_dq2localid.py') if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'getstats.py') if str(app.atlas_release).find('12.') >= 0: _append_files(inputbox, 'libDCache.so', 'libRFIO.so', 'libdcap.so') elif str(app.atlas_release).find('13.') >= 0: _append_files(inputbox, 'libdcap.so') else: _append_files(inputbox, 'libdcap.so') if job.inputsandbox: inputbox += job.inputsandbox # prepare environment if not app.atlas_release: raise ApplicationConfigurationError( None, 'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.' ) environment = { 'ATLAS_RELEASE': app.atlas_release, 'ATHENA_OPTIONS': athena_options, 'ATHENA_USERSETUPFILE': athena_usersetupfile, 'ATLAS_PROJECT': app.atlas_project, 'ATLAS_EXETYPE': app.atlas_exetype, 'GANGA_VERSION': configSystem['GANGA_VERSION'] } environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER'] if app.atlas_environment: for var in app.atlas_environment: try: vars = re.match("^(\w+)=(.*)", var).group(1) value = re.match("^(\w+)=(.*)", var).group(2) environment[vars] = value except: logger.warning( 'Athena.atlas_environment variable not correctly configured: %s', var) pass if app.atlas_production and app.atlas_release.find( '12.') >= 0 and app.atlas_project != 'AtlasPoint1': temp_atlas_production = re.sub('\.', '_', app.atlas_production) prod_url = config[ 'PRODUCTION_ARCHIVE_BASEURL'] + '/AtlasProduction_' + temp_atlas_production + '_noarch.tar.gz' logger.info('Using Production cache from: %s', prod_url) environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.') <= 0): environment['ATLAS_PRODUCTION'] = app.atlas_production if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name) #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name) if app.group_area.name: if str(app.group_area.name).find('http') >= 0: environment['GROUP_AREA_REMOTE'] = str(app.group_area.name) else: environment['GROUP_AREA'] = os.path.basename( app.group_area.name) if app.max_events: if (app.max_events != -999) and (app.max_events > -2): environment['ATHENA_MAX_EVENTS'] = str(app.max_events) if job.backend.requirements._name == 'AtlasLCGRequirements': requirements = AtlasLCGRequirements() elif job.backend.requirements._name == 'AtlasCREAMRequirements': requirements = AtlasCREAMRequirements() else: requirements = AtlasLCGRequirements() if job.inputdata and job.inputdata._name == 'ATLASDataset': if job.inputdata.lfc: environment['GANGA_LFC_HOST'] = job.inputdata.lfc if 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] if job.inputdata and (job.inputdata._name in ['DQ2Dataset', 'AMIDataset', 'EventPicking']): if job.inputdata.dataset: datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2[ 'DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type if job.inputdata.failover: environment['DATASETFAILOVER'] = 1 environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment[ 'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputdata.check_md5sum: environment['GANGA_CHECKMD5SUM'] = 1 else: raise ApplicationConfigurationError( None, 'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.' ) # Raise submission exception if (not job.backend.CE and not (job.backend.requirements._name in [ 'AtlasLCGRequirements', 'AtlasCREAMRequirements' ] and job.backend.requirements.sites) and not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and not (job.splitter and job.splitter._name == 'TNTJobSplitter') and not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob') and not (job.splitter and job.splitter._name == 'ATLASTier3Splitter')): raise ApplicationConfigurationError( None, 'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !' ) if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0: raise ApplicationConfigurationError( None, 'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !' ) #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'): # allLoc = job.inputdata.get_locations(complete=0) # completeLoc = job.inputdata.get_locations(complete=1) # incompleteLoc = [] # for loc in allLoc: # if loc not in completeLoc: # incompleteLoc.append(loc) # if incompleteLoc: # raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !') # Add TAG datasetname if job.inputdata.tagdataset: environment['TAGDATASETNAME'] = ':'.join( job.inputdata.tagdataset) # prepare job requirements requirementsSoftware = getLCGReleaseTag(app) releaseBlacklist = job.backend.requirements.list_release_blacklist() if requirementsSoftware and requirementsSoftware[0] in releaseBlacklist: logger.error( 'The athena release %s you are using is not recommended for distributed analysis !', requirementsSoftware[0]) logger.error( 'For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !' ) requirements.software = requirementsSoftware else: requirements.software = requirementsSoftware # Set athena architecture: 32 or 64 bit environment['ATLAS_ARCH'] = '32' if requirementsSoftware and requirementsSoftware[0].find( 'x86_64') >= 0: environment['ATLAS_ARCH'] = '64' # add software requirement of dq2clients if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ] and job.inputdata.type in [ 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER' ] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']: try: # override the default one if the dq2client_version is presented # in the job backend's requirements object dq2client_version = job.backend.requirements.dq2client_version except AttributeError: pass if dq2client_version: #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version] environment['DQ2_CLIENT_VERSION'] = dq2client_version if app.atlas_dbrelease: if not app._name == "AthenaTask" and not ( job.splitter and (job.splitter._name == 'DQ2JobSplitter' or job.splitter._name == 'ATLASTier3Splitter')): raise ApplicationConfigurationError( None, 'Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !' ) try: environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split( ':')[0] environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1] except: logger.warning( 'Problems with the atlas_dbrelease configuration') # Fill AtlasLCGRequirements access mode if configDQ2['USE_ACCESS_INFO']: logger.warning( "config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !" ) import pickle, StringIO #if job.backend.requirements.sites: info = job.backend.requirements.list_access_info() fileHandle = StringIO.StringIO() pickle.dump(info, fileHandle) fileHandle.seek(-1) lines = fileHandle.read() inputbox.append(FileBuffer('access_info.pickle', lines)) _append_files(inputbox, 'access_info.py') if not 'make_filestager_joption.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py', 'fs-copy.py') # jobscript exe = os.path.join(__directory__, 'run-athena-lcg.sh') # output sandbox outputbox = [ 'output_guids', 'output_location', 'output_data', 'stats.pickle' ] ## retrieve the FileStager log if configDQ2['USE_ACCESS_INFO'] or ( job.inputdata and (job.inputdata._name in ['DQ2Dataset', 'AMIDataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']): outputbox += ['FileStager.out', 'FileStager.err'] if job.outputsandbox: outputbox += job.outputsandbox # Switch for DEBUG print-out in logfiles if app.useNoDebugLogs: environment['GANGA_LOG_DEBUG'] = '0' else: environment['GANGA_LOG_DEBUG'] = '1' return LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements)
def master_prepare(self, app, appconfig): '''Prepare the master job''' from pandatools import Client from pandatools import MiscUtils from pandatools import AthenaUtils from pandatools import PsubUtils from taskbuffer.JobSpec import JobSpec from taskbuffer.FileSpec import FileSpec from pandatools import PandaToolsPkgInfo # create a random number for this submission to allow multiple use of containers self.rndSubNum = random.randint(1111, 9999) job = app._getParent() logger.debug('AthenaJediRTHandler master_prepare called for %s', job.getFQID('.')) if app.useRootCoreNoBuild: logger.info( 'Athena.useRootCoreNoBuild is True, setting Panda.nobuild=True.' ) job.backend.nobuild = True if job.backend.bexec and job.backend.nobuild: raise ApplicationConfigurationError( None, "Contradicting options: job.backend.bexec and job.backend.nobuild are both enabled." ) if job.backend.requirements.rootver != '' and job.backend.nobuild: raise ApplicationConfigurationError( None, "Contradicting options: job.backend.requirements.rootver given and job.backend.nobuild are enabled." ) # Switch on compilation flag if bexec is set or libds is empty if job.backend.bexec != '' or not job.backend.nobuild: app.athena_compile = True for sj in job.subjobs: sj.application.athena_compile = True logger.info( '"job.backend.nobuild=False" or "job.backend.bexec" is set - Panda build job is enabled.' ) if job.backend.nobuild: app.athena_compile = False for sj in job.subjobs: sj.application.athena_compile = False logger.info( '"job.backend.nobuild=True" or "--nobuild" chosen - Panda build job is switched off.' ) # check for auto datri if job.outputdata.location != '': if not PsubUtils.checkDestSE(job.outputdata.location, job.outputdata.datasetname, False): raise ApplicationConfigurationError( None, "Problems with outputdata.location setting '%s'" % job.outputdata.location) # validate application if not app.atlas_release and not job.backend.requirements.rootver and not app.atlas_exetype in [ 'EXE' ]: raise ApplicationConfigurationError( None, "application.atlas_release is not set. Did you run application.prepare()" ) self.dbrelease = app.atlas_dbrelease if self.dbrelease != '' and self.dbrelease != 'LATEST' and self.dbrelease.find( ':') == -1: raise ApplicationConfigurationError( None, "ERROR : invalid argument for DB Release. Must be 'LATEST' or 'DatasetName:FileName'" ) self.runConfig = AthenaUtils.ConfigAttr(app.atlas_run_config) for k in self.runConfig.keys(): self.runConfig[k] = AthenaUtils.ConfigAttr(self.runConfig[k]) if not app.atlas_run_dir: raise ApplicationConfigurationError( None, "application.atlas_run_dir is not set. Did you run application.prepare()" ) self.rundirectory = app.atlas_run_dir self.cacheVer = '' if app.atlas_project and app.atlas_production: self.cacheVer = "-" + app.atlas_project + "_" + app.atlas_production # handle different atlas_exetypes self.job_options = '' if app.atlas_exetype == 'TRF': self.job_options += ' '.join( [os.path.basename(fopt.name) for fopt in app.option_file]) #if not job.outputdata.outputdata: # raise ApplicationConfigurationError(None,"job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend") #raise ApplicationConfigurationError(None,"Sorry TRF on Panda backend not yet supported") if app.options: self.job_options += ' %s ' % app.options elif app.atlas_exetype == 'ATHENA': if len(app.atlas_environment) > 0 and app.atlas_environment[ 0].find('DBRELEASE_OVERRIDE') == -1: logger.warning( "Passing of environment variables to Athena using Panda not supported. Ignoring atlas_environment setting." ) if job.outputdata.outputdata: raise ApplicationConfigurationError( None, "job.outputdata.outputdata must be empty if atlas_exetype='ATHENA' and Panda backend is used (outputs are auto-detected)" ) if app.options: if app.options.startswith('-c'): self.job_options += ' %s ' % app.options else: self.job_options += ' -c %s ' % app.options logger.warning( 'The value of j.application.options has been prepended with " -c " ' ) logger.warning( 'Please make sure to use proper quotes for the values of j.application.options !' ) self.job_options += ' '.join( [os.path.basename(fopt.name) for fopt in app.option_file]) # check for TAG compression if 'subcoll.tar.gz' in app.append_to_user_area: self.job_options = ' uncompress.py ' + self.job_options elif app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']: #if not job.outputdata.outputdata: # raise ApplicationConfigurationError(None,"job.outputdata.outputdata is required for atlas_exetype in ['PYARA','ARES','TRF','ROOT','EXE' ] and Panda backend") self.job_options += ' '.join( [os.path.basename(fopt.name) for fopt in app.option_file]) # sort out environment variables env_str = "" if len(app.atlas_environment) > 0: for env_var in app.atlas_environment: env_str += "export %s ; " % env_var else: env_str = "" # below fixes issue with runGen -- job_options are executed by os.system when dbrelease is used, and by the shell otherwise ## - REMOVED FIX DUE TO CHANGE IN PILOT - MWS 8/11/11 if job.backend.requirements.usecommainputtxt: input_str = '/bin/echo %IN > input.txt; cat input.txt; ' else: input_str = '/bin/echo %IN | sed \'s/,/\\\n/g\' > input.txt; cat input.txt; ' if app.atlas_exetype == 'PYARA': self.job_options = env_str + input_str + ' python ' + self.job_options elif app.atlas_exetype == 'ARES': self.job_options = env_str + input_str + ' athena.py ' + self.job_options elif app.atlas_exetype == 'ROOT': self.job_options = env_str + input_str + ' root -b -q ' + self.job_options elif app.atlas_exetype == 'EXE': self.job_options = env_str + input_str + self.job_options if app.options: self.job_options += ' %s ' % app.options if self.job_options == '': raise ApplicationConfigurationError(None, "No Job Options found!") logger.info('Running job options: %s' % self.job_options) # validate dbrelease if self.dbrelease != "LATEST": self.dbrFiles, self.dbrDsList = getDBDatasets( self.job_options, '', self.dbrelease) # handle the output dataset if job.outputdata: if job.outputdata._name != 'DQ2OutputDataset': raise ApplicationConfigurationError( None, 'Panda backend supports only DQ2OutputDataset') else: logger.info('Adding missing DQ2OutputDataset') job.outputdata = DQ2OutputDataset() # validate the output dataset name (and make it a container) job.outputdata.datasetname, outlfn = dq2outputdatasetname( job.outputdata.datasetname, job.id, job.outputdata.isGroupDS, job.outputdata.groupname) if not job.outputdata.datasetname.endswith('/'): job.outputdata.datasetname += '/' # add extOutFiles self.extOutFile = [] for tmpName in job.outputdata.outputdata: if tmpName != '': self.extOutFile.append(tmpName) for tmpName in job.backend.extOutFile: if tmpName != '': self.extOutFile.append(tmpName) # use the shared area if possible tmp_user_area_name = app.user_area.name if app.is_prepared is not True: from Ganga.Utility.files import expandfilename shared_path = os.path.join( expandfilename(getConfig('Configuration')['gangadir']), 'shared', getConfig('Configuration')['user']) tmp_user_area_name = os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.user_area.name)) # Add inputsandbox to user_area if job.inputsandbox: logger.warning( "Submitting Panda job with inputsandbox. This may slow the submission slightly." ) if tmp_user_area_name: inpw = os.path.dirname(tmp_user_area_name) self.inputsandbox = os.path.join( inpw, 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null')) else: inpw = job.getInputWorkspace() self.inputsandbox = inpw.getPath( 'sources.%s.tar' % commands.getoutput('uuidgen 2> /dev/null')) if tmp_user_area_name: rc, output = commands.getstatusoutput( 'cp %s %s.gz' % (tmp_user_area_name, self.inputsandbox)) if rc: logger.error('Copying user_area failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError( None, 'Packing inputsandbox failed.') rc, output = commands.getstatusoutput('gunzip %s.gz' % (self.inputsandbox)) if rc: logger.error('Unzipping user_area failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError( None, 'Packing inputsandbox failed.') for fname in [os.path.abspath(f.name) for f in job.inputsandbox]: fname.rstrip(os.sep) path = os.path.dirname(fname) fn = os.path.basename(fname) #app.atlas_run_dir # get Athena versions rc, out = AthenaUtils.getAthenaVer() # failed if not rc: #raise ApplicationConfigurationError(None, 'CMT could not parse correct environment ! \n Did you start/setup ganga in the run/ or cmt/ subdirectory of your athena analysis package ?') logger.warning( "CMT could not parse correct environment for inputsandbox - will use the atlas_run_dir as default" ) # as we don't have to be in the run dir now, create a copy of the run_dir directory structure and use that input_dir = os.path.dirname(self.inputsandbox) run_path = "%s/sbx_tree/%s" % (input_dir, app.atlas_run_dir) rc, output = commands.getstatusoutput("mkdir -p %s" % run_path) if not rc: # copy this sandbox file rc, output = commands.getstatusoutput( "cp %s %s" % (fname, run_path)) if not rc: path = os.path.join(input_dir, 'sbx_tree') fn = os.path.join(app.atlas_run_dir, fn) else: raise ApplicationConfigurationError( None, "Couldn't copy file %s to recreate run_dir for input sandbox" % fname) else: raise ApplicationConfigurationError( None, "Couldn't create directory structure to match run_dir %s for input sandbox" % run_path) else: userarea = out['workArea'] # strip the path from the filename if present in the userarea ua = os.path.abspath(userarea) if ua in path: fn = fname[len(ua) + 1:] path = ua rc, output = commands.getstatusoutput( 'tar -h -r -f %s -C %s %s' % (self.inputsandbox, path, fn)) if rc: logger.error('Packing inputsandbox failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError( None, 'Packing inputsandbox failed.') # remove sandbox tree if created if "sbx_tree" in os.listdir(os.path.dirname(self.inputsandbox)): rc, output = commands.getstatusoutput( "rm -r %s/sbx_tree" % os.path.dirname(self.inputsandbox)) if rc: raise ApplicationConfigurationError( None, "Couldn't remove directory structure used for input sandbox" ) rc, output = commands.getstatusoutput('gzip %s' % (self.inputsandbox)) if rc: logger.error('Packing inputsandbox failed with status %d', rc) logger.error(output) raise ApplicationConfigurationError( None, 'Packing inputsandbox failed.') self.inputsandbox += ".gz" else: self.inputsandbox = tmp_user_area_name # job name jobName = 'ganga.%s' % MiscUtils.wrappedUuidGen() # make task taskParamMap = {} # Enforce that outputdataset name ends with / for container if not job.outputdata.datasetname.endswith('/'): job.outputdata.datasetname = job.outputdata.datasetname + '/' taskParamMap['taskName'] = job.outputdata.datasetname taskParamMap['uniqueTaskName'] = True taskParamMap['vo'] = 'atlas' taskParamMap['architecture'] = AthenaUtils.getCmtConfig( athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig) if app.atlas_release: taskParamMap['transUses'] = 'Atlas-%s' % app.atlas_release else: taskParamMap['transUses'] = '' taskParamMap[ 'transHome'] = 'AnalysisTransforms' + self.cacheVer #+nightVer configSys = getConfig('System') gangaver = configSys['GANGA_VERSION'].lower() if not gangaver: gangaver = "ganga" if app.atlas_exetype in ["ATHENA", "TRF"]: taskParamMap['processingType'] = '{0}-jedi-athena'.format(gangaver) else: taskParamMap['processingType'] = '{0}-jedi-run'.format(gangaver) #if options.eventPickEvtList != '': # taskParamMap['processingType'] += '-evp' taskParamMap['prodSourceLabel'] = 'user' if job.backend.site != 'AUTO': taskParamMap['cloud'] = Client.PandaSites[ job.backend.site]['cloud'] taskParamMap['site'] = job.backend.site elif job.backend.requirements.cloud != None and not job.backend.requirements.anyCloud: taskParamMap['cloud'] = job.backend.requirements.cloud if job.backend.requirements.excluded_sites != []: taskParamMap['excludedSite'] = expandExcludedSiteList(job) # if only a single site specifed, don't set includedSite #if job.backend.site != 'AUTO': # taskParamMap['includedSite'] = job.backend.site #taskParamMap['cliParams'] = fullExecString if job.backend.requirements.noEmail: taskParamMap['noEmail'] = True if job.backend.requirements.skipScout: taskParamMap['skipScout'] = True if not app.atlas_exetype in ["ATHENA", "TRF"]: taskParamMap[ 'nMaxFilesPerJob'] = job.backend.requirements.maxNFilesPerJob if job.backend.requirements.disableAutoRetry: taskParamMap['disableAutoRetry'] = 1 # source URL matchURL = re.search("(http.*://[^/]+)/", Client.baseURLCSRVSSL) if matchURL != None: taskParamMap['sourceURL'] = matchURL.group(1) # dataset names outDatasetName = job.outputdata.datasetname logDatasetName = re.sub('/$', '.log/', job.outputdata.datasetname) # log taskParamMap['log'] = { 'dataset': logDatasetName, 'container': logDatasetName, 'type': 'template', 'param_type': 'log', 'value': '{0}.${{SN}}.log.tgz'.format(logDatasetName[:-1]) } # job parameters if app.atlas_exetype in ["ATHENA", "TRF"]: taskParamMap['jobParameters'] = [ { 'type': 'constant', 'value': ' --sourceURL ${SURL}', }, ] else: taskParamMap['jobParameters'] = [ { 'type': 'constant', 'value': '-j "" --sourceURL ${SURL}', }, ] taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '-r {0}'.format(self.rundirectory), }, ] # output # output files outMap = {} if app.atlas_exetype in ["ATHENA", "TRF"]: outMap, tmpParamList = AthenaUtils.convertConfToOutput( self.runConfig, self.extOutFile, job.outputdata.datasetname, destination=job.outputdata.location) taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '-o "%s" ' % outMap }, ] taskParamMap['jobParameters'] += tmpParamList else: if job.outputdata.outputdata: for tmpLFN in job.outputdata.outputdata: if len(job.outputdata.datasetname.split('.')) > 2: lfn = '{0}.{1}'.format( *job.outputdata.datasetname.split('.')[:2]) else: lfn = job.outputdata.datasetname[:-1] lfn += '.$JOBSETID._${{SN/P}}.{0}'.format(tmpLFN) dataset = '{0}_{1}/'.format( job.outputdata.datasetname[:-1], tmpLFN) taskParamMap[ 'jobParameters'] += MiscUtils.makeJediJobParam( lfn, dataset, 'output', hidden=True, destination=job.outputdata.location) outMap[tmpLFN] = lfn taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '-o "{0}"'.format(str(outMap)), }, ] if app.atlas_exetype in ["ATHENA"]: # jobO parameter tmpJobO = self.job_options # replace full-path jobOs for tmpFullName, tmpLocalName in AthenaUtils.fullPathJobOs.iteritems( ): tmpJobO = re.sub(tmpFullName, tmpLocalName, tmpJobO) # modify one-liner for G4 random seeds if self.runConfig.other.G4RandomSeeds > 0: if app.options != '': tmpJobO = re.sub('-c "%s" ' % app.options, '-c "%s;from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' \ % app.options,tmpJobO) else: tmpJobO = '-c "from G4AtlasApps.SimFlags import SimFlags;SimFlags.SeedsG4=${RNDMSEED}" ' dictItem = { 'type': 'template', 'param_type': 'number', 'value': '${RNDMSEED}', 'hidden': True, 'offset': self.runConfig.other.G4RandomSeeds, } taskParamMap['jobParameters'] += [dictItem] elif app.atlas_exetype in ["TRF"]: # replace parameters for TRF tmpJobO = self.job_options # output : basenames are in outMap['IROOT'] trough extOutFile tmpOutMap = [] for tmpName, tmpLFN in outMap['IROOT']: tmpJobO = tmpJobO.replace('%OUT.' + tmpName, tmpName) # replace DBR tmpJobO = re.sub('%DB=[^ \'\";]+', '${DBR}', tmpJobO) if app.atlas_exetype in ["TRF"]: taskParamMap['useLocalIO'] = 1 # build if job.backend.nobuild: taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '-a {0}'.format(os.path.basename(self.inputsandbox)), }, ] else: taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '-l ${LIB}', }, ] # # input if job.inputdata and job.inputdata._name == 'DQ2Dataset': if job.backend.requirements.nFilesPerJob > 0 and job.inputdata.number_of_files == 0 and job.backend.requirements.split > 0: job.inputdata.number_of_files = job.backend.requirements.nFilesPerJob * job.backend.requirements.split if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.number_of_files != 0: taskParamMap['nFiles'] = job.inputdata.number_of_files elif job.backend.requirements.nFilesPerJob > 0 and job.backend.requirements.split > 0: # pathena does this for some reason even if there is no input files taskParamMap[ 'nFiles'] = job.backend.requirements.nFilesPerJob * job.backend.requirements.split if job.backend.requirements.nFilesPerJob > 0: taskParamMap[ 'nFilesPerJob'] = job.backend.requirements.nFilesPerJob if job.backend.requirements.nEventsPerFile > 0: taskParamMap[ 'nEventsPerFile'] = job.backend.requirements.nEventsPerFile if not job.backend.requirements.nGBPerJob in [0, 'MAX']: try: if job.backend.requirements.nGBPerJob != 'MAX': job.backend.requirments.nGBPerJob = int( job.backend.requirements.nGBPerJob) except: logger.error("nGBPerJob must be an integer or MAX") # check negative if job.backend.requirements.nGBPerJob <= 0: logger.error("nGBPerJob must be positive") # don't set MAX since it is the defalt on the server side if not job.backend.requirements.nGBPerJob in [-1, 'MAX']: taskParamMap['nGBPerJob'] = job.backend.requirements.nGBPerJob if app.atlas_exetype in ["ATHENA", "TRF"]: inputMap = {} if job.inputdata and job.inputdata._name == 'DQ2Dataset': tmpDict = { 'type': 'template', 'param_type': 'input', 'value': '-i "${IN/T}"', 'dataset': ','.join(job.inputdata.dataset), 'expand': True, 'exclude': '\.log\.tgz(\.\d+)*$', } #if options.inputType != '': # tmpDict['include'] = options.inputType taskParamMap['jobParameters'].append(tmpDict) taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset) inputMap['IN'] = ','.join(job.inputdata.dataset) else: # no input taskParamMap['noInput'] = True if job.backend.requirements.split > 0: taskParamMap['nEvents'] = job.backend.requirements.split else: taskParamMap['nEvents'] = 1 taskParamMap['nEventsPerJob'] = 1 taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '-i "[]"', }, ] else: if job.inputdata and job.inputdata._name == 'DQ2Dataset': tmpDict = { 'type': 'template', 'param_type': 'input', 'value': '-i "${IN/T}"', 'dataset': ','.join(job.inputdata.dataset), 'expand': True, 'exclude': '\.log\.tgz(\.\d+)*$', } #if options.nSkipFiles != 0: # tmpDict['offset'] = options.nSkipFiles taskParamMap['jobParameters'].append(tmpDict) taskParamMap['dsForIN'] = ','.join(job.inputdata.dataset) else: # no input taskParamMap['noInput'] = True if job.backend.requirements.split > 0: taskParamMap['nEvents'] = job.backend.requirements.split else: taskParamMap['nEvents'] = 1 taskParamMap['nEventsPerJob'] = 1 # param for DBR if self.dbrelease != '': dbrDS = self.dbrelease.split(':')[0] # change LATEST to DBR_LATEST if dbrDS == 'LATEST': dbrDS = 'DBR_LATEST' dictItem = { 'type': 'template', 'param_type': 'input', 'value': '--dbrFile=${DBR}', 'dataset': dbrDS, } taskParamMap['jobParameters'] += [dictItem] # no expansion #if options.notExpandDBR: #dictItem = {'type':'constant', # 'value':'--noExpandDBR', # } #taskParamMap['jobParameters'] += [dictItem] # secondary FIXME disabled self.secondaryDSs = {} if self.secondaryDSs != {}: inMap = {} streamNames = [] for tmpDsName, tmpMap in self.secondaryDSs.iteritems(): # make template item streamName = tmpMap['streamName'] dictItem = MiscUtils.makeJediJobParam( '${' + streamName + '}', tmpDsName, 'input', hidden=True, expand=True, include=tmpMap['pattern'], offset=tmpMap['nSkip'], nFilesPerJob=tmpMap['nFiles']) taskParamMap['jobParameters'] += dictItem inMap[streamName] = 'tmp_' + streamName streamNames.append(streamName) # make constant item strInMap = str(inMap) # set placeholders for streamName in streamNames: strInMap = strInMap.replace("'tmp_" + streamName + "'", '${' + streamName + '/T}') dictItem = { 'type': 'constant', 'value': '--inMap "%s"' % strInMap, } taskParamMap['jobParameters'] += [dictItem] # misc jobParameters = '' # use Athena packages if app.atlas_exetype == 'ARES' or (app.atlas_exetype in ['PYARA', 'ROOT', 'EXE'] and app.useAthenaPackages): jobParameters += "--useAthenaPackages " # use RootCore if app.useRootCore or app.useRootCoreNoBuild: jobParameters += "--useRootCore " # use mana if app.useMana: jobParameters += "--useMana " if app.atlas_release != "": jobParameters += "--manaVer %s " % app.atlas_release # root if app.atlas_exetype in ['PYARA', 'ROOT', 'EXE' ] and job.backend.requirements.rootver != '': rootver = re.sub('/', '.', job.backend.requirements.rootver) jobParameters += "--rootVer %s " % rootver # write input to txt #if options.writeInputToTxt != '': # jobParameters += "--writeInputToTxt %s " % options.writeInputToTxt # debug parameters #if options.queueData != '': # jobParameters += "--overwriteQueuedata=%s " % options.queueData # JEM #if options.enableJEM: # jobParameters += "--enable-jem " # if options.configJEM != '': # jobParameters += "--jem-config %s " % options.configJEM # set task param if jobParameters != '': taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': jobParameters, }, ] # force stage-in if job.backend.accessmode == "LocalIO": taskParamMap['useLocalIO'] = 1 # set jobO parameter if app.atlas_exetype in ["ATHENA", "TRF"]: taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '-j "', 'padding': False, }, ] taskParamMap[ 'jobParameters'] += PsubUtils.convertParamStrToJediParam( tmpJobO, inputMap, job.outputdata.datasetname[:-1], True, False) taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '"', }, ] else: taskParamMap['jobParameters'] += [ { 'type': 'constant', 'value': '-p "{0}"'.format(urllib.quote(self.job_options)), }, ] # build step if not job.backend.nobuild: jobParameters = '-i ${IN} -o ${OUT} --sourceURL ${SURL} ' if job.backend.bexec != '': jobParameters += ' --bexec "%s" ' % urllib.quote( job.backend.bexec) if app.atlas_exetype == 'ARES' or (app.atlas_exetype in ['PYARA', 'ROOT', 'EXE'] and app.useAthenaPackages): # use Athena packages jobParameters += "--useAthenaPackages " # use RootCore if app.useRootCore or app.useRootCoreNoBuild: jobParameters += "--useRootCore " # run directory if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']: jobParameters += '-r {0} '.format(self.rundirectory) # no compile #if options.noCompile: # jobParameters += "--noCompile " # use mana if app.useMana: jobParameters += "--useMana " if app.atlas_release != "": jobParameters += "--manaVer %s " % app.atlas_release # root if app.atlas_exetype in [ 'PYARA', 'ROOT', 'EXE' ] and job.backend.requirements.rootver != '': rootver = re.sub('/', '.', job.backend.requirements.rootver) jobParameters += "--rootVer %s " % rootver # cmt config if app.atlas_exetype in ['PYARA', 'ARES', 'ROOT', 'EXE']: if not app.atlas_cmtconfig in ['', 'NULL', None]: jobParameters += " --cmtConfig %s " % app.atlas_cmtconfig #cmtConfig = AthenaUtils.getCmtConfig(athenaVer=app.atlas_release, cmtConfig=app.atlas_cmtconfig) #if cmtConfig: # jobParameters += "--cmtConfig %s " % cmtConfig # debug parameters #if options.queueData != '': # jobParameters += "--overwriteQueuedata=%s " % options.queueData # set task param taskParamMap['buildSpec'] = { 'prodSourceLabel': 'panda', 'archiveName': os.path.basename(self.inputsandbox), 'jobParameters': jobParameters, } # enable merging if job.backend.requirements.enableMerge: jobParameters = '-r {0} '.format(self.rundirectory) if 'exec' in job.backend.requirements.configMerge and job.backend.requirements.configMerge[ 'exec'] != '': jobParameters += '-j "{0}" '.format( job.backend.requirements.configMerge['exec']) if not job.backend.nobuild: jobParameters += '-l ${LIB} ' else: jobParameters += '-a {0} '.format( os.path.basename(self.inputsandbox)) jobParameters += "--sourceURL ${SURL} " jobParameters += '${TRN_OUTPUT:OUTPUT} ${TRN_LOG:LOG}' taskParamMap['mergeSpec'] = {} taskParamMap['mergeSpec']['useLocalIO'] = 1 taskParamMap['mergeSpec']['jobParameters'] = jobParameters taskParamMap['mergeOutput'] = True # Selected by Jedi #if not app.atlas_exetype in ['PYARA','ROOT','EXE']: # taskParamMap['transPath'] = 'http://atlpan.web.cern.ch/atlpan/runAthena-00-00-12' logger.debug(taskParamMap) # upload sources if self.inputsandbox and not job.backend.libds: uploadSources(os.path.dirname(self.inputsandbox), os.path.basename(self.inputsandbox)) if not self.inputsandbox == tmp_user_area_name: logger.info('Removing source tarball %s ...' % self.inputsandbox) os.remove(self.inputsandbox) return taskParamMap
def master_prepare( self, app, appconfig ): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLocalRTHandler master_prepare called, %s", job.id) if job._getRoot().subjobs: jobid = "%d" % (job._getRoot().id) else: jobid = "%d" % job.id # Generate output dataset name if job.outputdata: if job.outputdata._name=='DQ2OutputDataset': dq2_datasetname = job.outputdata.datasetname dq2_isGroupDS = job.outputdata.isGroupDS dq2_groupname = job.outputdata.groupname else: dq2_datasetname = '' dq2_isGroupDS = False dq2_groupname = '' self.output_datasetname, self.output_lfn = dq2outputdatasetname(dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname) # Expand Athena jobOptions if not app.option_file and not app.command_line: raise ConfigError("j.application.option_file='' - No Athena jobOptions files specified.") athena_options = '' inputbox = [File(os.path.join(os.path.dirname(__file__),'athena-utility.sh'))] if app.atlas_exetype in ['PYARA','ARES','ROOT','EXE']: for option_file in app.option_file: athena_options += ' ' + os.path.basename(option_file.name) inputbox += [ File(option_file.name) ] athena_options += ' %s ' % app.options else: for option_file in app.option_file: athena_option = os.path.basename(option_file.name) athena_options += ' ' + athena_option if app.options: athena_options = app.options + ' ' + athena_options inputbox += [ File(option_file.name) ] if app.command_line: athena_options = app.command_line athena_usersetupfile = os.path.basename(app.user_setupfile.name) # prepare input sandbox if app.user_setupfile.name: inputbox += [ File(app.user_setupfile.name) ] #CN: added extra test for TNTJobSplitter if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'ATLASTier3Dataset'] or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox,'ganga-stage-in-out-dq2.py') _append_files(inputbox,'dq2_get') _append_files(inputbox,'dq2info.tar.gz') _append_files(inputbox,'libdcap.so') if job.inputdata and job.inputdata._name == 'ATLASDataset': if job.inputdata.lfc: _append_files(inputbox,'ganga-stagein-lfc.py') else: _append_files(inputbox,'ganga-stagein.py') ## insert more scripts to inputsandbox for FileStager if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in ['FILE_STAGER']: _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py') if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'getstats.py') if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': if not job.outputdata.location: raise ApplicationConfigurationError(None,'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !') if not File(os.path.join(os.path.dirname(__file__),'ganga-stage-in-out-dq2.py')) in inputbox: _append_files(inputbox,'ganga-stage-in-out-dq2.py') _append_files(inputbox,'dq2info.tar.gz') _append_files(inputbox,'libdcap.so') _append_files(inputbox,'ganga-joboption-parse.py') if job.inputsandbox: for file in job.inputsandbox: inputbox += [ file ] if app.user_area.name: if app.is_prepared is True: inputbox += [ File(app.user_area.name) ] else: inputbox += [ File(os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name))) ] if app.group_area.name and string.find(app.group_area.name,"http")<0: if app.is_prepared is True: inputbox += [ File(app.group_area.name) ] else: inputbox += [ File(os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.group_area.name))) ] # prepare environment try: atlas_software = config['ATLAS_SOFTWARE'] except ConfigError: raise ConfigError('No default location of ATLAS_SOFTWARE specified in the configuration.') if app.atlas_release=='' and app.atlas_project != "AthAnalysisBase": raise ApplicationConfigurationError(None,'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.') environment={ 'ATLAS_RELEASE' : app.atlas_release, 'ATHENA_OPTIONS' : athena_options, 'ATLAS_SOFTWARE' : atlas_software, 'ATHENA_USERSETUPFILE' : athena_usersetupfile, 'ATLAS_PROJECT' : app.atlas_project, 'ATLAS_EXETYPE' : app.atlas_exetype, 'GANGA_VERSION' : configSystem['GANGA_VERSION'], 'DQ2_SETUP_SCRIPT': configDQ2['setupScript'] } # Set athena architecture: 32 or 64 bit environment['ATLAS_ARCH'] = '32' cmtconfig = app.atlas_cmtconfig if cmtconfig.find('x86_64')>=0: environment['ATLAS_ARCH'] = '64' environment['ATLAS_CMTCONFIG'] = app.atlas_cmtconfig environment['DCACHE_RA_BUFFER'] = str(config['DCACHE_RA_BUFFER']) if app.atlas_environment: for var in app.atlas_environment: vars=var.split('=') if len(vars)==2: environment[vars[0]]=vars[1] if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.')<=0): environment['ATLAS_PRODUCTION'] = app.atlas_production if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name) if app.group_area.name: if string.find(app.group_area.name,"http")>=0: environment['GROUP_AREA_REMOTE'] = "%s" % (app.group_area.name) else: environment['GROUP_AREA']=os.path.basename(app.group_area.name) if app.max_events: if (app.max_events != -999) and (app.max_events > -2): environment['ATHENA_MAX_EVENTS'] = str(app.max_events) if job.inputdata and job.inputdata._name == 'StagerDataset': if job.inputdata.type not in ['LOCAL']: try: environment['X509CERTDIR']=os.environ['X509_CERT_DIR'] except KeyError: environment['X509CERTDIR']='' try: proxy = os.environ['X509_USER_PROXY'] except KeyError: proxy = '/tmp/x509up_u%s' % os.getuid() REMOTE_PROXY = '%s:%s' % (socket.getfqdn(),proxy) environment['REMOTE_PROXY'] = REMOTE_PROXY try: environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP'] except: pass if job.inputdata and job.inputdata._name == 'DQ2Dataset': if job.inputdata.dataset: datasetname = job.inputdata.dataset environment['DATASETNAME']=':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations()) environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL'] #environment['DATASETTYPE']=job.inputdata.type # At present, DQ2 download is the only thing that works environment['DATASETTYPE']="DQ2_DOWNLOAD" if job.inputdata.accessprotocol: environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol try: environment['X509CERTDIR']=os.environ['X509_CERT_DIR'] except KeyError: environment['X509CERTDIR']='' try: proxy = os.environ['X509_USER_PROXY'] except KeyError: proxy = '/tmp/x509up_u%s' % os.getuid() REMOTE_PROXY = '%s:%s' % (socket.getfqdn(),proxy) environment['REMOTE_PROXY'] = REMOTE_PROXY try: environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP'] except: pass else: raise ConfigError("j.inputdata.dataset='' - DQ2 dataset name needs to be specified.") if job.inputdata.tagdataset: environment['TAGDATASETNAME'] = ':'.join(job.inputdata.tagdataset) if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL'] try: environment['X509CERTDIR']=os.environ['X509_CERT_DIR'] except KeyError: environment['X509CERTDIR']='' try: proxy = os.environ['X509_USER_PROXY'] except KeyError: proxy = '/tmp/x509up_u%s' % os.getuid() REMOTE_PROXY = '%s:%s' % (socket.getfqdn(),proxy) environment['REMOTE_PROXY'] = REMOTE_PROXY try: environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP'] except: pass if hasattr(job.backend, 'extraopts'): if job.backend.extraopts.find('site=hh')>0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK' elif job.backend.extraopts.find('site=zn')>0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK' else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh') # output sandbox outputbox = [ ] outputGUIDs='output_guids' outputLOCATION='output_location' outputDATA='output_data' outputbox.append( outputGUIDs ) outputbox.append( outputLOCATION ) outputbox.append( outputDATA ) outputbox.append('stats.pickle') if (job.outputsandbox): for file in job.outputsandbox: outputbox += [ file ] ## retrieve the FileStager log if job.inputdata and job.inputdata._name in [ 'DQ2Dataset'] and job.inputdata.type in ['FILE_STAGER']: outputbox += ['FileStager.out', 'FileStager.err'] # Switch for DEBUG print-out in logfiles if app.useNoDebugLogs: environment['GANGA_LOG_DEBUG'] = '0' else: environment['GANGA_LOG_DEBUG'] = '1' return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
def master_prepare( self, app, appconfig): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id ) if job._getRoot().subjobs: jobid = "%d" % (job._getRoot().id) else: jobid = "%d" % job.id # Generate output dataset name if job.outputdata: if job.outputdata._name=='DQ2OutputDataset': dq2_datasetname = job.outputdata.datasetname dq2_isGroupDS = job.outputdata.isGroupDS dq2_groupname = job.outputdata.groupname else: dq2_datasetname = '' dq2_isGroupDS = False dq2_groupname = '' self.output_datasetname, self.output_lfn = dq2outputdatasetname(dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname) # Check if all sites are in the same cloud if job.backend.requirements.sites: firstCloud = whichCloud(job.backend.requirements.sites[0]) for site in job.backend.requirements.sites: cloud = whichCloud(site) if cloud != firstCloud: printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' %(job.backend.requirements.sites) raise ApplicationConfigurationError(printout ) #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job #will already have been prepared #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing. #if app.is_prepared is not True: # for position in xrange(len(app.option_file)): # app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name))) # Expand Athena jobOptions if not app.atlas_exetype in ['EXE']: athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file]) #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options if app.options: athena_options = app.options + ' ' + athena_options inputbox = [ File(opt_file.name) for opt_file in app.option_file ] else: athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file]) inputbox = [] athena_usersetupfile = os.path.basename(app.user_setupfile.name) # prepare input sandbox inputbox.append( File(os.path.join(__directory__,'athena-utility.sh')) ) if app.user_area.name: #we will now use the user_area that's stored in the users shared directory if app.is_prepared is not True: tmp_user_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name)) inputbox.append(File(tmp_user_name)) else: inputbox.append(File(app.user_area.name)) #if app.group_area.name: inputbox += [ File(app.group_area.name) ] if app.group_area.name and str(app.group_area.name).find('http')<0: #we will now use the group_area that's stored in the users shared directory if app.is_prepared is not True: tmp_group_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.group_area.name)) inputbox.append(File(tmp_group_name)) else: inputbox.append(File(app.group_area.name)) if app.user_setupfile.name: inputbox.append(File(app.user_setupfile.name)) # CN: added TNTJobSplitter clause if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ] ) or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2_get','dq2info.tar.gz') if job.inputdata and job.inputdata.type == 'LFC' and not (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox,'dq2_get_old') if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2info.tar.gz') ## insert more scripts to inputsandbox for FileStager if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']: _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py') #_append_files(inputbox,'make_filestager_joption.py','dm_util.py') if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': #if not job.outputdata.location: # raise ApplicationConfigurationError('j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !') if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox,'ganga-stage-in-out-dq2.py') _append_files(inputbox,'ganga-joboption-parse.py') if not 'dq2info.tar.gz' in [os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox,'dq2info.tar.gz') # add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'ganga-stage-in-out-dq2.py') if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'dq2tracerreport.py') if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'db_dq2localid.py') if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'getstats.py') if str(app.atlas_release).find('12.')>=0: _append_files(inputbox, 'libDCache.so','libRFIO.so','libdcap.so') elif str(app.atlas_release).find('13.')>=0: _append_files(inputbox,'libdcap.so') else: _append_files(inputbox,'libdcap.so') if job.inputsandbox: inputbox += job.inputsandbox # prepare environment if not app.atlas_release: raise ApplicationConfigurationError('j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.') environment={ 'ATLAS_RELEASE' : app.atlas_release, 'ATHENA_OPTIONS' : athena_options, 'ATHENA_USERSETUPFILE' : athena_usersetupfile, 'ATLAS_PROJECT' : app.atlas_project, 'ATLAS_EXETYPE' : app.atlas_exetype, 'GANGA_VERSION' : configSystem['GANGA_VERSION'] } environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER'] if app.atlas_environment: for var in app.atlas_environment: try: vars = re.match("^(\w+)=(.*)",var).group(1) value = re.match("^(\w+)=(.*)",var).group(2) environment[vars]=value except: logger.warning('Athena.atlas_environment variable not correctly configured: %s', var) pass if app.atlas_production and app.atlas_release.find('12.')>=0 and app.atlas_project != 'AtlasPoint1': temp_atlas_production = re.sub('\.','_',app.atlas_production) prod_url = config['PRODUCTION_ARCHIVE_BASEURL']+'/AtlasProduction_'+ temp_atlas_production +'_noarch.tar.gz' logger.info('Using Production cache from: %s', prod_url) environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.')<=0): environment['ATLAS_PRODUCTION'] = app.atlas_production if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name) #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name) if app.group_area.name: if str(app.group_area.name).find('http')>=0: environment['GROUP_AREA_REMOTE'] = str(app.group_area.name) else: environment['GROUP_AREA'] = os.path.basename(app.group_area.name) if app.max_events: if (app.max_events != -999) and (app.max_events > -2): environment['ATHENA_MAX_EVENTS'] = str(app.max_events) if job.backend.requirements._name == 'AtlasLCGRequirements': requirements = AtlasLCGRequirements() elif job.backend.requirements._name == 'AtlasCREAMRequirements': requirements = AtlasCREAMRequirements() else: requirements = AtlasLCGRequirements() if 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']): if job.inputdata.dataset: datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type if job.inputdata.failover: environment['DATASETFAILOVER'] = 1 environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputdata.check_md5sum: environment['GANGA_CHECKMD5SUM'] = 1 else: raise ApplicationConfigurationError('j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.') # Raise submission exception if (not job.backend.CE and not (job.backend.requirements._name in [ 'AtlasLCGRequirements', 'AtlasCREAMRequirements' ] and job.backend.requirements.sites) and not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and not (job.splitter and job.splitter._name == 'TNTJobSplitter') and not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob') and not (job.splitter and job.splitter._name == 'ATLASTier3Splitter')): raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !') if job.inputdata.match_ce_all or job.inputdata.min_num_files>0: raise ApplicationConfigurationError('Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !') #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'): # allLoc = job.inputdata.get_locations(complete=0) # completeLoc = job.inputdata.get_locations(complete=1) # incompleteLoc = [] # for loc in allLoc: # if loc not in completeLoc: # incompleteLoc.append(loc) # if incompleteLoc: # raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !') # prepare job requirements requirementsSoftware = getLCGReleaseTag( app ) releaseBlacklist = job.backend.requirements.list_release_blacklist() if requirementsSoftware and requirementsSoftware[0] in releaseBlacklist: logger.error('The athena release %s you are using is not recommended for distributed analysis !', requirementsSoftware[0]) logger.error('For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !') requirements.software = requirementsSoftware else: requirements.software = requirementsSoftware # Set athena architecture: 32 or 64 bit environment['ATLAS_ARCH'] = '32' if requirementsSoftware and requirementsSoftware[0].find('x86_64')>=0: environment['ATLAS_ARCH'] = '64' # add software requirement of dq2clients if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ] and job.inputdata.type in [ 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']: try: # override the default one if the dq2client_version is presented # in the job backend's requirements object dq2client_version = job.backend.requirements.dq2client_version except AttributeError: pass if dq2client_version: #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version] environment['DQ2_CLIENT_VERSION'] = dq2client_version if app.atlas_dbrelease: if not app._name == "AthenaTask" and not (job.splitter and (job.splitter._name == 'DQ2JobSplitter' or job.splitter._name == 'ATLASTier3Splitter')): raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !') try: environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(':')[0] environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1] except: logger.warning('Problems with the atlas_dbrelease configuration') # Fill AtlasLCGRequirements access mode if configDQ2['USE_ACCESS_INFO']: logger.warning("config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !" ) import pickle, StringIO #if job.backend.requirements.sites: info = job.backend.requirements.list_access_info() fileHandle = StringIO.StringIO() pickle.dump(info,fileHandle) fileHandle.seek(-1) lines = fileHandle.read() inputbox.append(FileBuffer( 'access_info.pickle', lines)) _append_files(inputbox, 'access_info.py') if not 'make_filestager_joption.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py') # jobscript exe = os.path.join(__directory__,'run-athena-lcg.sh') # output sandbox outputbox = [ 'output_guids', 'output_location', 'output_data', 'stats.pickle' ] ## retrieve the FileStager log if configDQ2['USE_ACCESS_INFO'] or (job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']): outputbox += ['FileStager.out', 'FileStager.err'] if job.outputsandbox: outputbox += job.outputsandbox # Switch for DEBUG print-out in logfiles if app.useNoDebugLogs: environment['GANGA_LOG_DEBUG'] = '0' else: environment['GANGA_LOG_DEBUG'] = '1' return LCGJobConfig(File(exe),inputbox,[],outputbox,environment,[],requirements)
def master_prepare(self, app, appconfig): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLocalRTHandler master_prepare called, %s", job.id) if job._getRoot().subjobs: jobid = "%d" % (job._getRoot().id) else: jobid = "%d" % job.id # Generate output dataset name if job.outputdata: if job.outputdata._name == "DQ2OutputDataset": dq2_datasetname = job.outputdata.datasetname dq2_isGroupDS = job.outputdata.isGroupDS dq2_groupname = job.outputdata.groupname else: dq2_datasetname = "" dq2_isGroupDS = False dq2_groupname = "" self.output_datasetname, self.output_lfn = dq2outputdatasetname( dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname ) # Expand Athena jobOptions if not app.option_file: raise ConfigError("j.application.option_file='' - No Athena jobOptions files specified.") athena_options = "" inputbox = [File(os.path.join(os.path.dirname(__file__), "athena-utility.sh"))] if app.atlas_exetype in ["PYARA", "ARES", "ROOT", "EXE"]: for option_file in app.option_file: athena_options += " " + os.path.basename(option_file.name) inputbox += [File(option_file.name)] athena_options += " %s " % app.options else: for option_file in app.option_file: athena_option = os.path.basename(option_file.name) athena_options += " " + athena_option if app.options: athena_options = app.options + " " + athena_options inputbox += [File(option_file.name)] athena_usersetupfile = os.path.basename(app.user_setupfile.name) # prepare input sandbox if app.user_setupfile.name: inputbox += [File(app.user_setupfile.name)] # CN: added extra test for TNTJobSplitter if ( job.inputdata and job.inputdata._name in ["DQ2Dataset", "ATLASTier3Dataset"] or (job._getRoot().splitter and job._getRoot().splitter._name == "TNTJobSplitter") ): _append_files(inputbox, "ganga-stage-in-out-dq2.py") _append_files(inputbox, "dq2_get") _append_files(inputbox, "dq2info.tar.gz") _append_files(inputbox, "libdcap.so") if job.inputdata and job.inputdata._name == "ATLASDataset": if job.inputdata.lfc: _append_files(inputbox, "ganga-stagein-lfc.py") else: _append_files(inputbox, "ganga-stagein.py") ## insert more scripts to inputsandbox for FileStager if job.inputdata and job.inputdata._name in ["DQ2Dataset"] and job.inputdata.type in ["FILE_STAGER"]: _append_files(inputbox, "make_filestager_joption.py", "dm_util.py", "fs-copy.py") if not "getstats.py" in [os.path.basename(file.name) for file in inputbox]: _append_files(inputbox, "getstats.py") if job.outputdata and job.outputdata._name == "DQ2OutputDataset": if not job.outputdata.location: raise ApplicationConfigurationError( None, "j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !" ) if not File(os.path.join(os.path.dirname(__file__), "ganga-stage-in-out-dq2.py")) in inputbox: _append_files(inputbox, "ganga-stage-in-out-dq2.py") _append_files(inputbox, "dq2info.tar.gz") _append_files(inputbox, "libdcap.so") _append_files(inputbox, "ganga-joboption-parse.py") if job.inputsandbox: for file in job.inputsandbox: inputbox += [file] if app.user_area.name: if app.is_prepared is True: inputbox += [File(app.user_area.name)] else: inputbox += [ File( os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.user_area.name) ) ) ] if app.group_area.name and string.find(app.group_area.name, "http") < 0: if app.is_prepared is True: inputbox += [File(app.group_area.name)] else: inputbox += [ File( os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.group_area.name) ) ) ] # prepare environment try: atlas_software = config["ATLAS_SOFTWARE"] except ConfigError: raise ConfigError("No default location of ATLAS_SOFTWARE specified in the configuration.") if app.atlas_release == "" and app.atlas_project != "AthAnalysisBase": raise ApplicationConfigurationError( None, "j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.", ) environment = { "ATLAS_RELEASE": app.atlas_release, "ATHENA_OPTIONS": athena_options, "ATLAS_SOFTWARE": atlas_software, "ATHENA_USERSETUPFILE": athena_usersetupfile, "ATLAS_PROJECT": app.atlas_project, "ATLAS_EXETYPE": app.atlas_exetype, "GANGA_VERSION": configSystem["GANGA_VERSION"], "DQ2_SETUP_SCRIPT": configDQ2["setupScript"], } # Set athena architecture: 32 or 64 bit environment["ATLAS_ARCH"] = "32" cmtconfig = app.atlas_cmtconfig if cmtconfig.find("x86_64") >= 0: environment["ATLAS_ARCH"] = "64" environment["ATLAS_CMTCONFIG"] = app.atlas_cmtconfig environment["DCACHE_RA_BUFFER"] = str(config["DCACHE_RA_BUFFER"]) if app.atlas_environment: for var in app.atlas_environment: vars = var.split("=") if len(vars) == 2: environment[vars[0]] = vars[1] if app.atlas_production and (app.atlas_project == "AtlasPoint1" or app.atlas_release.find("12.") <= 0): environment["ATLAS_PRODUCTION"] = app.atlas_production if app.user_area.name: environment["USER_AREA"] = os.path.basename(app.user_area.name) if app.group_area.name: if string.find(app.group_area.name, "http") >= 0: environment["GROUP_AREA_REMOTE"] = "%s" % (app.group_area.name) else: environment["GROUP_AREA"] = os.path.basename(app.group_area.name) if app.max_events: if (app.max_events != -999) and (app.max_events > -2): environment["ATHENA_MAX_EVENTS"] = str(app.max_events) if job.inputdata and job.inputdata._name == "StagerDataset": if job.inputdata.type not in ["LOCAL"]: try: environment["X509CERTDIR"] = os.environ["X509_CERT_DIR"] except KeyError: environment["X509CERTDIR"] = "" try: proxy = os.environ["X509_USER_PROXY"] except KeyError: proxy = "/tmp/x509up_u%s" % os.getuid() REMOTE_PROXY = "%s:%s" % (socket.getfqdn(), proxy) environment["REMOTE_PROXY"] = REMOTE_PROXY try: environment["GANGA_GLITE_UI"] = configLCG["GLITE_SETUP"] except: pass if job.inputdata and job.inputdata._name == "DQ2Dataset": if job.inputdata.dataset: datasetname = job.inputdata.dataset environment["DATASETNAME"] = ":".join(datasetname) environment["DATASETLOCATION"] = ":".join(job.inputdata.get_locations()) environment["DQ2_URL_SERVER"] = configDQ2["DQ2_URL_SERVER"] environment["DQ2_URL_SERVER_SSL"] = configDQ2["DQ2_URL_SERVER_SSL"] # environment['DATASETTYPE']=job.inputdata.type # At present, DQ2 download is the only thing that works environment["DATASETTYPE"] = "DQ2_DOWNLOAD" if job.inputdata.accessprotocol: environment["DQ2_LOCAL_PROTOCOL"] = job.inputdata.accessprotocol try: environment["X509CERTDIR"] = os.environ["X509_CERT_DIR"] except KeyError: environment["X509CERTDIR"] = "" try: proxy = os.environ["X509_USER_PROXY"] except KeyError: proxy = "/tmp/x509up_u%s" % os.getuid() REMOTE_PROXY = "%s:%s" % (socket.getfqdn(), proxy) environment["REMOTE_PROXY"] = REMOTE_PROXY try: environment["GANGA_GLITE_UI"] = configLCG["GLITE_SETUP"] except: pass else: raise ConfigError("j.inputdata.dataset='' - DQ2 dataset name needs to be specified.") if job.inputdata.tagdataset: environment["TAGDATASETNAME"] = ":".join(job.inputdata.tagdataset) if job.outputdata and job.outputdata._name == "DQ2OutputDataset": environment["DQ2_URL_SERVER"] = configDQ2["DQ2_URL_SERVER"] environment["DQ2_URL_SERVER_SSL"] = configDQ2["DQ2_URL_SERVER_SSL"] try: environment["X509CERTDIR"] = os.environ["X509_CERT_DIR"] except KeyError: environment["X509CERTDIR"] = "" try: proxy = os.environ["X509_USER_PROXY"] except KeyError: proxy = "/tmp/x509up_u%s" % os.getuid() REMOTE_PROXY = "%s:%s" % (socket.getfqdn(), proxy) environment["REMOTE_PROXY"] = REMOTE_PROXY try: environment["GANGA_GLITE_UI"] = configLCG["GLITE_SETUP"] except: pass if hasattr(job.backend, "extraopts"): if job.backend.extraopts.find("site=hh") > 0: environment["DQ2_LOCAL_SITE_ID"] = "DESY-HH_SCRATCHDISK" elif job.backend.extraopts.find("site=zn") > 0: environment["DQ2_LOCAL_SITE_ID"] = "DESY-ZN_SCRATCHDISK" else: environment["DQ2_LOCAL_SITE_ID"] = configDQ2["DQ2_LOCAL_SITE_ID"] else: environment["DQ2_LOCAL_SITE_ID"] = configDQ2["DQ2_LOCAL_SITE_ID"] exe = os.path.join(os.path.dirname(__file__), "run-athena-local.sh") # output sandbox outputbox = [] outputGUIDs = "output_guids" outputLOCATION = "output_location" outputDATA = "output_data" outputbox.append(outputGUIDs) outputbox.append(outputLOCATION) outputbox.append(outputDATA) outputbox.append("stats.pickle") if job.outputsandbox: for file in job.outputsandbox: outputbox += [file] ## retrieve the FileStager log if job.inputdata and job.inputdata._name in ["DQ2Dataset"] and job.inputdata.type in ["FILE_STAGER"]: outputbox += ["FileStager.out", "FileStager.err"] # Switch for DEBUG print-out in logfiles if app.useNoDebugLogs: environment["GANGA_LOG_DEBUG"] = "0" else: environment["GANGA_LOG_DEBUG"] = "1" return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)