def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLocalRTHandler prepare called, %s", job.id) input_files = [] input_guids = [] input_tag_files = [] input_tag_guids = [] input_esd_files = [] input_esd_guids = [] # If job has inputdata if job.inputdata: # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'ATLASDataset': if not job.inputdata.lfn: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.lfn elif job.inputdata._name == 'ATLASTier3Dataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata._name == 'DQ2Dataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_guids = job.inputdata.guids input_files = job.inputdata.names if not job.inputdata.type in [ 'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]: job.inputdata.type = 'DQ2_LOCAL' else: if job.inputdata._name == 'ATLASCastorDataset': input_files = ATLASCastorDataset.get_filenames(app) elif job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'ATLASDataset': input_files = ATLASDataset.get_filenames(app) elif job.inputdata._name == 'ATLASTier3Dataset': if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata.pfnListFile: logger.info('Loading file names from %s' % job.inputdata.pfnListFile.name) pfnListFile = open(job.inputdata.pfnListFile.name) job.inputdata.names = [ line.strip() for line in pfnListFile ] pfnListFile.close() input_files = job.inputdata.names input_guids = input_files else: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') elif job.inputdata._name == 'DQ2Dataset': if not job.inputdata.type in [ 'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]: job.inputdata.type = 'DQ2_LOCAL' contents = job.inputdata.get_contents() input_files = [lfn for guid, lfn in contents] input_guids = [guid for guid, lfn in contents] if job.inputdata.tagdataset: tag_contents = job.inputdata.get_tag_contents() input_tag_files = [lfn for guid, lfn in tag_contents] input_tag_guids = [guid for guid, lfn in tag_contents] if job.inputdata.use_aodesd_backnav: esd_contents = job.inputdata.get_contents(backnav=True) input_esd_files = [lfn for guid, lfn in esd_contents] input_esd_guids = [guid for guid, lfn in esd_contents] job.inputdata.names = input_files job.inputdata.guids = input_guids # Outputdataset output_location = '' if job.outputdata: if job.outputdata._name == 'DQ2OutputDataset': if job.outputdata.location: if isDQ2SRMSite(job.outputdata.location): output_location = job.outputdata.location else: logger.warning('Unknown output location %s.', job.outputdata.location) elif job._getRoot().subjobs and job._getRoot( ).outputdata.location: if isDQ2SRMSite(job._getRoot().outputdata.location): output_location = job._getRoot().outputdata.location else: logger.warning('Unknown output location %s.', job.getRoot().outputdata.location) logger.debug('Output: %s,%s', output_location, job.outputdata.location) elif job.outputdata.location == '' and job.outputdata._name == 'DQ2OutputDataset': output_location = '' elif job.outputdata.location: output_location = expandfilename(job.outputdata.location) else: try: output_location = config['LocalOutputLocation'] if job.outputdata: job.outputdata.location = expandfilename( output_location) except ConfigError: logger.warning( 'No default output location specified in the configuration.' ) else: try: output_location = config['LocalOutputLocation'] except ConfigError: logger.warning( 'No default output location specified in the configuration.' ) if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset': if job._getRoot().subjobs: if config['NoSubDirsAtAllForLocalOutput']: output_location = output_location elif config['SingleDirForLocalOutput']: output_location = os.path.join(output_location, "%d" % (job._getRoot().id)) elif config['IndividualSubjobDirsForLocalOutput']: output_location = os.path.join( output_location, "%d/%d" % (job._getRoot().id, job.id)) else: output_location = os.path.join(output_location, jid) if job.outputdata: # Remove trailing number if job is copied pat = re.compile(r'\/[\d\.]+\/[\d\.]+$') if re.findall(pat, output_location): output_location = re.sub(pat, '', output_location) if config['NoSubDirsAtAllForLocalOutput']: output_location = output_location elif config['SingleDirForLocalOutput']: output_location = os.path.join( output_location, "%d" % (job._getRoot().id)) elif config['IndividualSubjobDirsForLocalOutput']: output_location = os.path.join( output_location, "%d/%d" % (job._getRoot().id, job.id)) else: output_location = os.path.join(output_location, jid) job.outputdata.location = output_location if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': # output dataset name from master_prepare output_datasetname = self.output_datasetname output_lfn = self.output_lfn output_jobid = jid # Set subjob datasetname job.outputdata.datasetname = output_datasetname # Set master job datasetname if job._getRoot().subjobs: job._getRoot().outputdata.datasetname = output_datasetname # Create output dataset -> moved to the worker node code ! if not job.outputdata.dataset_exists(output_datasetname): if job._getRoot().subjobs: if job.id == 0: #job.outputdata.create_dataset(output_datasetname) pass else: #job.outputdata.create_dataset(output_datasetname) pass else: if (job._getRoot().subjobs and job.id == 0) or not job._getRoot().subjobs: logger.warning( "Dataset %s already exists - appending new files to this dataset", output_datasetname) output_location = job.outputdata.get_locations( datasetname=output_datasetname, quiet=True) logger.debug('Output3: %s,%s', output_location, job.outputdata.location) if output_location: output_location = output_location[0] if job._getRoot().subjobs: job._getRoot( ).outputdata.location = output_location job.outputdata.location = output_location else: job.outputdata.location = output_location logger.debug('Output4: %s,%s', output_location, job.outputdata.location) inputbox = [ File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh')) ] if input_guids: inputbox += [ FileBuffer('input_guids', '\n'.join(input_guids) + '\n') ] if input_files: inputbox += [ FileBuffer('input_files', '\n'.join(input_files) + '\n') ] if input_tag_guids: inputbox += [ FileBuffer('input_tag_guids', '\n'.join(input_tag_guids) + '\n') ] if input_tag_files: inputbox += [ FileBuffer('input_tag_files', '\n'.join(input_tag_files) + '\n') ] if input_esd_guids: inputbox += [ FileBuffer('input_esd_guids', '\n'.join(input_esd_guids) + '\n') ] if input_esd_files: inputbox += [ FileBuffer('input_esd_files', '\n'.join(input_esd_files) + '\n') ] # check for output data given in prepare info if job.outputdata and job.application.atlas_exetype == "ATHENA": for of in job.application.atlas_run_config['output']['alloutputs']: if not of in job.outputdata.outputdata: job.outputdata.outputdata.append(of) if job.outputdata and job.outputdata.outputdata: inputbox += [ FileBuffer('output_files', '\n'.join(job.outputdata.outputdata) + '\n') ] elif job.outputdata and not job.outputdata.outputdata: raise ApplicationConfigurationError( None, 'j.outputdata.outputdata is empty - Please specify output filename(s).' ) exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh') outputbox = jobmasterconfig.outputbox environment = jobmasterconfig.env.copy() ## create and add sample files for FileStager if job.inputdata and job.inputdata._name == 'StagerDataset': if not job.inputdata.dataset: raise ApplicationConfigurationError( None, 'dataset name not specified in job.inputdata') ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager inputbox += [ File(os.path.join(os.path.dirname(__file__), 'fs-copy.py')) ] (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions( job=job, max_events=app.max_events) inputbox += [File(jo_path), File(ic_path)] ## re-make the environment['ATHENA_OPTIONS'] athena_options = os.path.basename(File(jo_path).name) for option_file in app.option_file: athena_option = os.path.basename(option_file.name) athena_options += ' ' + athena_option if app.options: athena_options = app.options + ' ' + athena_options environment['ATHENA_OPTIONS'] = athena_options environment['DATASETTYPE'] = 'FILE_STAGER' ## ask to send back the FileStager.out/err generated by fs-copy.py outputbox += ['FileStager.out', 'FileStager.err'] # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment[ 'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options = job.application.args except AttributeError: pass if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]: raise ApplicationConfigurationError( None, 'j.outputdata.outputdata is empty - Please specify output filename(s).' ) # set EOS env setting environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary'] # flag for single output dir if (config['SingleDirForLocalOutput'] or config['NoSubDirsAtAllForLocalOutput']) and job._getParent(): environment['SINGLE_OUTPUT_DIR'] = jid # change the filename newoutput = [] for outf in job.outputdata.outputdata: newfile, newfileExt = os.path.splitext(outf) jid = "%d.%d" % (job._getParent().id, job.id) newoutput.append("%s.%s%s" % (newfile, jid, newfileExt)) job.outputdata.outputdata = newoutput[:] environment['OUTPUT_LOCATION'] = output_location if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['OUTPUT_DATASETNAME'] = output_datasetname environment['OUTPUT_LFN'] = output_lfn environment['OUTPUT_JOBID'] = output_jobid environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str( configDQ2['OUTPUTFILE_NAMELENGTH']) if job.outputdata.use_shortfilename: environment['GANGA_SHORTFILENAME'] = '1' else: environment['GANGA_SHORTFILENAME'] = '' try: environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP'] except: pass environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join( configDQ2['DQ2_OUTPUT_SPACE_TOKENS']) environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join( configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS']) # CN: extra condition for TNTSplitter if job._getRoot().splitter and job._getRoot( ).splitter._name == 'TNTJobSplitter': # set up dq2 environment datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] #environment['DATASETTYPE']=job.inputdata.type # At present, DQ2 download is the only thing that works environment['DATASETTYPE'] = "DQ2_DOWNLOAD" if job.inputdata.accessprotocol: environment[ 'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputsandbox: inputbox += job.inputsandbox # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER']: if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations( overlap=False)[datasets[0]]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % ( datasets[0]) raise ApplicationConfigurationError(None, printout) if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': environment['DATASETTYPE'] = 'TIER3' # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset if job.inputdata and job.inputdata._name == 'ATLASLocalDataset': if job.inputdata.use_poolfilecatalog_failover: environment['USE_POOLFILECATALOG_FAILOVER'] = '1' # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset environment['CREATE_POOLFILECATALOG'] = '1' if job.inputdata and job.inputdata._name == 'ATLASLocalDataset': if not job.inputdata.create_poolfilecatalog: environment['CREATE_POOLFILECATALOG'] = '0' # Write trf parameters trf_params = ' ' for key, value in job.application.trf_parameter.iteritems(): if key == 'dbrelease': environment['DBDATASETNAME'] = value.split(':')[0] environment['DBFILENAME'] = value.split(':')[1] else: trf_params = trf_params + key + '=' + str(value) + ' ' if trf_params != ' ' and job.application.atlas_exetype == 'TRF': _append_file_buffer(inputbox, 'trf_params', [trf_params]) if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'db_dq2localid.py') # set RecExCommon options environment['RECEXTYPE'] = job.application.recex_type # Athena run dir if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "": environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir # Set DQ2_LOCAL_SITE_ID if hasattr(job.backend, 'extraopts'): if job.backend.extraopts.find('site=hh') > 0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK' environment[ 'GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119' # hack for FILE_STAGER at NAF elif job.backend.extraopts.find('site=zn') > 0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK' environment[ 'GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119' # hack for FILE_STAGER at NAF else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2[ 'DQ2_LOCAL_SITE_ID'] else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("TagPrepareLocalRTHandler prepare called, %s", job.id) # prepare inputdata input_files = [] input_guids = [] if job.inputdata: # check for subjobs if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'DQ2Dataset': raise ApplicationConfigurationError( None, 'Cannot use DQ2Dataset with a local job') else: if job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'DQ2Dataset': raise ApplicationConfigurationError( None, 'Cannot use DQ2Dataset with a local job') if job.outputdata: raise ApplicationConfigurationError( None, 'No outputdata required for TagPrepare job.') if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id # prepare inputsandbox inputbox = [File(os.path.join(__athdirectory__, 'athena-utility.sh'))] if input_files: _append_file_buffer(inputbox, 'input_files', input_files) exe = os.path.join(__directory__, 'run-tagprepare-local.sh') outputbox = jobmasterconfig.outputbox environment = jobmasterconfig.env.copy() # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment[ 'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options = job.application.args except AttributeError: pass output_location = '' environment['OUTPUT_LOCATION'] = output_location environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC'] # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and job.inputdata._name == 'DQ2Dataset': if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations( overlap=False)[datasets[0]]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % ( datasets[0]) raise ApplicationConfigurationError(None, printout) # Work around for glite WMS spaced environement variable problem inputbox.append( FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n')) # append a property for monitoring to the jobconfig of subjobs lcg_config = StandardJobConfig(File(exe), inputbox, [], outputbox, environment) return lcg_config
def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLocalRTHandler prepare called, %s", job.id ) input_files = [] input_guids = [] input_tag_files = [] input_tag_guids = [] input_esd_files = [] input_esd_guids = [] # If job has inputdata if job.inputdata: # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'ATLASDataset': if not job.inputdata.lfn: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_files = job.inputdata.lfn elif job.inputdata._name == 'ATLASTier3Dataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata._name == 'DQ2Dataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_guids = job.inputdata.guids input_files = job.inputdata.names if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]: job.inputdata.type ='DQ2_LOCAL' else: if job.inputdata._name == 'ATLASCastorDataset': input_files = ATLASCastorDataset.get_filenames(app) elif job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'ATLASDataset': input_files = ATLASDataset.get_filenames(app) elif job.inputdata._name == 'ATLASTier3Dataset': if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata.pfnListFile: logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name) pfnListFile = open(job.inputdata.pfnListFile.name) job.inputdata.names = [ line.strip() for line in pfnListFile] pfnListFile.close() input_files = job.inputdata.names input_guids = input_files else: raise ApplicationConfigurationError(None,'No inputdata has been specified.') elif job.inputdata._name == 'DQ2Dataset': if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]: job.inputdata.type ='DQ2_LOCAL' contents = job.inputdata.get_contents() input_files = [ lfn for guid, lfn in contents ] input_guids = [ guid for guid, lfn in contents ] if job.inputdata.tagdataset: tag_contents = job.inputdata.get_tag_contents() input_tag_files = [ lfn for guid, lfn in tag_contents ] input_tag_guids = [ guid for guid, lfn in tag_contents ] if job.inputdata.use_aodesd_backnav: esd_contents = job.inputdata.get_contents(backnav=True) input_esd_files = [ lfn for guid, lfn in esd_contents ] input_esd_guids = [ guid for guid, lfn in esd_contents ] job.inputdata.names = input_files job.inputdata.guids = input_guids # Outputdataset output_location='' if job.outputdata: if job.outputdata._name=='DQ2OutputDataset': if job.outputdata.location: if isDQ2SRMSite(job.outputdata.location): output_location = job.outputdata.location else: logger.warning('Unknown output location %s.',job.outputdata.location) elif job._getRoot().subjobs and job._getRoot().outputdata.location: if isDQ2SRMSite(job._getRoot().outputdata.location): output_location = job._getRoot().outputdata.location else: logger.warning('Unknown output location %s.',job.getRoot().outputdata.location) logger.debug('Output: %s,%s',output_location, job.outputdata.location) elif job.outputdata.location=='' and job.outputdata._name=='DQ2OutputDataset': output_location = '' elif job.outputdata.location: output_location = expandfilename(job.outputdata.location) else: try: output_location=config['LocalOutputLocation'] if job.outputdata: job.outputdata.location = expandfilename(output_location) except ConfigError: logger.warning('No default output location specified in the configuration.') else: try: output_location=config['LocalOutputLocation'] except ConfigError: logger.warning('No default output location specified in the configuration.') if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset': if job._getRoot().subjobs: if config['NoSubDirsAtAllForLocalOutput']: output_location = output_location elif config['SingleDirForLocalOutput']: output_location = os.path.join(output_location, "%d" % (job._getRoot().id)) elif config['IndividualSubjobDirsForLocalOutput']: output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id)) else: output_location = os.path.join(output_location, jid) if job.outputdata: # Remove trailing number if job is copied pat = re.compile(r'\/[\d\.]+\/[\d\.]+$') if re.findall(pat,output_location): output_location = re.sub(pat, '', output_location) if config['NoSubDirsAtAllForLocalOutput']: output_location = output_location elif config['SingleDirForLocalOutput']: output_location = os.path.join(output_location, "%d" % (job._getRoot().id)) elif config['IndividualSubjobDirsForLocalOutput']: output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id)) else: output_location = os.path.join(output_location, jid) job.outputdata.location = output_location if job.outputdata and job.outputdata._name=='DQ2OutputDataset': # output dataset name from master_prepare output_datasetname = self.output_datasetname output_lfn = self.output_lfn output_jobid = jid # Set subjob datasetname job.outputdata.datasetname=output_datasetname # Set master job datasetname if job._getRoot().subjobs: job._getRoot().outputdata.datasetname=output_datasetname # Create output dataset -> moved to the worker node code ! if not job.outputdata.dataset_exists(output_datasetname): if job._getRoot().subjobs: if job.id==0: #job.outputdata.create_dataset(output_datasetname) pass else: #job.outputdata.create_dataset(output_datasetname) pass else: if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs: logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname) output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True) logger.debug('Output3: %s,%s',output_location, job.outputdata.location) if output_location: output_location = output_location[0] if job._getRoot().subjobs: job._getRoot().outputdata.location=output_location job.outputdata.location=output_location else: job.outputdata.location=output_location logger.debug('Output4: %s,%s',output_location, job.outputdata.location) inputbox = [File(os.path.join(os.path.dirname(__file__),'athena-utility.sh'))] if input_guids: inputbox += [ FileBuffer('input_guids','\n'.join(input_guids)+'\n') ] if input_files: inputbox += [ FileBuffer('input_files','\n'.join(input_files)+'\n') ] if input_tag_guids: inputbox += [ FileBuffer('input_tag_guids','\n'.join(input_tag_guids)+'\n') ] if input_tag_files: inputbox += [ FileBuffer('input_tag_files','\n'.join(input_tag_files)+'\n') ] if input_esd_guids: inputbox += [ FileBuffer('input_esd_guids','\n'.join(input_esd_guids)+'\n') ] if input_esd_files: inputbox += [ FileBuffer('input_esd_files','\n'.join(input_esd_files)+'\n') ] # check for output data given in prepare info if job.outputdata and job.application.atlas_exetype == "ATHENA": for of in job.application.atlas_run_config['output']['alloutputs']: if not of in job.outputdata.outputdata: job.outputdata.outputdata.append(of) if job.outputdata and job.outputdata.outputdata: inputbox += [ FileBuffer('output_files','\n'.join(job.outputdata.outputdata)+'\n') ] elif job.outputdata and not job.outputdata.outputdata: raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).') exe = os.path.join(os.path.dirname(__file__),'run-athena-local.sh') outputbox = jobmasterconfig.outputbox environment = jobmasterconfig.env.copy() ## create and add sample files for FileStager if job.inputdata and job.inputdata._name == 'StagerDataset': if not job.inputdata.dataset: raise ApplicationConfigurationError(None,'dataset name not specified in job.inputdata') ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager inputbox += [ File( os.path.join( os.path.dirname(__file__), 'fs-copy.py') ) ] (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(job=job, max_events=app.max_events) inputbox += [ File(jo_path), File(ic_path) ] ## re-make the environment['ATHENA_OPTIONS'] athena_options = os.path.basename( File(jo_path).name ) for option_file in app.option_file: athena_option = os.path.basename(option_file.name) athena_options += ' ' + athena_option if app.options: athena_options = app.options + ' ' + athena_options environment['ATHENA_OPTIONS'] = athena_options environment['DATASETTYPE'] = 'FILE_STAGER' ## ask to send back the FileStager.out/err generated by fs-copy.py outputbox += ['FileStager.out', 'FileStager.err'] # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options=job.application.args except AttributeError: pass if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]: raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).') # set EOS env setting environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary'] # flag for single output dir if (config['SingleDirForLocalOutput'] or config['NoSubDirsAtAllForLocalOutput']) and job._getParent(): environment['SINGLE_OUTPUT_DIR'] = jid # change the filename newoutput = [] for outf in job.outputdata.outputdata: newfile, newfileExt = os.path.splitext(outf) jid = "%d.%d" % (job._getParent().id, job.id) newoutput.append("%s.%s%s" % (newfile, jid, newfileExt) ) job.outputdata.outputdata = newoutput[:] environment['OUTPUT_LOCATION'] = output_location if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['OUTPUT_DATASETNAME'] = output_datasetname environment['OUTPUT_LFN'] = output_lfn environment['OUTPUT_JOBID'] = output_jobid environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL'] environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(configDQ2['OUTPUTFILE_NAMELENGTH']) if job.outputdata.use_shortfilename: environment['GANGA_SHORTFILENAME'] = '1' else: environment['GANGA_SHORTFILENAME'] = '' try: environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP'] except: pass environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS']) environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS']) # CN: extra condition for TNTSplitter if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter': # set up dq2 environment datasetname = job.inputdata.dataset environment['DATASETNAME']= ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations()) environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL'] #environment['DATASETTYPE']=job.inputdata.type # At present, DQ2 download is the only thing that works environment['DATASETTYPE']="DQ2_DOWNLOAD" if job.inputdata.accessprotocol: environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputsandbox: inputbox += job.inputsandbox # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in [ 'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER' ]: if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0]) raise ApplicationConfigurationError(None,printout ) if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': environment['DATASETTYPE'] = 'TIER3' # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset if job.inputdata and job.inputdata._name == 'ATLASLocalDataset': if job.inputdata.use_poolfilecatalog_failover: environment['USE_POOLFILECATALOG_FAILOVER'] = '1' # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset environment['CREATE_POOLFILECATALOG'] = '1' if job.inputdata and job.inputdata._name == 'ATLASLocalDataset': if not job.inputdata.create_poolfilecatalog: environment['CREATE_POOLFILECATALOG'] = '0' # Write trf parameters trf_params = ' ' for key, value in job.application.trf_parameter.iteritems(): if key == 'dbrelease': environment['DBDATASETNAME'] = value.split(':')[0] environment['DBFILENAME'] = value.split(':')[1] else: trf_params = trf_params + key + '=' + str(value) + ' ' if trf_params!=' ' and job.application.atlas_exetype=='TRF': _append_file_buffer(inputbox,'trf_params', [ trf_params ]) if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'db_dq2localid.py') # set RecExCommon options environment['RECEXTYPE'] = job.application.recex_type # Athena run dir if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "": environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir # Set DQ2_LOCAL_SITE_ID if hasattr(job.backend, 'extraopts'): if job.backend.extraopts.find('site=hh')>0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK' environment['GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119' # hack for FILE_STAGER at NAF elif job.backend.extraopts.find('site=zn')>0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK' environment['GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119' # hack for FILE_STAGER at NAF else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("TagPrepareLocalRTHandler prepare called, %s", job.id) # prepare inputdata input_files = [] input_guids = [] if job.inputdata: # check for subjobs if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'DQ2Dataset': raise ApplicationConfigurationError(None,'Cannot use DQ2Dataset with a local job' ) else: if job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'DQ2Dataset': raise ApplicationConfigurationError(None,'Cannot use DQ2Dataset with a local job' ) if job.outputdata: raise ApplicationConfigurationError(None,'No outputdata required for TagPrepare job.') if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id # prepare inputsandbox inputbox = [File(os.path.join(__athdirectory__,'athena-utility.sh')) ] if input_files: _append_file_buffer(inputbox,'input_files',input_files) exe = os.path.join(__directory__,'run-tagprepare-local.sh') outputbox = jobmasterconfig.outputbox environment = jobmasterconfig.env.copy() # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options = job.application.args except AttributeError: pass output_location = '' environment['OUTPUT_LOCATION'] = output_location environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC'] # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and job.inputdata._name == 'DQ2Dataset': if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0]) raise ApplicationConfigurationError(None,printout ) # Work around for glite WMS spaced environement variable problem inputbox.append(FileBuffer('athena_options',environment['ATHENA_OPTIONS']+'\n')) # append a property for monitoring to the jobconfig of subjobs lcg_config = StandardJobConfig(File(exe), inputbox, [], outputbox, environment) return lcg_config