def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare( app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) job = app.getJobObject() outputfiles = [this_file.namePattern for this_file in job.outputfiles if isinstance(this_file, DiracFile)] gaudi_script_path = os.path.join( job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator(gaudi_script_template(), #remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND='gaudirun.py' # , #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), EXE=gaudi_script_path, EXE_ARG_STR=' '.join( [str(arg) for arg in app.args]), EXE_LOG_FILE='Ganga_%s_%s.log' % ( app.appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=outputsandbox, OUTPUTDATA=list(outputfiles), OUTPUT_PATH="", # job.fqid, OUTPUT_SE=getConfig( 'DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = app.getJobObject() if job.inputdata: if not job.splitter: if len(job.inputdata) > 100: raise BackendError( "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!" ) outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] data_str = "import os\n" data_str += "execfile('data.py')\n" if hasattr(job, "_splitter_data"): data_str += job._splitter_data inputsandbox.append(FileBuffer("data-wrapper.py", data_str)) input_data = [] # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take care of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), "inputdata", "options_data.pkl") if not job.inputdata: if os.path.exists(share_path): f = open(share_path, "r+b") job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), "output", "options_parser.pkl") if os.path.exists(share_path): # if not os.path.exists(share_path): # raise GangaException('could not find the parser') f = open(share_path, "r+b") parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters["gangafiles"] outdata_files = [ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend( [output_file for output_file in outdata_files if not isType(output_file, DiracFile)] ) outbox_files = [ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend( [outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile)] ) outputsandbox = [f.namePattern for f in job.non_copyable_outputfiles] outputsandbox.extend([f.namePattern for f in job.outputfiles if not isType(f, DiracFile)]) outputsandbox = unique(outputsandbox) # + outbox[:]) ####################################################################### input_data_dirac, parametricinput_data = dirac_inputdata(job.application) if input_data_dirac is not None: for f in input_data_dirac: if isType(f, DiracFile): input_data.append(f.lfn) elif isType(f, str): input_data.append(f) else: raise ApplicationConfigurationError( "Don't know How to handle anythig other than DiracFiles or strings to LFNs!" ) commandline = "python ./gaudipython-wrapper.py" if is_gaudi_child(app): commandline = "gaudirun.py " commandline += " ".join([str(arg) for arg in app.args]) commandline += " options.pkl data-wrapper.py" logger.debug("Command line: %s: ", commandline) gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator( gaudi_script_template(), # remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND=commandline, XMLSUMMARYPARSING=getXMLSummaryScript() # , # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) # logger.debug( "input_data %s" % str( input_data ) ) # We want to propogate the ancestor depth to DIRAC when we have # inputdata set if job.inputdata is not None and isType(job.inputdata, LHCbDataset): # As the RT Handler we already know we have a Dirac backend if type(job.backend.settings) is not dict: raise ApplicationConfigurationError(None, "backend.settings should be a dict") if "AncestorDepth" in job.backend.settings: ancestor_depth = job.backend.settings["AncestorDepth"] else: ancestor_depth = job.inputdata.depth else: ancestor_depth = 0 lhcbdirac_script_template = lhcbdiracAPI_script_template() lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac # remove after Ganga6 release # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( lhcbdirac_script_template, DIRAC_IMPORT="from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb", DIRAC_JOB_IMPORT="from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob", DIRAC_OBJECT="DiracLHCb()", JOB_OBJECT="LHCbJob()", NAME=mangle_job_name(app), APP_NAME=app.appname, APP_VERSION=app.version, APP_SCRIPT=gaudi_script_path, APP_LOG_FILE="Ganga_%s_%s.log" % (app.appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles, # job.fqid,#outputdata_path, OUTPUT_PATH="", OUTPUT_SE=getConfig("DIRAC")["DiracOutputDataSE"], SETTINGS=diracAPI_script_settings(job.application), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "", ANCESTOR_DEPTH=ancestor_depth, ## This is to be modified in the final 'submit' function in the backend ## The backend also handles the inputfiles DiracFiles ass appropriate INPUT_SANDBOX="##INPUT_SANDBOX##", ) logger.debug("prepare: LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) logger.debug("input_data: " + str(input_data)) job = app.getJobObject() outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication params = {'DIRAC_IMPORT': 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', 'DIRAC_JOB_IMPORT': 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', 'DIRAC_OBJECT': 'DiracLHCb()', 'JOB_OBJECT': 'LHCbJob()', 'NAME': mangle_job_name(app), 'INPUTDATA': input_data, 'PARAMETRIC_INPUTDATA': parametricinput_data, 'OUTPUT_SANDBOX': API_nullifier(outputsandbox), 'OUTPUTFILESSCRIPT' : lhcb_dirac_outputfiles, 'OUTPUT_PATH': "", # job.fqid, 'OUTPUT_SE': getConfig('DIRAC')['DiracOutputDataSE'], 'SETTINGS': diracAPI_script_settings(app), 'DIRAC_OPTS': job.backend.diracOpts, 'PLATFORM': getConfig('ROOT')['arch'], 'REPLICATE': 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 'INPUT_SANDBOX': '##INPUT_SANDBOX##' } scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) wrapper_path = os.path.join(job.getInputWorkspace(create=True).getPath(), 'script_wrapper.py') python_wrapper =\ """#!/usr/bin/env python import os, sys def formatVar(var): try: float(var) return str(var) except ValueError as v: return '\\\"%s\\\"' % str(var) script_args = '###SCRIPT_ARGS###' del sys.argv[sys.argv.index('script_wrapper.py')] ###FIXARGS### if script_args == []: script_args = '' os.system('###COMMAND###' % script_args) ###INJECTEDCODE### """ python_wrapper = python_wrapper.replace('###SCRIPT_ARGS###', str('###JOINER###'.join([str(a) for a in app.args]))) params.update({ 'APP_NAME' : 'Root', 'APP_VERSION' : app.version, 'APP_SCRIPT' : wrapper_path, 'APP_LOG_FILE' : 'Ganga_Root.log' }) #params.update({'ROOTPY_SCRIPT': wrapper_path, # 'ROOTPY_VERSION': app.version, # 'ROOTPY_LOG_FILE': 'Ganga_Root.log', # 'ROOTPY_ARGS': [str(a) for a in app.args]}) f = open(wrapper_path, 'w') if app.usepython: python_wrapper = script_generator(python_wrapper, remove_unreplaced=False, FIXARGS='', COMMAND='/usr/bin/env python %s %s' % (os.path.basename(app.script.name), '%s'), JOINER=' ', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) else: python_wrapper = script_generator(python_wrapper, remove_unreplaced=False, FIXARGS='script_args=[formatVar(v) for v in script_args]', COMMAND='export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' % (os.path.basename(app.script.name), '%s'), JOINER=',', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) f.write(python_wrapper) f.close() dirac_script = script_generator(lhcbdiracAPI_script_template(), **params) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) logger.debug("input_data: " + str(input_data)) job = app.getJobObject() outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication params = { 'DIRAC_IMPORT': 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', 'DIRAC_JOB_IMPORT': 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', 'DIRAC_OBJECT': 'DiracLHCb()', 'JOB_OBJECT': 'LHCbJob()', 'NAME': mangle_job_name(app), 'INPUTDATA': input_data, 'PARAMETRIC_INPUTDATA': parametricinput_data, 'OUTPUT_SANDBOX': API_nullifier(outputsandbox), 'OUTPUTFILESSCRIPT': lhcb_dirac_outputfiles, 'OUTPUT_PATH': "", # job.fqid, 'SETTINGS': diracAPI_script_settings(app), 'DIRAC_OPTS': job.backend.diracOpts, 'PLATFORM': getConfig('ROOT')['arch'], 'REPLICATE': 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 'INPUT_SANDBOX': '##INPUT_SANDBOX##' } scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) wrapper_path = os.path.join( job.getInputWorkspace(create=True).getPath(), 'script_wrapper.py') python_wrapper =\ """#!/usr/bin/env python import os, sys def formatVar(var): try: float(var) return str(var) except ValueError as v: return '\\\"%s\\\"' % str(var) script_args = '###SCRIPT_ARGS###' del sys.argv[sys.argv.index('script_wrapper.py')] ###FIXARGS### if script_args == []: script_args = '' os.system('###COMMAND###' % script_args) ###INJECTEDCODE### """ python_wrapper = python_wrapper.replace( '###SCRIPT_ARGS###', str('###JOINER###'.join([str(a) for a in app.args]))) params.update({ 'APP_NAME': 'Root', 'APP_VERSION': app.version, 'APP_SCRIPT': wrapper_path, 'APP_LOG_FILE': 'Ganga_Root.log' }) #params.update({'ROOTPY_SCRIPT': wrapper_path, # 'ROOTPY_VERSION': app.version, # 'ROOTPY_LOG_FILE': 'Ganga_Root.log', # 'ROOTPY_ARGS': [str(a) for a in app.args]}) f = open(wrapper_path, 'w') if app.usepython: python_wrapper = script_generator( python_wrapper, remove_unreplaced=False, FIXARGS='', COMMAND='/usr/bin/env python %s %s' % (os.path.basename(app.script.name), '%s'), JOINER=' ', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) else: python_wrapper = script_generator( python_wrapper, remove_unreplaced=False, FIXARGS='script_args=[formatVar(v) for v in script_args]', COMMAND= 'export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' % (os.path.basename(app.script.name), '%s'), JOINER=',', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) f.write(python_wrapper) f.close() dirac_script = script_generator(lhcbdiracAPI_script_template(), **params) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ This function prepares the application of the actual job being submitted, master or not Args: app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app """ # Construct some common objects used in job submission here inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app, hasOtherInputData=True) job = app.getJobObject() # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job exe_script_name = "im3shape-script.py" output_filename = os.path.basename(job.inputdata[0].lfn) + "." + str(app.rank) + "." + str(app.size) im3shape_args = " ".join( [ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini app.catalog, output_filename, # catalog, output str(app.rank), str(app.size), ] ) full_cmd = app.exe_name + " " + im3shape_args outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] inputsandbox.append( FileBuffer( name=exe_script_name, contents=script_generator( Im3Shape_script_template(), ## ARGS for app from job.app RUN_DIR=app.run_dir, BLACKLIST=os.path.basename(app.blacklist.namePattern), COMMAND=full_cmd, ## Stuff for Ganga OUTPUTFILES=repr([this_file.namePattern for this_file in job.outputfiles]), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing(job, " "), ), executable=True, ) ) # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded app_file_list = [app.im3_location, app.ini_location, app.blacklist] app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)] job.inputfiles.extend(app_file_list) # Slightly mis-using this here but it would be nice to have these files # job.inputfiles.extend(job.inputdata) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( diracAPI_script_template(), DIRAC_IMPORT="from DIRAC.Interfaces.API.Dirac import Dirac", DIRAC_JOB_IMPORT="from DIRAC.Interfaces.API.Job import Job", DIRAC_OBJECT="Dirac()", JOB_OBJECT="Job()", NAME=mangle_job_name(app), EXE=exe_script_name, EXE_ARG_STR="", EXE_LOG_FILE="Ganga_Executable.log", ENVIRONMENT=None, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfile_jdl(outputfiles, False), OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "", # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX="##INPUT_SANDBOX##", ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ Prepare the RTHandler in order to submit to the Dirac backend Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appconfig (unknown): Output passed from the application configuration call appmasterconfig (unknown): Output passed from the application master_configure call jobmasterconfig (tuple): Output from the master job prepare step """ # NB this needs to be removed safely # Get the inputdata and input/output sandbox in a sorted way inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # We know we don't need this one inputsandbox = [] job = app.getJobObject() # We can support inputfiles and opts_file here. Locally should be submitted once, remotely can be referenced. all_opts_files = app.getOptsFiles() for opts_file in all_opts_files: if isinstance(opts_file, DiracFile): inputsandbox += ['LFN:'+opts_file.lfn] # Sort out inputfiles we support for file_ in job.inputfiles: if isinstance(file_, DiracFile): inputsandbox += ['LFN:'+file_.lfn] elif isinstance(file_, LocalFile): base_name = os.path.basename(file_.namePattern) shutil.copyfile(os.path.join(file_.localDir, base_name), os.path.join(app.getSharedPath(), base_name)) else: logger.error("Filetype: %s nor currently supported, please contact Ganga Devs if you require support for this with the DIRAC backend" % getName(file_)) raise ApplicationConfigurationError(None, "Unsupported filetype: %s with DIRAC backend" % getName(file_)) master_job = job.master or job app.uploadedInput = master_job.application.uploadedInput app.jobScriptArchive = master_job.application.jobScriptArchive logger.debug("uploadedInput: %s" % app.uploadedInput) rep_data = app.uploadedInput.getReplicas() logger.debug("Replica info: %s" % rep_data) inputsandbox += ['LFN:'+app.uploadedInput.lfn] inputsandbox += ['LFN:'+app.jobScriptArchive.lfn] logger.debug("Input Sand: %s" % inputsandbox) logger.debug("input_data: %s" % input_data) outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] scriptToRun = getScriptName(app) # Already added to sandbox uploaded as LFN # This code deals with the outputfiles as outputsandbox and outputdata for us lhcbdirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(lhcbdiracAPI_script_template(), DIRAC_IMPORT='from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', DIRAC_JOB_IMPORT='from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', DIRAC_OBJECT='DiracLHCb()', JOB_OBJECT='LHCbJob()', NAME=mangle_job_name(app), EXE=os.path.join('jobScript', scriptToRun), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_GaudiExec.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcbdirac_outputfiles, OUTPUT_PATH="", # job.fqid, OUTPUT_SE=[], PLATFORM=app.platform, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX=repr([f for f in inputsandbox]), ) # NB # inputsandbox here isn't used by the DIRAC backend as we explicitly define the INPUT_SANDBOX here! # Return the output needed for the backend to submit this job return StandardJobConfig(dirac_script, inputbox=[], outputbox=[])
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ Prepare the RTHandler in order to submit to the Dirac backend Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appconfig (unknown): Output passed from the application configuration call appmasterconfig (unknown): Output passed from the application master_configure call jobmasterconfig (tuple): Output from the master job prepare step """ cred_req = app.getJobObject().backend.credential_requirements check_creds(cred_req) # NB this needs to be removed safely # Get the inputdata and input/output sandbox in a sorted way inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # We know we don't need this one inputsandbox = [] job = app.getJobObject() # We can support inputfiles and opts_file here. Locally should be submitted once, remotely can be referenced. all_opts_files = app.getOptsFiles() for opts_file in all_opts_files: if isinstance(opts_file, DiracFile): inputsandbox += ['LFN:' + opts_file.lfn] # Sort out inputfiles we support for file_ in job.inputfiles: if isinstance(file_, DiracFile): inputsandbox += ['LFN:' + file_.lfn] if isinstance(file_, LocalFile): if job.master is not None and file_ not in job.master.inputfiles: shutil.copy( os.path.join(file_.localDir, file_.namePattern), app.getSharedPath()) inputsandbox += [ os.path.join(app.getSharedPath(), file_.namePattern) ] else: logger.error( "Filetype: %s nor currently supported, please contact Ganga Devs if you require support for this with the DIRAC backend" % getName(file_)) raise ApplicationConfigurationError( "Unsupported filetype: %s with DIRAC backend" % getName(file_)) master_job = job.master or job app.uploadedInput = master_job.application.uploadedInput app.jobScriptArchive = master_job.application.jobScriptArchive logger.debug("uploadedInput: %s" % app.uploadedInput) rep_data = app.uploadedInput.getReplicas() logger.debug("Replica info: %s" % rep_data) inputsandbox += ['LFN:' + app.uploadedInput.lfn] inputsandbox += ['LFN:' + app.jobScriptArchive.lfn] logger.debug("Input Sand: %s" % inputsandbox) logger.debug("input_data: %s" % input_data) outputfiles = [ this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile) ] scriptToRun = getScriptName(app) # Already added to sandbox uploaded as LFN # This code deals with the outputfiles as outputsandbox and outputdata for us lhcbdirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( lhcbdiracAPI_script_template(), DIRAC_IMPORT= 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', DIRAC_JOB_IMPORT= 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', DIRAC_OBJECT='DiracLHCb()', JOB_OBJECT='LHCbJob()', NAME=mangle_job_name(app), EXE=os.path.join('jobScript', scriptToRun), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_GaudiExec.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcbdirac_outputfiles, OUTPUT_PATH="", # job.fqid, OUTPUT_SE=[], PLATFORM=app.platform, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX=repr([f for f in inputsandbox]), ) # NB # inputsandbox here isn't used by the DIRAC backend as we explicitly define the INPUT_SANDBOX here! # Return the output needed for the backend to submit this job return StandardJobConfig(dirac_script, inputbox=[], outputbox=[])
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = stripProxy(app).getJobObject() outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] commandline = [] commandline.append(app.exe) if isType(app.exe, File): #logger.info("app: %s" % str(app.exe.name)) #fileName = os.path.join(get_share_path(app), os.path.basename(app.exe.name)) #logger.info("EXE: %s" % str(fileName)) #inputsandbox.append(File(name=fileName)) inputsandbox.append(app.exe) commandline[0] = os.path.join('.', os.path.basename(app.exe.name)) commandline.extend([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' logger.info("Setting Command to be: '%s'" % repr(commandline)) inputsandbox.append( FileBuffer( name=exe_script_name, contents=script_generator( exe_script_template(), #remove_unreplaced = False, # , COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')), executable=True)) contents = script_generator( exe_script_template(), COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')) #logger.info("Script is: %s" % str(contents)) from os.path import abspath, expanduser for this_file in job.inputfiles: if isinstance(this_file, LocalFile): for name in this_file.getFilenameList(): inputsandbox.append(File(abspath(expanduser(name)))) elif isinstance(this_file, DiracFile): name = this_file.lfn if isinstance(input_data, list): input_data.append(name) else: input_data = [name] dirac_outputfiles = dirac_outputfile_jdl(outputfiles, config['RequireDefaultSE']) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if config['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##') #logger.info("dirac_script: %s" % dirac_script) #logger.info("inbox: %s" % str(unique(inputsandbox))) #logger.info("outbox: %s" % str(unique(outputsandbox))) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = stripProxy(app).getJobObject() outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] commandline = [] commandline.append(app.exe) if isType(app.exe, File): #logger.info("app: %s" % str(app.exe.name)) #fileName = os.path.join(get_share_path(app), os.path.basename(app.exe.name)) #logger.info("EXE: %s" % str(fileName)) #inputsandbox.append(File(name=fileName)) inputsandbox.append(app.exe) commandline[0]=os.path.join('.', os.path.basename(app.exe.name)) commandline.extend([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' logger.info("Setting Command to be: '%s'" % repr(commandline)) inputsandbox.append(FileBuffer(name=exe_script_name, contents=script_generator(exe_script_template(), #remove_unreplaced = False, # , COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ), executable=True)) contents=script_generator(exe_script_template(), COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) #logger.info("Script is: %s" % str(contents)) from os.path import abspath, expanduser for this_file in job.inputfiles: if isinstance(this_file, LocalFile): for name in this_file.getFilenameList(): inputsandbox.append(File(abspath(expanduser(name)))) elif isinstance(this_file, DiracFile): name = this_file.lfn if isinstance(input_data, list): input_data.append(name) else: input_data = [name] dirac_outputfiles = dirac_outputfile_jdl(outputfiles, config['RequireDefaultSE']) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if config['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##' ) #logger.info("dirac_script: %s" % dirac_script) #logger.info("inbox: %s" % str(unique(inputsandbox))) #logger.info("outbox: %s" % str(unique(outputsandbox))) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare( app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = app.getJobObject() outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] commandline = app.exe if type(app.exe) == File: inputsandbox.append(File(name=os.path.join(get_share_path(app), os.path.basename(app.exe.name)))) commandline = os.path.basename(app.exe.name) commandline += ' ' commandline += ' '.join([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' inputsandbox.append(FileBuffer(name=exe_script_name, contents=script_generator(exe_script_template(), #remove_unreplaced = False, # , COMMAND=commandline #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ), executable=True)) dirac_outputfiles = dirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("Prepare") inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = stripProxy(app).getJobObject() if job.inputdata: if not job.splitter: if len(job.inputdata) > 100: raise BackendError( "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!" ) outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] data_str = 'import os\n' data_str += 'execfile(\'data.py\')\n' if hasattr(job, '_splitter_data'): data_str += job._splitter_data inputsandbox.append(FileBuffer('data-wrapper.py', data_str)) input_data = [] # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take care of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), 'inputdata', 'options_data.pkl') if not job.inputdata: if os.path.exists(share_path): f = open(share_path, 'r+b') job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), 'output', 'options_parser.pkl') if os.path.exists(share_path): # if not os.path.exists(share_path): # raise GangaException('could not find the parser') f = open(share_path, 'r+b') parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters['gangafiles'] outdata_files = [ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend([ output_file for output_file in outdata_files if not isType(output_file, DiracFile) ]) outbox_files = [ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend([ outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile) ]) outputsandbox = [ f.namePattern for f in job.non_copyable_outputfiles ] outputsandbox.extend([ f.namePattern for f in job.outputfiles if not isType(f, DiracFile) ]) outputsandbox = unique(outputsandbox) # + outbox[:]) ####################################################################### input_data_dirac, parametricinput_data = dirac_inputdata( job.application) if input_data_dirac is not None: for f in input_data_dirac: if isType(f, DiracFile): input_data.append(f.lfn) elif isType(f, str): input_data.append(f) else: raise ApplicationConfigurationError( "Don't know How to handle anythig other than DiracFiles or strings to LFNs!" ) commandline = "python ./gaudipython-wrapper.py" if is_gaudi_child(app): commandline = 'gaudirun.py ' commandline += ' '.join([str(arg) for arg in app.args]) commandline += ' options.pkl data-wrapper.py' logger.debug('Command line: %s: ', commandline) gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator( gaudi_script_template(), #remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND=commandline, XMLSUMMARYPARSING=getXMLSummaryScript() # , #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) #logger.debug( "input_data %s" % str( input_data ) ) # We want to propogate the ancestor depth to DIRAC when we have # inputdata set if job.inputdata is not None and isType(job.inputdata, LHCbDataset): # As the RT Handler we already know we have a Dirac backend if type(job.backend.settings) is not dict: raise ApplicationConfigurationError( None, 'backend.settings should be a dict') if 'AncestorDepth' in job.backend.settings: ancestor_depth = job.backend.settings['AncestorDepth'] else: ancestor_depth = job.inputdata.depth else: ancestor_depth = 0 lhcbdirac_script_template = lhcbdiracAPI_script_template() lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac # remove after Ganga6 release # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( lhcbdirac_script_template, DIRAC_IMPORT= 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', DIRAC_JOB_IMPORT= 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', DIRAC_OBJECT='DiracLHCb()', JOB_OBJECT='LHCbJob()', NAME=mangle_job_name(app), APP_NAME=stripProxy(app).appname, APP_VERSION=app.version, APP_SCRIPT=gaudi_script_path, APP_LOG_FILE='Ganga_%s_%s.log' % (stripProxy(app).appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles, # job.fqid,#outputdata_path, OUTPUT_PATH="", OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(job.application), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', ANCESTOR_DEPTH=ancestor_depth, ## This is to be modified in the final 'submit' function in the backend ## The backend also handles the inputfiles DiracFiles ass appropriate INPUT_SANDBOX='##INPUT_SANDBOX##') logger.debug("prepare: LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ This function prepares the application of the actual job being submitted, master or not Args: app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app """ # Construct some common objects used in job submission here inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app, hasOtherInputData=True) job = app.getJobObject() # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job exe_script_name = 'im3shape-script.py' output_filename = os.path.basename(job.inputdata[0].lfn) + '.' + str(app.rank) + '.' + str(app.size) im3shape_args = ' '.join([ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini app.catalog, output_filename, # catalog, output str(app.rank), str(app.size) ]) full_cmd = app.exe_name + ' ' + im3shape_args outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] inputsandbox.append(FileBuffer( name=exe_script_name, contents=script_generator(Im3Shape_script_template(), ## ARGS for app from job.app RUN_DIR = app.run_dir, BLACKLIST = os.path.basename(app.blacklist.namePattern), COMMAND = full_cmd, ## Stuff for Ganga OUTPUTFILES = repr([this_file.namePattern for this_file in job.outputfiles]), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' '), ), executable=True) ) # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded app_file_list = [app.im3_location, app.ini_location, app.blacklist] app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)] job.inputfiles.extend(app_file_list) # Slightly mis-using this here but it would be nice to have these files #job.inputfiles.extend(job.inputdata) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT = 'from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT = 'from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT = 'Dirac()', JOB_OBJECT = 'Job()', NAME = mangle_job_name(app), EXE = exe_script_name, EXE_ARG_STR = '', EXE_LOG_FILE = 'Ganga_Executable.log', ENVIRONMENT = None, INPUTDATA = input_data, PARAMETRIC_INPUTDATA = parametricinput_data, OUTPUT_SANDBOX = API_nullifier(outputsandbox), OUTPUTFILESSCRIPT = dirac_outputfile_jdl(outputfiles, False), OUTPUT_PATH = "", # job.fqid, SETTINGS = diracAPI_script_settings(app), DIRAC_OPTS = job.backend.diracOpts, REPLICATE = 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX = '##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))