def preparejob(self, jobconfig, master_input_sandbox): job = self.getJobObject() # print str(job.backend_output_postprocess) mon = job.getMonitoringService() import Ganga.Core.Sandbox as Sandbox subjob_input_sandbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules())) appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings() if self.nice: appscriptpath = ['nice', '-n %d' % self.nice] + appscriptpath if self.nice < 0: logger.warning('increasing process priority is often not allowed, your job may fail due to this') sharedoutputpath = job.getOutputWorkspace().getPath() ## FIXME DON'T just use the blind list here, request the list of files to be in the output from a method. outputpatterns = jobconfig.outputbox environment = dict() if jobconfig.env is None else jobconfig.env import tempfile workdir = tempfile.mkdtemp(dir=config['location']) import inspect script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), 'LocalHostExec.py') from Ganga.GPIDev.Lib.File import FileUtils script = FileUtils.loadScript(script_location, '') script = script.replace('###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles, getWNCodeForInputdataListCreation from Ganga.Utility.Config import getConfig jobidRepr = repr(job.getFQID('.')) script = script.replace('###OUTPUTSANDBOXPOSTPROCESSING###', getWNCodeForOutputSandbox(job, ['stdout', 'stderr', '__syslog__'], jobidRepr)) script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, '')) script = script.replace('###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, '')) script = script.replace('###CREATEINPUTDATALIST###', getWNCodeForInputdataListCreation(job, '')) script = script.replace('###APPLICATION_NAME###', repr(getName(job.application))) script = script.replace('###INPUT_SANDBOX###', repr(subjob_input_sandbox + master_input_sandbox)) script = script.replace('###SHAREDOUTPUTPATH###', repr(sharedoutputpath)) script = script.replace('###APPSCRIPTPATH###', repr(appscriptpath)) script = script.replace('###OUTPUTPATTERNS###', str(outputpatterns)) script = script.replace('###JOBID###', jobidRepr) script = script.replace('###ENVIRONMENT###', repr(environment)) script = script.replace('###WORKDIR###', repr(workdir)) script = script.replace('###INPUT_DIR###', repr(job.getStringInputDir())) self.workdir = workdir script = script.replace('###GANGADIR###', repr(getConfig('System')['GANGA_PYTHONPATH'])) wrkspace = job.getInputWorkspace() scriptPath = wrkspace.writefile(FileBuffer('__jobscript__', script), executable=1) return scriptPath
def preparejob(self, jobconfig, master_input_sandbox): job = self.getJobObject() # print str(job.backend_output_postprocess) mon = job.getMonitoringService() import Ganga.Core.Sandbox as Sandbox subjob_input_sandbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules())) appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings() if self.nice: appscriptpath = ['nice', '-n %d' % self.nice] + appscriptpath if self.nice < 0: logger.warning('increasing process priority is often not allowed, your job may fail due to this') sharedoutputpath = job.getOutputWorkspace().getPath() ## FIXME DON'T just use the blind list here, request the list of files to be in the output from a method. outputpatterns = jobconfig.outputbox environment = dict() if jobconfig.env is None else jobconfig.env import tempfile workdir = tempfile.mkdtemp(dir=config['location']) import inspect script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), 'LocalHostExec.py') from Ganga.GPIDev.Lib.File import FileUtils script = FileUtils.loadScript(script_location, '') script = script.replace('###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles, getWNCodeForInputdataListCreation from Ganga.Utility.Config import getConfig jobidRepr = repr(job.getFQID('.')) script = script.replace('###OUTPUTSANDBOXPOSTPROCESSING###', getWNCodeForOutputSandbox(job, ['stdout', 'stderr', '__syslog__'], jobidRepr)) script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, '')) script = script.replace('###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, '')) script = script.replace('###CREATEINPUTDATALIST###', getWNCodeForInputdataListCreation(job, '')) script = script.replace('###APPLICATION_NAME###', repr(job.application._name)) script = script.replace('###INPUT_SANDBOX###', repr(subjob_input_sandbox + master_input_sandbox)) script = script.replace('###SHAREDOUTPUTPATH###', repr(sharedoutputpath)) script = script.replace('###APPSCRIPTPATH###', repr(appscriptpath)) script = script.replace('###OUTPUTPATTERNS###', str(outputpatterns)) script = script.replace('###JOBID###', jobidRepr) script = script.replace('###ENVIRONMENT###', repr(environment)) script = script.replace('###WORKDIR###', repr(workdir)) script = script.replace('###INPUT_DIR###', repr(job.getStringInputDir())) self.workdir = workdir script = script.replace('###GANGADIR###', repr(getConfig('System')['GANGA_PYTHONPATH'])) wrkspace = job.getInputWorkspace() scriptPath = wrkspace.writefile(FileBuffer('__jobscript__', script), executable=1) return scriptPath
def generateWNScript(commandline, app): """ Generate the script as a file buffer and return it Args: commandline (str): This is the command-line argument the script is wrapping app (Job): This is the app object which contains everything useful for generating the code """ job = app.getJobObject() exe_script_name = getScriptName(app) return FileBuffer( name=exe_script_name, contents=script_generator( gaudiRun_script_template(), COMMAND=commandline, OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')), subdir='jobScript', executable=True)
def generateWNScript(commandline, app): """ Generate the script as a file buffer and return it Args: commandline (str): This is the command-line argument the script is wrapping app (Job): This is the app object which contains everything useful for generating the code """ job = app.getJobObject() exe_script_name = getScriptName(app) return FileBuffer(name=exe_script_name, contents=script_generator(gaudiRun_script_template(), COMMAND=commandline, OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ')), subdir='jobScript', executable=True)
def preparejob(self, jobconfig, master_job_sandbox): '''Prepare the JDL''' script = self.__jobWrapperTemplate__() job = self.getJobObject() inpw = job.getInputWorkspace() wrapperlog = '__jobscript__.log' import Ganga.Core.Sandbox as Sandbox # FIXME: check what happens if 'stdout','stderr' are specified here script = script.replace('###OUTPUTSANDBOX###', repr(jobconfig.outputbox)) script = script.replace('###APPLICATION_NAME###', getName(job.application)) script = script.replace('###APPLICATIONEXEC###', repr(jobconfig.getExeString())) script = script.replace('###APPLICATIONARGS###', repr(jobconfig.getArguments())) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ' ')) script = script.replace('###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ' ')) if jobconfig.env: script = script.replace('###APPLICATIONENVS###', repr(jobconfig.env)) else: script = script.replace('###APPLICATIONENVS###', repr({})) script = script.replace('###WRAPPERLOG###', repr(wrapperlog)) import inspect script = script.replace('###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) mon = job.getMonitoringService() self.monInfo = None # set the monitoring file by default to the stdout if isinstance(self.monInfo, dict): self.monInfo['remotefile'] = 'stdout' # try to print out the monitoring service information in debug mode try: logger.debug('job info of monitoring service: %s' % str(self.monInfo)) except: pass # prepare input/output sandboxes import Ganga.Utility.files from Ganga.GPIDev.Lib.File import File from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR import inspect fileutils = File(inspect.getsourcefile(Ganga.Utility.files), subdir=PYTHON_DIR) packed_files = jobconfig.getSandboxFiles() + [fileutils] sandbox_files = job.createPackedInputSandbox(packed_files) # sandbox of child jobs should include master's sandbox sandbox_files.extend(master_job_sandbox) # check the input file size and pre-upload larger inputs to the iocache lfc_host = '' input_sandbox_uris = [] input_sandbox_names = [] ick = True max_prestaged_fsize = 0 for f in sandbox_files: idx = self.__check_and_prestage_inputfile__(f) if not idx: logger.error('input sandbox preparation failed: %s' % f) ick = False break else: if idx['lfc_host']: lfc_host = idx['lfc_host'] if idx['remote']: abspath = os.path.abspath(f) fsize = os.path.getsize(abspath) if fsize > max_prestaged_fsize: max_prestaged_fsize = fsize input_sandbox_uris.append( idx['remote'][os.path.basename(f)]) input_sandbox_names.append(os.path.basename( urlparse(f)[2])) if idx['local']: input_sandbox_uris += idx['local'] input_sandbox_names.append(os.path.basename(f)) if not ick: logger.error('stop job submission') return None # determin the lcg-cp timeout according to the max_prestaged_fsize # - using the assumption of 1 MB/sec. max_prestaged_fsize = 0 lfc_host = '' transfer_timeout = config['SandboxTransferTimeout'] predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) if predict_timeout > transfer_timeout: transfer_timeout = predict_timeout if transfer_timeout < 60: transfer_timeout = 60 script = script.replace('###TRANSFERTIMEOUT###', '%d' % transfer_timeout) # update the job wrapper with the inputsandbox list script = script.replace( '###INPUTSANDBOX###', repr({ 'remote': {}, 'local': input_sandbox_names })) # write out the job wrapper and put job wrapper into job's inputsandbox scriptPath = inpw.writefile(FileBuffer( '__jobscript_%s__' % job.getFQID('.'), script), executable=1) input_sandbox = input_sandbox_uris + [scriptPath] for isb in input_sandbox: logger.debug('ISB URI: %s' % isb) # compose output sandbox to include by default the following files: # - gzipped stdout (transferred only when the JobLogHandler is WMS) # - gzipped stderr (transferred only when the JobLogHandler is WMS) # - __jobscript__.log (job wrapper's log) output_sandbox = [wrapperlog] from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns for outputSandboxPattern in getOutputSandboxPatterns(job): output_sandbox.append(outputSandboxPattern) if config['JobLogHandler'] in ['WMS']: output_sandbox += ['stdout.gz', 'stderr.gz'] if len(jobconfig.outputbox): output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] # compose ARC XRSL xrsl = { #'VirtualOrganisation' : config['VirtualOrganisation'], 'executable': os.path.basename(scriptPath), 'environment': { 'GANGA_LCG_VO': config['VirtualOrganisation'], 'GANGA_LOG_HANDLER': config['JobLogHandler'], 'LFC_HOST': lfc_host }, #'stdout' : 'stdout', #'stderr' : 'stderr', 'inputFiles': input_sandbox, 'outputFiles': output_sandbox, #'OutputSandboxBaseDestURI': 'gsiftp://localhost' } xrsl['environment'].update({'GANGA_LCG_CE': self.CE}) #xrsl['Requirements'] = self.requirements.merge(jobconfig.requirements).convert() # if self.jobtype.upper() in ['NORMAL','MPICH']: #xrsl['JobType'] = self.jobtype.upper() # if self.jobtype.upper() == 'MPICH': #xrsl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') # xrsl['Requirements'].append('Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') #xrsl['NodeNumber'] = self.requirements.nodenumber # else: # logger.warning('JobType "%s" not supported' % self.jobtype) # return # additional settings from the job if jobconfig.env: xrsl['environment'].update(jobconfig.env) xrslText = Grid.expandxrsl(xrsl) # append any additional requirements from the requirements object xrslText += '\n'.join(self.requirements.other) logger.debug('subjob XRSL: %s' % xrslText) return inpw.writefile(FileBuffer('__xrslfile__', xrslText))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = stripProxy(app).getJobObject() outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] commandline = [] commandline.append(app.exe) if isType(app.exe, File): #logger.info("app: %s" % str(app.exe.name)) #fileName = os.path.join(get_share_path(app), os.path.basename(app.exe.name)) #logger.info("EXE: %s" % str(fileName)) #inputsandbox.append(File(name=fileName)) inputsandbox.append(app.exe) commandline[0] = os.path.join('.', os.path.basename(app.exe.name)) commandline.extend([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' logger.info("Setting Command to be: '%s'" % repr(commandline)) inputsandbox.append( FileBuffer( name=exe_script_name, contents=script_generator( exe_script_template(), #remove_unreplaced = False, # , COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')), executable=True)) contents = script_generator( exe_script_template(), COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')) #logger.info("Script is: %s" % str(contents)) from os.path import abspath, expanduser for this_file in job.inputfiles: if isinstance(this_file, LocalFile): for name in this_file.getFilenameList(): inputsandbox.append(File(abspath(expanduser(name)))) elif isinstance(this_file, DiracFile): name = this_file.lfn if isinstance(input_data, list): input_data.append(name) else: input_data = [name] dirac_outputfiles = dirac_outputfile_jdl(outputfiles, config['RequireDefaultSE']) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if config['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##') #logger.info("dirac_script: %s" % dirac_script) #logger.info("inbox: %s" % str(unique(inputsandbox))) #logger.info("outbox: %s" % str(unique(outputsandbox))) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def preparejob(self, jobconfig, master_input_sandbox): """Method for preparing job script""" job = self.getJobObject() from Ganga.GPIDev.Lib.File import File from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR import Ganga.Utility.files import inspect fileutils = File(inspect.getsourcefile(Ganga.Utility.files), subdir=PYTHON_DIR) inputfiles = jobconfig.getSandboxFiles() + [fileutils] inbox = job.createPackedInputSandbox(inputfiles) inbox.extend(master_input_sandbox) inpDir = job.getInputWorkspace(create=True).getPath() outDir = job.getOutputWorkspace(create=True).getPath() workdir = tempfile.mkdtemp() self.workdir = workdir exeString = jobconfig.getExeString() argList = jobconfig.getArgStrings() argString = " ".join(map(lambda x: " %s " % x, argList)) outputSandboxPatterns = jobconfig.outputbox patternsToZip = [] wnCodeForPostprocessing = '' wnCodeToDownloadInputFiles = '' if (len(job.outputfiles) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatternsForInteractive, getWNCodeForOutputPostprocessing (outputSandboxPatterns, patternsToZip) = getOutputSandboxPatternsForInteractive(job) wnCodeForPostprocessing = 'def printError(message):pass\ndef printInfo(message):pass' + \ getWNCodeForOutputPostprocessing(job, '') all_inputfiles = [this_file for this_file in job.inputfiles] if job.master is not None: all_inputfiles.extend( [this_file for this_file in job.master.inputfiles]) wnCodeToDownloadInputFiles = '' if (len(all_inputfiles) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import outputFilePostProcessingOnWN for inputFile in all_inputfiles: inputfileClassName = getName(inputFile) logger.debug("name: %s" % inputfileClassName) logger.debug( "result: %s" % str(outputFilePostProcessingOnWN(job, inputfileClassName))) if outputFilePostProcessingOnWN(job, inputfileClassName): inputFile.processWildcardMatches() if inputFile.subfiles: getfromFile = False for subfile in inputFile.subfiles: wnCodeToDownloadInputFiles += subfile.getWNScriptDownloadCommand( '') else: getfromFile = True else: getFromFile = True if getFromFile: wnCodeToDownloadInputFiles += inputFile.getWNScriptDownloadCommand( '') wnCodeToDownloadInputData = '' if job.inputdata and (len(job.inputdata) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForDownloadingInputFiles wnCodeToDownloadInputData = getWNCodeForDownloadingInputFiles( job, '') import inspect replace_dict = { '###CONSTRUCT_TIME###': (time.strftime("%c")), '###WNSANDBOX_SOURCE###': inspect.getsource(Sandbox.WNSandbox), '###GANGA_PYTHONPATH###': getConfig("System")["GANGA_PYTHONPATH"], '###OUTPUTDIR###': outDir, '###WORKDIR###': workdir, '###IN_BOX###': inbox, '###WN_INPUTFILES###': wnCodeToDownloadInputFiles, '###WN_INPUTDATA###': wnCodeToDownloadInputData, '###JOBCONFIG_ENV###': jobconfig.env if jobconfig.env is not None else dict(), '###EXE_STRING###': exeString, '###ARG_STRING###': argString, '###WN_POSTPROCESSING###': wnCodeForPostprocessing, '###PATTERNS_TO_ZIP###': patternsToZip, '###OUTPUT_SANDBOX_PATTERNS###': outputSandboxPatterns } script_location = os.path.join( os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))), 'InteractiveScriptTemplate.py.template') from Ganga.GPIDev.Lib.File import FileUtils commandString = FileUtils.loadScript(script_location, '') for k, v in replace_dict.iteritems(): commandString = commandString.replace(str(k), str(v)) return job.getInputWorkspace().writefile(FileBuffer( "__jobscript__", commandString), executable=1)
def preparejob(self, jobconfig, master_job_sandbox): """Prepare the JDL""" script = self.__jobWrapperTemplate__() job = self.getJobObject() inpw = job.getInputWorkspace() wrapperlog = "__jobscript__.log" import Ganga.Core.Sandbox as Sandbox # FIXME: check what happens if 'stdout','stderr' are specified here script = script.replace("###OUTPUTSANDBOX###", repr(jobconfig.outputbox)) script = script.replace("###APPLICATION_NAME###", job.application._name) script = script.replace("###APPLICATIONEXEC###", repr(jobconfig.getExeString())) script = script.replace("###APPLICATIONARGS###", repr(jobconfig.getArguments())) from Ganga.GPIDev.Lib.File.OutputFileManager import ( getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles, ) script = script.replace("###OUTPUTUPLOADSPOSTPROCESSING###", getWNCodeForOutputPostprocessing(job, " ")) script = script.replace("###DOWNLOADINPUTFILES###", getWNCodeForDownloadingInputFiles(job, " ")) if jobconfig.env: script = script.replace("###APPLICATIONENVS###", repr(jobconfig.env)) else: script = script.replace("###APPLICATIONENVS###", repr({})) script = script.replace("###WRAPPERLOG###", repr(wrapperlog)) import inspect script = script.replace("###INLINEMODULES###", inspect.getsource(Sandbox.WNSandbox)) mon = job.getMonitoringService() self.monInfo = None # set the monitoring file by default to the stdout if isinstance(self.monInfo, dict): self.monInfo["remotefile"] = "stdout" # try to print out the monitoring service information in debug mode try: logger.debug("job info of monitoring service: %s" % str(self.monInfo)) except: pass script = script.replace("###MONITORING_SERVICE###", mon.getWrapperScriptConstructorText()) # prepare input/output sandboxes packed_files = ( jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules()) + Sandbox.getGangaModulesAsSandboxFiles(mon.getSandboxModules()) ) sandbox_files = job.createPackedInputSandbox(packed_files) # sandbox of child jobs should include master's sandbox sandbox_files.extend(master_job_sandbox) # check the input file size and pre-upload larger inputs to the iocache lfc_host = "" input_sandbox_uris = [] input_sandbox_names = [] ick = True max_prestaged_fsize = 0 for f in sandbox_files: idx = self.__check_and_prestage_inputfile__(f) if not idx: logger.error("input sandbox preparation failed: %s" % f) ick = False break else: if idx["lfc_host"]: lfc_host = idx["lfc_host"] if idx["remote"]: abspath = os.path.abspath(f) fsize = os.path.getsize(abspath) if fsize > max_prestaged_fsize: max_prestaged_fsize = fsize input_sandbox_uris.append(idx["remote"][os.path.basename(f)]) input_sandbox_names.append(os.path.basename(urlparse(f)[2])) if idx["local"]: input_sandbox_uris += idx["local"] input_sandbox_names.append(os.path.basename(f)) if not ick: logger.error("stop job submission") return None # determin the lcg-cp timeout according to the max_prestaged_fsize # - using the assumption of 1 MB/sec. max_prestaged_fsize = 0 lfc_host = "" transfer_timeout = config["SandboxTransferTimeout"] predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) if predict_timeout > transfer_timeout: transfer_timeout = predict_timeout if transfer_timeout < 60: transfer_timeout = 60 script = script.replace("###TRANSFERTIMEOUT###", "%d" % transfer_timeout) # update the job wrapper with the inputsandbox list script = script.replace("###INPUTSANDBOX###", repr({"remote": {}, "local": input_sandbox_names})) # write out the job wrapper and put job wrapper into job's inputsandbox scriptPath = inpw.writefile(FileBuffer("__jobscript_%s__" % job.getFQID("."), script), executable=1) input_sandbox = input_sandbox_uris + [scriptPath] for isb in input_sandbox: logger.debug("ISB URI: %s" % isb) # compose output sandbox to include by default the following files: # - gzipped stdout (transferred only when the JobLogHandler is WMS) # - gzipped stderr (transferred only when the JobLogHandler is WMS) # - __jobscript__.log (job wrapper's log) output_sandbox = [wrapperlog] from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns for outputSandboxPattern in getOutputSandboxPatterns(job): output_sandbox.append(outputSandboxPattern) if config["JobLogHandler"] in ["WMS"]: output_sandbox += ["stdout.gz", "stderr.gz"] if len(jobconfig.outputbox): output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] # compose LCG JDL jdl = { "VirtualOrganisation": config["VirtualOrganisation"], "Executable": os.path.basename(scriptPath), "Environment": { "GANGA_LCG_VO": config["VirtualOrganisation"], "GANGA_LOG_HANDLER": config["JobLogHandler"], "LFC_HOST": lfc_host, }, "StdOutput": "stdout", "StdError": "stderr", "InputSandbox": input_sandbox, "OutputSandbox": output_sandbox, "OutputSandboxBaseDestURI": "gsiftp://localhost", } jdl["Environment"].update({"GANGA_LCG_CE": self.CE}) jdl["Requirements"] = self.requirements.merge(jobconfig.requirements).convert() if self.jobtype.upper() in ["NORMAL", "MPICH"]: jdl["JobType"] = self.jobtype.upper() if self.jobtype.upper() == "MPICH": # jdl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') jdl["Requirements"].append('Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') jdl["NodeNumber"] = self.requirements.nodenumber else: logger.warning('JobType "%s" not supported' % self.jobtype) return # additional settings from the job # if jobconfig.env: # jdl['Environment'].update(jobconfig.env) jdlText = Grid.expandjdl(jdl) logger.debug("subjob JDL: %s" % jdlText) return inpw.writefile(FileBuffer("__jdlfile__", jdlText))
def preparejob(self, jobconfig, master_job_sandbox): '''Prepare the JDL''' script = self.__jobWrapperTemplate__() job = self.getJobObject() inpw = job.getInputWorkspace() wrapperlog = '__jobscript__.log' import Ganga.Core.Sandbox as Sandbox # FIXME: check what happens if 'stdout','stderr' are specified here script = script.replace( '###OUTPUTSANDBOX###', repr(jobconfig.outputbox)) script = script.replace( '###APPLICATION_NAME###', getName(job.application)) script = script.replace( '###APPLICATIONEXEC###', repr(jobconfig.getExeString())) script = script.replace( '###APPLICATIONARGS###', repr(jobconfig.getArguments())) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles script = script.replace( '###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ' ')) script = script.replace( '###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ' ')) if jobconfig.env: script = script.replace( '###APPLICATIONENVS###', repr(jobconfig.env)) else: script = script.replace('###APPLICATIONENVS###', repr({})) script = script.replace('###WRAPPERLOG###', repr(wrapperlog)) import inspect script = script.replace( '###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) mon = job.getMonitoringService() self.monInfo = None # set the monitoring file by default to the stdout if isinstance(self.monInfo, dict): self.monInfo['remotefile'] = 'stdout' # try to print out the monitoring service information in debug mode try: logger.debug('job info of monitoring service: %s' % str(self.monInfo)) except: pass # prepare input/output sandboxes packed_files = jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules()) sandbox_files = job.createPackedInputSandbox(packed_files) # sandbox of child jobs should include master's sandbox sandbox_files.extend(master_job_sandbox) # check the input file size and pre-upload larger inputs to the iocache lfc_host = '' input_sandbox_uris = [] input_sandbox_names = [] ick = True max_prestaged_fsize = 0 for f in sandbox_files: idx = self.__check_and_prestage_inputfile__(f) if not idx: logger.error('input sandbox preparation failed: %s' % f) ick = False break else: if idx['lfc_host']: lfc_host = idx['lfc_host'] if idx['remote']: abspath = os.path.abspath(f) fsize = os.path.getsize(abspath) if fsize > max_prestaged_fsize: max_prestaged_fsize = fsize input_sandbox_uris.append( idx['remote'][os.path.basename(f)]) input_sandbox_names.append( os.path.basename(urlparse(f)[2])) if idx['local']: input_sandbox_uris += idx['local'] input_sandbox_names.append(os.path.basename(f)) if not ick: logger.error('stop job submission') return None # determin the lcg-cp timeout according to the max_prestaged_fsize # - using the assumption of 1 MB/sec. max_prestaged_fsize = 0 lfc_host = '' transfer_timeout = config['SandboxTransferTimeout'] predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) if predict_timeout > transfer_timeout: transfer_timeout = predict_timeout if transfer_timeout < 60: transfer_timeout = 60 script = script.replace( '###TRANSFERTIMEOUT###', '%d' % transfer_timeout) # update the job wrapper with the inputsandbox list script = script.replace( '###INPUTSANDBOX###', repr({'remote': {}, 'local': input_sandbox_names})) # write out the job wrapper and put job wrapper into job's inputsandbox scriptPath = inpw.writefile( FileBuffer('__jobscript_%s__' % job.getFQID('.'), script), executable=1) input_sandbox = input_sandbox_uris + [scriptPath] for isb in input_sandbox: logger.debug('ISB URI: %s' % isb) # compose output sandbox to include by default the following files: # - gzipped stdout (transferred only when the JobLogHandler is WMS) # - gzipped stderr (transferred only when the JobLogHandler is WMS) # - __jobscript__.log (job wrapper's log) output_sandbox = [wrapperlog] from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns for outputSandboxPattern in getOutputSandboxPatterns(job): output_sandbox.append(outputSandboxPattern) if config['JobLogHandler'] in ['WMS']: output_sandbox += ['stdout.gz', 'stderr.gz'] if len(jobconfig.outputbox): output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] # compose LCG JDL jdl = { 'VirtualOrganisation': config['VirtualOrganisation'], 'Executable': os.path.basename(scriptPath), 'Environment': {'GANGA_LCG_VO': config['VirtualOrganisation'], 'GANGA_LOG_HANDLER': config['JobLogHandler'], 'LFC_HOST': lfc_host}, 'StdOutput': 'stdout', 'StdError': 'stderr', 'InputSandbox': input_sandbox, 'OutputSandbox': output_sandbox, 'OutputSandboxBaseDestURI': 'gsiftp://localhost' } jdl['Environment'].update({'GANGA_LCG_CE': self.CE}) jdl['Requirements'] = self.requirements.merge( jobconfig.requirements).convert() if self.jobtype.upper() in ['NORMAL', 'MPICH']: jdl['JobType'] = self.jobtype.upper() if self.jobtype.upper() == 'MPICH': #jdl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') jdl['Requirements'].append( 'Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') jdl['NodeNumber'] = self.requirements.nodenumber else: logger.warning('JobType "%s" not supported' % self.jobtype) return # additional settings from the job # if jobconfig.env: # jdl['Environment'].update(jobconfig.env) jdlText = Grid.expandjdl(jdl) logger.debug('subjob JDL: %s' % jdlText) return inpw.writefile(FileBuffer('__jdlfile__', jdlText))
def generateWNScript(commandline, job): """ Generate the script as a file buffer and return it Args: commandline (str): This is the command-line argument the script is wrapping job (Job): This is the job object which contains everything useful for generating the code """ exe_script_name = 'gaudiRun-script.py' if job.application.runWithPython: return FileBuffer(name=exe_script_name, contents=script_generator(gaudiRun_script_template(), COMMAND=commandline, WN_SCRIPT_NAME=WN_script_name, SCRIPT_NAME = os.path.basename(job.application.getOptsFile().namePattern), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ')), executable=True) else: return FileBuffer(name=exe_script_name, contents=script_generator(gaudiRun_script_template(), COMMAND=commandline, WN_SCRIPT_NAME=WN_script_name, OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ')), executable=True)
def preparejob(self, jobconfig, master_input_sandbox): job = self.getJobObject() mon = job.getMonitoringService() import Ganga.Core.Sandbox as Sandbox subjob_input_sandbox = job.createPackedInputSandbox( jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules()) + Sandbox.getGangaModulesAsSandboxFiles(mon.getSandboxModules()) ) appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings() sharedoutputpath = job.getOutputWorkspace().getPath() outputpatterns = jobconfig.outputbox environment = jobconfig.env text = """#!/usr/bin/env python import shutil import os import time import popen2 import glob ############################################################################################ ###INLINEMODULES### ###INLINEHOSTNAMEFUNCTION### ############################################################################################ input_sandbox = ###INPUT_SANDBOX### sharedoutputpath = ###SHAREDOUTPUTPATH### outputpatterns = ###OUTPUTPATTERNS### appscriptpath = ###APPSCRIPTPATH### environment = ###ENVIRONMENT### # jobid is a string jobid = ###JOBID### ###PREEXECUTE### def flush_file(f): f.flush() os.fsync(f.fileno()) #this forces a global flush (cache synchronization on AFS) def open_file(fname): try: filehandle=file(fname,'w') except IOError,x: print 'ERROR: not able to write a status file: ', fname print 'ERROR: ',x raise return filehandle statusfilename = os.path.join(sharedoutputpath,'__jobstatus__') heartbeatfilename = os.path.join(sharedoutputpath,'__heartbeat__') statusfile=open_file(statusfilename) heartbeatfile=open_file(heartbeatfilename) line='START: '+ time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) + os.linesep try: line+='PID: ' + os.getenv('###JOBIDNAME###') + os.linesep line+='QUEUE: ' + os.getenv('###QUEUENAME###') + os.linesep line+='ACTUALCE: ' + hostname() + os.linesep except: pass statusfile.writelines(line) flush_file(statusfile) try: import tarfile except ImportError,x: sys.path.insert(0,###TARFILE_PYTHONPATH###) import tarfile # -- WARNING: get the input files including the python modules BEFORE sys.path.insert() # -- SINCE PYTHON 2.6 THERE WAS A SUBTLE CHANGE OF SEMANTICS IN THIS AREA for f in input_sandbox: getPackedInputSandbox(f) # -- END OF MOVED CODE BLOCK import sys sys.path.insert(0, ###GANGADIR###) sys.path.insert(0,os.path.join(os.getcwd(),PYTHON_DIR)) try: import subprocess except ImportError,x: sys.path.insert(0,###SUBPROCESS_PYTHONPATH###) import subprocess for key,value in environment.iteritems(): os.environ[key] = value sysout2 = os.dup(sys.stdout.fileno()) syserr2 = os.dup(sys.stderr.fileno()) print >>sys.stdout,"--- GANGA APPLICATION OUTPUT BEGIN ---" print >>sys.stderr,"--- GANGA APPLICATION ERROR BEGIN ---" flush_file(sys.stdout) flush_file(sys.stderr) sys.stdout=file('./__syslog__','w') sys.stderr=sys.stdout ###MONITORING_SERVICE### monitor = createMonitoringObject() monitor.start() result = 255 try: child = subprocess.Popen(appscriptpath, shell=False, stdout=sysout2, stderr=syserr2) while 1: result = child.poll() if result is not None: break monitor.progress() heartbeatfile.write('.') flush_file(heartbeatfile) time.sleep(###HEARTBEATFREQUENCE###) except Exception,x: print 'ERROR: %s'%str(x) monitor.progress() flush_file(sys.stdout) flush_file(sys.stderr) sys.stdout=sys.__stdout__ sys.stderr=sys.__stderr__ print >>sys.stdout,"--- GANGA APPLICATION OUTPUT END ---" monitor.stop(result) try: filefilter except: filefilter = None from Ganga.Utility.files import multi_glob, recursive_copy createOutputSandbox(outputpatterns,filefilter,sharedoutputpath) def printError(message): print >>sys.stderr, message def printInfo(message): print >>sys.stdout, message ###OUTPUTUPLOADSPOSTPROCESSING### print >>sys.stderr,"--- GANGA APPLICATION ERROR END ---" ###OUTPUTSANDBOXPOSTPROCESSING### ###POSTEXECUTE### line='EXITCODE: ' + repr(result) + os.linesep line+='STOP: '+time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) + os.linesep statusfile.writelines(line) statusfile.close() heartbeatfile.close() os.unlink(heartbeatfilename) sys.exit(result) """ import inspect import Ganga.Core.Sandbox as Sandbox import Ganga.Utility as Utility from Ganga.Utility.Config import getConfig from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing jobidRepr = repr(self.getJobObject().getFQID(".")) text = text.replace( "###OUTPUTSANDBOXPOSTPROCESSING###", getWNCodeForOutputSandbox( job, ["__syslog__", getConfig("Output")["PostProcessLocationsFileName"]], jobidRepr ), ) text = text.replace("###OUTPUTUPLOADSPOSTPROCESSING###", getWNCodeForOutputPostprocessing(job, "")) text = text.replace("###INLINEMODULES###", inspect.getsource(Sandbox.WNSandbox)) text = text.replace("###INLINEHOSTNAMEFUNCTION###", inspect.getsource(Utility.util.hostname)) text = text.replace("###APPSCRIPTPATH###", repr(appscriptpath)) # text = text.replace('###SHAREDINPUTPATH###',repr(sharedinputpath)) logger.debug("subjob input sandbox %s ", subjob_input_sandbox) logger.debug("master input sandbox %s ", master_input_sandbox) text = text.replace("###INPUT_SANDBOX###", repr(subjob_input_sandbox + master_input_sandbox)) text = text.replace("###SHAREDOUTPUTPATH###", repr(sharedoutputpath)) text = text.replace("###OUTPUTPATTERNS###", repr(outputpatterns)) text = text.replace("###JOBID###", jobidRepr) text = text.replace("###ENVIRONMENT###", repr(environment)) text = text.replace("###PREEXECUTE###", self.config["preexecute"]) text = text.replace("###POSTEXECUTE###", self.config["postexecute"]) text = text.replace("###JOBIDNAME###", self.config["jobid_name"]) text = text.replace("###QUEUENAME###", self.config["queue_name"]) text = text.replace("###HEARTBEATFREQUENCE###", self.config["heartbeat_frequency"]) text = text.replace("###INPUT_DIR###", repr(job.getStringInputDir())) text = text.replace("###MONITORING_SERVICE###", job.getMonitoringService().getWrapperScriptConstructorText()) text = text.replace("###GANGADIR###", repr(getConfig("System")["GANGA_PYTHONPATH"])) import Ganga.PACKAGE text = text.replace( "###SUBPROCESS_PYTHONPATH###", repr(Ganga.PACKAGE.setup.getPackagePath2("subprocess", "syspath", force=True)), ) text = text.replace( "###TARFILE_PYTHONPATH###", repr(Ganga.PACKAGE.setup.getPackagePath2("tarfile", "syspath", force=True)) ) from Ganga.GPIDev.Lib.File import FileBuffer return job.getInputWorkspace().writefile(FileBuffer("__jobscript__", text), executable=1)
def preparejob(self, jobconfig, master_input_sandbox): job = self.getJobObject() # print str(job.backend_output_postprocess) mon = job.getMonitoringService() import Ganga.Core.Sandbox as Sandbox subjob_input_sandbox = job.createPackedInputSandbox( jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules()) + Sandbox.getGangaModulesAsSandboxFiles(mon.getSandboxModules()) ) appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings() if self.nice: appscriptpath = ["nice", "-n %d" % self.nice] + appscriptpath if self.nice < 0: logger.warning("increasing process priority is often not allowed, your job may fail due to this") sharedoutputpath = job.getOutputWorkspace().getPath() outputpatterns = jobconfig.outputbox environment = dict() if jobconfig.env is None else jobconfig.env import tempfile workdir = tempfile.mkdtemp(dir=config["location"]) import inspect script_location = os.path.join( os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), "LocalHostExec.py" ) from Ganga.GPIDev.Lib.File import FileUtils script = FileUtils.loadScript(script_location, "") script = script.replace("###INLINEMODULES###", inspect.getsource(Sandbox.WNSandbox)) from Ganga.GPIDev.Lib.File.OutputFileManager import ( getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles, getWNCodeForInputdataListCreation, ) from Ganga.Utility.Config import getConfig jobidRepr = repr(job.getFQID(".")) script = script.replace( "###OUTPUTSANDBOXPOSTPROCESSING###", getWNCodeForOutputSandbox(job, ["stdout", "stderr", "__syslog__"], jobidRepr), ) script = script.replace("###OUTPUTUPLOADSPOSTPROCESSING###", getWNCodeForOutputPostprocessing(job, "")) script = script.replace("###DOWNLOADINPUTFILES###", getWNCodeForDownloadingInputFiles(job, "")) script = script.replace("###CREATEINPUTDATALIST###", getWNCodeForInputdataListCreation(job, "")) script = script.replace("###APPLICATION_NAME###", repr(job.application._name)) script = script.replace("###INPUT_SANDBOX###", repr(subjob_input_sandbox + master_input_sandbox)) script = script.replace("###SHAREDOUTPUTPATH###", repr(sharedoutputpath)) script = script.replace("###APPSCRIPTPATH###", repr(appscriptpath)) script = script.replace("###OUTPUTPATTERNS###", repr(outputpatterns)) script = script.replace("###JOBID###", jobidRepr) script = script.replace("###ENVIRONMENT###", repr(environment)) script = script.replace("###WORKDIR###", repr(workdir)) script = script.replace("###INPUT_DIR###", repr(job.getStringInputDir())) script = script.replace( "###MONITORING_SERVICE###", job.getMonitoringService().getWrapperScriptConstructorText() ) self.workdir = workdir script = script.replace("###GANGADIR###", repr(getConfig("System")["GANGA_PYTHONPATH"])) wrkspace = job.getInputWorkspace() scriptPath = wrkspace.writefile(FileBuffer("__jobscript__", script), executable=1) return scriptPath
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare( app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = app.getJobObject() outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] commandline = app.exe if type(app.exe) == File: inputsandbox.append(File(name=os.path.join(get_share_path(app), os.path.basename(app.exe.name)))) commandline = os.path.basename(app.exe.name) commandline += ' ' commandline += ' '.join([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' inputsandbox.append(FileBuffer(name=exe_script_name, contents=script_generator(exe_script_template(), #remove_unreplaced = False, # , COMMAND=commandline, OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ), executable=True)) dirac_outputfiles = dirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ This function prepares the application of the actual job being submitted, master or not Args: app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app """ # Construct some common objects used in job submission here inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app, hasOtherInputData=True) job = app.getJobObject() # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job exe_script_name = 'im3shape-script.py' output_filename = os.path.basename(job.inputdata[0].lfn) + '.' + str(app.rank) + '.' + str(app.size) im3shape_args = ' '.join([ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini app.catalog, output_filename, # catalog, output str(app.rank), str(app.size) ]) full_cmd = app.exe_name + ' ' + im3shape_args outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] inputsandbox.append(FileBuffer( name=exe_script_name, contents=script_generator(Im3Shape_script_template(), ## ARGS for app from job.app RUN_DIR = app.run_dir, BLACKLIST = os.path.basename(app.blacklist.namePattern), COMMAND = full_cmd, ## Stuff for Ganga OUTPUTFILES = repr([this_file.namePattern for this_file in job.outputfiles]), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' '), ), executable=True) ) # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded app_file_list = [app.im3_location, app.ini_location, app.blacklist] app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)] job.inputfiles.extend(app_file_list) # Slightly mis-using this here but it would be nice to have these files #job.inputfiles.extend(job.inputdata) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT = 'from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT = 'from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT = 'Dirac()', JOB_OBJECT = 'Job()', NAME = mangle_job_name(app), EXE = exe_script_name, EXE_ARG_STR = '', EXE_LOG_FILE = 'Ganga_Executable.log', ENVIRONMENT = None, INPUTDATA = input_data, PARAMETRIC_INPUTDATA = parametricinput_data, OUTPUT_SANDBOX = API_nullifier(outputsandbox), OUTPUTFILESSCRIPT = dirac_outputfile_jdl(outputfiles, False), OUTPUT_PATH = "", # job.fqid, SETTINGS = diracAPI_script_settings(app), DIRAC_OPTS = job.backend.diracOpts, REPLICATE = 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX = '##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ This function prepares the application of the actual job being submitted, master or not Args: app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app """ # Construct some common objects used in job submission here inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app, hasOtherInputData=True) job = app.getJobObject() # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job exe_script_name = "im3shape-script.py" output_filename = os.path.basename(job.inputdata[0].lfn) + "." + str(app.rank) + "." + str(app.size) im3shape_args = " ".join( [ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini app.catalog, output_filename, # catalog, output str(app.rank), str(app.size), ] ) full_cmd = app.exe_name + " " + im3shape_args outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] inputsandbox.append( FileBuffer( name=exe_script_name, contents=script_generator( Im3Shape_script_template(), ## ARGS for app from job.app RUN_DIR=app.run_dir, BLACKLIST=os.path.basename(app.blacklist.namePattern), COMMAND=full_cmd, ## Stuff for Ganga OUTPUTFILES=repr([this_file.namePattern for this_file in job.outputfiles]), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing(job, " "), ), executable=True, ) ) # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded app_file_list = [app.im3_location, app.ini_location, app.blacklist] app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)] job.inputfiles.extend(app_file_list) # Slightly mis-using this here but it would be nice to have these files # job.inputfiles.extend(job.inputdata) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( diracAPI_script_template(), DIRAC_IMPORT="from DIRAC.Interfaces.API.Dirac import Dirac", DIRAC_JOB_IMPORT="from DIRAC.Interfaces.API.Job import Job", DIRAC_OBJECT="Dirac()", JOB_OBJECT="Job()", NAME=mangle_job_name(app), EXE=exe_script_name, EXE_ARG_STR="", EXE_LOG_FILE="Ganga_Executable.log", ENVIRONMENT=None, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfile_jdl(outputfiles, False), OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "", # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX="##INPUT_SANDBOX##", ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def preparejob(self, jobconfig, master_input_sandbox): """Method for preparing job script""" job = self.getJobObject() inputfiles = jobconfig.getSandboxFiles() inbox = job.createPackedInputSandbox(inputfiles) inbox.extend(master_input_sandbox) inpDir = job.getInputWorkspace(create=True).getPath() outDir = job.getOutputWorkspace(create=True).getPath() workdir = tempfile.mkdtemp() self.workdir = workdir exeString = jobconfig.getExeString() argList = jobconfig.getArgStrings() argString = " ".join(map(lambda x: " %s " % x, argList)) outputSandboxPatterns = jobconfig.outputbox patternsToZip = [] wnCodeForPostprocessing = '' wnCodeToDownloadInputFiles = '' if (len(job.outputfiles) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatternsForInteractive, getWNCodeForOutputPostprocessing (outputSandboxPatterns, patternsToZip) = getOutputSandboxPatternsForInteractive(job) wnCodeForPostprocessing = 'def printError(message):pass\ndef printInfo(message):pass' + \ getWNCodeForOutputPostprocessing(job, '') all_inputfiles = [this_file for this_file in job.inputfiles] if job.master: all_inputfiles.extend([this_file for this_file in job.master.inputfiles]) wnCodeToDownloadInputFiles = '' if(len(all_inputfiles) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import outputFilePostProcessingOnWN for inputFile in all_inputfiles: inputfileClassName = getName(inputFile) logger.debug("name: %s" % inputfileClassName) logger.debug("result: %s" % str(outputFilePostProcessingOnWN(job, inputfileClassName))) if outputFilePostProcessingOnWN(job, inputfileClassName): inputFile.processWildcardMatches() if inputFile.subfiles: getfromFile = False for subfile in inputFile.subfiles: wnCodeToDownloadInputFiles += subfile.getWNScriptDownloadCommand('') else: getfromFile = True else: getFromFile = True if getFromFile: wnCodeToDownloadInputFiles += inputFile.getWNScriptDownloadCommand('') wnCodeToDownloadInputData = '' if job.inputdata and (len(job.inputdata) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForDownloadingInputFiles wnCodeToDownloadInputData = getWNCodeForDownloadingInputFiles(job, '') import inspect replace_dict = { '###CONSTRUCT_TIME###' : (time.strftime("%c")), '###WNSANDBOX_SOURCE###' : inspect.getsource(Sandbox.WNSandbox), '###GANGA_PYTHONPATH###' : getConfig("System")["GANGA_PYTHONPATH"], '###OUTPUTDIR###' : outDir, '###WORKDIR###' : workdir, '###IN_BOX###' : inbox, '###WN_INPUTFILES###' : wnCodeToDownloadInputFiles, '###WN_INPUTDATA###' : wnCodeToDownloadInputData, '###JOBCONFIG_ENV###' : jobconfig.env if jobconfig.env is not None else dict(), '###EXE_STRING###' : exeString, '###ARG_STRING###' : argString, '###WN_POSTPROCESSING###' : wnCodeForPostprocessing, '###PATTERNS_TO_ZIP###' : patternsToZip, '###OUTPUT_SANDBOX_PATTERNS###' : outputSandboxPatterns } script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), 'InteractiveScriptTemplate.py') from Ganga.GPIDev.Lib.File import FileUtils commandString = FileUtils.loadScript(script_location, '') for k, v in replace_dict.iteritems(): commandString = commandString.replace(str(k), str(v)) return job.getInputWorkspace().writefile(FileBuffer("__jobscript__", commandString), executable=1)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = stripProxy(app).getJobObject() outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] commandline = [] commandline.append(app.exe) if isType(app.exe, File): #logger.info("app: %s" % str(app.exe.name)) #fileName = os.path.join(get_share_path(app), os.path.basename(app.exe.name)) #logger.info("EXE: %s" % str(fileName)) #inputsandbox.append(File(name=fileName)) inputsandbox.append(app.exe) commandline[0]=os.path.join('.', os.path.basename(app.exe.name)) commandline.extend([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' logger.info("Setting Command to be: '%s'" % repr(commandline)) inputsandbox.append(FileBuffer(name=exe_script_name, contents=script_generator(exe_script_template(), #remove_unreplaced = False, # , COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ), executable=True)) contents=script_generator(exe_script_template(), COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) #logger.info("Script is: %s" % str(contents)) from os.path import abspath, expanduser for this_file in job.inputfiles: if isinstance(this_file, LocalFile): for name in this_file.getFilenameList(): inputsandbox.append(File(abspath(expanduser(name)))) elif isinstance(this_file, DiracFile): name = this_file.lfn if isinstance(input_data, list): input_data.append(name) else: input_data = [name] dirac_outputfiles = dirac_outputfile_jdl(outputfiles, config['RequireDefaultSE']) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if config['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##' ) #logger.info("dirac_script: %s" % dirac_script) #logger.info("inbox: %s" % str(unique(inputsandbox))) #logger.info("outbox: %s" % str(unique(outputsandbox))) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def preparejob(self, jobconfig, master_input_sandbox): job = self.getJobObject() mon = job.getMonitoringService() import Ganga.Core.Sandbox as Sandbox from Ganga.GPIDev.Lib.File import File from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR import inspect fileutils = File( inspect.getsourcefile(Ganga.Utility.files), subdir=PYTHON_DIR ) subjob_input_sandbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles() + [ fileutils ] ) appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings() sharedoutputpath = job.getOutputWorkspace().getPath() ## FIXME Check this isn't a GangaList outputpatterns = jobconfig.outputbox environment = jobconfig.env if not jobconfig.env is None else {} import inspect script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), 'BatchScriptTemplate.py') from Ganga.GPIDev.Lib.File import FileUtils text = FileUtils.loadScript(script_location, '') import Ganga.Core.Sandbox as Sandbox import Ganga.Utility as Utility from Ganga.Utility.Config import getConfig from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles jobidRepr = repr(self.getJobObject().getFQID('.')) replace_dict = { '###OUTPUTSANDBOXPOSTPROCESSING###' : getWNCodeForOutputSandbox(job, ['__syslog__'], jobidRepr), '###OUTPUTUPLOADSPOSTPROCESSING###' : getWNCodeForOutputPostprocessing(job, ''), '###DOWNLOADINPUTFILES###' : getWNCodeForDownloadingInputFiles(job, ''), '###INLINEMODULES###' : inspect.getsource(Sandbox.WNSandbox), '###INLINEHOSTNAMEFUNCTION###' : inspect.getsource(Utility.util.hostname), '###APPSCRIPTPATH###' : repr(appscriptpath), #'###SHAREDINPUTPATH###' : repr(sharedinputpath)), '###INPUT_SANDBOX###' : repr(subjob_input_sandbox + master_input_sandbox), '###SHAREDOUTPUTPATH###' : repr(sharedoutputpath), '###OUTPUTPATTERNS###' : repr(outputpatterns), '###JOBID###' : jobidRepr, '###ENVIRONMENT###' : repr(environment), '###PREEXECUTE###' : self.config['preexecute'], '###POSTEXECUTE###' : self.config['postexecute'], '###JOBIDNAME###' : self.config['jobid_name'], '###QUEUENAME###' : self.config['queue_name'], '###HEARTBEATFREQUENCE###' : self.config['heartbeat_frequency'], '###INPUT_DIR###' : repr(job.getStringInputDir()), '###GANGADIR###' : repr(getConfig('System')['GANGA_PYTHONPATH']) } for k, v in replace_dict.iteritems(): text = text.replace(str(k), str(v)) logger.debug('subjob input sandbox %s ', subjob_input_sandbox) logger.debug('master input sandbox %s ', master_input_sandbox) from Ganga.GPIDev.Lib.File import FileBuffer return job.getInputWorkspace().writefile(FileBuffer('__jobscript__', text), executable=1)
def preparejob(self,jobconfig,master_input_sandbox): job = self.getJobObject() #print str(job.backend_output_postprocess) mon = job.getMonitoringService() import Ganga.Core.Sandbox as Sandbox subjob_input_sandbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules()) + Sandbox.getGangaModulesAsSandboxFiles(mon.getSandboxModules())) appscriptpath = [jobconfig.getExeString()]+jobconfig.getArgStrings() if self.nice: appscriptpath = ['nice','-n %d'%self.nice] + appscriptpath if self.nice < 0: logger.warning('increasing process priority is often not allowed, your job may fail due to this') sharedoutputpath=job.getOutputWorkspace().getPath() outputpatterns=jobconfig.outputbox environment=jobconfig.env from Ganga.Utility import tempfile workdir = tempfile.mkdtemp() script= """#!/usr/bin/env python import os,os.path,shutil,tempfile import sys,time import glob import sys # FIXME: print as DEBUG: to __syslog__ file #print sys.path #print os.environ['PATH'] #print sys.version # bugfix #13314 : make sure that the wrapper (spawned process) is detached from Ganga session # the process will not receive Control-C signals # using fork and doing setsid() before exec would probably be a bit # better (to avoid slim chance that the signal is propagated to this # process before setsid is reached) # this is only enabled if the first argument is 'subprocess' in order to enable # running this script by hand from outside ganga (which is sometimes useful) if len(sys.argv)>1 and sys.argv[1] == 'subprocess': os.setsid() ############################################################################################ ###INLINEMODULES### ############################################################################################ input_sandbox = ###INPUT_SANDBOX### sharedoutputpath= ###SHAREDOUTPUTPATH### outputpatterns = ###OUTPUTPATTERNS### appscriptpath = ###APPSCRIPTPATH### environment = ###ENVIRONMENT### workdir = ###WORKDIR### statusfilename = os.path.join(sharedoutputpath,'__jobstatus__') try: statusfile=file(statusfilename,'w') except IOError,x: print 'ERROR: not able to write a status file: ', statusfilename print 'ERROR: ',x raise line='START: '+ time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) + os.linesep statusfile.writelines(line) statusfile.flush() os.chdir(workdir) # -- WARNING: get the input files including the python modules BEFORE sys.path.insert() # -- SINCE PYTHON 2.6 THERE WAS A SUBTLE CHANGE OF SEMANTICS IN THIS AREA for f in input_sandbox: getPackedInputSandbox(f) # -- END OF MOVED CODE BLOCK import sys sys.path.insert(0, ###GANGADIR###) sys.path.insert(0,os.path.join(os.getcwd(),PYTHON_DIR)) try: import subprocess except ImportError,x: sys.path.insert(0,###SUBPROCESS_PYTHONPATH###) import subprocess try: import tarfile except ImportError,x: sys.path.insert(0,###TARFILE_PYTHONPATH###) import tarfile for key,value in environment.iteritems(): os.environ[key] = value outfile=file('stdout','w') errorfile=file('stderr','w') sys.stdout=file('./__syslog__','w') sys.stderr=sys.stdout ###MONITORING_SERVICE### monitor = createMonitoringObject() monitor.start() import subprocess import time #datetime #disabled for python2.2 compatiblity try: child = subprocess.Popen(appscriptpath, shell=False, stdout=outfile, stderr=errorfile) except OSError,x: file('tt','w').close() print >> statusfile, 'EXITCODE: %d'%-9999 print >> statusfile, 'FAILED: %s'%time.strftime('%a %b %d %H:%M:%S %Y') #datetime.datetime.utcnow().strftime('%a %b %d %H:%M:%S %Y') print >> statusfile, 'PROBLEM STARTING THE APPLICATION SCRIPT: %s %s'%(appscriptpath,str(x)) statusfile.close() sys.exit() print >> statusfile, 'PID: %d'%child.pid statusfile.flush() result = -1 try: while 1: result = child.poll() if result is not None: break outfile.flush() errorfile.flush() monitor.progress() time.sleep(0.3) finally: monitor.progress() sys.stdout=sys.__stdout__ sys.stderr=sys.__stderr__ monitor.stop(result) outfile.flush() errorfile.flush() createOutputSandbox(outputpatterns,None,sharedoutputpath) def printError(message): errorfile.write(message + os.linesep) errorfile.flush() def printInfo(message): outfile.write(message + os.linesep) outfile.flush() ###OUTPUTUPLOADSPOSTPROCESSING### outfile.close() errorfile.close() ###OUTPUTSANDBOXPOSTPROCESSING### line="EXITCODE: " + repr(result) + os.linesep line+='STOP: '+time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) + os.linesep statusfile.writelines(line) sys.exit() """ import inspect script = script.replace('###INLINEMODULES###',inspect.getsource(Sandbox.WNSandbox)) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing from Ganga.Utility.Config import getConfig jobidRepr = repr(job.getFQID('.')) script = script.replace('###OUTPUTSANDBOXPOSTPROCESSING###',getWNCodeForOutputSandbox(job, ['stdout', 'stderr', '__syslog__', getConfig('Output')['PostProcessLocationsFileName']], jobidRepr)) script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###',getWNCodeForOutputPostprocessing(job, '')) script = script.replace('###APPLICATION_NAME###',repr(job.application._name)) script = script.replace('###INPUT_SANDBOX###',repr(subjob_input_sandbox+master_input_sandbox)) script = script.replace('###SHAREDOUTPUTPATH###',repr(sharedoutputpath)) script = script.replace('###APPSCRIPTPATH###',repr(appscriptpath)) script = script.replace('###OUTPUTPATTERNS###',repr(outputpatterns)) script = script.replace('###JOBID###',jobidRepr) script = script.replace('###ENVIRONMENT###',repr(environment)) script = script.replace('###WORKDIR###',repr(workdir)) script = script.replace('###INPUT_DIR###',repr(job.getStringInputDir())) script = script.replace('###MONITORING_SERVICE###',job.getMonitoringService().getWrapperScriptConstructorText()) self.workdir = workdir script = script.replace('###GANGADIR###',repr(getConfig('System')['GANGA_PYTHONPATH'])) import Ganga.PACKAGE script = script.replace('###SUBPROCESS_PYTHONPATH###',repr(Ganga.PACKAGE.setup.getPackagePath2('subprocess','syspath',force=True))) script = script.replace('###TARFILE_PYTHONPATH###',repr(Ganga.PACKAGE.setup.getPackagePath2('tarfile','syspath',force=True))) return job.getInputWorkspace().writefile(FileBuffer('__jobscript__',script),executable=1)