def __submitJob( self, jobID, jobParams, resourceParams, optimizerParams, proxyChain, processors, wholeNode = False ): """ Submit job to the Computing Element instance after creating a custom Job Wrapper with the available job parameters. """ logLevel = self.am_getOption( 'DefaultLogLevel', 'INFO' ) defaultWrapperLocation = self.am_getOption( 'JobWrapperTemplate', 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py' ) jobDesc = { "jobID": jobID, "jobParams": jobParams, "resourceParams": resourceParams, "optimizerParams": optimizerParams, "extraOptions": self.extraOptions, "defaultWrapperLocation": defaultWrapperLocation } result = createJobWrapper( log = self.log, logLevel = logLevel, **jobDesc ) if not result['OK']: return result wrapperFile = result['Value'] self.__report( jobID, 'Matched', 'Submitted To CE' ) self.log.info( 'Submitting JobWrapper %s to %sCE' % ( os.path.basename( wrapperFile ), self.ceName ) ) # Pass proxy to the CE proxy = proxyChain.dumpAllToString() if not proxy['OK']: self.log.error( 'Invalid proxy', proxy ) return S_ERROR( 'Payload Proxy Not Found' ) payloadProxy = proxy['Value'] submission = self.computingElement.submitJob( wrapperFile, payloadProxy, numberOfProcessors = processors, wholeNode = wholeNode, jobDesc = jobDesc, log = self.log, logLevel = logLevel ) ret = S_OK( 'Job submitted' ) if submission['OK']: batchID = submission['Value'] self.log.info( 'Job %s submitted as %s' % ( jobID, batchID ) ) self.log.verbose( 'Set JobParameter: Local batch ID %s' % ( batchID ) ) self.__setJobParam( jobID, 'LocalBatchID', str( batchID ) ) if 'PayloadFailed' in submission: ret['PayloadFailed'] = submission['PayloadFailed'] return ret time.sleep( self.jobSubmissionDelay ) else: self.log.error( 'Job submission failed', jobID ) self.__setJobParam( jobID, 'ErrorMessage', '%s CE Submission Error' % ( self.ceName ) ) if 'ReschedulePayload' in submission: rescheduleFailedJob( jobID, submission['Message'] ) return S_OK() # Without this job is marked as failed at line 265 above else: if 'Value' in submission: self.log.error( 'Error in DIRAC JobWrapper:', 'exit code = %s' % ( str( submission['Value'] ) ) ) return S_ERROR( '%s CE Error: %s' % ( self.ceName, submission['Message'] ) ) return ret
def __submitJob(self, jobID, jobParams, resourceParams, optimizerParams, jobJDL, proxyChain): """Submit job to the Computing Element instance after creating a custom Job Wrapper with the available job parameters. """ result = self.__createJobWrapper(jobID, jobParams, resourceParams, optimizerParams) if not result['OK']: return result wrapperData = result['Value'] wrapperFile = wrapperData['execFile'] self.__report(jobID, 'Matched', 'Submitted To CE') wrapperName = os.path.basename(wrapperFile) self.log.info('Submitting %s to %sCE' % (wrapperName, self.ceName)) #Pass proxy to the CE proxy = proxyChain.dumpAllToString() if not proxy['OK']: self.log.error(proxy) return S_ERROR('Payload Proxy Not Found') payloadProxy = proxy['Value'] # FIXME: how can we set the batchID before we submit, this makes no sense batchID = 'dc%s' % (jobID) submission = self.computingElement.submitJob(wrapperFile, payloadProxy, wrapperData) ret = S_OK('Job submitted') if submission['OK']: batchID = submission['Value'] self.log.info('Job %s submitted as %s' % (jobID, batchID)) self.log.verbose('Set JobParameter: Local batch ID %s' % (batchID)) self.__setJobParam(jobID, 'LocalBatchID', str(batchID)) if 'PayloadFailed' in submission: ret['PayloadFailed'] = submission['PayloadFailed'] return ret time.sleep(self.jobSubmissionDelay) else: self.log.error('Job submission failed', jobID) self.__setJobParam(jobID, 'ErrorMessage', '%s CE Submission Error' % (self.ceName)) if 'ReschedulePayload' in submission: rescheduleFailedJob(jobID, submission['Message'], self.__report) else: if 'Value' in submission: self.log.error( 'Error in DIRAC JobWrapper:', 'exit code = %s' % (str(submission['Value']))) # make sure the Job is declared Failed self.__report(jobID, 'Failed', submission['Message']) return S_ERROR('%s CE Submission Error: %s' % (self.ceName, submission['Message'])) return ret
def __submitJob( self, jobID, jobParams, resourceParams, optimizerParams, proxyChain ): """Submit job to the Computing Element instance after creating a custom Job Wrapper with the available job parameters. """ logLevel = self.am_getOption( 'DefaultLogLevel', 'INFO' ) defaultWrapperLocation = self.am_getOption( 'JobWrapperTemplate', 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py' ) result = createJobWrapper( jobID, jobParams, resourceParams, optimizerParams, extraOptions = self.extraOptions, defaultWrapperLocation = defaultWrapperLocation, log = self.log, logLevel = logLevel ) if not result['OK']: return result wrapperFile = result['Value'] self.__report( jobID, 'Matched', 'Submitted To CE' ) self.log.info( 'Submitting %s to %sCE' % ( os.path.basename( wrapperFile ), self.ceName ) ) #Pass proxy to the CE proxy = proxyChain.dumpAllToString() if not proxy['OK']: self.log.error( proxy ) return S_ERROR( 'Payload Proxy Not Found' ) payloadProxy = proxy['Value'] # FIXME: how can we set the batchID before we submit, this makes no sense batchID = 'dc%s' % ( jobID ) submission = self.computingElement.submitJob( wrapperFile, payloadProxy ) ret = S_OK( 'Job submitted' ) if submission['OK']: batchID = submission['Value'] self.log.info( 'Job %s submitted as %s' % ( jobID, batchID ) ) self.log.verbose( 'Set JobParameter: Local batch ID %s' % ( batchID ) ) self.__setJobParam( jobID, 'LocalBatchID', str( batchID ) ) if 'PayloadFailed' in submission: ret['PayloadFailed'] = submission['PayloadFailed'] return ret time.sleep( self.jobSubmissionDelay ) else: self.log.error( 'Job submission failed', jobID ) self.__setJobParam( jobID, 'ErrorMessage', '%s CE Submission Error' % ( self.ceName ) ) if 'ReschedulePayload' in submission: rescheduleFailedJob( jobID, submission['Message'], self.__report ) else: if 'Value' in submission: self.log.error( 'Error in DIRAC JobWrapper:', 'exit code = %s' % ( str( submission['Value'] ) ) ) # make sure the Job is declared Failed self.__report( jobID, 'Failed', submission['Message'] ) return S_ERROR( '%s CE Submission Error: %s' % ( self.ceName, submission['Message'] ) ) return ret
def __submitJob(self, jobID, jobParams, resourceParams, optimizerParams, jobJDL, proxyChain): """Submit job to the Computing Element instance after creating a custom Job Wrapper with the available job parameters. """ result = self.__createJobWrapper(jobID, jobParams, resourceParams, optimizerParams) if not result["OK"]: return result wrapperData = result["Value"] wrapperFile = wrapperData["execFile"] self.__report(jobID, "Matched", "Submitted To CE") wrapperName = os.path.basename(wrapperFile) self.log.info("Submitting %s to %sCE" % (wrapperName, self.ceName)) # Pass proxy to the CE proxy = proxyChain.dumpAllToString() if not proxy["OK"]: self.log.error(proxy) return S_ERROR("Payload Proxy Not Found") payloadProxy = proxy["Value"] # FIXME: how can we set the batchID before we submit, this makes no sense batchID = "dc%s" % (jobID) submission = self.computingElement.submitJob(wrapperFile, payloadProxy, wrapperData) ret = S_OK("Job submitted") if submission["OK"]: batchID = submission["Value"] self.log.info("Job %s submitted as %s" % (jobID, batchID)) self.log.verbose("Set JobParameter: Local batch ID %s" % (batchID)) self.__setJobParam(jobID, "LocalBatchID", str(batchID)) if "PayloadFailed" in submission: ret["PayloadFailed"] = submission["PayloadFailed"] return ret time.sleep(self.jobSubmissionDelay) else: self.log.error("Job submission failed", jobID) self.__setJobParam(jobID, "ErrorMessage", "%s CE Submission Error" % (self.ceName)) if "ReschedulePayload" in submission: rescheduleFailedJob(jobID, submission["Message"], self.__report) else: if "Value" in submission: self.log.error("Error in DIRAC JobWrapper:", "exit code = %s" % (str(submission["Value"]))) # make sure the Job is declared Failed self.__report(jobID, "Failed", submission["Message"]) return S_ERROR("%s CE Submission Error: %s" % (self.ceName, submission["Message"])) return ret
def __submitJob( self, jobID, jobParams, resourceParams, optimizerParams, jobJDL, proxyChain ): """Submit job to the Computing Element instance after creating a custom Job Wrapper with the available job parameters. """ result = self.__createJobWrapper( jobID, jobParams, resourceParams, optimizerParams ) if not result['OK']: return result wrapperFile = result['Value'] self.__report( jobID, 'Matched', 'Submitted To CE' ) wrapperName = os.path.basename( wrapperFile ) self.log.info( 'Submitting %s to %sCE' % ( wrapperName, self.ceName ) ) #Pass proxy to the CE proxy = proxyChain.dumpAllToString() if not proxy['OK']: self.log.error( proxy ) return S_ERROR( 'Payload Proxy Not Found' ) payloadProxy = proxy['Value'] # FIXME: how can we set the batchID before we submit, this makes no sense batchID = 'dc%s' % ( jobID ) submission = self.computingElement.submitJob( wrapperFile, payloadProxy ) ret = S_OK( 'Job submitted' ) if submission['OK']: batchID = submission['Value'] self.log.info( 'Job %s submitted as %s' % ( jobID, batchID ) ) self.log.verbose( 'Set JobParameter: Local batch ID %s' % ( batchID ) ) self.__setJobParam( jobID, 'LocalBatchID', str( batchID ) ) if 'PayloadFailed' in submission: ret['PayloadFailed'] = submission['PayloadFailed'] return ret time.sleep( self.jobSubmissionDelay ) else: self.log.error( 'Job submission failed', jobID ) self.__setJobParam( jobID, 'ErrorMessage', '%s CE Submission Error' % ( self.ceName ) ) if 'ReschedulePayload' in submission: rescheduleFailedJob( jobID, submission['Message'], self.__report ) return S_ERROR( '%s CE Submission Error: %s' % ( self.ceName, submission['Message'] ) ) return ret
def __rescheduleFailedJob(self, jobID, message, jobParams, stop=True): """ Set Job Status to "Rescheduled" and issue a reschedule command to the Job Manager """ jobReport = JobReport(int(jobID), "JobAgent@%s" % self.siteName) rescheduleResult = rescheduleFailedJob(jobID, message, jobReport) self.__sendAccountingRecord(rescheduleResult, message, jobParams) if rescheduleResult == "Rescheduled": self.log.info("Job Rescheduled %s" % (jobID)) return self.__finish("Job Rescheduled", stop) return self.__finish("Problem Rescheduling Job", stop)
def __rescheduleFailedJob( self, jobID, message, jobParams, stop = True ): """ Set Job Status to "Rescheduled" and issue a reschedule command to the Job Manager """ jobReport = JobReport( int( jobID ), 'JobAgent@%s' % self.siteName ) rescheduleResult = rescheduleFailedJob( jobID, message, jobReport ) self.__sendAccountingRecord( rescheduleResult, message, jobParams ) if rescheduleResult == 'Rescheduled': self.log.info( 'Job Rescheduled %s' % ( jobID ) ) return self.__finish( 'Job Rescheduled', stop ) return self.__finish( 'Problem Rescheduling Job', stop )
def execute(arguments): """The only real function executed here""" global gJobReport jobID = arguments["Job"].get("JobID", 0) os.environ["JOBID"] = str(jobID) jobID = int(jobID) if "WorkingDirectory" in arguments: wdir = os.path.expandvars(arguments["WorkingDirectory"]) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs( wdir ) # this will raise an exception if wdir already exists (which is ~OK) if os.path.isdir(wdir): os.chdir(wdir) except OSError as osError: if osError.errno == errno.EEXIST and os.path.isdir(wdir): gLogger.exception( "JobWrapperTemplate found that the working directory already exists" ) rescheduleResult = rescheduleFailedJob( jobID, "Working Directory already exists") else: gLogger.exception( "JobWrapperTemplate could not create working directory" ) rescheduleResult = rescheduleFailedJob( jobID, "Could Not Create Working Directory") return 1 gJobReport = JobReport(jobID, "JobWrapper") try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR except Exception as exc: # pylint: disable=broad-except gLogger.exception("JobWrapper failed the initialization phase", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.JOB_WRAPPER_INITIALIZATION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.JOB_WRAPPER_INITIALIZATION) return 1 if "InputSandbox" in arguments["Job"]: gJobReport.commit() try: result = job.transferInputSandbox(arguments["Job"]["InputSandbox"]) if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError: gLogger.exception("JobWrapper failed to download input sandbox") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while downloading input sandbox", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.DOWNLOADING_INPUT_SANDBOX) return 1 else: gLogger.verbose("Job has no InputSandbox requirement") gJobReport.commit() if "InputData" in arguments["Job"]: if arguments["Job"]["InputData"]: try: result = job.resolveInputData() if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError: gLogger.exception("JobWrapper failed to resolve input data") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while resolving input data", lException=exc) rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.INPUT_DATA_RESOLUTION) return 1 else: gLogger.verbose("Job has a null InputData requirement:") gLogger.verbose(arguments) else: gLogger.verbose("Job has no InputData requirement") gJobReport.commit() try: result = job.execute() if not result["OK"]: gLogger.error("Failed to execute job", result["Message"]) raise JobWrapperError((result["Message"], result["Errno"])) except JobWrapperError as exc: if exc.value[1] == 0 or str(exc.value[0]) == "0": gLogger.verbose("JobWrapper exited with status=0 after execution") if exc.value[1] == DErrno.EWMSRESC: gLogger.warn("Asked to reschedule job") rescheduleResult = rescheduleFailedJob( jobID=jobID, minorStatus=JobMinorStatus.JOB_WRAPPER_EXECUTION, jobReport=gJobReport) job.sendJobAccounting( status=rescheduleResult, minorStatus=JobMinorStatus.JOB_WRAPPER_EXECUTION) return 1 gLogger.exception("Job failed in execution phase") gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting(status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC) return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception("Job raised exception during execution phase", lException=exc) gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting(status=JobStatus.FAILED, minorStatus=JobMinorStatus.EXCEPTION_DURING_EXEC) return 1 if "OutputSandbox" in arguments["Job"] or "OutputData" in arguments["Job"]: try: result = job.processJobOutputs() if not result["OK"]: gLogger.warn(result["Message"]) raise JobWrapperError(result["Message"]) except JobWrapperError as exc: gLogger.exception("JobWrapper failed to process output files") gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS) return 2 except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception while processing output files", lException=exc) gJobReport.setJobParameter("Error Message", repr(exc), sendFlag=False) gJobReport.setJobStatus( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS, sendFlag=False) job.sendFailoverRequest() job.sendJobAccounting( status=JobStatus.FAILED, minorStatus=JobMinorStatus.UPLOADING_JOB_OUTPUTS) return 2 else: gLogger.verbose("Job has no OutputData or OutputSandbox requirement") try: # Failed jobs will return !=0 / successful jobs will return 0 return job.finalize() except Exception as exc: # pylint: disable=broad-except gLogger.exception( "JobWrapper raised exception during the finalization phase", lException=exc) return 2
def execute(arguments): """ The only real function executed here """ global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if 'WorkingDirectory' in arguments: wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs( wdir ) # this will raise an exception if wdir already exists (which is ~OK) if os.path.isdir(wdir): os.chdir(wdir) except OSError as osError: if osError.errno == errno.EEXIST and os.path.isdir(wdir): gLogger.exception( 'JobWrapperTemplate found that the working directory already exists' ) rescheduleResult = rescheduleFailedJob( jobID, 'Working Directory already exists') else: gLogger.exception( 'JobWrapperTemplate could not create working directory' ) rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR except Exception as exc: #pylint: disable=broad-except gLogger.exception('JobWrapper failed the initialization phase', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) try: job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') except Exception as exc: #pylint: disable=broad-except gLogger.exception('JobWrapper failed sending job accounting', lException=exc) return 1 if 'InputSandbox' in arguments['Job']: gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while downloading input sandbox', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if 'InputData' in arguments['Job']: if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while resolving input data', lException=exc) rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments) else: gLogger.verbose('Job has no InputData requirement') gJobReport.commit() try: result = job.execute(arguments) if not result['OK']: gLogger.error('Failed to execute job', result['Message']) raise JobWrapperError((result['Message'], result['Errno'])) except JobWrapperError as exc: if exc.value[1] == 0 or str(exc.value[0]) == '0': gLogger.verbose('JobWrapper exited with status=0 after execution') if exc.value[1] == DErrno.EWMSRESC: gLogger.warn("Asked to reschedule job") rescheduleResult = rescheduleFailedJob(jobID, 'JobWrapper execution', gJobReport) job.sendJobAccounting(rescheduleResult, 'JobWrapper execution') return 1 gLogger.exception('Job failed in execution phase') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 except Exception as exc: #pylint: disable=broad-except gLogger.exception('Job raised exception during execution phase', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 if 'OutputSandbox' in arguments['Job'] or 'OutputData' in arguments['Job']: try: result = job.processJobOutputs(arguments) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError as exc: gLogger.exception('JobWrapper failed to process output files') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 except Exception as exc: # pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception while processing output files', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 else: gLogger.verbose('Job has no OutputData or OutputSandbox requirement') try: # Failed jobs will return 1 / successful jobs will return 0 return job.finalize() except Exception as exc: #pylint: disable=broad-except gLogger.exception( 'JobWrapper raised exception during the finalization phase', lException=exc) return 2
def execute(arguments): """ The only real function executed here """ global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if 'WorkingDirectory' in arguments: wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) # this will raise an exception if wdir already exists (which is ~OK) if os.path.isdir(wdir): os.chdir(wdir) except OSError as osError: if osError.errno == errno.EEXIST and os.path.isdir(wdir): gLogger.exception('JobWrapperTemplate found that the working directory already exists') rescheduleResult = rescheduleFailedJob(jobID, 'Working Directory already exists') else: gLogger.exception('JobWrapperTemplate could not create working directory') rescheduleResult = rescheduleFailedJob(jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) # initialize doesn't return S_OK/S_ERROR except Exception as exc: # pylint: disable=broad-except gLogger.exception('JobWrapper failed the initialization phase', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) try: job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') except Exception as exc: # pylint: disable=broad-except gLogger.exception('JobWrapper failed sending job accounting', lException=exc) return 1 if 'InputSandbox' in arguments['Job']: gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception('JobWrapper raised exception while downloading input sandbox', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if 'InputData' in arguments['Job']: if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob(jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception('JobWrapper raised exception while resolving input data', lException=exc) rescheduleResult = rescheduleFailedJob(jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments) else: gLogger.verbose('Job has no InputData requirement') gJobReport.commit() try: result = job.execute(arguments) if not result['OK']: gLogger.error('Failed to execute job', result['Message']) raise JobWrapperError((result['Message'], result['Errno'])) except JobWrapperError as exc: if exc.value[1] == 0 or str(exc.value[0]) == '0': gLogger.verbose('JobWrapper exited with status=0 after execution') if exc.value[1] == DErrno.EWMSRESC: gLogger.warn("Asked to reschedule job") rescheduleResult = rescheduleFailedJob(jobID, 'JobWrapper execution', gJobReport) job.sendJobAccounting(rescheduleResult, 'JobWrapper execution') return 1 gLogger.exception('Job failed in execution phase') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus( 'Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 except Exception as exc: # pylint: disable=broad-except gLogger.exception('Job raised exception during execution phase', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 if 'OutputSandbox' in arguments['Job'] or 'OutputData' in arguments['Job']: try: result = job.processJobOutputs() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except JobWrapperError as exc: gLogger.exception('JobWrapper failed to process output files') gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 except Exception as exc: # pylint: disable=broad-except gLogger.exception('JobWrapper raised exception while processing output files', lException=exc) gJobReport.setJobParameter('Error Message', str(exc), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 else: gLogger.verbose('Job has no OutputData or OutputSandbox requirement') try: # Failed jobs will return 1 / successful jobs will return 0 return job.finalize() except Exception as exc: # pylint: disable=broad-except gLogger.exception('JobWrapper raised exception during the finalization phase', lException=exc) return 2
def execute ( arguments ): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int( jobID ) if arguments.has_key( 'WorkingDirectory' ): wdir = os.path.expandvars( arguments['WorkingDirectory'] ) if os.path.isdir( wdir ): os.chdir( wdir ) else: try: os.makedirs( wdir ) if os.path.isdir( wdir ): os.chdir( wdir ) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory' ) rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory' ) return 1 gJobReport = JobReport( jobID, 'JobWrapper' ) try: job = JobWrapper( jobID, gJobReport ) job.initialize( arguments ) except Exception: gLogger.exception( 'JobWrapper failed the initialization phase' ) rescheduleResult = rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport ) job.sendJobAccounting( rescheduleResult, 'Job Wrapper Initialization' ) return 1 if arguments['Job'].has_key( 'InputSandbox' ): gJobReport.commit() try: result = job.transferInputSandbox( arguments['Job']['InputSandbox'] ) if not result['OK']: gLogger.warn( result['Message'] ) raise JobWrapperError( result['Message'] ) except Exception: gLogger.exception( 'JobWrapper failed to download input sandbox' ) rescheduleResult = rescheduleFailedJob( jobID, 'Input Sandbox Download', gJobReport ) job.sendJobAccounting( rescheduleResult, 'Input Sandbox Download' ) return 1 else: gLogger.verbose( 'Job has no InputSandbox requirement' ) gJobReport.commit() if arguments['Job'].has_key( 'InputData' ): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn( result['Message'] ) raise JobWrapperError( result['Message'] ) except Exception, x: gLogger.exception( 'JobWrapper failed to resolve input data' ) rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport ) job.sendJobAccounting( rescheduleResult, 'Input Data Resolution' ) return 1 else: gLogger.verbose( 'Job has a null InputData requirement:' ) gLogger.verbose( arguments )
def __submitJob(self, jobID, jobParams, resourceParams, optimizerParams, proxyChain): """Submit job to the Computing Element instance after creating a custom Job Wrapper with the available job parameters. """ logLevel = self.am_getOption('DefaultLogLevel', 'INFO') defaultWrapperLocation = self.am_getOption( 'JobWrapperTemplate', 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py') result = createJobWrapper( jobID, jobParams, resourceParams, optimizerParams, extraOptions=self.extraOptions, defaultWrapperLocation=defaultWrapperLocation, log=self.log, logLevel=logLevel) if not result['OK']: return result wrapperFile = result['Value'] self.__report(jobID, 'Matched', 'Submitted To CE') self.log.info('Submitting %s to %sCE' % (os.path.basename(wrapperFile), self.ceName)) # Pass proxy to the CE proxy = proxyChain.dumpAllToString() if not proxy['OK']: self.log.error('Invalid proxy', proxy) return S_ERROR('Payload Proxy Not Found') payloadProxy = proxy['Value'] # FIXME: how can we set the batchID before we submit, this makes no sense batchID = 'dc%s' % (jobID) submission = self.computingElement.submitJob(wrapperFile, payloadProxy) ret = S_OK('Job submitted') if submission['OK']: batchID = submission['Value'] self.log.info('Job %s submitted as %s' % (jobID, batchID)) self.log.verbose('Set JobParameter: Local batch ID %s' % (batchID)) self.__setJobParam(jobID, 'LocalBatchID', str(batchID)) if 'PayloadFailed' in submission: ret['PayloadFailed'] = submission['PayloadFailed'] return ret time.sleep(self.jobSubmissionDelay) else: self.log.error('Job submission failed', jobID) self.__setJobParam(jobID, 'ErrorMessage', '%s CE Submission Error' % (self.ceName)) if 'ReschedulePayload' in submission: rescheduleFailedJob(jobID, submission['Message']) return S_OK( ) # Without this job is marked as failed at line 265 above else: if 'Value' in submission: self.log.error( 'Error in DIRAC JobWrapper:', 'exit code = %s' % (str(submission['Value']))) # make sure the Job is declared Failed self.__report(jobID, 'Failed', submission['Message']) return S_ERROR('%s CE Submission Error: %s' % (self.ceName, submission['Message'])) return ret
def execute ( arguments ): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int( jobID ) # Fix in the environment to get a reasonable performance from dCache, # until we move to a new version of root # os.environ['DCACHE_RAHEAD'] = str(1) # os.environ['DCACHE_RA_BUFFER'] = str(50*1024) if arguments.has_key( 'WorkingDirectory' ): wdir = os.path.expandvars( arguments['WorkingDirectory'] ) if os.path.isdir( wdir ): os.chdir( wdir ) else: try: os.makedirs( wdir ) if os.path.isdir( wdir ): os.chdir( wdir ) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory' ) rescheduleFailedJob( jobID, 'Could Not Create Working Directory' ) return 1 #root = arguments['CE']['Root'] gJobReport = JobReport( jobID, 'JobWrapper' ) try: job = JobWrapper( jobID, gJobReport ) job.initialize( arguments ) except Exception: gLogger.exception( 'JobWrapper failed the initialization phase' ) rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport ) job.sendWMSAccounting( 'Failed', 'Job Wrapper Initialization' ) return 1 if arguments['Job'].has_key( 'InputSandbox' ): gJobReport.commit() try: result = job.transferInputSandbox( arguments['Job']['InputSandbox'] ) if not result['OK']: gLogger.warn( result['Message'] ) raise JobWrapperError( result['Message'] ) except Exception: gLogger.exception( 'JobWrapper failed to download input sandbox' ) rescheduleFailedJob( jobID, 'Input Sandbox Download' ) job.sendWMSAccounting( 'Failed', 'Input Sandbox Download' ) return 1 else: gLogger.verbose( 'Job has no InputSandbox requirement' ) gJobReport.commit() if arguments['Job'].has_key( 'InputData' ): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn( result['Message'] ) raise JobWrapperError( result['Message'] ) except Exception, x: gLogger.exception( 'JobWrapper failed to resolve input data' ) rescheduleFailedJob( jobID, 'Input Data Resolution' ) job.sendWMSAccounting( 'Failed', 'Input Data Resolution' ) return 1 else: gLogger.verbose( 'Job has a null InputData requirement:' ) gLogger.verbose( arguments )
def execute(arguments): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if arguments.has_key('WorkingDirectory'): wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) if os.path.isdir(wdir): os.chdir(wdir) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory') rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) except Exception: gLogger.exception('JobWrapper failed the initialization phase') rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') return 1 if arguments['Job'].has_key('InputSandbox'): gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if arguments['Job'].has_key('InputData'): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception, x: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments)
def _submitJob(self, jobID, jobParams, resourceParams, optimizerParams, proxyChain, processors=1, wholeNode=False, maxNumberOfProcessors=0, mpTag=False): """ Submit job to the Computing Element instance after creating a custom Job Wrapper with the available job parameters. """ logLevel = self.am_getOption('DefaultLogLevel', 'INFO') defaultWrapperLocation = self.am_getOption( 'JobWrapperTemplate', 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py') jobDesc = { "jobID": jobID, "jobParams": jobParams, "resourceParams": resourceParams, "optimizerParams": optimizerParams, "extraOptions": self.extraOptions, "defaultWrapperLocation": defaultWrapperLocation } result = createJobWrapper(log=self.log, logLevel=logLevel, **jobDesc) if not result['OK']: return result wrapperFile = result['Value'] self.__report(jobID, 'Matched', 'Submitted To CE') self.log.info( 'Submitting JobWrapper', '%s to %sCE' % (os.path.basename(wrapperFile), self.ceName)) # Pass proxy to the CE proxy = proxyChain.dumpAllToString() if not proxy['OK']: self.log.error('Invalid proxy', proxy) return S_ERROR('Payload Proxy Not Found') payloadProxy = proxy['Value'] submission = self.computingElement.submitJob( wrapperFile, payloadProxy, numberOfProcessors=processors, maxNumberOfProcessors=maxNumberOfProcessors, wholeNode=wholeNode, mpTag=mpTag, jobDesc=jobDesc, log=self.log, logLevel=logLevel) ret = S_OK('Job submitted') if submission['OK']: batchID = submission['Value'] self.log.info('Job submitted', '%s as %s' % (jobID, batchID)) if 'PayloadFailed' in submission: ret['PayloadFailed'] = submission['PayloadFailed'] return ret time.sleep(self.jobSubmissionDelay) else: self.log.error('Job submission failed', jobID) self.__setJobParam(jobID, 'ErrorMessage', '%s CE Submission Error' % (self.ceName)) if 'ReschedulePayload' in submission: rescheduleFailedJob(jobID, submission['Message']) return S_OK() # Without this, the job is marked as failed else: if 'Value' in submission: self.log.error( 'Error in DIRAC JobWrapper:', 'exit code = %s' % (str(submission['Value']))) return S_ERROR('%s CE Error: %s' % (self.ceName, submission['Message'])) return ret
def execute(arguments): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) if arguments.has_key('WorkingDirectory'): wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) if os.path.isdir(wdir): os.chdir(wdir) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory') rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory') return 1 gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) except Exception as e: gLogger.exception('JobWrapper failed the initialization phase', lException=e) rescheduleResult = rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) try: job.sendJobAccounting(rescheduleResult, 'Job Wrapper Initialization') except Exception as e: gLogger.exception('JobWrapper failed sending job accounting', lException=e) return 1 if arguments['Job'].has_key('InputSandbox'): gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleResult = rescheduleFailedJob(jobID, 'Input Sandbox Download', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if arguments['Job'].has_key('InputData'): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception as x: gLogger.exception('JobWrapper failed to resolve input data') rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport) job.sendJobAccounting(rescheduleResult, 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments) else: gLogger.verbose('Job has no InputData requirement') gJobReport.commit() try: result = job.execute(arguments) if not result['OK']: gLogger.error('Failed to execute job', result['Message']) raise JobWrapperError(result['Message']) except Exception as x: if str(x) == '0': gLogger.verbose('JobWrapper exited with status=0 after execution') else: gLogger.exception('Job failed in execution phase') gJobReport.setJobParameter('Error Message', str(x), sendFlag=False) gJobReport.setJobStatus('Failed', 'Exception During Execution', sendFlag=False) job.sendFailoverRequest('Failed', 'Exception During Execution') return 1 if arguments['Job'].has_key('OutputSandbox') or arguments['Job'].has_key( 'OutputData'): try: result = job.processJobOutputs(arguments) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception as x: gLogger.exception('JobWrapper failed to process output files') gJobReport.setJobParameter('Error Message', str(x), sendFlag=False) gJobReport.setJobStatus('Failed', 'Uploading Job Outputs', sendFlag=False) job.sendFailoverRequest('Failed', 'Uploading Job Outputs') return 2 else: gLogger.verbose('Job has no OutputData or OutputSandbox requirement') try: # Failed jobs will return 1 / successful jobs will return 0 return job.finalize(arguments) except Exception: gLogger.exception('JobWrapper failed the finalization phase') return 2
def execute( arguments ): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int( jobID ) if arguments.has_key( 'WorkingDirectory' ): wdir = os.path.expandvars( arguments['WorkingDirectory'] ) if os.path.isdir( wdir ): os.chdir( wdir ) else: try: os.makedirs( wdir ) if os.path.isdir( wdir ): os.chdir( wdir ) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory' ) rescheduleResult = rescheduleFailedJob( jobID, 'Could Not Create Working Directory' ) return 1 gJobReport = JobReport( jobID, 'JobWrapper' ) try: job = JobWrapper( jobID, gJobReport ) job.initialize( arguments ) except Exception as e: gLogger.exception( 'JobWrapper failed the initialization phase', lException = e ) rescheduleResult = rescheduleFailedJob( jobID, 'Job Wrapper Initialization', gJobReport ) try: job.sendJobAccounting( rescheduleResult, 'Job Wrapper Initialization' ) except Exception as e: gLogger.exception( 'JobWrapper failed sending job accounting', lException = e ) return 1 if arguments['Job'].has_key( 'InputSandbox' ): gJobReport.commit() try: result = job.transferInputSandbox( arguments['Job']['InputSandbox'] ) if not result['OK']: gLogger.warn( result['Message'] ) raise JobWrapperError( result['Message'] ) except Exception: gLogger.exception( 'JobWrapper failed to download input sandbox' ) rescheduleResult = rescheduleFailedJob( jobID, 'Input Sandbox Download', gJobReport ) job.sendJobAccounting( rescheduleResult, 'Input Sandbox Download' ) return 1 else: gLogger.verbose( 'Job has no InputSandbox requirement' ) gJobReport.commit() if arguments['Job'].has_key( 'InputData' ): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn( result['Message'] ) raise JobWrapperError( result['Message'] ) except Exception as x: gLogger.exception( 'JobWrapper failed to resolve input data' ) rescheduleResult = rescheduleFailedJob( jobID, 'Input Data Resolution', gJobReport ) job.sendJobAccounting( rescheduleResult, 'Input Data Resolution' ) return 1 else: gLogger.verbose( 'Job has a null InputData requirement:' ) gLogger.verbose( arguments ) else: gLogger.verbose( 'Job has no InputData requirement' ) gJobReport.commit() try: result = job.execute( arguments ) if not result['OK']: gLogger.error( 'Failed to execute job', result['Message'] ) raise JobWrapperError( result['Message'] ) except Exception as x: if str( x ) == '0': gLogger.verbose( 'JobWrapper exited with status=0 after execution' ) else: gLogger.exception( 'Job failed in execution phase' ) gJobReport.setJobParameter( 'Error Message', str( x ), sendFlag = False ) gJobReport.setJobStatus( 'Failed', 'Exception During Execution', sendFlag = False ) job.sendFailoverRequest( 'Failed', 'Exception During Execution' ) return 1 if arguments['Job'].has_key( 'OutputSandbox' ) or arguments['Job'].has_key( 'OutputData' ): try: result = job.processJobOutputs( arguments ) if not result['OK']: gLogger.warn( result['Message'] ) raise JobWrapperError( result['Message'] ) except Exception as x: gLogger.exception( 'JobWrapper failed to process output files' ) gJobReport.setJobParameter( 'Error Message', str( x ), sendFlag = False ) gJobReport.setJobStatus( 'Failed', 'Uploading Job Outputs', sendFlag = False ) job.sendFailoverRequest( 'Failed', 'Uploading Job Outputs' ) return 2 else: gLogger.verbose( 'Job has no OutputData or OutputSandbox requirement' ) try: # Failed jobs will return 1 / successful jobs will return 0 return job.finalize( arguments ) except Exception: gLogger.exception( 'JobWrapper failed the finalization phase' ) return 2
def execute(arguments): global gJobReport jobID = arguments['Job']['JobID'] os.environ['JOBID'] = jobID jobID = int(jobID) # Fix in the environment to get a reasonable performance from dCache, # until we move to a new version of root # os.environ['DCACHE_RAHEAD'] = str(1) # os.environ['DCACHE_RA_BUFFER'] = str(50*1024) if arguments.has_key('WorkingDirectory'): wdir = os.path.expandvars(arguments['WorkingDirectory']) if os.path.isdir(wdir): os.chdir(wdir) else: try: os.makedirs(wdir) if os.path.isdir(wdir): os.chdir(wdir) except Exception: gLogger.exception( 'JobWrapperTemplate could not create working directory') rescheduleFailedJob(jobID, 'Could Not Create Working Directory') return 1 #root = arguments['CE']['Root'] gJobReport = JobReport(jobID, 'JobWrapper') try: job = JobWrapper(jobID, gJobReport) job.initialize(arguments) except Exception: gLogger.exception('JobWrapper failed the initialization phase') rescheduleFailedJob(jobID, 'Job Wrapper Initialization', gJobReport) job.sendWMSAccounting('Failed', 'Job Wrapper Initialization') return 1 if arguments['Job'].has_key('InputSandbox'): gJobReport.commit() try: result = job.transferInputSandbox(arguments['Job']['InputSandbox']) if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception: gLogger.exception('JobWrapper failed to download input sandbox') rescheduleFailedJob(jobID, 'Input Sandbox Download') job.sendWMSAccounting('Failed', 'Input Sandbox Download') return 1 else: gLogger.verbose('Job has no InputSandbox requirement') gJobReport.commit() if arguments['Job'].has_key('InputData'): if arguments['Job']['InputData']: try: result = job.resolveInputData() if not result['OK']: gLogger.warn(result['Message']) raise JobWrapperError(result['Message']) except Exception, x: gLogger.exception('JobWrapper failed to resolve input data') rescheduleFailedJob(jobID, 'Input Data Resolution') job.sendWMSAccounting('Failed', 'Input Data Resolution') return 1 else: gLogger.verbose('Job has a null InputData requirement:') gLogger.verbose(arguments)
def __submitJob(self, jobID, jobParams, resourceParams, optimizerParams, proxyChain): """Submit job to the Computing Element instance after creating a custom Job Wrapper with the available job parameters. """ logLevel = self.am_getOption("DefaultLogLevel", "INFO") defaultWrapperLocation = self.am_getOption( "JobWrapperTemplate", "DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py" ) result = createJobWrapper( jobID, jobParams, resourceParams, optimizerParams, extraOptions=self.extraOptions, defaultWrapperLocation=defaultWrapperLocation, log=self.log, logLevel=logLevel, ) if not result["OK"]: return result wrapperFile = result["Value"] self.__report(jobID, "Matched", "Submitted To CE") self.log.info("Submitting %s to %sCE" % (os.path.basename(wrapperFile), self.ceName)) # Pass proxy to the CE proxy = proxyChain.dumpAllToString() if not proxy["OK"]: self.log.error("Invalid proxy", proxy) return S_ERROR("Payload Proxy Not Found") payloadProxy = proxy["Value"] # FIXME: how can we set the batchID before we submit, this makes no sense batchID = "dc%s" % (jobID) submission = self.computingElement.submitJob(wrapperFile, payloadProxy) ret = S_OK("Job submitted") if submission["OK"]: batchID = submission["Value"] self.log.info("Job %s submitted as %s" % (jobID, batchID)) self.log.verbose("Set JobParameter: Local batch ID %s" % (batchID)) self.__setJobParam(jobID, "LocalBatchID", str(batchID)) if "PayloadFailed" in submission: ret["PayloadFailed"] = submission["PayloadFailed"] return ret time.sleep(self.jobSubmissionDelay) else: self.log.error("Job submission failed", jobID) self.__setJobParam(jobID, "ErrorMessage", "%s CE Submission Error" % (self.ceName)) if "ReschedulePayload" in submission: rescheduleFailedJob(jobID, submission["Message"]) return S_OK() # Without this job is marked as failed at line 265 above else: if "Value" in submission: self.log.error("Error in DIRAC JobWrapper:", "exit code = %s" % (str(submission["Value"]))) # make sure the Job is declared Failed self.__report(jobID, "Failed", submission["Message"]) return S_ERROR("%s CE Submission Error: %s" % (self.ceName, submission["Message"])) return ret