def submitJob(self, executableFile, proxy, dummy=None): """ Method to submit job, should be overridden in sub-class. """ ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice('Pilot Proxy:', pilotProxy) payloadEnv = dict(os.environ) payloadProxy = '' if proxy: self.log.verbose('Setting up proxy for payload') result = self.writeProxyToFile(proxy) if not result['OK']: return result payloadProxy = result['Value'] # pilotProxy = os.environ['X509_USER_PROXY'] payloadEnv['X509_USER_PROXY'] = payloadProxy self.log.verbose('Starting process for monitoring payload proxy') # Do not renew previous payload proxy, if any taskID = gThreadScheduler.getNextTaskId() if taskID: gThreadScheduler.removeTask(taskID) gThreadScheduler.addPeriodicTask(self.proxyCheckPeriod, self.monitorProxy, taskArgs=(pilotProxy, payloadProxy), executions=0, elapsedTime=0) if not os.access(executableFile, 5): os.chmod(executableFile, 0755) cmd = os.path.abspath(executableFile) self.log.verbose('CE submission command: %s' % (cmd)) result = systemCall(0, cmd, callbackFunction=self.sendOutput, env=payloadEnv) if payloadProxy: os.unlink(payloadProxy) ret = S_OK() if not result['OK']: self.log.error('Fail to run InProcess', result['Message']) elif result['Value'][0] > 128: # negative exit values are returned as 256 - exit self.log.warn('InProcess Job Execution Failed') self.log.info('Exit status:', result['Value'][0] - 256) if result['Value'][0] - 256 == -2: error = 'Error in the initialization of the DIRAC JobWrapper' elif result['Value'][0] - 256 == -1: error = 'Error in the execution of the DIRAC JobWrapper' else: error = 'InProcess Job Execution Failed' res = S_ERROR(error) res['Value'] = result['Value'][0] - 256 return res elif result['Value'][0] > 0: self.log.warn('Fail in payload execution') self.log.info('Exit status:', result['Value'][0]) ret['PayloadFailed'] = result['Value'][0] else: self.log.debug('InProcess CE result OK') self.submittedJobs += 1 return ret
def submitJob(self, executableFile, proxy, **kwargs): """ Method to submit job (overriding base method). :param str executableFile: file to execute via systemCall. Normally the JobWrapperTemplate when invoked by the JobAgent. :param str proxy: the proxy used for running the job (the payload). It will be dumped to a file. """ ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice('Pilot Proxy:', pilotProxy) payloadEnv = dict(os.environ) payloadProxy = '' renewTask = None if proxy: self.log.verbose('Setting up proxy for payload') result = self.writeProxyToFile(proxy) if not result['OK']: return result payloadProxy = result['Value'] # proxy file location # pilotProxy = os.environ['X509_USER_PROXY'] payloadEnv['X509_USER_PROXY'] = payloadProxy self.log.verbose('Starting process for monitoring payload proxy') result = gThreadScheduler.addPeriodicTask(self.proxyCheckPeriod, self.monitorProxy, taskArgs=(pilotProxy, payloadProxy), executions=0, elapsedTime=0) if result['OK']: renewTask = result['Value'] if not os.access(executableFile, 5): os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) cmd = os.path.abspath(executableFile) self.log.verbose('CE submission command: %s' % (cmd)) result = systemCall(0, cmd, callbackFunction=self.sendOutput, env=payloadEnv) if payloadProxy: os.unlink(payloadProxy) if renewTask: gThreadScheduler.removeTask(renewTask) ret = S_OK() if not result['OK']: self.log.error('Fail to run InProcess', result['Message']) elif result['Value'][0] > 128: # negative exit values are returned as 256 - exit self.log.warn('InProcess Job Execution Failed') self.log.info('Exit status:', result['Value'][0] - 256) if result['Value'][0] - 256 == -2: error = 'JobWrapper initialization error' elif result['Value'][0] - 256 == -1: error = 'JobWrapper execution error' else: error = 'InProcess Job Execution Failed' res = S_ERROR(error) res['Value'] = result['Value'][0] - 256 return res elif result['Value'][0] > 0: self.log.warn('Fail in payload execution') self.log.info('Exit status:', result['Value'][0]) ret['PayloadFailed'] = result['Value'][0] else: self.log.debug('InProcess CE result OK') self.submittedJobs += 1 return ret
def submitJob( self, executableFile, proxy, dummy = None ): """ Method to submit job, should be overridden in sub-class. """ ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice( 'Pilot Proxy:', pilotProxy ) payloadEnv = dict( os.environ ) payloadProxy = '' if proxy: self.log.verbose( 'Setting up proxy for payload' ) result = self.writeProxyToFile( proxy ) if not result['OK']: return result payloadProxy = result['Value'] # pilotProxy = os.environ['X509_USER_PROXY'] payloadEnv[ 'X509_USER_PROXY' ] = payloadProxy self.log.verbose( 'Starting process for monitoring payload proxy' ) renewTask = None result = gThreadScheduler.addPeriodicTask( self.proxyCheckPeriod, self.monitorProxy, taskArgs = ( pilotProxy, payloadProxy ), executions = 0, elapsedTime = 0 ) if result[ 'OK' ]: renewTask = result[ 'Value' ] if not os.access( executableFile, 5 ): os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH ) cmd = os.path.abspath( executableFile ) self.log.verbose( 'CE submission command: %s' % ( cmd ) ) result = systemCall( 0, cmd, callbackFunction = self.sendOutput, env = payloadEnv ) if payloadProxy: os.unlink( payloadProxy ) if renewTask: gThreadScheduler.removeTask( renewTask ) ret = S_OK() if not result['OK']: self.log.error( 'Fail to run InProcess', result['Message'] ) elif result['Value'][0] > 128: # negative exit values are returned as 256 - exit self.log.warn( 'InProcess Job Execution Failed' ) self.log.info( 'Exit status:', result['Value'][0] - 256 ) if result['Value'][0] - 256 == -2: error = 'Error in the initialization of the DIRAC JobWrapper' elif result['Value'][0] - 256 == -1: error = 'Error in the execution of the DIRAC JobWrapper' else: error = 'InProcess Job Execution Failed' res = S_ERROR( error ) res['Value'] = result['Value'][0] - 256 return res elif result['Value'][0] > 0: self.log.warn( 'Fail in payload execution' ) self.log.info( 'Exit status:', result['Value'][0] ) ret['PayloadFailed'] = result['Value'][0] else: self.log.debug( 'InProcess CE result OK' ) self.submittedJobs += 1 return ret
def submitJob(self, executableFile, proxy=None, **kwargs): """Start a container for a job. executableFile is ignored. A new wrapper suitable for running in a container is created from jobDesc. """ rootImage = self.__root # Check that singularity is available if not self.__hasSingularity(): self.log.error("Singularity is not installed on PATH.") result = S_ERROR("Failed to find singularity ") result["ReschedulePayload"] = True return result self.log.info("Creating singularity container") # Start by making the directory for the container ret = self.__createWorkArea(kwargs.get("jobDesc"), kwargs.get("log"), kwargs.get("logLevel", "INFO"), proxy) if not ret["OK"]: return ret baseDir = ret["baseDir"] tmpDir = ret["tmpDir"] if proxy: payloadProxyLoc = ret["proxyLocation"] # Now we have to set-up payload proxy renewal for the container # This is fairly easy as it remains visible on the host filesystem result = gThreadScheduler.addPeriodicTask( self.proxyCheckPeriod, self._monitorProxy, taskArgs=(payloadProxyLoc, ), executions=0, elapsedTime=0) if result["OK"]: renewTask = result["Value"] else: self.log.warn("Failed to start proxy renewal task") renewTask = None # Very simple accounting self.__submittedJobs += 1 self.__runningJobs += 1 # Now prepare start singularity # Mount /cvmfs in if it exists on the host withCVMFS = os.path.isdir("/cvmfs") innerCmd = os.path.join(self.__innerdir, "dirac_container.sh") cmd = [self.__singularityBin, "exec"] cmd.extend( ["--contain"] ) # use minimal /dev and empty other directories (e.g. /tmp and $HOME) cmd.extend(["--ipc", "--pid"]) # run container in new IPC and PID namespaces cmd.extend([ "--workdir", baseDir ]) # working directory to be used for /tmp, /var/tmp and $HOME if self.__hasUserNS(): cmd.append("--userns") if withCVMFS: cmd.extend(["--bind", "/cvmfs"]) if not self.__installDIRACInContainer: cmd.extend( ["--bind", "{0}:{0}:ro".format(self.__findInstallBaseDir())]) if "ContainerBind" in self.ceParameters: bindPaths = self.ceParameters["ContainerBind"].split(",") for bindPath in bindPaths: if len(bindPath.split(":::")) == 1: cmd.extend(["--bind", bindPath.strip()]) elif len(bindPath.split(":::")) in [2, 3]: cmd.extend([ "--bind", ":".join([bp.strip() for bp in bindPath.split(":::")]) ]) if "ContainerOptions" in self.ceParameters: containerOpts = self.ceParameters["ContainerOptions"].split(",") for opt in containerOpts: cmd.extend([opt.strip()]) if os.path.isdir(rootImage) or os.path.isfile(rootImage): cmd.extend([rootImage, innerCmd]) else: # if we are here is because there's no image, or it is not accessible (e.g. not on CVMFS) self.log.error("Singularity image to exec not found: ", rootImage) result = S_ERROR("Failed to find singularity image to exec") result["ReschedulePayload"] = True return result self.log.debug("Execute singularity command: %s" % cmd) self.log.debug("Execute singularity env: %s" % self.__getEnv()) result = systemCall(0, cmd, callbackFunction=self.sendOutput, env=self.__getEnv()) self.__runningJobs -= 1 if not result["OK"]: self.log.error("Fail to run Singularity", result["Message"]) if proxy and renewTask: gThreadScheduler.removeTask(renewTask) self.__deleteWorkArea(baseDir) result = S_ERROR("Error running singularity command") result["ReschedulePayload"] = True return result result = self.__checkResult(tmpDir) if proxy and renewTask: gThreadScheduler.removeTask(renewTask) self.__deleteWorkArea(baseDir) return result
def submitJob( self, executableFile, proxyObj, jobData ): """ Method to submit job """ self.log.info( "Executable file is %s" % executableFile ) self.__proxyObj = proxyObj self.__execFile = executableFile self.__jobData = jobData self.log.verbose( 'Setting up proxy for payload' ) result = self.writeProxyToFile( self.__proxyObj ) if not result['OK']: return result self.__payloadProxyLocation = result['Value'] glEnabled = True glOK = True if gConfig.getValue( "/DIRAC/Security/UseServerCertificate", False ): self.log.info( "Running with a certificate. Avoid using glexec" ) glEnabled = False else: result = ProxyInfo.getProxyInfo( self.__pilotProxyLocation, disableVOMS = True ) if result[ 'OK' ]: if not Properties.GENERIC_PILOT in result[ 'Value' ].get( 'groupProperties', [] ): self.log.info( "Pilot is NOT running with a generic pilot. Skipping glexec" ) glEnabled = False else: self.log.info( "Pilot is generic. Trying glexec" ) if not glEnabled: self.log.notice( "glexec is not enabled ") else: self.log.info( "Trying glexec..." ) for step in ( self.__check_credentials, self.__locate_glexec, self.__prepare_glenv, self.__prepare_tmpdir, self.__test, self.__construct_payload ): self.log.info( "Running step %s" % step.__name__ ) result = step() if not result[ 'OK' ]: self.log.error( "Step %s failed: %s" % ( step.__name__, result[ 'Message' ] ) ) if self.ceParameters.get( "RescheduleOnError", False ): result = S_ERROR( 'glexec CE failed on step %s : %s' % ( step.__name__, result[ 'Message' ] ) ) result['ReschedulePayload'] = True return result glOK = False break if not glOK: self.log.notice( "glexec failed miserably... Running without it." ) self.log.verbose( 'Starting process for monitoring payload proxy' ) result = gThreadScheduler.addPeriodicTask( self.proxyCheckPeriod, self.monitorProxy, taskArgs = ( self.__pilotProxyLocation, self.__payloadProxyLocation ), executions = 0, elapsedTime = 0 ) if not result[ 'OK' ]: return S_ERROR( "Could not schedule monitor proxy task: %s" % result[ 'Message' ] ) pTask = result[ 'Value' ] if glEnabled and glOK: result = self.__execute( [ self.__glCommand ] ) else: result = self.__executeInProcess( executableFile ) gThreadScheduler.removeTask( pTask ) self.__cleanup() return result
def submitJob(self, executableFile, proxy, jobDesc, log, logLevel, **kwargs): """ Start a container for a job. executableFile is ignored. A new wrapper suitable for running in a container is created from jobDesc. """ rootImage = self.__root # Check that singularity is available if not self.__hasSingularity(): self.log.error('Singularity is not installed on PATH.') result = S_ERROR("Failed to find singularity ") result['ReschedulePayload'] = True return result self.log.info('Creating singularity container') # Start by making the directory for the container ret = self.__createWorkArea(proxy, jobDesc, log, logLevel) if not ret['OK']: return ret baseDir = ret['baseDir'] tmpDir = ret['tmpDir'] proxyLoc = ret['proxyLocation'] # Now we have to set-up proxy renewal for the container # This is fairly easy as it remains visible on the host filesystem ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] result = gThreadScheduler.addPeriodicTask(self.proxyCheckPeriod, self._monitorProxy, taskArgs=(pilotProxy, proxyLoc), executions=0, elapsedTime=0) renewTask = None if result['OK']: renewTask = result['Value'] else: self.log.warn('Failed to start proxy renewal task') # Very simple accounting self.__submittedJobs += 1 self.__runningJobs += 1 # Now prepare start singularity # Mount /cvmfs in if it exists on the host withCVMFS = os.path.isdir("/cvmfs") innerCmd = os.path.join(self.__innerdir, "dirac_container.sh") cmd = [self.__singularityBin, "exec"] cmd.extend(["-c", "-i", "-p"]) cmd.extend(["-W", baseDir]) if withCVMFS: cmd.extend(["-B", "/cvmfs"]) if 'ContainerBind' in self.ceParameters: bindPaths = self.ceParameters['ContainerBind'].split(',') for bindPath in bindPaths: cmd.extend(["-B", bindPath.strip()]) if 'ContainerOptions' in self.ceParameters: containerOpts = self.ceParameters['ContainerOptions'].split(',') for opt in containerOpts: cmd.extend([opt.strip()]) cmd.extend([rootImage, innerCmd]) self.log.debug('Execute singularity command: %s' % cmd) self.log.debug('Execute singularity env: %s' % self.__getEnv()) result = systemCall(0, cmd, callbackFunction=self.sendOutput, env=self.__getEnv()) self.__runningJobs -= 1 if not result["OK"]: if renewTask: gThreadScheduler.removeTask(renewTask) self.__deleteWorkArea(baseDir) result = S_ERROR("Error running singularity command") result['ReschedulePayload'] = True return result result = self.__checkResult(tmpDir) if renewTask: gThreadScheduler.removeTask(renewTask) self.__deleteWorkArea(baseDir) return result
def submitJob( self, executableFile, proxy, **kwargs ): """ Method to submit job (overriding base method). :param executableFile: file to execute via systemCall. Normally the JobWrapperTemplate when invoked by the JobAgent. :type executableFile: string :param proxy: the proxy used for running the job (the payload). It will be dumped to a file. :type proxy: string """ ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] self.log.notice( 'Pilot Proxy:', pilotProxy ) payloadEnv = dict( os.environ ) payloadProxy = '' renewTask = None if proxy: self.log.verbose( 'Setting up proxy for payload' ) result = self.writeProxyToFile( proxy ) if not result['OK']: return result payloadProxy = result['Value'] # proxy file location # pilotProxy = os.environ['X509_USER_PROXY'] payloadEnv[ 'X509_USER_PROXY' ] = payloadProxy self.log.verbose( 'Starting process for monitoring payload proxy' ) result = gThreadScheduler.addPeriodicTask( self.proxyCheckPeriod, self.monitorProxy, taskArgs = ( pilotProxy, payloadProxy ), executions = 0, elapsedTime = 0 ) if result[ 'OK' ]: renewTask = result[ 'Value' ] if not os.access( executableFile, 5 ): os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH ) cmd = os.path.abspath( executableFile ) self.log.verbose( 'CE submission command: %s' % ( cmd ) ) result = systemCall( 0, cmd, callbackFunction = self.sendOutput, env = payloadEnv ) if payloadProxy: os.unlink( payloadProxy ) if renewTask: gThreadScheduler.removeTask( renewTask ) ret = S_OK() if not result['OK']: self.log.error( 'Fail to run InProcess', result['Message'] ) elif result['Value'][0] > 128: # negative exit values are returned as 256 - exit self.log.warn( 'InProcess Job Execution Failed' ) self.log.info( 'Exit status:', result['Value'][0] - 256 ) if result['Value'][0] - 256 == -2: error = 'JobWrapper initialization error' elif result['Value'][0] - 256 == -1: error = 'JobWrapper execution error' else: error = 'InProcess Job Execution Failed' res = S_ERROR( error ) res['Value'] = result['Value'][0] - 256 return res elif result['Value'][0] > 0: self.log.warn( 'Fail in payload execution' ) self.log.info( 'Exit status:', result['Value'][0] ) ret['PayloadFailed'] = result['Value'][0] else: self.log.debug( 'InProcess CE result OK' ) self.submittedJobs += 1 return ret
def submitJob(self, executableFile, proxy, jobDesc, log, logLevel, **kwargs): """ Start a container for a job. executableFile is ignored. A new wrapper suitable for running in a container is created from jobDesc. """ rootImage = self.__root # Check that singularity is available if not self.__hasSingularity(): self.log.error('Singularity is not installed on PATH.') result = S_ERROR("Failed to find singularity ") result['ReschedulePayload'] = True return result self.log.info('Creating singularity container') # Start by making the directory for the container ret = self.__createWorkArea(proxy, jobDesc, log, logLevel) if not ret['OK']: return ret baseDir = ret['baseDir'] tmpDir = ret['tmpDir'] proxyLoc = ret['proxyLocation'] # Now we have to set-up proxy renewal for the container # This is fairly easy as it remains visible on the host filesystem ret = getProxyInfo() if not ret['OK']: pilotProxy = None else: pilotProxy = ret['Value']['path'] result = gThreadScheduler.addPeriodicTask(self.proxyCheckPeriod, self._monitorProxy, taskArgs=(pilotProxy, proxyLoc), executions=0, elapsedTime=0) renewTask = None if result['OK']: renewTask = result['Value'] else: self.log.warn('Failed to start proxy renewal task') # Very simple accounting self.__submittedJobs += 1 self.__runningJobs += 1 # Now prepare start singularity # Mount /cvmfs in if it exists on the host withCVMFS = os.path.isdir("/cvmfs") innerCmd = os.path.join(self.__innerdir, "dirac_container.sh") cmd = [self.__singularityBin, "exec"] cmd.extend(["-c", "-i", "-p"]) cmd.extend(["-W", baseDir]) if withCVMFS: cmd.extend(["-B", "/cvmfs"]) if 'ContainerBind' in self.ceParameters: bindPaths = self.ceParameters['ContainerBind'].split(',') for bindPath in bindPaths: cmd.extend(["-B", bindPath.strip()]) if 'ContainerOptions' in self.ceParameters: containerOpts = self.ceParameters['ContainerOptions'].split(',') for opt in containerOpts: cmd.extend([opt.strip()]) cmd.extend([rootImage, innerCmd]) self.log.debug('Execute singularity command: %s' % cmd) self.log.debug('Execute singularity env: %s' % self.__getEnv()) result = systemCall(0, cmd, callbackFunction=self.sendOutput, env=self.__getEnv()) self.__runningJobs -= 1 if not result["OK"]: if renewTask: gThreadScheduler.removeTask(renewTask) result = S_ERROR("Error running singularity command") result['ReschedulePayload'] = True return result result = self.__checkResult(tmpDir) if not result["OK"]: if renewTask: gThreadScheduler.removeTask(renewTask) return result
def submitJob(self, executableFile, proxy=None, inputs=None, **kwargs): """Method to submit job (overriding base method). :param str executableFile: file to execute via systemCall. Normally the JobWrapperTemplate when invoked by the JobAgent. :param str proxy: the proxy used for running the job (the payload). It will be dumped to a file. :param list inputs: dependencies of executableFile """ payloadEnv = dict(os.environ) payloadProxy = "" if proxy: self.log.verbose("Setting up proxy for payload") result = self.writeProxyToFile(proxy) if not result["OK"]: return result payloadProxy = result["Value"] # payload proxy file location payloadEnv["X509_USER_PROXY"] = payloadProxy self.log.verbose("Starting process for monitoring payload proxy") result = gThreadScheduler.addPeriodicTask( self.proxyCheckPeriod, self._monitorProxy, taskArgs=(payloadProxy, ), executions=0, elapsedTime=0) if result["OK"]: renewTask = result["Value"] else: self.log.warn("Failed to start proxy renewal task") renewTask = None self.submittedJobs += 1 self.runningJobs += 1 if not os.access(executableFile, 5): os.chmod( executableFile, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) cmd = os.path.abspath(executableFile) self.log.verbose("CE submission command:", cmd) result = systemCall(0, cmd, callbackFunction=self.sendOutput, env=payloadEnv) if payloadProxy: os.unlink(payloadProxy) if proxy and renewTask: gThreadScheduler.removeTask(renewTask) self.runningJobs -= 1 # Delete executable file and inputs in case space is limited os.unlink(executableFile) if inputs: if not isinstance(inputs, list): inputs = [inputs] for inputFile in inputs: os.unlink(inputFile) ret = S_OK() if not result["OK"]: self.log.error("Fail to run InProcess", result["Message"]) elif result["Value"][0] > 128: res = S_ERROR() # negative exit values are returned as 256 - exit res["Value"] = result["Value"][0] - 256 # yes, it's "correct" self.log.warn("InProcess Job Execution Failed") self.log.info("Exit status:", result["Value"]) if res["Value"] == -2: error = "JobWrapper initialization error" elif res["Value"] == -1: error = "JobWrapper execution error" else: error = "InProcess Job Execution Failed" res["Message"] = error return res elif result["Value"][0] > 0: self.log.warn("Fail in payload execution") self.log.info("Exit status:", result["Value"][0]) ret["PayloadFailed"] = result["Value"][0] else: self.log.debug("InProcess CE result OK") return ret