def _logError( self , userMsg = None , logMsg = None ): if userMsg : sm = StatusManager() sm.setStatus( self.work_dir , Status( code = 5 , message = userMsg ) ) if logMsg : rf_log.error( "%s/%s : %s" %( self._service.getName() , self._job.getKey() , logMsg ) )
def _run(self , serviceName, xmlEnv ): dispatcher = _cfg.getDispatcher() execution_config = dispatcher.getExecutionConfig( self.jobState ) try: exec_engine = executionLoader( execution_config = execution_config ) except MobyleError ,err : msg = "unknown execution system : %s" %err rc_log.critical("%s : %s" %( serviceName , msg ), exc_info = True ) sm = StatusManager() sm.setStatus( self._dirPath , Status( code = 5 , message = 'Mobyle internal server error' ) ) raise MobyleError, msg
def getStatus(jobID): """ @param jobID: the url of the job @type jobID: string @return: the current status of the job @rtype: string @raise MobyleError: if the job has no number or if the job doesn't exist anymore @raise OSError: if the user is not the owner of the process """ from Mobyle.JobState import JobState, normUri from urlparse import urlparse from Mobyle.StatusManager import StatusManager path = normUri(jobID) protocol, host, path, a, b, c = urlparse(path) if protocol == "http": raise NotImplementedError, "trying to querying a distant server" if path[-9:] == "index.xml": path = path[:-10] sm = StatusManager() oldStatus = sm.getStatus(path) #'killed' , 'finished' , 'error' the status cannot change anymore #'building' these jobs have not yet batch number # ( 'finished' , 'error' , 'killed' , 'building' ): if not oldStatus.isQueryable(): return oldStatus else: adm = Admin(path) batch = adm.getExecutionAlias() jobNum = adm.getNumber() if batch is None or jobNum is None: return oldStatus try: exec_engine = executionLoader(jobID=jobID) newStatus = exec_engine.getStatus(jobNum) except MobyleError, err: u_log.error(str(err), exc_info=True) raise err if not newStatus.isKnown(): return oldStatus if newStatus != oldStatus: sm.setStatus(path, newStatus) return newStatus
def __init__( self , execution_config ): """ @param execution_config: the configuration of the Execution @type execution_config: ExecutionConfig instance """ self._cfg = Config() self.execution_config = execution_config self.execution_config_alias = self._cfg.getAliasFromConfig( self.execution_config ) self.status_manager = StatusManager()
def testCreation(self): #create( filename , status ) unknown = Status( code = -1 ) StatusManager.create( self.jobDir , unknown ) doc = etree.parse( self.filename ) root = doc.getroot() self.assertEqual( root.tag , 'status' ) children = list(root) self.assertEqual( len( children ) , 2 ) self.assertEqual( children[0].tag , 'value') self.assertEqual( children[0].text , 'unknown' ) self.assertEqual( children[1].tag , 'message') self.assertEqual( children[1].text , None ) os.unlink( self.filename ) building = Status( code = 0 ) StatusManager.create( self.jobDir, building ) doc = etree.parse( self.filename ) root = doc.getroot() self.assertEqual( root.tag , 'status' ) children = list(root) self.assertEqual( len( children ) , 2 ) self.assertEqual( children[0].tag , 'value') self.assertEqual( children[0].text , 'building' ) self.assertEqual( children[1].tag , 'message') self.assertEqual( children[1].text , None ) os.unlink( self.filename ) submitted = Status( code = 1, message= 'test message' ) StatusManager.create( self.jobDir, submitted ) doc = etree.parse( self.filename ) root = doc.getroot() self.assertEqual( root.tag , 'status' ) children = list(root) self.assertEqual( len( children ) , 2 ) self.assertEqual( children[0].tag , 'value') self.assertEqual( children[0].text , 'submitted' ) self.assertEqual( children[1].tag , 'message') self.assertEqual( children[1].text , 'test message' ) os.unlink( self.filename ) running = Status( string='running' ) StatusManager.create( self.jobDir, running ) doc = etree.parse( self.filename ) root = doc.getroot() self.assertEqual( root.tag , 'status' ) children = list(root) self.assertEqual( len( children ) , 2 ) self.assertEqual( children[0].tag , 'value') self.assertEqual( children[0].text , 'running' ) self.assertEqual( children[1].tag , 'message') self.assertEqual( children[1].text , None ) os.unlink( self.filename )
def testGetStatus( self ): running = Status( string='running' ) StatusManager.create( self.jobDir, running ) sm = StatusManager() recieved_status = sm.getStatus( self.jobDir ) self.assertEqual( recieved_status , running ) os.unlink( self.filename ) killed = Status( string='killed' , message= "your job has been canceled" ) StatusManager.create( self.jobDir, killed ) sm = StatusManager() recieved_status = sm.getStatus( self.jobDir ) self.assertEqual( recieved_status , killed ) os.unlink( self.filename )
def isExecuting(jobID): """ @param jobID: the url of the job @type jobID: string @return True if the job is currently executing ( submitted , running , pending , hold ). False otherwise ( building, finished , error , killed ) @rtype: boolean @raise MobyleError: if the job has no number @raise OSError: if the user is not the owner of the process """ from Mobyle.JobState import normUri from urlparse import urlparse from Mobyle.StatusManager import StatusManager path = normUri(jobID) protocol, host, path, a, b, c = urlparse(path) if protocol == "http": raise NotImplementedError, "trying to querying a distant server" if path[-9:] == "index.xml": path = path[:-10] adm = Admin(path) batch = adm.getExecutionAlias() jobNum = adm.getNumber() if batch is None or jobNum is None: sm = StatusManager() status = sm.getStatus(path) if not status.isQueryable(): return False else: raise MobyleError("inconsistency in .admin file %s" % path) try: execKlass = executionLoader(jobID=jobID) newStatus = execKlass.getStatus(jobNum) except MobyleError, err: u_log.error(str(err), exc_info=True) raise err
def testConcurency(self): status = Status( string='submitted' ) StatusManager.create( self.jobDir, status ) ## sub-process start childPid = os.fork() if childPid: #father sleep(1) sm = StatusManager() self.assertEqual( status , sm.getStatus( self.jobDir ) ) self.assertRaises( IOError , sm.setStatus , self.jobDir, status ) os.kill( childPid , signal.SIGALRM ) os.wait() else: #child signal.signal(signal.SIGALRM, handler) File = open( self.filename , 'r' ) fcntl.lockf( File , fcntl.LOCK_SH | fcntl.LOCK_NB ) signal.pause() fcntl.lockf( File , fcntl.LOCK_UN ) File.close() os._exit(0) ## sub-process end ## sub-process start childPid = os.fork() if childPid: #father sleep(1) sm = StatusManager() recieved_status = sm.getStatus( self.jobDir ) self.assertEqual( recieved_status , Status( string= "unknown" ) ) self.assertRaises( IOError , sm.setStatus , self.jobDir, status ) os.kill( childPid , signal.SIGALRM ) os.wait() else: #child signal.signal(signal.SIGALRM, handler) File = open( self.filename , 'r+' ) fcntl.lockf( File , fcntl.LOCK_EX | fcntl.LOCK_NB ) signal.pause() fcntl.lockf( File , fcntl.LOCK_UN ) File.close() os._exit(0)
class ExecutionSystem(object): """ abstract class manage the status by updating the file index.xml """ def __init__( self , execution_config ): """ @param execution_config: the configuration of the Execution @type execution_config: ExecutionConfig instance """ self._cfg = Config() self.execution_config = execution_config self.execution_config_alias = self._cfg.getAliasFromConfig( self.execution_config ) self.status_manager = StatusManager() def run( self , commandLine , dirPath , serviceName , jobState , xmlEnv = None): """ @param execution_config: the configuration of the Execution @type execution_config: ExecutionConfig instance @param commandLine: the command to be executed @type commandLine: String @param dirPath: the absolute path to directory where the job will be executed (normaly we are already in) @type dirPath: String @param serviceName: the name of the service @type serviceName: string @param jobState: @type jobState: a L{JobState} instance """ self.jobState = jobState if dirPath[-1] == '/': dirPath = dirPath[:-1] jobKey = os.path.split( dirPath )[1] if os.getcwd() != os.path.abspath( dirPath ): msg = "the child process execute itself in a wrong directory" self._logError( dirPath , serviceName ,jobKey, userMsg = "Mobyle internal server error" , logMsg = msg ) raise MobyleError , msg protectedCommandLine = '' for c in commandLine: protectedCommandLine += '\\'+ c if xmlEnv is None: xmlEnv = {} dispatcher = self._cfg.getDispatcher() queue = dispatcher.getQueue( jobState ) adm = Admin( dirPath ) adm.setQueue( queue ) adm.commit() new_path = '' binary_path = self._cfg.binary_path() if binary_path : new_path = ":".join( binary_path ) if xmlEnv.has_key( 'PATH' ) : new_path = "%s:%s" %( xmlEnv[ 'PATH' ] , new_path ) if new_path : xmlEnv[ 'PATH' ] = "%s:%s" %( new_path , os.environ[ 'PATH' ] ) else: xmlEnv[ 'PATH' ] = os.environ[ 'PATH' ] for var in os.environ.keys(): if var != 'PATH': xmlEnv[ var ] = os.environ[ var ] self._returncode = None accounting = self._cfg.accounting() if accounting: beg_time = time.time() ################################### mobyleStatus = self._run( commandLine , dirPath , serviceName , jobKey , jobState , queue , xmlEnv ) ################################### if accounting: end_time = time.time() elapsed_time = end_time - beg_time a_log = getLogger( 'Mobyle.account' ) #%d trunc time to second #%f for millisecond a_log.info("%(serviceName)s/%(jobkey)s : %(exec_class)s/%(queue)s : %(beg_time)d-%(end_time)d %(ela_time)d : %(status)s" %{ 'serviceName':serviceName , 'jobkey':jobKey, 'exec_class':self.execution_config.execution_class_name , 'queue': queue, 'beg_time':beg_time , 'end_time':end_time , 'ela_time':elapsed_time , 'status': mobyleStatus , } ) self.status_manager.setStatus( dirPath , mobyleStatus ) def getStatus( self , number ): """ @param execution_config: a configuration object for this execution system @type execution_config: an ExecutionConfig subclass instance @param number: @type number: @return the status of the job @rtype: abstract method. this method must be implemented in child classes """ raise NotImplementedError, "Must be Implemented in child classes" def kill( self , number ): """ kill the Job @param execution_config: a configuration object for this execution system @type execution_config: an ExecutionConfig subclass instance @param number: @type number: abstract method. this method must be implemented in child classes """ raise NotImplementedError, "Must be Implemented in child classes" def _logError( self , dirPath , serviceName , jobKey , userMsg = None , logMsg = None ): if userMsg : self.status_manager.setStatus( dirPath, Status( code = 5 , message = userMsg ) ) if logMsg : _log.error( "%s/%s : %s" %( serviceName , jobKey , logMsg ) )
def testSetstatus( self): StatusManager.create( self.jobDir, Status( string='submitted' ) ) pending = Status( string= 'pending' ) sm = StatusManager() sm.setStatus( self.jobDir , pending ) recieved_status = sm.getStatus( self.jobDir ) self.assertEqual( recieved_status , pending ) finished = Status( string='finished' , message = 'your job finnished with an unusual status code, check youre results carefully') sm.setStatus( self.jobDir , finished ) recieved_status = sm.getStatus( self.jobDir ) self.assertEqual( recieved_status , finished ) #an ended status cannot be changed anymore running = Status( string= 'running') sm.setStatus( self.jobDir , running ) recieved_status = sm.getStatus( self.jobDir ) self.assertNotEqual( recieved_status , running ) self.assertEqual( recieved_status , finished ) os.unlink( self.filename )
def __init__(self, id=None, workflow=None, email=None, email_notify = 'auto', session=None, workflowID = None): """ @param id: the identifier of this workflow (it's used to rebuild WorkflowJob using it's id) @type id: string @param workflow: the workflow definition used to create a new job @type workflow: a L{Workflow} instance @param email: the user email address @type email: L{EmailAddress} instance or a string @param email_notify: if the user must be or not notify of the results at the end of the Job. the 3 authorized values for this argument are: - 'true' to notify the results to the user - 'false' to Not notify the results to the user - 'auto' to notify the results based on the job elapsed time and the config EMAIL_DELAY @type email_notify: string @param session: the session owner of this workflow (if session is set workflowID mut be None ) @type session: a L{Session} instance @param workflowID: the ID of a the workflow owner of this workflow @type workflowID: string """ self.cfg = ConfigManager.Config() self.status_manager = StatusManager() if id: log.debug("accessing WorkflowJob %s" %(id)) self.id = id self.jobState = JobState( id ) else: log.debug("creating WorkflowJob for workflow '%s'" %(workflow.name)) self.workflow = workflow if session and workflowID: msg = "try to instanciate a workflow with 2 owners: session %s & workflowID %s" %( session.getKey(), workflowID ) log.error( msg ) raise MobyleError( msg ) self.session = session if session : email = session.getEmail() if email: self.email = EmailAddress( email ) else: self.email = None elif email : #there is an email without session if not isinstance( email , EmailAddress ): self.email = EmailAddress( email ) else: self.email = email self.email_notify = email_notify if self.email_notify != 'false' and not self.email: raise MobyleError( "email adress must be specified when email_notify is set to %s" % email_notify ) self.parameters = {} for parameter in self.workflow.parameters: # setting parameters which have a default value (important for hidden parameters which are not # accessed by JobFacade... if not(parameter.isout) and parameter.vdef is not None: self.set_value(parameter.name, value=str(parameter.vdef)) # job is just an "environment" folder for the job # it contains the instanciation of the job runner which seems to be hardcoded as "command runner"... self._job = Job( service = self.workflow, cfg = self.cfg, userEmail = self.email, session = self.session, workflowID = workflowID , ) self.jobState = self._job.jobState self.id = self._job.getURL()
class WorkflowJob(object): def __init__(self, id=None, workflow=None, email=None, email_notify = 'auto', session=None, workflowID = None): """ @param id: the identifier of this workflow (it's used to rebuild WorkflowJob using it's id) @type id: string @param workflow: the workflow definition used to create a new job @type workflow: a L{Workflow} instance @param email: the user email address @type email: L{EmailAddress} instance or a string @param email_notify: if the user must be or not notify of the results at the end of the Job. the 3 authorized values for this argument are: - 'true' to notify the results to the user - 'false' to Not notify the results to the user - 'auto' to notify the results based on the job elapsed time and the config EMAIL_DELAY @type email_notify: string @param session: the session owner of this workflow (if session is set workflowID mut be None ) @type session: a L{Session} instance @param workflowID: the ID of a the workflow owner of this workflow @type workflowID: string """ self.cfg = ConfigManager.Config() self.status_manager = StatusManager() if id: log.debug("accessing WorkflowJob %s" %(id)) self.id = id self.jobState = JobState( id ) else: log.debug("creating WorkflowJob for workflow '%s'" %(workflow.name)) self.workflow = workflow if session and workflowID: msg = "try to instanciate a workflow with 2 owners: session %s & workflowID %s" %( session.getKey(), workflowID ) log.error( msg ) raise MobyleError( msg ) self.session = session if session : email = session.getEmail() if email: self.email = EmailAddress( email ) else: self.email = None elif email : #there is an email without session if not isinstance( email , EmailAddress ): self.email = EmailAddress( email ) else: self.email = email self.email_notify = email_notify if self.email_notify != 'false' and not self.email: raise MobyleError( "email adress must be specified when email_notify is set to %s" % email_notify ) self.parameters = {} for parameter in self.workflow.parameters: # setting parameters which have a default value (important for hidden parameters which are not # accessed by JobFacade... if not(parameter.isout) and parameter.vdef is not None: self.set_value(parameter.name, value=str(parameter.vdef)) # job is just an "environment" folder for the job # it contains the instanciation of the job runner which seems to be hardcoded as "command runner"... self._job = Job( service = self.workflow, cfg = self.cfg, userEmail = self.email, session = self.session, workflowID = workflowID , ) self.jobState = self._job.jobState self.id = self._job.getURL() def getDir(self): """ returns the absolute path of the workflow job directory """ return self.jobState.getDir() def set_status(self, status): log.debug("setting job %s status to %s" % (self.id, status)) self.status_manager.setStatus( self.getDir() , status ) def set_value(self, parameter_name, value=None, src=None, srcFileName=None): wf_parameter = [p for p in self.workflow.parameters if p.name==parameter_name][0] if value is not None: log.debug("setting %s parameter value to %s" %(parameter_name, value)) elif src is not None: log.debug("copying %s parameter value from %s/%s" %(parameter_name, src,srcFileName)) else: log.error("no VALUE or SOURCE URL specified for %s parameter." % parameter_name) """ set a parameter value """ self.parameters[parameter_name] = value self.parameters[parameter_name + '.src'] = src self.parameters[parameter_name + '.srcFileName'] = srcFileName if value and value==wf_parameter.vdef: log.debug("setting %s parameter value to default value %s" %(parameter_name, wf_parameter.vdef)) return # save input value in a file # link this file from the JobState xml datatype_class = wf_parameter.type.datatype.class_name datatype_superclass = wf_parameter.type.datatype.superclass_name df = DataTypeFactory() if (datatype_superclass in [None,""] ): dt = df.newDataType(datatype_class) else: dt = df.newDataType(datatype_superclass, datatype_class) mt = MobyleType(dt) p = Parameter(mt, name=parameter_name) p._isout = wf_parameter.isout if dt.isFile(): file_name = parameter_name+'.data' if src: src = DataProvider.get(src) file_name, size = mt.toFile( value , self , file_name, src , srcFileName ) if not(wf_parameter.isout): self.jobState.setInputDataFile(parameter_name, (file_name, size, None)) else: self.jobState.setOutputDataFile(parameter_name, [(file_name, size, None)]) else: if not(wf_parameter.isout): self.jobState.setInputDataValue(parameter_name, value) else: raise NotImplementedError() # so far Mobyle does not manage non-file outputs self.jobState.commit() def setValue(self, parameter_name, value=None, src=None, srcFileName=None): """MobyleJob-style set value method, called from JobFacade""" if type(value)==tuple: return self.set_value(parameter_name, value=value[1], src=value[2],srcFileName=value[3]) else: return self.set_value(parameter_name, value=value, src=src,srcFileName=srcFileName) def getJobid(self): """MobyleJob-style get job id method, called from JobFacade""" return self.id def getDate(self): """MobyleJob-style get date method, called from JobFacade""" return time.strptime(self.get_date(),"%x %X") def getStatus(self): """MobyleJob-style get status method, called from JobFacade""" return self.status_manager.getStatus( self.getDir() ) def get_value(self, parameter_name): """get a parameter value""" return self.parameters.get(parameter_name,None) def get_date(self): """get the job date as a string""" return self.jobState.getDate() def get_id(self): """get the job id""" return self.id def run(self): """submit the job asynchronously""" self.validate() self.set_status(Status( code = 1 )) # status = submitted #raise a UserValueError if nb of job is over the limit accepted if( self.email is not None ): self._job.over_limit( self.email , '' ) self._child_pid = os.fork() if self._child_pid==0: #Child code os.setsid() log_fd = os.open("%s/log" % self.jobState.getDir(), os.O_APPEND | os.O_WRONLY | os.O_CREAT , 0664 ) devnull = os.open( "/dev/null" , os.O_RDWR ) os.dup2( devnull , sys.stdin.fileno() ) os.close( devnull) os.dup2( log_fd , sys.stdout.fileno() ) os.dup2( log_fd , sys.stderr.fileno() ) os.close( log_fd ) atexit.register( self.log , "child exit for workflow id: %s" % self.get_id()) ################################################ service = self._job.getService() serviceName = service.getName() jobKey = self._job.getKey() linkName = ( "%s/%s.%s" %( self.cfg.admindir() , serviceName , jobKey ) ) try: os.symlink( os.path.join( self.getDir() , '.admin') , linkName ) except OSError , err: self.set_status(Status(string="error", message="workflow execution failed")) msg = "can't create symbolic link %s in ADMINDIR: %s" %( linkName , err ) log.critical( "%s/%s : %s" %( serviceName, jobKey, msg ), exc_info = True ) raise WorkflowJobError , msg ################################################ t0 = time.time() self.srun() t1 = time.time() ################################################ try: os.unlink( linkName ) except OSError , err: self.set_status(Status(string="error", message="workflow execution failed")) msg = "can't remove symbolic link %s in ADMINDIR: %s" %( linkName , err ) log.critical( "%s/%s : %s" %( serviceName, jobKey, msg ), exc_info= True ) raise WorkflowJobError , msg ################################################ try: zipFileName = self.zip_results() except Exception : msg = "an error occured during the zipping results :\n\n" log.critical( "%s : %s" %( self.id , msg ) , exc_info = True) zipFileName = None if self.email_notify == 'auto': if ( t1 - t0 ) > self.cfg.email_delay() : emailResults( self.cfg , self.email, #userEmail, registry, self.id, self.getDir(), self.workflow.getName(), self._job.getKey(), FileName = zipFileName ) elif self.email_notify == 'true': emailResults( self.cfg , self.email, #userEmail, registry, self.id, self.getDir(), self.workflow.getName(), self._job.getKey(), FileName = zipFileName ) else: pass sys.exit(0) #exit with no error