示例#1
0
    def get_task_data(self,worker_uuid):
        wid = self._resolve_wid(worker_uuid)
        logger.debug('get_task_data %d',wid)
        self.journal.addEntry('get_task_request',wid=wid)
        try:
            worker_entry = self.worker_registry.get(wid)
            worker_entry.alive_lock.acquire()

            self.update_contact(wid)
            require_worker_initialized(worker_entry)
                  
            try:
                #TEST: time.sleep(1)
                import Queue
                task_info = worker_entry.scheduled_tasks.get(block=False)
                logger.debug('removed from scheduled_tasks queue:%d,%s',task_info.tid,repr(task_info.task_input))
            except Queue.Empty:
                self.journal.addEntry('get_task_data_error',wid=wid,msg=1)
                raise DIANE_CORBA.XRepeatCall(0)
            else:
                worker_entry.processing_tasks[task_info.tid] = task_info
                self.worker_registry.update_cache(worker_entry)
                task_info.details.time_start = time.time()
                self.journal.addEntry('get_task_data',wid=wid,tid=task_info.tid)
                return (task_info.tid,streamer.dumps(task_info.task_input))
        finally:
            worker_entry.alive_lock.release()
示例#2
0
 def do_work(self, task_data):
     try:
         logger.debug("InprocessApplicationProxy.do_work()")
         task_data = streamer.loads(task_data)
         task_result = self.app.do_work(task_data)
         return streamer.dumps(task_result)
     except Exception, x:
         handleApplicationFailure(x)
示例#3
0
    def run(self):

        import MSGWrap
        
        from diane.config import log_configuration
        log_configuration(title='initial configuration')        

        msg_data = { '_worker_uuid' : self.uuid }

        try:
            self.registerToMaster()

            master = StandingCall(self.master, config.HEARTBEAT_DELAY, should_stop = self.should_stop)
            
            
            app_boot,app_init = master.get_init_data(self.uuid) #(config.HEARTBEAT_DELAY,-1,self.should_stop,self.master,'get_init_data',self.uuid)
            _boot = streamer.loads(app_boot)
            msg_data['_master_uuid'] = _boot.master_uuid
            msg_data['_runid'] = _boot.runid
            import os
            msg_data['ganga_job_uuid'] = self.ganga_job_uuid

            # FIXME: if worker restart enabled, save diane.config.__all_configs and restore it after run has finished
            MSGWrap.sendStatus('_worker_create_application_proxy_start', msg_data)
            self.application = create_application_proxy(app_boot,app_init,agent=self)
            MSGWrap.sendStatus('_worker_create_application_proxy_finish', msg_data)
            
            self.program.registerAtExitHandler(self.finalize_application)
            
            MSGWrap.sendStatus('_worker_initialize_start', msg_data)
            app_init_output = self.application.initialize(app_init)
            MSGWrap.sendStatus('_worker_initialize_finish', msg_data)

            # config may have been updated and the value of config.HEARTBEAT_DELAY may have changed -> need to create the object again
            # FIXME: use a REFERENCE to config.HEARTBEAT_DELAY
            master = StandingCall(self.master, config.HEARTBEAT_DELAY, should_stop = self.should_stop)

            master.put_init_result(self.uuid,app_init_output,0) #(config.HEARTBEAT_DELAY,-1,self.should_stop,self.master,'put_init_result',self.uuid,app_init_output,0)

            while not self.should_stop():
                time.sleep(config.PULL_REQUEST_DELAY) # PENDING: this parameter should be dynamically controlled by the master
                tid,task_data = master.get_task_data(self.uuid) #(config.HEARTBEAT_DELAY,-1,self.should_stop,self.master,'get_task_data',self.uuid)
                try:
                    msg_data['tid'] = tid
                    MSGWrap.sendStatus('_worker_do_work_start', msg_data)
                    task_result = self.application.do_work(task_data)
                    MSGWrap.sendStatus('_worker_do_work_finish', msg_data)
                    error = 0
                except diane.application.ApplicationFailure,x: # recoverable problem
                    task_result = streamer.dumps(x)
                    error = 1
                    #FIXME: reporting failure is not yet well-defined
                
                master.put_task_result(self.uuid,tid,task_result,error) #(config.HEARTBEAT_DELAY,-1,self.should_stop,self.master,'put_task_result',self.uuid,tid,task_result,error)

        except diane.application.ApplicationFailure,x: # recoverable problem but raised by the application init
            pass
示例#4
0
 def initialize(self, app_init):
     try:
         logger.debug("InprocessApplicationProxy.initialize()")
         app_init = streamer.loads(app_init)
         app_init_output = self.app.initialize(app_init)
         app_init_output = streamer.dumps(app_init_output)
         return app_init_output
     except Exception, x:
         handleApplicationFailure(x)
示例#5
0
 def get_init_data(self,worker_uuid):
     wid = self._resolve_wid(worker_uuid)        
     # during the execution of this method the tasks may not be scheduled
     # to this worker because it is not in the cache waiting list
     logger.debug('get_init_data %d',wid)
     self.update_contact(wid)
     w = self.worker_registry.get(wid)
     require_worker_initialized(w,False)
     self.CNT = 0
     self.journal.addEntry('get_init_data',wid=wid)
     return (self.app_boot_data,streamer.dumps(w.init_input))
示例#6
0
    def startProcessing(self,input):
        import diane.util

        from diane.config import log_configuration
        log_configuration()


        #start the default file server
        import diane.FileTransfer        
        self.file_server = diane.FileTransfer.Server.main('FileTransferOID',self.server)

        # FIXME: segmentation fault if object reference is passed directly, workaround via stringified IOR
        self.file_server_ior = self.server.orb.object_to_string(self.file_server)

        import os
        self.journal.addEntry('master_start',runid=self.runid(), application_name=input.application.__name__, name=os.path.basename(input._runfile))


        # prepare application boot

        import application
        boot_data = application.make_boot_data(input)
        boot_data.runid = self.runid()
        boot_data.master_uuid = self.uuid
        self.app_boot_data = streamer.dumps(boot_data)
        




        # TEST: trigger race condition with registerWorker(): a fast worker registers before this method is completed
        # this problem should be fixed now
        #import time
        #time.sleep(5)

        def thread_crash_handler(t):
            try:
                logger.debug('Crash handler started: %s',t.__class__.__name__)
                return t._run()
            except Exception,x:
                logger.exception('Information from crash handler (%s): unhandled exception: %s',t.__class__.__name__,x)
                logger.info('Stopping the RunMaster and dumping the state into the "crash-dump.pickle" file')
                import pickle
                pickle.dump(self.worker_registry,file("crash-dump.pickle",'w'))
                self.shutdown()