def __init__(self,rid,server): Peer.__init__(self,server,logger) ## CORBAProgram server object which contains this service. self.server = server ## WorkerRegistry. self.worker_registry = WorkerRegistry() self.__ping_cnt = 0 ## TaskScheduler. self.task_scheduler = None ## ControlThread, self.control_thread = ControlThread(self) ## cache for lookup efficiency ##self.free_workers = {} ##self.received_workers = {} # application bootstrap parameters self.app_boot_data = None self.file_server = None # default file server # numeric run identifier self.rid = rid # uuid from diane.util.compatibility import uuid self.uuid = str(uuid()) # journal has an option to report events via MSG and uuid is used to identify the master self.journal = diane.journal.Journal('master.j', self.uuid) # automatically generated task id self.__tid = 1
class RunMaster (DIANE_CORBA__POA.RunMaster, IMasterController, Peer): """ RunMaster is a service communicating with WorkerAgents and dispatching/receiving tasks. The task scheduling is controlled by TaskScheduler. WorkerAgent control is performed by ControlThread.""" def __init__(self,rid,server): Peer.__init__(self,server,logger) ## CORBAProgram server object which contains this service. self.server = server ## WorkerRegistry. self.worker_registry = WorkerRegistry() self.__ping_cnt = 0 ## TaskScheduler. self.task_scheduler = None ## ControlThread, self.control_thread = ControlThread(self) ## cache for lookup efficiency ##self.free_workers = {} ##self.received_workers = {} # application bootstrap parameters self.app_boot_data = None self.file_server = None # default file server # numeric run identifier self.rid = rid # uuid from diane.util.compatibility import uuid self.uuid = str(uuid()) # journal has an option to report events via MSG and uuid is used to identify the master self.journal = diane.journal.Journal('master.j', self.uuid) # automatically generated task id self.__tid = 1 def schedule(self,worker,tasks): # access wid or worker object as argument if type(worker) is type(1): worker = self.worker_registry.get(worker) assert(worker.initialized) try: worker.alive_lock.acquire() if worker.alive: for t in tasks: t.assign(worker.wid) worker.scheduled_tasks.put(t) self.worker_registry.update_cache(worker) self.journal.addEntry('tasks_scheduled',wid=worker.wid,tids=[t.tid for t in tasks]) else: # we do not add to the journal events which were immediately uncheduled tids = [t.tid for t in tasks] logger.warning('worker wid=%s: attempt to schedule tasks %s to a worker which is not alive',worker.wid,tids) logger.debug('task_scheduler.tasks_unscheduled(%s)',tids) self.task_scheduler.tasks_unscheduled(tasks) finally: worker.alive_lock.release() def unschedule(self,worker): # access wid or worker object as argument if type(worker) is type(1): worker = self.worker_registry.get(worker) assert(worker.initialized) try: worker.alive_lock.acquire() unscheduled_tasks = list(worker.scheduled_tasks.queue) for t in unscheduled_tasks: t.update(TaskStatus.UNSCHEDULED,None) try: tids = [t.tid for t in unscheduled_tasks] self.journal.addEntry('tasks_unscheduled',wid=worker.wid,tids=tids) logger.debug('task_scheduler.tasks_unscheduled(%s)',tids) self.task_scheduler.tasks_unscheduled(unscheduled_tasks) except Exception,x: logger.exception('Error in TaskScheduler.tasks_unscheduled() callback') worker.scheduled_tasks.clear() finally: