class CoordinatorMessagingEndpoint(Greenlet): """ Messaging endpoint for the Coordinator Node """ def __init__(self, node): super(CoordinatorMessagingEndpoint, self).__init__() self.logger = logging.getLogger('dwf.EndPoint') self.node = node self.worker_manager = self.node.worker_manager self.file_manager = self.node.file_manager self.context = Context() self.router = self.context.socket(ROUTER) self.router.bind("tcp://*:%s" % str(MESSAGE_PORT)) #=====================RECEIVING============================== def kill(self): """ Kill the greenlet, close the sockets """ super(CoordinatorMessagingEndpoint, self).kill() self.router.close() self.context.term() def _run(self): while True: # try for recv_json() debug - C.S 2013.12.27 try: self._handle_data(loads(self.router.recv_multipart()[1])) except Exception, e: # print "Error: _handle_data(loads(self.router.recv_multipart()[1])): " + e # print "Error.message: " + e.message self.logger.critical("Error: _handle_data(loads(self.router.recv_multipart()[1])): " + e)
class WorkerMessagingEndpoint(Greenlet): """ Messaging endpoint for the worker node """ def __init__(self, node): super(WorkerMessagingEndpoint, self).__init__() self.logger = logging.getLogger("dwf.MessagingEndPoint") self.node = node self.context = Context() self.dealer = self.context.socket(DEALER) self.dealer.setsockopt(IDENTITY, self.node.host_address) self.dealer.connect("tcp://%s:%s" % ( self.node.coordinator_address, str(MESSAGE_PORT))) #=================RECEIVING==================== def kill(self): """ Kill the greenlet, close the sockets """ super(WorkerMessagingEndPoint, self).kill() self.dealer.close() self.context.term() def send_heartbeat (func): """ python decorator to optionally send heartbeats """ if HEARTBEAT_MONITORING: def wrapper (self): spawn(self._heartbeat) # spawn a heartbeat loop return func(self) else: # def wrapper (self, message): def wrapper (self): # fix remove message - C.S 2013.01.25 return func(self) return wrapper @send_heartbeat def _run(self): """ Greenlet run method """ while True: self._handle_message(self.dealer.recv_json()) # # try for recv_json() debug - C.S 2013.12.27 # try: # self._handle_message(self.dealer.recv_json()) # except Exception, e: ## print "Error: _handle_message(self.dealer.recv_json()): " + e ## print "Error.message: " + e.message # self.logger.critical("Error: _handle_message(self.dealer.recv_json()): " + e) ## sys.exc_clear() def _heartbeat(self): """ send heartbeats to the coordinator """ while True: self._send_heartbeat() sleep(HEARTBEAT_INTERVAL) def set_handler(self, handler): """ set the message handler """ self._handle_message = handler def _handle_message(self, message): """ handle incoming messages received as dicts (serialized in JSON) """ message_type = message["__type__"] if message_type == "assignment": # job assignment for filename, address in message["input_file_locs"]: if filename not in self.node.files: self.node.files[filename] = File(filename=filename) self.node.add_file_location(filename, address) job = Job(message["job_id"], message["args"][0], message["args"], dict((filename, self.node.files[filename]) for filename, file_loc in message["input_file_locs"]), dict((filename, File(filename=filename, type="data")) for filename in message["output_files"])) self.node.slot_manager.dispatch_job_to_slot(message["slot_id"], job) elif message_type == "dag_info": # dag info self.node.file_manager.binary_filenames.update(message["binary_filenames"]) # add the location of the initial binary as the coordinator for filename in self.node.file_manager.binary_filenames: self.node.file_manager.add_file_location( filename, self.node.coordinator_address) self.node.files[filename] = File(filename=filename, type="executable") self.node.num_jobs = message["num_jobs"] elif message_type == "status": # status message e.g., failure status = message["status"] if status == "finished": # workflow finished self.node.finished = True if hasattr(self.node, "completed_job_queue"): # put the end message into the completed job queue self.node.completed_job_queue.put("END") elif status == "failed": # workflow failed self.node.failed = True if hasattr(self.node, "completed_job_queue"): # put the end message into the completed job queue self.node.completed_job_queue.put("END") elif message_type == "reset": # reset message self.node.reset() self.node.run() else: self.logger.critical("INVALID MESSAGE RECEIVED: %s" % message) #=======================SENDING============================== def send_message(self, message): """ notify the coordinator -- message is dict """ message["__address__"] = self.node.host_address return spawn(self.dealer.send_json, message) def register(self, num_slots): """ register this machine with the coordinator """ return self.send_message({ "__type__" : "register", "num_slots" : num_slots }) def send_ready(self): """ send ready message to coordinator """ return self.send_message({ "__type__" : "status", "status" : "ready" }) def send_file_received(self, filename): """ send ready message to coordinator """ return self.send_message({ "__type__" : "received", "filename" : filename }) def send_job_completed(self, job_result): """ notify that job completed using the corresponding xml format""" job_result["__type__"] = "result" return self.send_message(job_result) def send_workflow_complete(self): """ notify that the workflow has completed """ return self.send_message({ "__type__" : "status", "status" : "finished" }) def send_workflow_failed(self): """ notify that the workflow has completed """ return self.send_message({ "__type__" : "status", "status" : "failed" }) def _send_heartbeat(self): self.send_message({"__type__" : "heartbeat"}).join()