def serve(self): casalog_call_origin = "MPICommandServer::serve" # First start command and ping status services casalog.post("Starting services...", "INFO", casalog_call_origin) self.start_services() # Notify to MPICommandClient that service is up and running self.__communicator.control_service_response_send(response=self.__monitor_server.get_status()) # Keep serving until a stop signal service is received control_service_request = {} stop_service_requested = False while (not stop_service_requested) and (not self.__monitor_server.get_client_timeout()): # Check if there is an incoming control service msg msg_available = False try: msg_available = self.__communicator.control_service_request_probe() except: msg_available = False formatted_traceback = traceback.format_exc() casalog.post( "Exception checking if control service msg is available: %s" % str(formatted_traceback), "SEVERE", casalog_call_origin, ) # Notify to MPICommandClient that control signal has been processed if msg_available: # Receive control service msg msg_received = False control_service_request = {} try: control_service_request = self.__communicator.control_service_request_recv() msg_received = True except: msg_received = False formatted_traceback = traceback.format_exc() casalog.post( "Exception receiving control service msg: %s" % str(formatted_traceback), "SEVERE", casalog_call_origin, ) continue # Process control service msg cmd = None send_response = False if msg_received: try: cmd = control_service_request["command"] send_response = control_service_request["send_response"] code = compile(cmd, casalog_call_origin, "exec") exec(code) casalog.post( "Control signal %s successfully handled by server %s" % (str(cmd), str(MPIEnvironment.mpi_processor_rank)), "INFO", casalog_call_origin, ) except: formatted_traceback = traceback.format_exc() casalog.post( "Exception handling control signal command %s in server %s: %s" % ( str(control_service_request), str(MPIEnvironment.mpi_processor_rank), str(formatted_traceback), ), "SEVERE", casalog_call_origin, ) # Notify to MPICommandClient that control signal has been processed if send_response: try: self.__communicator.control_service_response_send( response=self.__monitor_server.get_status() ) except: formatted_traceback = traceback.format_exc() casalog.post( "Exception sending response to control signal command %s in server %s: %s" % (str(cmd), str(MPIEnvironment.mpi_processor_rank), str(formatted_traceback)), "SEVERE", casalog_call_origin, ) time.sleep(MPIEnvironment.mpi_stop_service_sleep_time) # Process stop service request if stop_service_requested: # Check if force mode is needed force_command_request_interruption = control_service_request["force_command_request_interruption"] finalize_mpi_environment = control_service_request["finalize_mpi_environment"] busy = self.__monitor_server.get_status("busy") if force_command_request_interruption and busy: casalog.post( "force-stop service signal received, stopping services, " + "command request handler service will be interrupted...", "INFO", casalog_call_origin, ) else: force_command_request_interruption = False casalog.post("stop service signal received, stopping services...", "INFO", casalog_call_origin) else: force_command_request_interruption = True casalog.post( "client timeout, forcing disconnection, " + "command request handler service will be interrupted..", "INFO", casalog_call_origin, ) # Stop services self.stop_services(force_command_request_interruption) # Finalize MPI environment if finalize_mpi_environment: try: casalog.post("Going to finalize MPI environment", "INFO", casalog_call_origin) MPIEnvironment.finalize_mpi_environment() except: formatted_traceback = traceback.format_exc() casalog.post( "Exception finalizing MPI environment %s" % str(formatted_traceback), "SEVERE", casalog_call_origin, ) # Exit casalog.post("Exiting", "INFO", casalog_call_origin)
def stop_services(self, force_command_request_interruption=False): # jagonzal: This method is called by the atexit module and if it fails it # causes ipython to crash, producing a report and waiting for user input # so we cannot risk under any circumstances such an event try: casalog_call_origin = "MPICommandClient::stop_services" if self.__life_cycle_state == 0: casalog.post("Services not started", "WARN", casalog_call_origin) return elif self.__life_cycle_state == 2: casalog.post("MPICommandClient life cycle finalized", "WARN", casalog_call_origin) return # Check if any server is in timeout condition before stopping the monitoring service server_rank_timeout = self.__monitor_client.get_server_timeout() finalize_mpi_environment = True if len(server_rank_timeout) > 0: finalize_mpi_environment = False force_command_request_interruption = True # Stop client monitoring services self.__monitor_client.stop_services() # Notify command requests which are going to be interrupted for command_request_id in self.__command_request_list: if not self.__command_response_list.has_key(command_request_id): server = self.__command_request_list[command_request_id]["server"] status = self.__command_request_list[command_request_id]["status"] casalog.post( "Aborting command request with id# %s: %s" % (str(command_request_id), str(self.__command_request_list[command_request_id])), "SEVERE", casalog_call_origin, ) # Stop client command request-response services self.__stop_command_request_queue_service() self.__stop_command_response_handler_service() # Shutdown plotms process self.__send_control_signal( {"command": "pm.killApp()", "signal": "process_control"}, check_response=True ) # Shutdown virtual frame buffer self.__send_control_signal( {"command": "self.stop_virtual_frame_buffer()", "signal": "process_control"}, check_response=True ) # Send stop signal to servers self.__send_control_signal( { "command": "stop_service_requested = True", "signal": "stop", "force_command_request_interruption": force_command_request_interruption, "finalize_mpi_environment": finalize_mpi_environment, }, check_response=False, ) # Finalize MPI environment if finalize_mpi_environment: try: casalog.post("Going to finalize MPI environment", "INFO", casalog_call_origin) MPIEnvironment.finalize_mpi_environment() except: formatted_traceback = traceback.format_exc() casalog.post( "Exception finalizing MPI environment %s" % str(formatted_traceback), "SEVERE", casalog_call_origin, ) else: casalog.post( "MPIServers with rank %s are in timeout condition, skipping MPI_Finalize()" % str(server_rank_timeout), "SEVERE", casalog_call_origin, ) # UnMark MPI environment to be finalized by the MPICommunicator destructor # (Either because it is already finalized or due to a # server not responsive that prevents graceful finalization) self.__communicator.set_finalize_mpi_environment(False) # Set life cycle state self.__life_cycle_state = 2 casalog.post("All services stopped", "INFO", casalog_call_origin) except: formatted_traceback = traceback.format_exc() print "Unhandled exception in MPICommandClient::stop_services %s" % (formatted_traceback)
def serve(self): casalog_call_origin = "MPICommandServer::serve" # First start command and ping status services casalog.post("Starting services...","INFO",casalog_call_origin) self.start_services() # Notify to MPICommandClient that service is up and running self.__communicator.control_service_response_send(response=self.__monitor_server.get_status()) # Keep serving until a stop signal service is received control_service_request = {} stop_service_requested = False while ((not stop_service_requested) and (not self.__monitor_server.get_client_timeout())): # Check if there is an incoming control service msg msg_available = False try: msg_available = self.__communicator.control_service_request_probe() except: msg_available = False formatted_traceback = traceback.format_exc() casalog.post("Exception checking if control service msg is available: %s" % str(formatted_traceback),"SEVERE",casalog_call_origin) # Notify to MPICommandClient that control signal has been processed if msg_available: # Receive control service msg msg_received = False control_service_request = {} try: control_service_request = self.__communicator.control_service_request_recv() msg_received = True except: msg_received = False formatted_traceback = traceback.format_exc() casalog.post("Exception receiving control service msg: %s" % str(formatted_traceback),"SEVERE",casalog_call_origin) continue # Process control service msg cmd = None send_response = False if msg_received: try: cmd = control_service_request['command'] send_response = control_service_request['send_response'] code = compile(cmd, casalog_call_origin, 'exec') exec(code) casalog.post("Control signal %s successfully handled by server %s" % (str(cmd),str(MPIEnvironment.mpi_processor_rank)), "INFO",casalog_call_origin) except: formatted_traceback = traceback.format_exc() casalog.post("Exception handling control signal command %s in server %s: %s" % (str(control_service_request), str(MPIEnvironment.mpi_processor_rank), str(formatted_traceback)), "SEVERE",casalog_call_origin) # Notify to MPICommandClient that control signal has been processed if send_response: try: self.__communicator.control_service_response_send(response=self.__monitor_server.get_status()) except: formatted_traceback = traceback.format_exc() casalog.post("Exception sending response to control signal command %s in server %s: %s" % (str(cmd),str(MPIEnvironment.mpi_processor_rank),str(formatted_traceback)), "SEVERE",casalog_call_origin) time.sleep(MPIEnvironment.mpi_stop_service_sleep_time) # Process stop service request if stop_service_requested: # Check if force mode is needed force_command_request_interruption = control_service_request['force_command_request_interruption'] finalize_mpi_environment = control_service_request['finalize_mpi_environment'] busy = self.__monitor_server.get_status('busy') if force_command_request_interruption and busy: casalog.post("force-stop service signal received, stopping services, " + "command request handler service will be interrupted...","INFO",casalog_call_origin) else: force_command_request_interruption = False casalog.post("stop service signal received, stopping services...","INFO",casalog_call_origin) else: force_command_request_interruption = True casalog.post("client timeout, forcing disconnection, " + "command request handler service will be interrupted.." ,"INFO",casalog_call_origin) # Stop services self.stop_services(force_command_request_interruption) # Finalize MPI environment if finalize_mpi_environment: try: casalog.post("Going to finalize MPI environment","INFO",casalog_call_origin) MPIEnvironment.finalize_mpi_environment() except: formatted_traceback = traceback.format_exc() casalog.post("Exception finalizing MPI environment %s" % str(formatted_traceback),"SEVERE",casalog_call_origin) # Exit casalog.post("Exiting","INFO",casalog_call_origin)
def stop_services(self,force_command_request_interruption=False): # jagonzal: This method is called by the atexit module and if it fails it # causes ipython to crash, producing a report and waiting for user input # so we cannot risk under any circumstances such an event try: casalog_call_origin = "MPICommandClient::stop_services" if self.__life_cycle_state == 0: casalog.post("Services not started","WARN",casalog_call_origin) return elif self.__life_cycle_state == 2: casalog.post("MPICommandClient life cycle finalized","WARN",casalog_call_origin) return # Check if any server is in timeout condition before stopping the monitoring service server_rank_timeout = self.__monitor_client.get_server_timeout() finalize_mpi_environment = True if len(server_rank_timeout) > 0: finalize_mpi_environment = False force_command_request_interruption = True # Stop client monitoring services self.__monitor_client.stop_services() # Notify command requests which are going to be interrupted for command_request_id in self.__command_request_list: if not self.__command_response_list.has_key(command_request_id): server = self.__command_request_list[command_request_id]['server'] status = self.__command_request_list[command_request_id]['status'] casalog.post("Aborting command request with id# %s: %s" % (str(command_request_id),str(self.__command_request_list[command_request_id])), "SEVERE",casalog_call_origin) # Stop client command request-response services self.__stop_command_request_queue_service() self.__stop_command_response_handler_service() # Shutdown plotms process self.__send_control_signal({'command':'pm.killApp()', 'signal':'process_control'}, check_response=True) # Shutdown virtual frame buffer self.__send_control_signal({'command':'self.stop_virtual_frame_buffer()', 'signal':'process_control'}, check_response=True) # Send stop signal to servers self.__send_control_signal({'command':'stop_service_requested = True', 'signal':'stop', 'force_command_request_interruption':force_command_request_interruption, 'finalize_mpi_environment':finalize_mpi_environment}, check_response=False) # Finalize MPI environment if finalize_mpi_environment: try: casalog.post("Going to finalize MPI environment","INFO",casalog_call_origin) MPIEnvironment.finalize_mpi_environment() except: formatted_traceback = traceback.format_exc() casalog.post("Exception finalizing MPI environment %s" % str(formatted_traceback),"SEVERE",casalog_call_origin) else: casalog.post("MPIServers with rank %s are in timeout condition, skipping MPI_Finalize()" % str(server_rank_timeout),"SEVERE",casalog_call_origin) # UnMark MPI environment to be finalized by the MPICommunicator destructor # (Either because it is already finalized or due to a # server not responsive that prevents graceful finalization) self.__communicator.set_finalize_mpi_environment(False) # Set life cycle state self.__life_cycle_state = 2 casalog.post("All services stopped","INFO",casalog_call_origin) except: formatted_traceback = traceback.format_exc() print "Unhandled exception in MPICommandClient::stop_services %s" %(formatted_traceback)