示例#1
0
 def __init__(self,start_services=True):
                          
     # Initialize server status state
     self.__server_status_list = {}
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     for rank in mpi_server_rank_list:
         self.__server_status_list[rank] = {}
         self.__server_status_list[rank]['rank'] = rank
         self.__server_status_list[rank]['processor'] = None
         self.__server_status_list[rank]['pid'] = None
         self.__server_status_list[rank]['busy'] = False
         self.__server_status_list[rank]['command'] = None
         self.__server_status_list[rank]['command_start_time'] = None
         self.__server_status_list[rank]['pong_pending'] = False
         self.__server_status_list[rank]['ping_time'] = None
         self.__server_status_list[rank]['pong_time'] = None          
         self.__server_status_list[rank]['timeout'] = False      
            
     # Initialize monitor service state
     self.__monitor_status_service_on = False
     self.__monitor_status_service_running = False
     self.__monitor_status_service_thread = None          
     
     # Initialize ping status response handler service state
     self.__ping_status_response_handler_service_on = False
     self.__ping_status_response_handler_service_running = False
     self.__ping_status_response_handler_service_thread = None  
     
     # Instantiate MPICommunicator reference
     self.__communicator = MPICommunicator()
     
     # Automatically start services
     if start_services:
         self.start_services()
示例#2
0
        def __init__(self, start_services=True):

            # Initialize server status state
            self.__server_status_list = {}
            mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
            for rank in mpi_server_rank_list:
                self.__server_status_list[rank] = {}
                self.__server_status_list[rank]['rank'] = rank
                self.__server_status_list[rank]['processor'] = None
                self.__server_status_list[rank]['pid'] = None
                self.__server_status_list[rank]['busy'] = False
                self.__server_status_list[rank]['command'] = None
                self.__server_status_list[rank]['command_start_time'] = None
                self.__server_status_list[rank]['pong_pending'] = False
                self.__server_status_list[rank]['ping_time'] = None
                self.__server_status_list[rank]['pong_time'] = None
                self.__server_status_list[rank]['timeout'] = False

            # Initialize monitor service state
            self.__monitor_status_service_on = False
            self.__monitor_status_service_running = False
            self.__monitor_status_service_thread = None

            # Initialize ping status response handler service state
            self.__ping_status_response_handler_service_on = False
            self.__ping_status_response_handler_service_running = False
            self.__ping_status_response_handler_service_thread = None

            # Instantiate MPICommunicator reference
            self.__communicator = MPICommunicator()

            # Automatically start services
            if start_services:
                self.start_services()
示例#3
0
 def __validate_target_servers(self,target_server):
     
     casalog_call_origin = "MPICommandClient::validate_target_servers"
     
     # Get list of valid MPIServer ranks
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     
     # Check if target server is a list of integers
     if isinstance(target_server,list) and (len(target_server)>=1) and all(isinstance(server, int) for server in target_server):
         # Check if server is within the server rank list
         for server in target_server:
             if server not in mpi_server_rank_list:
                 casalog.post("Server #%s does not exist" % str(server),"SEVERE",casalog_call_origin)
                 return None
             elif self.__monitor_client.get_server_status_keyword(server,'timeout'):
                 casalog.post("Server #%s has timed out" % str(server),"SEVERE",casalog_call_origin)
                 return None
         # Return input list validated
         return target_server
     # Check if target server is an integer
     elif isinstance(target_server,int):
         # Check if server is within the server rank list
         if target_server in mpi_server_rank_list:
             return [target_server]
         else:
             casalog.post("Server #%s does not exist" % str(target_server),"SEVERE",casalog_call_origin)
             return None
     else:
         casalog.post("target_server has wrong format (%s), accepted formats are int and list(int)" 
                      % str(type(target_server)),"SEVERE",casalog_call_origin)                
         return None        
示例#4
0
 def control_service_request_broadcast(self,request,logger=None):
     
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     for rank in mpi_server_rank_list:
         if logger is not None:
             logger.post("Sending %s service signal to server %s" 
                         % (request['signal'],str(rank)),"DEBUG","MPICommunicator::control_service_request_send_all")
         self.control_service_request_send(request=request,server=rank)      
示例#5
0
 def control_service_request_broadcast(self,request,logger=None):
     
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     for rank in mpi_server_rank_list:
         if logger is not None:
             logger.post("Sending %s service signal to server %s" 
                         % (request['signal'],str(rank)),"DEBUG","MPICommunicator::control_service_request_send_all")
         self.control_service_request_send(request=request,server=rank)      
示例#6
0
        def __monitor_status_service(self):

            casalog_call_origin = "MPIMonitorClient::monitor_status_service"

            # Mark service as running
            self.__monitor_status_service_running = True

            mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()

            while (self.__monitor_status_service_on):
                # Iterate over servers
                for rank in mpi_server_rank_list:
                    # Send ping status request if there is none pending
                    if not self.__server_status_list[rank]['pong_pending']:
                        try:
                            self.__communicator.ping_status_request_send(
                                server=rank)
                            self.__server_status_list[rank][
                                'ping_time'] = time.time()
                            self.__server_status_list[rank][
                                'pong_pending'] = True
                            self.__server_status_list[rank]['pong_checks'] = 0
                        except:
                            formatted_traceback = traceback.format_exc()
                            casalog.post(
                                "Exception sending ping status request to server %s: %s"
                                % (str(rank), str(formatted_traceback)),
                                "SEVERE", casalog_call_origin)
                    else:
                        self.__server_status_list[rank]['pong_checks'] += 1
                        elapsed_time = MPIEnvironment.mpi_monitor_status_service_heartbeat
                        elapsed_time *= self.__server_status_list[rank][
                            'pong_checks']
                        # elapsed_time = int(round(time.time() - self.__server_status_list[rank]['ping_time']))
                        # Notify when a server reaches timeout condition
                        if (MPIEnvironment.
                                mpi_monitor_status_service_timeout_enabled and
                            (elapsed_time >
                             MPIEnvironment.mpi_monitor_status_service_timeout)
                                and
                            (not self.__server_status_list[rank]['timeout'])):
                            casalog.post(
                                "Ping status response from server %s not received "
                                "in the last %ss. Setting its status to 'timeout'"
                                % (str(rank), str(int(elapsed_time))),
                                "SEVERE", casalog_call_origin)
                            self.__server_status_list[rank]['timeout'] = True
                # Sleep before next round
                time.sleep(MPIEnvironment.mpi_monitor_status_service_heartbeat)

            # Mark service as not running
            self.__monitor_status_service_running = False
示例#7
0
 def get_nodes(self):
     
     # Get list of all server Ids
     servers_list = MPIEnvironment.mpi_server_rank_list()
     
     # Get list of hostnames for each server Id
     hostnames = []
     for server in servers_list:
         hostnames.append(self.__monitor_client.get_server_status_keyword(server,'processor'))
         
     # Remove duplicates to get the list of host used in general
     hostnames_not_repeated = list(set(hostnames))
     
     return hostnames_not_repeated
示例#8
0
 def push(self, variables, targets=None):
     """Set variables in a sub-set of engines"""
     
     # Determine target servers
     target_server = []
     if targets is None or targets == 'all':
         target_server = MPIEnvironment.mpi_server_rank_list()
     else:
         target_server = list(targets)
         
     # Push variables
     ret = self.__command_client.push_command_request(   "push",
                                                         block=True,
                                                         target_server=target_server,
                                                         parameters=dict(variables))
     
     # Return request result for further processing
     return ret
示例#9
0
        def __send_start_service_signal(self):

            casalog_call_origin = "MPICommandClient::send_start_service_signal"

            casalog.post("Sending start service signal to all servers", "INFO", casalog_call_origin)

            # Prepare stop service request
            request = {}
            request["signal"] = "start"
            request["casa"] = casa  # The request contains the global casa dictionary to be used by the servers
            request["logmode"] = self.__log_mode

            # Send request to all servers
            self.__communicator.control_service_request_broadcast(request, casalog)

            # Then wait until all servers have handled the signal
            mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
            while len(mpi_server_rank_list) > 0:
                response_available = False
                response_available = self.__communicator.control_service_response_probe()
                if response_available:
                    # Receive start service response to know what server has started
                    response = self.__communicator.control_service_response_recv()
                    rank = response["rank"]
                    # Store processor name and PID info in the MPIMonitorClient
                    self.__monitor_client.set_server_status_keyword(rank, "processor", response["processor"])
                    self.__monitor_client.set_server_status_keyword(rank, "pid", response["pid"])
                    # Remove server from list
                    mpi_server_rank_list.remove(rank)
                    # Communicate that server response to start service signal has been received
                    casalog.post(
                        "Server with rank %s started at %s with PID %s"
                        % (str(rank), str(response["processor"]), str(response["pid"])),
                        "INFO",
                        casalog_call_origin,
                    )
                else:
                    time.sleep(MPIEnvironment.mpi_check_start_service_sleep_time)

            casalog.post("Received response from all servers to start service signal", "INFO", casalog_call_origin)
示例#10
0
        def __validate_target_servers(self, target_server):

            casalog_call_origin = "MPICommandClient::validate_target_servers"

            # Get list of valid MPIServer ranks
            mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()

            # Check if target server is a list of integers
            if (
                isinstance(target_server, list)
                and (len(target_server) >= 1)
                and all(isinstance(server, int) for server in target_server)
            ):
                # Check if server is within the server rank list
                for server in target_server:
                    if server not in mpi_server_rank_list:
                        casalog.post("Server #%s does not exist" % str(server), "SEVERE", casalog_call_origin)
                        return None
                    elif self.__monitor_client.get_server_status_keyword(server, "timeout"):
                        casalog.post("Server #%s has timed out" % str(server), "SEVERE", casalog_call_origin)
                        return None
                # Return input list validated
                return target_server
            # Check if target server is an integer
            elif isinstance(target_server, int):
                # Check if server is within the server rank list
                if target_server in mpi_server_rank_list:
                    return [target_server]
                else:
                    casalog.post("Server #%s does not exist" % str(target_server), "SEVERE", casalog_call_origin)
                    return None
            else:
                casalog.post(
                    "target_server has wrong format (%s), accepted formats are int and list(int)"
                    % str(type(target_server)),
                    "SEVERE",
                    casalog_call_origin,
                )
                return None
示例#11
0
 def __monitor_status_service(self):
     
     casalog_call_origin = "MPIMonitorClient::monitor_status_service"
     
     # Mark service as running
     self.__monitor_status_service_running = True            
     
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
                   
     while (self.__monitor_status_service_on):
         # Iterate over servers     
         for rank in mpi_server_rank_list:
             # Send ping status request if there is none pending
             if not self.__server_status_list[rank]['pong_pending']:
                 try:
                     self.__communicator.ping_status_request_send(server=rank)
                     self.__server_status_list[rank]['ping_time'] = time.time()
                     self.__server_status_list[rank]['pong_pending'] = True    
                     self.__server_status_list[rank]['pong_checks'] = 0           
                 except:
                     formatted_traceback = traceback.format_exc()
                     casalog.post("Exception sending ping status request to server %s: %s" % 
                                  (str(rank),str(formatted_traceback)),"SEVERE",casalog_call_origin)
             else:
                 self.__server_status_list[rank]['pong_checks'] += 1
                 elapsed_time = MPIEnvironment.mpi_monitor_status_service_heartbeat
                 elapsed_time *= self.__server_status_list[rank]['pong_checks']
                 # elapsed_time = int(round(time.time() - self.__server_status_list[rank]['ping_time']))                        
                 # Notify when a server reaches timeout condition
                 if ((elapsed_time > MPIEnvironment.mpi_monitor_status_service_timeout) and 
                     (not self.__server_status_list[rank]['timeout'])):
                     casalog.post("Ping status response from server %s not received in the last %ss" % 
                                  (str(rank),str(int(elapsed_time))),"SEVERE",casalog_call_origin)
                     self.__server_status_list[rank]['timeout'] = True
         # Sleep before next round
         time.sleep(MPIEnvironment.mpi_monitor_status_service_heartbeat)
     
     # Mark service as not running
     self.__monitor_status_service_running = False            
示例#12
0
 def pgc(self,commands,block=True):
     """This method has two modes:
     
        - When the input command is a dictionary of commands execute  
          each command taking the dictionary key as target node
          (Equivalent to various calls to odo/execute)  
          
        - When commands is a single command execute it in all engines          
     """
     
     # Get list of jobs and commands
     ret = None
     if isinstance(commands,dict):
         
         # Spawn jobs in non-blocking mode
         jobId_list = []
         for server in commands:
             cmd = commands[server]
             jobId = self.__command_client.push_command_request(   cmd,
                                                                   block=False,
                                                                   target_server=server)
             jobId_list.append(jobId[0])
         
         # If user requests blocking mode wait until execution is completed    
         ret = self.__command_client.get_command_response(jobId_list,block=block,verbose=True)
             
         
     else:
         cmd = commands
         # Get list of all servers
         all_servers_list = MPIEnvironment.mpi_server_rank_list()
         # Execute command in all servers          
         ret = self.__command_client.push_command_request(   cmd,
                                                             block=block,
                                                             target_server=all_servers_list)
     
     # Return result
     return ret        
示例#13
0
 def __send_start_service_signal(self):
     
     casalog_call_origin = "MPICommandClient::send_start_service_signal"
     
     casalog.post("Sending start service signal to all servers","INFO",casalog_call_origin)
     
     # Prepare stop service request
     request = {}
     request['signal'] = 'start'
     request['casa'] = casa # The request contains the global casa dictionary to be used by the servers
     request['logmode'] = self.__log_mode
     
     # Send request to all servers
     self.__communicator.control_service_request_broadcast(request,casalog)
         
     # Then wait until all servers have handled the signal
     mpi_server_rank_list = MPIEnvironment.mpi_server_rank_list()
     while len(mpi_server_rank_list)>0:
         response_available = False
         response_available = self.__communicator.control_service_response_probe()
         if response_available:
             # Receive start service response to know what server has started
             response = self.__communicator.control_service_response_recv()
             rank = response['rank']
             # Store processor name and PID info in the MPIMonitorClient
             self.__monitor_client.set_server_status_keyword(rank,'processor',response['processor'])
             self.__monitor_client.set_server_status_keyword(rank,'pid',response['pid'])
             # Remove server from list
             mpi_server_rank_list.remove(rank)
             # Communicate that server response to start service signal has been received
             casalog.post("Server with rank %s started at %s with PID %s" 
                          % (str(rank),str(response['processor']),str(response['pid'])),
                          "INFO",casalog_call_origin)
         else:
             time.sleep(MPIEnvironment.mpi_check_start_service_sleep_time)
                 
     casalog.post("Received response from all servers to start service signal","INFO",casalog_call_origin)
示例#14
0
        def get_engines(self):

            return MPIEnvironment.mpi_server_rank_list()
示例#15
0
 def get_engines(self):
     
     return MPIEnvironment.mpi_server_rank_list()