class SinkCom: def __init__(self, receiving_topics, parameters_topic, push_port, worker_exec, verbose=True, ssh_local_server_id='None', ssh_remote_server_id='None'): self.receiving_topics = receiving_topics self.parameters_topic = parameters_topic self.push_data_port = push_port self.pull_data_port = str(int(self.push_data_port) + 1) self.push_heartbeat_port = str(int(self.push_data_port) + 2) self.worker_exec = worker_exec self.verbose = verbose self.verbose, self.relic = self.define_verbosity_and_relic(verbose) self.all_loops_running = True self.ssh_com = SSHCom(self.worker_exec, ssh_local_server_id, ssh_remote_server_id) self.port_sub_data = ct.DATA_FORWARDER_PUBLISH_PORT self.port_pub_parameters = ct.PARAMETERS_FORWARDER_SUBMIT_PORT self.poller = zmq.Poller() self.context = None self.socket_sub_data = None self.stream_sub = None self.socket_push_data = None self.socket_pull_data = None self.socket_push_heartbeat = None self.index = 0 # If self.verbose is a string it is the file name to log things in. If it is an int it is the level of the verbosity self.logger = None if self.verbose != 0: try: self.verbose = int(self.verbose) except: log_file_name = gu.add_timestamp_to_filename( self.verbose, datetime.now()) self.logger = gu.setup_logger('Sink', log_file_name) self.logger.info( 'Index of data packet given : Index of data packet received: Topic : Computer Time' ) self.verbose = False atexit.register(self.on_kill, None, None) signal.signal(signal.SIGTERM, self.on_kill) def connect_sockets(self): """ Start the required sockets to communicate with the link forwarder and the worker_com processes :return: Nothing """ if self.verbose: print('Starting Sink Node with PID = {}'.format(os.getpid())) self.context = zmq.Context() # Socket for subscribing to data from nodes connected to the input self.socket_sub_data = Socket(self.context, zmq.SUB) self.socket_sub_data.setsockopt(zmq.LINGER, 0) self.socket_sub_data.set_hwm(len(self.receiving_topics)) self.socket_sub_data.connect("tcp://127.0.0.1:{}".format( self.port_sub_data)) for rt in self.receiving_topics: self.socket_sub_data.setsockopt(zmq.SUBSCRIBE, rt.encode('ascii')) self.poller.register(self.socket_sub_data, zmq.POLLIN) # Socket for pushing the data to the worker_exec self.socket_push_data = Socket(self.context, zmq.PUSH) self.socket_push_data.setsockopt(zmq.LINGER, 0) self.socket_push_data.set_hwm(1) self.socket_push_data.bind(r"tcp://*:{}".format(self.push_data_port)) # Socket for pulling the end of worker function signal from the worker_exec self.socket_pull_data = Socket(self.context, zmq.PULL) self.socket_pull_data.setsockopt(zmq.LINGER, 0) self.socket_pull_data.set_hwm(1) self.socket_pull_data.connect(r"tcp://127.0.0.1:{}".format( self.pull_data_port)) self.poller.register(self.socket_pull_data, zmq.POLLIN) # Socket for pushing the heartbeat to the worker_exec self.socket_push_heartbeat = self.context.socket(zmq.PUSH) self.socket_push_heartbeat.setsockopt(zmq.LINGER, 0) self.socket_push_heartbeat.bind(r'tcp://*:{}'.format( self.push_heartbeat_port)) self.socket_push_heartbeat.set_hwm(1) def define_verbosity_and_relic(self, verbosity_string): """ Splits the string that comes from the Node as verbosity_string into the string (or int) for the logging/printing (self.verbose) and the string that carries the path where the relic is to be saved. The self.relic is then passed to the worker process :param verbosity_string: The string with syntax verbosity||relic :return: (int)str vebrose, str relic """ if verbosity_string != '': verbosity, relic = verbosity_string.split('||') if relic == '': relic = '_' if verbosity == '': return 0, relic else: return verbosity, relic else: return 0, '' def heartbeat_loop(self): """ The loop that send a 'PULSE' heartbeat to the worker_exec process to keep it alive (every ct.HEARTBEAT_RATE seconds) :return: Nothing """ while self.all_loops_running: self.socket_push_heartbeat.send_string('PULSE') time.sleep(ct.HEARTBEAT_RATE) def start_heartbeat_thread(self): """ The daemon thread that runs the infinite heartbeat_loop :return: Noting """ heartbeat_thread = threading.Thread(target=self.heartbeat_loop, daemon=True) heartbeat_thread.start() def start_worker(self): """ Starts the worker_exec process and then sends the parameters as are currently on the node to the process The pull_data_port of the worker_exec needs to be the push_data_port of the com (obviously). The way the arguments are structured is defined by the way they are read by the process. For that see general_utilities.parse_arguments_to_worker :return: Nothing """ if 'python' in self.worker_exec or '.py' not in self.worker_exec: arguments_list = [self.worker_exec] else: arguments_list = ['python'] arguments_list.append(self.worker_exec) arguments_list.append(str(self.push_data_port)) arguments_list.append(str(self.parameters_topic)) arguments_list.append(str(len(self.receiving_topics))) for i in range(len(self.receiving_topics)): arguments_list.append(self.receiving_topics[i]) arguments_list.append(str(0)) arguments_list.append(str(self.relic)) arguments_list = self.ssh_com.add_local_server_info_to_arguments( arguments_list) worker_pid = self.ssh_com.start_process(arguments_list) self.ssh_com.connect_socket_to_remote( self.socket_pull_data, r"tcp://127.0.0.1:{}".format(self.pull_data_port)) def get_sub_data(self): """ Gets the link from the forwarder. It assumes that each message has four parts: The topic The data_index, an int that increases by one for every message the previous node sends The data_time, the time.perf_counter() result at the time the previous node send its message The messagedata, the array of link :return: Nothing """ prev_topic = self.socket_sub_data.recv() prev_data_index = self.socket_sub_data.recv() prev_data_time = self.socket_sub_data.recv() prev_messagedata = self.socket_sub_data.recv_array() # The following while ensures that the sink works only on the the latest available # message from the previous node. If the sink is too slow compared to the input node # this while throws all past messages away. while prev_topic: topic = prev_topic data_index = prev_data_index data_time = prev_data_time messagedata = prev_messagedata try: prev_topic = self.socket_sub_data.recv(zmq.NOBLOCK) prev_data_index = self.socket_sub_data.recv(zmq.NOBLOCK) prev_data_time = self.socket_sub_data.recv(zmq.NOBLOCK) prev_messagedata = self.socket_sub_data.recv_array(zmq.NOBLOCK) except: prev_topic = None pass return topic, data_index, data_time, messagedata def start_ioloop(self): """ Start the io loop for the sink node. It reads the link from the previous node's _com process, pushes it to the worker_com process, waits for the results, grabs the resulting link from the worker_com process and publishes the transformed link to the link forwarder with this nodes' topic :return: Nothing """ while self.all_loops_running: t1 = time.perf_counter() try: # The timeout=1 means things coming in faster than 1000Hz will be lost but if timeout is set to 0 then # the CPU utilization goes to around 10% which quickly kills the CPU (if there are 2 or 3 Sinks in the # pipeline) sockets_in = dict(self.poller.poll(timeout=1)) while not sockets_in: sockets_in = dict(self.poller.poll(timeout=1)) if self.socket_sub_data in sockets_in and sockets_in[ self.socket_sub_data] == zmq.POLLIN: topic, data_index, data_time, messagedata = self.get_sub_data( ) sockets_in = dict(self.poller.poll(timeout=1)) if self.verbose: print( "oooo Sink from {}, data_index {} at time {} s oooo" .format(topic, data_index, data_time)) # Send link to be transformed to the worker_exec self.socket_push_data.send(topic, flags=zmq.SNDMORE) self.socket_push_data.send_array(messagedata, copy=False) t2 = time.perf_counter() # Get the end of worker function link (wait for the socket_pull_data to get some link from the worker_exec) sockets_in = dict(self.poller.poll(timeout=None)) self.socket_pull_data.recv() t3 = time.perf_counter() if self.verbose: print( "---Time to Transport link from previous com to worker_exec = {} ms" .format((t2 - t1) * 1000)) print("---Time to to finish with the worker_exec = {} ms". format((t3 - t2) * 1000)) print('=============================') if self.logger: self.logger.info('{} : {} : {} : {}'.format( self.index, data_index, topic, datetime.now())) self.index += 1 except: pass def on_kill(self, signal, frame): try: self.all_loops_running = False self.poller.unregister(socket=self.socket_sub_data) self.poller.unregister(socket=self.socket_pull_data) self.socket_sub_data.close() self.socket_push_data.close() self.socket_pull_data.close() self.socket_push_heartbeat.close() except Exception as e: print('Trying to kill Sink com {} failed with error: {}'.format( self.worker_exec, e)) finally: self.context.term()
class SourceWorker: def __init__(self, port, parameters_topic, initialisation_function, end_of_life_function, num_sending_topics, relic_path, ssh_local_ip=' ', ssh_local_username='******', ssh_local_password='******'): self.parameters_topic = parameters_topic self.data_port = port self.pull_heartbeat_port = str(int(self.data_port) + 1) self.initialisation_function = initialisation_function self.end_of_life_function = end_of_life_function self.num_sending_topics = int(num_sending_topics) self.node_name = parameters_topic.split('##')[-2] self.node_index = parameters_topic.split('##')[-1] self.ssh_com = SSHCom(ssh_local_ip=ssh_local_ip, ssh_local_username=ssh_local_username, ssh_local_password=ssh_local_password) self.relic_path = relic_path self.import_reliquery() self.heron_relic = None self.num_of_iters_to_update_relics_substate = None self.time_of_pulse = time.perf_counter() self.port_sub_parameters = ct.PARAMETERS_FORWARDER_PUBLISH_PORT self.port_pub_proof_of_life = ct.PROOF_OF_LIFE_FORWARDER_SUBMIT_PORT self.running_thread = True self.loops_on = True self.initialised = False self.context = None self.socket_push_data = None self.socket_sub_parameters = None self.stream_parameters = None self.thread_parameters = None self.parameters = None self.socket_pull_heartbeat = None self.stream_heartbeat = None self.thread_heartbeat = None self.socket_pub_proof_of_life = None self.thread_proof_of_life = None self.index = 0 def connect_socket(self): """ Sets up the sockets to do the communication with the source_com process through the forwarders (for the link and the parameters). :return: Nothing """ self.context = zmq.Context() # Setup the socket that receives the parameters of the worker_exec function from the node self.socket_sub_parameters = Socket(self.context, zmq.SUB) self.socket_sub_parameters.setsockopt(zmq.LINGER, 0) self.socket_sub_parameters.subscribe(self.parameters_topic) self.ssh_com.connect_socket_to_local(self.socket_sub_parameters, r'tcp://127.0.0.1', self.port_sub_parameters) self.socket_sub_parameters.subscribe(self.parameters_topic) # Setup the socket that pushes the data to the com self.socket_push_data = Socket(self.context, zmq.PUSH) self.socket_push_data.setsockopt(zmq.LINGER, 0) self.socket_push_data.set_hwm(1) self.socket_push_data.bind(r"tcp://127.0.0.1:{}".format(self.data_port)) # Setup the socket that receives the heartbeat from the com self.socket_pull_heartbeat = self.context.socket(zmq.PULL) self.socket_pull_heartbeat.setsockopt(zmq.LINGER, 0) self.ssh_com.connect_socket_to_local(self.socket_pull_heartbeat, r'tcp://127.0.0.1', self.pull_heartbeat_port) # Setup the socket that publishes the fact that the worker_exec is up and running to the node com so that it # can then update the parameters of the worker_exec. self.socket_pub_proof_of_life = Socket(self.context, zmq.PUB) self.socket_pub_proof_of_life.setsockopt(zmq.LINGER, 0) self.ssh_com.connect_socket_to_local(self.socket_pub_proof_of_life, r'tcp://127.0.0.1', self.port_pub_proof_of_life, skip_ssh=True) def send_data_to_com(self, data): self.socket_push_data.send_array(data, copy=False) self.index += 1 def import_reliquery(self): """ This import is required because it takes a good few seconds to load the package and if the import is done first time in the HeronRelic instance that delays the initialisation of the worker process which can be a problem :return: Nothing """ # if self.relic_path != '_': try: import reliquery import reliquery.storage except ImportError: pass def relic_create_parameters_df(self, **parameters): """ Creates a new relic with the Parameters pandasdf in it or adds the Parameters pandasdf in the existing Node's Relic. :param parameters: The dictionary of the parameters. The keys of the dict will become the column names of the pandasdf :return: Nothing """ self._relic_create_df('Parameters', **parameters) def relic_create_substate_df(self, **variables): """ Creates a new relic with the Substate pandasdf in it or adds the Substate pandasdf in the existing Node's Relic. :param variables: The dictionary of the variables to save. The keys of the dict will become the column names of the pandasdf :return: Nothing """ self._relic_create_df('Substate', **variables) def _relic_create_df(self, type, **variables): """ Base function to create either a Parameters or a Substate pandasdf in a new or the existing Node's Relic :param type: Parameters or Substate :param variables: The variables dictionary to be saved in the pandas. The keys of the dict will become the c olumn names of the pandasdf :return: Nothing """ if self.heron_relic is None: self.heron_relic = HeronRelic(self.relic_path, self.node_name, self.node_index, self.num_of_iters_to_update_relics_substate) if self.heron_relic.operational: self.heron_relic.create_the_pandasdf(type, **variables) def relic_update_substate_df(self, **variables): """ Updates the Substate pandasdf of the Node's Relic :param variables: The Substate's variables dict :return: Nothing """ self.heron_relic.update_the_substate_pandasdf(self.index, **variables) def update_parameters(self): """ This updates the self.parameters from the parameters send form the node (through the gui_com) If the rlic system is up and running it also saves the new parameters into the Parameters df of the relic :return: Nothing """ try: topic = self.socket_sub_parameters.recv(flags=zmq.NOBLOCK) parameters_in_bytes = self.socket_sub_parameters.recv(flags=zmq.NOBLOCK) args = pickle.loads(parameters_in_bytes) self.parameters = args if not self.initialised and self.initialisation_function is not None: self.initialised = self.initialisation_function(self) if self.initialised and self.heron_relic is not None and self.heron_relic.operational: self.heron_relic.update_the_parameters_pandasdf(parameters=self.parameters, worker_index=self.index) except Exception as e: pass def parameters_loop(self): """ The loop that updates the arguments (self.parameters) :return: Nothing """ while self.loops_on: self.update_parameters() time.sleep(0.2) def start_parameters_thread(self): """ Start the thread that runs the infinite arguments_loop :return: Nothing """ self.thread_parameters = threading.Thread(target=self.parameters_loop, daemon=True) self.thread_parameters.start() def heartbeat_loop(self): """ The loop that reads the heartbeat 'PULSE' from the source_com. If it takes too long to receive the new one it kills the worker_exec process :return: Nothing """ while self.loops_on: if self.socket_pull_heartbeat.poll(timeout=(1000 * ct.HEARTBEAT_RATE * ct.HEARTBEATS_TO_DEATH)): self.socket_pull_heartbeat.recv() else: pid = os.getpid() self.end_of_life_function() self.on_kill(pid) os.kill(pid, signal.SIGTERM) time.sleep(0.5) time.sleep(int(ct.HEARTBEAT_RATE)) self.socket_pull_heartbeat.close() def proof_of_life(self): """ When the worker_exec process starts it sends to the gui_com (through the proof_of_life_forwarder thread) a signal that lets the node (in the gui_com process) that the worker_exec is running and ready to receive parameter updates. :return: Nothing """ #print('---Sending POL {}'.format('topic = {}, msg = POL'.format(self.parameters_topic.encode('ascii')))) for i in range(100): try: self.socket_pub_proof_of_life.send(self.parameters_topic.encode('ascii'), zmq.SNDMORE) self.socket_pub_proof_of_life.send_string('POL') except: pass time.sleep(0.1) def start_heartbeat_thread(self): """ Start the heartbeat thread that run the infinite heartbeat_loop :return: Nothing """ print('Started Worker {}##{} process with PID = {}'.format(self.node_name, self.node_index, os.getpid())) self.thread_heartbeat = threading.Thread(target=self.heartbeat_loop, daemon=True) self.thread_heartbeat.start() self.thread_proof_of_life = threading.Thread(target=self.proof_of_life, daemon=True) self.thread_proof_of_life.start() def on_kill(self, pid): print('Killing {} {} with pid {}'.format(self.node_name, self.node_index, pid)) if self.heron_relic is not None and self.heron_relic.substate_pandasdf_exists: self.heron_relic.save_substate_at_death() try: self.loops_on = False self.visualisation_on = False self.socket_sub_parameters.close() self.socket_push_data.close() self.socket_pub_proof_of_life.close() except Exception as e: print('Trying to kill Source worker {} failed with error: {}'.format(self.node_name, e)) finally: #self.context.term() # That causes an error self.ssh_com.kill_tunneling_processes()