Пример #1
0
class SinkCom:
    def __init__(self,
                 receiving_topics,
                 parameters_topic,
                 push_port,
                 worker_exec,
                 verbose=True,
                 ssh_local_server_id='None',
                 ssh_remote_server_id='None'):
        self.receiving_topics = receiving_topics
        self.parameters_topic = parameters_topic
        self.push_data_port = push_port
        self.pull_data_port = str(int(self.push_data_port) + 1)
        self.push_heartbeat_port = str(int(self.push_data_port) + 2)
        self.worker_exec = worker_exec
        self.verbose = verbose
        self.verbose, self.relic = self.define_verbosity_and_relic(verbose)
        self.all_loops_running = True
        self.ssh_com = SSHCom(self.worker_exec, ssh_local_server_id,
                              ssh_remote_server_id)

        self.port_sub_data = ct.DATA_FORWARDER_PUBLISH_PORT
        self.port_pub_parameters = ct.PARAMETERS_FORWARDER_SUBMIT_PORT

        self.poller = zmq.Poller()

        self.context = None
        self.socket_sub_data = None
        self.stream_sub = None
        self.socket_push_data = None
        self.socket_pull_data = None
        self.socket_push_heartbeat = None

        self.index = 0

        # If self.verbose is a string it is the file name to log things in. If it is an int it is the level of the verbosity
        self.logger = None
        if self.verbose != 0:
            try:
                self.verbose = int(self.verbose)
            except:
                log_file_name = gu.add_timestamp_to_filename(
                    self.verbose, datetime.now())
                self.logger = gu.setup_logger('Sink', log_file_name)
                self.logger.info(
                    'Index of data packet given : Index of data packet received: Topic : Computer Time'
                )
                self.verbose = False

        atexit.register(self.on_kill, None, None)
        signal.signal(signal.SIGTERM, self.on_kill)

    def connect_sockets(self):
        """
        Start the required sockets to communicate with the link forwarder and the worker_com processes
        :return: Nothing
        """
        if self.verbose:
            print('Starting Sink Node with PID = {}'.format(os.getpid()))
        self.context = zmq.Context()

        # Socket for subscribing to data from nodes connected to the input
        self.socket_sub_data = Socket(self.context, zmq.SUB)
        self.socket_sub_data.setsockopt(zmq.LINGER, 0)
        self.socket_sub_data.set_hwm(len(self.receiving_topics))
        self.socket_sub_data.connect("tcp://127.0.0.1:{}".format(
            self.port_sub_data))
        for rt in self.receiving_topics:
            self.socket_sub_data.setsockopt(zmq.SUBSCRIBE, rt.encode('ascii'))
        self.poller.register(self.socket_sub_data, zmq.POLLIN)

        # Socket for pushing the data to the worker_exec
        self.socket_push_data = Socket(self.context, zmq.PUSH)
        self.socket_push_data.setsockopt(zmq.LINGER, 0)
        self.socket_push_data.set_hwm(1)
        self.socket_push_data.bind(r"tcp://*:{}".format(self.push_data_port))

        # Socket for pulling the end of worker function signal from the worker_exec
        self.socket_pull_data = Socket(self.context, zmq.PULL)
        self.socket_pull_data.setsockopt(zmq.LINGER, 0)
        self.socket_pull_data.set_hwm(1)
        self.socket_pull_data.connect(r"tcp://127.0.0.1:{}".format(
            self.pull_data_port))

        self.poller.register(self.socket_pull_data, zmq.POLLIN)

        # Socket for pushing the heartbeat to the worker_exec
        self.socket_push_heartbeat = self.context.socket(zmq.PUSH)
        self.socket_push_heartbeat.setsockopt(zmq.LINGER, 0)
        self.socket_push_heartbeat.bind(r'tcp://*:{}'.format(
            self.push_heartbeat_port))
        self.socket_push_heartbeat.set_hwm(1)

    def define_verbosity_and_relic(self, verbosity_string):
        """
        Splits the string that comes from the Node as verbosity_string into the string (or int) for the logging/printing
        (self.verbose) and the string that carries the path where the relic is to be saved. The self.relic is then
        passed to the worker process
        :param verbosity_string: The string with syntax verbosity||relic
        :return: (int)str vebrose, str relic
        """
        if verbosity_string != '':
            verbosity, relic = verbosity_string.split('||')
            if relic == '':
                relic = '_'
            if verbosity == '':
                return 0, relic
            else:
                return verbosity, relic
        else:
            return 0, ''

    def heartbeat_loop(self):
        """
        The loop that send a 'PULSE' heartbeat to the worker_exec process to keep it alive (every ct.HEARTBEAT_RATE seconds)
        :return: Nothing
        """
        while self.all_loops_running:
            self.socket_push_heartbeat.send_string('PULSE')
            time.sleep(ct.HEARTBEAT_RATE)

    def start_heartbeat_thread(self):
        """
        The daemon thread that runs the infinite heartbeat_loop
        :return: Noting
        """
        heartbeat_thread = threading.Thread(target=self.heartbeat_loop,
                                            daemon=True)
        heartbeat_thread.start()

    def start_worker(self):
        """
        Starts the worker_exec process and then sends the parameters as are currently on the node to the process
        The pull_data_port of the worker_exec needs to be the push_data_port of the com (obviously).
        The way the arguments are structured is defined by the way they are read by the process. For that see
        general_utilities.parse_arguments_to_worker
        :return: Nothing
        """

        if 'python' in self.worker_exec or '.py' not in self.worker_exec:
            arguments_list = [self.worker_exec]
        else:
            arguments_list = ['python']
            arguments_list.append(self.worker_exec)

        arguments_list.append(str(self.push_data_port))
        arguments_list.append(str(self.parameters_topic))
        arguments_list.append(str(len(self.receiving_topics)))
        for i in range(len(self.receiving_topics)):
            arguments_list.append(self.receiving_topics[i])
        arguments_list.append(str(0))
        arguments_list.append(str(self.relic))
        arguments_list = self.ssh_com.add_local_server_info_to_arguments(
            arguments_list)

        worker_pid = self.ssh_com.start_process(arguments_list)
        self.ssh_com.connect_socket_to_remote(
            self.socket_pull_data,
            r"tcp://127.0.0.1:{}".format(self.pull_data_port))

    def get_sub_data(self):
        """
        Gets the link from the forwarder. It assumes that each message has four parts:
        The topic
        The data_index, an int that increases by one for every message the previous node sends
        The data_time, the time.perf_counter() result at the time the previous node send its message
        The messagedata, the array of link
        :return: Nothing
        """

        prev_topic = self.socket_sub_data.recv()
        prev_data_index = self.socket_sub_data.recv()
        prev_data_time = self.socket_sub_data.recv()
        prev_messagedata = self.socket_sub_data.recv_array()
        # The following while ensures that the sink works only on the the latest available
        # message from the previous node. If the sink is too slow compared to the input node
        # this while throws all past messages away.
        while prev_topic:
            topic = prev_topic
            data_index = prev_data_index
            data_time = prev_data_time
            messagedata = prev_messagedata
            try:
                prev_topic = self.socket_sub_data.recv(zmq.NOBLOCK)
                prev_data_index = self.socket_sub_data.recv(zmq.NOBLOCK)
                prev_data_time = self.socket_sub_data.recv(zmq.NOBLOCK)
                prev_messagedata = self.socket_sub_data.recv_array(zmq.NOBLOCK)
            except:
                prev_topic = None
                pass

        return topic, data_index, data_time, messagedata

    def start_ioloop(self):
        """
        Start the io loop for the sink node. It reads the link from the previous node's _com process,
        pushes it to the worker_com process,
        waits for the results,
        grabs the resulting link from the worker_com process and
        publishes the transformed link to the link forwarder with this nodes' topic
        :return: Nothing
        """
        while self.all_loops_running:
            t1 = time.perf_counter()

            try:
                # The timeout=1 means things coming in faster than 1000Hz will be lost but if timeout is set to 0 then
                # the CPU utilization goes to around 10% which quickly kills the CPU (if there are 2 or 3 Sinks in the
                # pipeline)
                sockets_in = dict(self.poller.poll(timeout=1))

                while not sockets_in:
                    sockets_in = dict(self.poller.poll(timeout=1))

                if self.socket_sub_data in sockets_in and sockets_in[
                        self.socket_sub_data] == zmq.POLLIN:
                    topic, data_index, data_time, messagedata = self.get_sub_data(
                    )
                    sockets_in = dict(self.poller.poll(timeout=1))

                    if self.verbose:
                        print(
                            "oooo Sink from {}, data_index {} at time {} s oooo"
                            .format(topic, data_index, data_time))

                    # Send link to be transformed to the worker_exec
                    self.socket_push_data.send(topic, flags=zmq.SNDMORE)
                    self.socket_push_data.send_array(messagedata, copy=False)
                    t2 = time.perf_counter()

                # Get the end of worker function link (wait for the socket_pull_data to get some link from the worker_exec)
                sockets_in = dict(self.poller.poll(timeout=None))
                self.socket_pull_data.recv()
                t3 = time.perf_counter()

                if self.verbose:
                    print(
                        "---Time to Transport link from previous com to worker_exec = {} ms"
                        .format((t2 - t1) * 1000))
                    print("---Time to to finish with the worker_exec = {} ms".
                          format((t3 - t2) * 1000))
                    print('=============================')
                if self.logger:
                    self.logger.info('{} : {} : {} : {}'.format(
                        self.index, data_index, topic, datetime.now()))

                self.index += 1
            except:
                pass

    def on_kill(self, signal, frame):
        try:
            self.all_loops_running = False
            self.poller.unregister(socket=self.socket_sub_data)
            self.poller.unregister(socket=self.socket_pull_data)
            self.socket_sub_data.close()
            self.socket_push_data.close()
            self.socket_pull_data.close()
            self.socket_push_heartbeat.close()
        except Exception as e:
            print('Trying to kill Sink com {} failed with error: {}'.format(
                self.worker_exec, e))
        finally:
            self.context.term()
Пример #2
0
class SourceWorker:
    def __init__(self, port, parameters_topic, initialisation_function, end_of_life_function, num_sending_topics,
                 relic_path, ssh_local_ip=' ', ssh_local_username='******', ssh_local_password='******'):
        self.parameters_topic = parameters_topic
        self.data_port = port
        self.pull_heartbeat_port = str(int(self.data_port) + 1)
        self.initialisation_function = initialisation_function
        self.end_of_life_function = end_of_life_function
        self.num_sending_topics = int(num_sending_topics)
        self.node_name = parameters_topic.split('##')[-2]
        self.node_index = parameters_topic.split('##')[-1]

        self.ssh_com = SSHCom(ssh_local_ip=ssh_local_ip, ssh_local_username=ssh_local_username,
                              ssh_local_password=ssh_local_password)
        self.relic_path = relic_path
        self.import_reliquery()
        self.heron_relic = None
        self.num_of_iters_to_update_relics_substate = None

        self.time_of_pulse = time.perf_counter()
        self.port_sub_parameters = ct.PARAMETERS_FORWARDER_PUBLISH_PORT
        self.port_pub_proof_of_life = ct.PROOF_OF_LIFE_FORWARDER_SUBMIT_PORT
        self.running_thread = True
        self.loops_on = True
        self.initialised = False

        self.context = None
        self.socket_push_data = None
        self.socket_sub_parameters = None
        self.stream_parameters = None
        self.thread_parameters = None
        self.parameters = None
        self.socket_pull_heartbeat = None
        self.stream_heartbeat = None
        self.thread_heartbeat = None
        self.socket_pub_proof_of_life = None
        self.thread_proof_of_life = None
        self.index = 0

    def connect_socket(self):
        """
        Sets up the sockets to do the communication with the source_com process through the forwarders
        (for the link and the parameters).
        :return: Nothing
        """
        self.context = zmq.Context()

        # Setup the socket that receives the parameters of the worker_exec function from the node
        self.socket_sub_parameters = Socket(self.context, zmq.SUB)
        self.socket_sub_parameters.setsockopt(zmq.LINGER, 0)
        self.socket_sub_parameters.subscribe(self.parameters_topic)
        self.ssh_com.connect_socket_to_local(self.socket_sub_parameters, r'tcp://127.0.0.1', self.port_sub_parameters)
        self.socket_sub_parameters.subscribe(self.parameters_topic)

        # Setup the socket that pushes the data to the com
        self.socket_push_data = Socket(self.context, zmq.PUSH)
        self.socket_push_data.setsockopt(zmq.LINGER, 0)
        self.socket_push_data.set_hwm(1)
        self.socket_push_data.bind(r"tcp://127.0.0.1:{}".format(self.data_port))

        # Setup the socket that receives the heartbeat from the com
        self.socket_pull_heartbeat = self.context.socket(zmq.PULL)
        self.socket_pull_heartbeat.setsockopt(zmq.LINGER, 0)
        self.ssh_com.connect_socket_to_local(self.socket_pull_heartbeat, r'tcp://127.0.0.1', self.pull_heartbeat_port)

        # Setup the socket that publishes the fact that the worker_exec is up and running to the node com so that it
        # can then update the parameters of the worker_exec.
        self.socket_pub_proof_of_life = Socket(self.context, zmq.PUB)
        self.socket_pub_proof_of_life.setsockopt(zmq.LINGER, 0)
        self.ssh_com.connect_socket_to_local(self.socket_pub_proof_of_life, r'tcp://127.0.0.1',
                                             self.port_pub_proof_of_life, skip_ssh=True)

    def send_data_to_com(self, data):
        self.socket_push_data.send_array(data, copy=False)
        self.index += 1

    def import_reliquery(self):
        """
        This import is required because it takes a good few seconds to load the package and if the import is done
        first time in the HeronRelic instance that delays the initialisation of the worker process which can be
        a problem
        :return: Nothing
        """
        #
        if self.relic_path != '_':
            try:
                import reliquery
                import reliquery.storage
            except ImportError:
                pass

    def relic_create_parameters_df(self, **parameters):
        """
        Creates a new relic with the Parameters pandasdf in it or adds the Parameters pandasdf in the existing Node's
        Relic.
        :param parameters: The dictionary of the parameters. The keys of the dict will become the column names of the
        pandasdf
        :return: Nothing
        """
        self._relic_create_df('Parameters', **parameters)

    def relic_create_substate_df(self, **variables):
        """
        Creates a new relic with the Substate pandasdf in it or adds the Substate pandasdf in the existing Node's Relic.
        :param variables: The dictionary of the variables to save. The keys of the dict will become the column names of
        the pandasdf
        :return: Nothing
        """
        self._relic_create_df('Substate', **variables)

    def _relic_create_df(self, type, **variables):
        """
        Base function to create either a Parameters or a Substate pandasdf in a new or the existing Node's Relic
        :param type: Parameters or Substate
        :param variables: The variables dictionary to be saved in the pandas. The keys of the dict will become the c
        olumn names of the pandasdf
        :return: Nothing
        """
        if self.heron_relic is None:
            self.heron_relic = HeronRelic(self.relic_path, self.node_name,
                                          self.node_index, self.num_of_iters_to_update_relics_substate)
        if self.heron_relic.operational:
            self.heron_relic.create_the_pandasdf(type, **variables)

    def relic_update_substate_df(self, **variables):
        """
        Updates the Substate pandasdf of the Node's Relic
        :param variables: The Substate's variables dict
        :return: Nothing
        """
        self.heron_relic.update_the_substate_pandasdf(self.index, **variables)

    def update_parameters(self):
        """
        This updates the self.parameters from the parameters send form the node (through the gui_com)
        If the rlic system is up and running it also saves the new parameters into the Parameters df of the relic
        :return: Nothing
        """
        try:
            topic = self.socket_sub_parameters.recv(flags=zmq.NOBLOCK)
            parameters_in_bytes = self.socket_sub_parameters.recv(flags=zmq.NOBLOCK)
            args = pickle.loads(parameters_in_bytes)
            self.parameters = args
            if not self.initialised and self.initialisation_function is not None:
                self.initialised = self.initialisation_function(self)

            if self.initialised and self.heron_relic is not None and self.heron_relic.operational:
                self.heron_relic.update_the_parameters_pandasdf(parameters=self.parameters, worker_index=self.index)
        except Exception as e:
            pass

    def parameters_loop(self):
        """
        The loop that updates the arguments (self.parameters)
        :return: Nothing
        """
        while self.loops_on:
            self.update_parameters()
            time.sleep(0.2)

    def start_parameters_thread(self):
        """
        Start the thread that runs the infinite arguments_loop
        :return: Nothing
        """
        self.thread_parameters = threading.Thread(target=self.parameters_loop, daemon=True)
        self.thread_parameters.start()

    def heartbeat_loop(self):
        """
        The loop that reads the heartbeat 'PULSE' from the source_com. If it takes too long to receive the new one
        it kills the worker_exec process
        :return: Nothing
        """
        while self.loops_on:
            if self.socket_pull_heartbeat.poll(timeout=(1000 * ct.HEARTBEAT_RATE * ct.HEARTBEATS_TO_DEATH)):
                self.socket_pull_heartbeat.recv()
            else:
                pid = os.getpid()
                self.end_of_life_function()
                self.on_kill(pid)
                os.kill(pid, signal.SIGTERM)
                time.sleep(0.5)
            time.sleep(int(ct.HEARTBEAT_RATE))
        self.socket_pull_heartbeat.close()

    def proof_of_life(self):
        """
        When the worker_exec process starts it sends to the gui_com (through the proof_of_life_forwarder thread) a signal
        that lets the node (in the gui_com process) that the worker_exec is running and ready to receive parameter updates.
        :return: Nothing
        """
        #print('---Sending POL {}'.format('topic = {}, msg = POL'.format(self.parameters_topic.encode('ascii'))))
        for i in range(100):
            try:
                self.socket_pub_proof_of_life.send(self.parameters_topic.encode('ascii'), zmq.SNDMORE)
                self.socket_pub_proof_of_life.send_string('POL')
            except:
                pass
            time.sleep(0.1)

    def start_heartbeat_thread(self):
        """
        Start the heartbeat thread that run the infinite heartbeat_loop
        :return: Nothing
        """
        print('Started Worker {}##{} process with PID = {}'.format(self.node_name, self.node_index, os.getpid()))

        self.thread_heartbeat = threading.Thread(target=self.heartbeat_loop, daemon=True)
        self.thread_heartbeat.start()

        self.thread_proof_of_life = threading.Thread(target=self.proof_of_life, daemon=True)
        self.thread_proof_of_life.start()

    def on_kill(self, pid):
        print('Killing {} {} with pid {}'.format(self.node_name, self.node_index, pid))

        if self.heron_relic is not None and self.heron_relic.substate_pandasdf_exists:
            self.heron_relic.save_substate_at_death()

        try:
            self.loops_on = False
            self.visualisation_on = False
            self.socket_sub_parameters.close()
            self.socket_push_data.close()
            self.socket_pub_proof_of_life.close()
        except Exception as e:
            print('Trying to kill Source worker {} failed with error: {}'.format(self.node_name, e))
        finally:
            #self.context.term()  # That causes an error
            self.ssh_com.kill_tunneling_processes()
Пример #3
0
class TransformWorker:
    def __init__(self,
                 recv_topics_buffer,
                 pull_port,
                 initialisation_function,
                 work_function,
                 end_of_life_function,
                 parameters_topic,
                 num_sending_topics,
                 relic_path,
                 ssh_local_ip=' ',
                 ssh_local_username='******',
                 ssh_local_password='******'):

        self.pull_data_port = pull_port
        self.push_data_port = str(int(self.pull_data_port) + 1)
        self.pull_heartbeat_port = str(int(self.pull_data_port) + 2)
        self.initialisation_function = initialisation_function
        self.work_function = work_function
        self.end_of_life_function = end_of_life_function
        self.parameters_topic = parameters_topic
        self.num_sending_topics = int(num_sending_topics)
        self.recv_topics_buffer = recv_topics_buffer
        self.node_name = parameters_topic.split('##')[-2]
        self.node_index = self.parameters_topic.split('##')[-1]

        self.relic_path = relic_path
        self.import_reliquery()
        self.heron_relic = None
        self.num_of_iters_to_update_relics_substate = None

        self.ssh_com = SSHCom(ssh_local_ip=ssh_local_ip,
                              ssh_local_username=ssh_local_username,
                              ssh_local_password=ssh_local_password)

        self.time_of_pulse = time.perf_counter()
        self.port_sub_parameters = ct.PARAMETERS_FORWARDER_PUBLISH_PORT
        self.port_pub_proof_of_life = ct.PROOF_OF_LIFE_FORWARDER_SUBMIT_PORT
        self.loops_on = True
        self.initialised = False

        self.context = None
        self.socket_pull_data = None
        self.stream_pull_data = None
        self.socket_push_data = None
        self.socket_sub_parameters = None
        self.stream_parameters = None
        self.parameters = None
        self.socket_pull_heartbeat = None
        self.stream_heartbeat = None
        self.thread_heartbeat = None
        self.socket_pub_proof_of_life = None
        self.thread_proof_of_life = None
        self.worker_visualisable_result = None
        self.index = 0

    def connect_sockets(self):
        """
        Sets up the sockets to do the communication with the transform_com process through the forwarders
        (for the link and the parameters).
        :return: Nothing
        """
        self.context = zmq.Context()

        # Setup the socket and the stream that receives the pre-transformed data from the com
        self.socket_pull_data = Socket(self.context, zmq.PULL)
        self.socket_pull_data.setsockopt(zmq.LINGER, 0)
        self.socket_pull_data.set_hwm(1)
        self.ssh_com.connect_socket_to_local(self.socket_pull_data,
                                             r'tcp://127.0.0.1',
                                             self.pull_data_port)
        self.stream_pull_data = zmqstream.ZMQStream(self.socket_pull_data)
        self.stream_pull_data.on_recv(self.data_callback, copy=False)

        # Setup the socket and the stream that receives the parameters of the worker_exec function from the node (gui_com)
        self.socket_sub_parameters = Socket(self.context, zmq.SUB)
        self.socket_sub_parameters.setsockopt(zmq.LINGER, 0)
        self.ssh_com.connect_socket_to_local(self.socket_sub_parameters,
                                             r'tcp://127.0.0.1',
                                             self.port_sub_parameters)
        self.socket_sub_parameters.subscribe(self.parameters_topic)
        self.stream_parameters = zmqstream.ZMQStream(
            self.socket_sub_parameters)
        self.stream_parameters.on_recv(self.parameters_callback, copy=False)

        # Setup the socket that pushes the transformed data to the com
        self.socket_push_data = Socket(self.context, zmq.PUSH)
        self.socket_push_data.setsockopt(zmq.LINGER, 0)
        self.socket_push_data.set_hwm(1)
        self.socket_push_data.bind(r"tcp://127.0.0.1:{}".format(
            self.push_data_port))

        # Setup the socket that receives the heartbeat from the com
        self.socket_pull_heartbeat = self.context.socket(zmq.PULL)
        self.socket_pull_heartbeat.setsockopt(zmq.LINGER, 0)
        self.ssh_com.connect_socket_to_local(self.socket_pull_heartbeat,
                                             r'tcp://127.0.0.1',
                                             self.pull_heartbeat_port)
        self.stream_heartbeat = zmqstream.ZMQStream(self.socket_pull_heartbeat)
        self.stream_heartbeat.on_recv(self.heartbeat_callback, copy=False)

        # Setup the socket that publishes the fact that the worker_exec is up and running to the node com so that it
        # can then update the parameters of the worker_exec
        self.socket_pub_proof_of_life = Socket(self.context, zmq.PUB)
        self.socket_pub_proof_of_life.setsockopt(zmq.LINGER, 0)
        self.ssh_com.connect_socket_to_local(self.socket_pub_proof_of_life,
                                             r'tcp://127.0.0.1',
                                             self.port_pub_proof_of_life,
                                             skip_ssh=True)

    def data_callback(self, data):
        """
        The callback that is called when link is send from the previous com process this com process is connected to
        (receives link from and shares a common topic) and pushes the link to the worker_exec.
        The link is a three zmq.Frame list. The first is the topic (used for the worker_exec to distinguish which input the
        link has come from in the case of multiple input nodes). The other two items are the details and the link load
        of the numpy array coming from the previous node).
        The link's load is then given to the work_function of the worker (together with the parameters of the node)
        and the result is sent to the com process to be passed on.
        The result must be a list of numpy arrays! Each element of the list represent one output of the node in the
        same order as the order of Outputs specified in the xxx_com.py of the node
        The callback will call the work_function only if the self.initialised is True (i.e. if the parameters_callback
        has had a chance to call the initialisation_function and get back a True). Otherwise it will pass to the com
        a set of ct.IGNORE (as many as the Node's outputs).
        :param data: The link received
        :return: Nothing
        """
        if self.initialised:
            data = [data[0].bytes, data[1].bytes, data[2].bytes]

            try:
                results = self.work_function(data, self.parameters,
                                             self.relic_update_substate_df)
            except TypeError:
                results = self.work_function(data, self.parameters)

            for i, array_in_list in enumerate(results):
                if i < len(results) - 1:
                    self.socket_push_data.send_array(array_in_list,
                                                     flags=zmq.SNDMORE,
                                                     copy=False)
                else:
                    self.socket_push_data.send_array(array_in_list, copy=False)
            self.index += 1
        else:
            send_topics = 0
            while send_topics < self.num_sending_topics - 1:
                self.socket_push_data.send_array(np.array([ct.IGNORE]),
                                                 flags=zmq.SNDMORE,
                                                 copy=False)
                send_topics += 1
            self.socket_push_data.send_array(np.array([ct.IGNORE]), copy=False)

    def import_reliquery(self):
        """
        This import is required because it takes a good few seconds to load the package and if the import is done
        first time in the HeronRelic instance that delays the initialisation of the worker process which can be
        a problem
        :return: Nothing
        """
        #
        if self.relic_path != '_':
            try:
                import reliquery
                import reliquery.storage
            except ImportError:
                pass

    def relic_create_parameters_df(self, **parameters):
        """
        Creates a new relic with the Parameters pandasdf in it or adds the Parameters pandasdf in the existing Node's
        Relic.
        :param parameters: The dictionary of the parameters. The keys of the dict will become the column names of the
        pandasdf
        :return: Nothing
        """
        self._relic_create_df('Parameters', **parameters)

    def relic_create_substate_df(self, **variables):
        """
        Creates a new relic with the Substate pandasdf in it or adds the Substate pandasdf in the existing Node's Relic.
        :param variables: The dictionary of the variables to save. The keys of the dict will become the column names of
        the pandasdf
        :return: Nothing
        """
        self._relic_create_df('Substate', **variables)

    def _relic_create_df(self, type, **variables):
        """
        Base function to create either a Parameters or a Substate pandasdf in a new or the existing Node's Relic
        :param type: Parameters or Substate
        :param variables: The variables dictionary to be saved in the pandas. The keys of the dict will become the c
        olumn names of the pandasdf
        :return: Nothing
        """
        if self.heron_relic is None:
            self.heron_relic = HeronRelic(
                self.relic_path, self.node_name, self.node_index,
                self.num_of_iters_to_update_relics_substate)
        if self.heron_relic.operational:
            self.heron_relic.create_the_pandasdf(type, **variables)

    def relic_update_substate_df(self, **variables):
        """
        Updates the Substate pandasdf of the Node's Relic
        :param variables: The Substate's variables dict
        :return: Nothing
        """
        self.heron_relic.update_the_substate_pandasdf(self.index, **variables)

    def parameters_callback(self, parameters_in_bytes):
        """
        The callback called when there is an update of the parameters (worker_exec function's parameters) from the node
        (send by the gui_com)
        :param parameters_in_bytes:
        :return:
        """
        #print('UPDATING PARAMETERS OF {} {}'.format(self.node_name, self.node_index))
        if len(parameters_in_bytes) > 1:
            args_pyobj = parameters_in_bytes[1].bytes  # remove the topic
            args = pickle.loads(args_pyobj)
            if args is not None:
                self.parameters = args
                if not self.initialised and self.initialisation_function is not None:
                    self.initialised = self.initialisation_function(self)
            #print('Updated parameters in {} = {}'.format(self.parameters_topic, args))

                if self.initialised and self.heron_relic is not None and self.heron_relic.operational:
                    self.heron_relic.update_the_parameters_pandasdf(
                        parameters=self.parameters, worker_index=self.index)

    def heartbeat_callback(self, pulse):
        """
        The callback called when the com sends a 'PULSE'. It registers the time the 'PULSE' has been received
        :param pulse: The pulse (message from the com's push) received
        :return:
        """
        self.time_of_pulse = time.perf_counter()

    def heartbeat_loop(self):
        """
        The loop that checks whether the latest 'PULSE' received from the com's heartbeat push is not too stale.
        If it is then the current process is killed
        :return: Nothing
        """
        while self.loops_on:
            current_time = time.perf_counter()
            if current_time - self.time_of_pulse > ct.HEARTBEAT_RATE * ct.HEARTBEATS_TO_DEATH:
                pid = os.getpid()
                self.end_of_life_function()
                self.on_kill(pid)
                os.kill(pid, signal.SIGTERM)
                time.sleep(0.5)
            time.sleep(ct.HEARTBEAT_RATE)
        self.socket_pull_heartbeat.close()

    def proof_of_life(self):
        """
        When the worker_exec process starts AND ONCE IT HAS RECEIVED ITS FIRST BUNCH OF DATA, it sends to the gui_com
        (through the proof_of_life_forwarder thread) a signal that lets the node (in the gui_com process) that the
        worker_exec is running and ready to receive parameter updates.
        :return: Nothing
        """

        #print('---Sending POL {}'.format('{}##POL'.format(self.parameters_topic)))
        for i in range(100):
            try:
                self.socket_pub_proof_of_life.send(
                    self.parameters_topic.encode('ascii'), zmq.SNDMORE)
                self.socket_pub_proof_of_life.send_string('POL')
            except:
                pass
            time.sleep(0.1)
        #print('--- Finished sending POL from {} {}'.format(self.node_name, self.node_index))

    def start_ioloop(self):
        """
        Starts the heartbeat thread daemon and the ioloop of the zmqstreams
        :return: Nothing
        """
        self.thread_heartbeat = threading.Thread(target=self.heartbeat_loop,
                                                 daemon=True)
        self.thread_heartbeat.start()

        self.thread_proof_of_life = threading.Thread(target=self.proof_of_life,
                                                     daemon=True)
        self.thread_proof_of_life.start()

        print('Started Worker {}_{} process with PID = {}'.format(
            self.node_name, self.node_index, os.getpid()))

        ioloop.IOLoop.instance().start()
        print('!!! WORKER {} HAS STOPPED'.format(self.parameters_topic))

    def on_kill(self, pid):
        print('Killing {} {} with pid {}'.format(self.node_name,
                                                 self.node_index, pid))

        if self.heron_relic is not None and self.heron_relic.substate_pandasdf_exists:
            self.heron_relic.save_substate_at_death()

        try:
            self.visualisation_on = False
            self.loops_on = False
            self.stream_pull_data.close()
            self.stream_parameters.close()
            self.stream_heartbeat.close()
            self.socket_pull_data.close()
            self.socket_sub_parameters.close()
            self.socket_push_data.close()
            self.socket_pub_proof_of_life.close()
        except Exception as e:
            print('Trying to kill Transform worker {} failed with error: {}'.
                  format(self.node_name, e))
        finally:
            self.context.term()
            self.ssh_com.kill_tunneling_processes()
Пример #4
0
class SourceCom:
    def __init__(self, sending_topics, parameters_topic, port, worker_exec, verbose='||',
                 ssh_local_server_id='None', ssh_remote_server_id='None', outputs=None):
        self.sending_topics = sending_topics
        self.parameters_topic = parameters_topic
        self.pull_data_port = port
        self.heartbeat_port = str(int(self.pull_data_port) + 1)
        self.worker_exec = worker_exec
        self.index = 0
        self.time = int(1000000 * time.perf_counter())
        self.previous_time = self.time
        self.verbose, self.relic = self.define_verbosity_and_relic(verbose)

        self.all_loops_running = True
        self.ssh_com = SSHCom(self.worker_exec, ssh_local_server_id, ssh_remote_server_id)
        self.outputs = outputs
        self.port_pub = ct.DATA_FORWARDER_SUBMIT_PORT

        self.context = None
        self.socket_pub_data = None
        self.socket_pull_data = None
        self.stream_pull_data = None
        self.socket_push_heartbeat = None
        self.average_sending_time = 0

        # If self.verbose is a string it is the file name to log things in. If it is an int it is the level of the verbosity
        self.logger = None
        if self.verbose != 0:
            try:
                self.verbose = int(self.verbose)
            except:
                log_file_name =  gu.add_timestamp_to_filename(self.verbose, datetime.now())
                self.logger = gu.setup_logger('Source', log_file_name)
                self.logger.info('Index of data packet : Computer Time Data Out')
                self.verbose = False

    def connect_sockets(self):
        """
        Start the required sockets to communicate with the link forwarder and the source_com processes
        :return: Nothing
        """
        if self.verbose:
            print('Starting Source Node with PID = {}'.format(os.getpid()))
        self.context = zmq.Context()

        # Socket for pulling the data from the worker_exec
        self.socket_pull_data = Socket(self.context, zmq.PULL)
        self.socket_pull_data.setsockopt(zmq.LINGER, 0)
        self.socket_pull_data.set_hwm(1)
        self.socket_pull_data.connect(r"tcp://127.0.0.1:{}".format(self.pull_data_port))

        self.stream_pull_data = zmqstream.ZMQStream(self.socket_pull_data)
        self.stream_pull_data.on_recv(self.on_receive_data_from_worker)

        # Socket for publishing the data to the data forwarder
        self.socket_pub_data = Socket(self.context, zmq.PUB)
        self.socket_pub_data.setsockopt(zmq.LINGER, 0)
        self.socket_pub_data.set_hwm(len(self.sending_topics))
        self.socket_pub_data.connect(r"tcp://127.0.0.1:{}".format(self.port_pub))

        # Socket for publishing the heartbeat to the worker_exec
        self.socket_push_heartbeat = self.context.socket(zmq.PUSH)
        self.socket_push_heartbeat.setsockopt(zmq.LINGER, 0)
        self.socket_push_heartbeat.bind(r'tcp://*:{}'.format(self.heartbeat_port))
        self.socket_push_heartbeat.set_hwm(1)

    def define_verbosity_and_relic(self, verbosity_string):
        """
        Splits the string that comes from the Node as verbosity_string into the string (or int) for the logging/printing
        (self.verbose) and the string that carries the path where the relic is to be saved. The self.relic is then
        passed to the worker process
        :param verbosity_string: The string with syntax verbosity||relic
        :return: (int)str vebrose, str relic
        """
        if verbosity_string != '':
            verbosity, relic = verbosity_string.split('||')
            if relic == '':
                relic = '_'
            if verbosity == '':
                return 0, relic
            else:
                return verbosity, relic
        else:
            return 0, ''

    def on_receive_data_from_worker(self, msg):
        """
        The callback that runs every time link is received from the worker_exec process. It takes the link and passes it
        onto the link forwarder
        :param msg: The link packet (carrying the actual link (np array))
        :return:
        """

        # A specific worker with multiple outputs should send from its infinite loop a message with multiple parts
        # (using multiple send_array(data, flags=zmq.SNDMORE) commands). For an example see how the transform_worker
        # sends data to the com from its data_callback function
        # TODO The bellow will not work for multiple outputs. I have to find out how many times a callback is called
        #  when data are send with SNDMORE flag !!!

        ignoring_outputs = [False] * len(self.outputs)
        new_message_data = []
        if len(self.outputs) > 1:
            for i in range(len(self.outputs)):
                array_data = Socket.reconstruct_array_from_bytes_message(msg[i])
                new_message_data.append(array_data)
                if type(array_data[0]) == np.str_:
                    if array_data[0] == ct.IGNORE:
                        ignoring_outputs[i] = True
        else:
            array_data = Socket.reconstruct_array_from_bytes_message(msg)
            new_message_data.append(array_data)
            if type(array_data[0]) == np.str_:
                if array_data[0] == ct.IGNORE:
                    ignoring_outputs[0] = True

        self.time = int(1000000 * time.perf_counter())
        self.index = self.index + 1

        # Publish the results. Each array in the list of arrays is published to its own sending topic
        # (matched by order)
        for i, st in enumerate(self.sending_topics):
            for k, output in enumerate(self.outputs):
                if output.replace(' ', '_') in st.split('##')[0]:
                    break

            if ignoring_outputs[k] is False:
                self.socket_pub_data.send("{}".format(st).encode('ascii'), flags=zmq.SNDMORE)
                self.socket_pub_data.send("{}".format(self.index).encode('ascii'), flags=zmq.SNDMORE)
                self.socket_pub_data.send("{}".format(self.time).encode('ascii'), flags=zmq.SNDMORE)
                self.socket_pub_data.send_array(new_message_data[k], copy=False)
                # This delay is critical to get single output to multiple inputs to work!
                gu.accurate_delay(ct.DELAY_BETWEEN_SENDING_DATA_TO_NEXT_NODE_MILLISECONDS)

            if self.verbose:
                dt = self.time - self.previous_time
                if self.index > 3:
                    self.average_sending_time = self.average_sending_time * (self.index - 1) / self.index + dt / self.index
                print('----------')
                print("Source with topic {} sending packet with data_index {} at time {}".format(self.sending_topics[i],
                                                                                                 self.index, self.time))
                print('Time Diff between packages = {}. Average package sending time = {} ms'.format(dt/1000, self.average_sending_time / 1000))
            if self.logger:
                self.logger.info('{} : {}'.format(self.index, datetime.now()))
            self.previous_time = self.time

    def heartbeat_loop(self):
        """
        Sending every ct.HEARTBEAT_RATE a 'PULSE' to the worker_exec so that it stays alive
        :return: Nothing
        """
        while self.all_loops_running:
            self.socket_push_heartbeat.send_string('PULSE')
            time.sleep(ct.HEARTBEAT_RATE)

    def start_heartbeat_thread(self):
        """
        Starts the daemon thread that runs the self.heartbeat loop
        :return: Nothing
        """
        heartbeat_thread = threading.Thread(target=self.heartbeat_loop, daemon=True)
        heartbeat_thread.start()

    def start_worker_process(self):
        """
        Starts the worker_exec process and then sends the parameters as are currently on the node to the process
        The pull_data_port of the worker_exec needs to be the push_data_port of the com (obviously).
        The way the arguments are structured is defined by the way they are read by the process. For that see
        general_utilities.parse_arguments_to_worker
        :return: Nothing
        """
        if 'python' in self.worker_exec or '.py' not in self.worker_exec:
            arguments_list = [self.worker_exec]
        else:
            arguments_list = ['python']
            arguments_list.append(self.worker_exec)

        arguments_list.append(str(self.pull_data_port))
        arguments_list.append(str(self.parameters_topic))
        arguments_list.append(str(0))
        arguments_list.append(str(len(self.sending_topics)))
        arguments_list.append(self.relic)
        arguments_list = self.ssh_com.add_local_server_info_to_arguments(arguments_list)

        worker_pid = self.ssh_com.start_process(arguments_list)
        self.ssh_com.connect_socket_to_remote(self.socket_pull_data,
                                              r"tcp://127.0.0.1:{}".format(self.pull_data_port))

    def start_ioloop(self):
        """
        Starts the ioloop of the zmqstream
        :return: Nothing
        """
        ioloop.IOLoop.instance().start()

    def on_kill(self, signal, frame):
        """
        The function that is called when the parent process sends a SIGBREAK (windows) or SIGTERM (linux) signal.
        It needs signal and frame as parameters
        :param signal: The signal received
        :param frame: I haven't got a clue
        :return: Nothing
        """
        try:
            self.all_loops_running = False
            self.stream_pull_data.close(linger=0)
            self.socket_pull_data.close()
            self.socket_pub_data.close()
            self.socket_push_heartbeat.close()
        except Exception as e:
            print('Trying to kill Source com {} failed with error: {}'.format(self.sending_topics[0], e))
        finally:
            self.context.term()