Exemplo n.º 1
0
def send_msg_to_client(self, client_ip, client_port, status, sock,
                       write_req_id):
    reply_data = {}
    reply_data['status'] = status  #can be +1 or -1
    reply_msg = Message(Msg_type['write_reply'],
                        recv_host=client_ip,
                        recv_port=client_port,
                        data_dict=reply_data)
    reply_msg._msg_id = (self.node_id, threading.current_thread().ident)
    send_msg(sock, reply_msg)
    #close the socket
    self.inputs.remove(sock)
    sock.close()
    #clean dicts
    self.clear_write_req_data(write_req_id)
    return
    def heartbeat_thread_fn(self):
        '''
		Does all processes related to heartbeat receiving and sending
		'''
        self.pause_heartbeat = False
        self.thread_msg_qs[threading.current_thread().ident] = queue.Queue()
        heartbeat_msg = Message(Msg_type['heartbeat'],
                                msg_id=(self.node_id,
                                        threading.current_thread().ident))

        # dict of type   [node_id : count of time-outs]
        node_timeouts = {n_id: -1
                         for n_id in self.network_dict.keys()
                         }  # initiate time-out counts

        while True:
            # for a leader node
            if self.is_leader:

                if self.pause_heartbeat:
                    continue
                responded_nodes = []
                # Collect all messages from queue:
                q = self.thread_msg_qs[threading.current_thread().ident]

                while not q.empty():
                    hmsg = q.get()
                    responded_nodes.append(hmsg._msg_id[0])

                # correct time-out counts
                for n_id in self.network_dict.keys():
                    if n_id not in responded_nodes:
                        try:
                            node_timeouts[n_id] += 1
                        except:
                            node_timeouts[n_id] = 1
                    else:
                        node_timeouts[n_id] = 0

                # Check if someone has not responded for long:
                to_del = []
                for n_id in self.network_dict.keys():
                    if node_timeouts[n_id] >= self.timeout_thresh:
                        print("NODE : ", n_id, " found unresponsive")
                        # TODO: what now? - initiate node deletion phase
                        to_del.append(n_id)
                # delete in self and send to all
                for n_id in to_del:
                    try:
                        del self.network_dict[n_id]
                    except:
                        pass
                    try:
                        del node_timeouts[n_id]
                    except:
                        pass

                for n_to_delete in to_del:
                    del_msg = Message(
                        Msg_type['delete_node'],
                        msg_id=(self.node_id,
                                threading.current_thread().ident))
                    for n_id in self.network_dict:
                        new_recv = (self.network_dict[n_id][0],
                                    self.network_dict[n_id][1])
                        with socket.socket(socket.AF_INET,
                                           socket.SOCK_STREAM) as s:
                            try:
                                s.connect(new_recv)
                            except:
                                pass
                            else:
                                del_msg._source_host, del_msg._source_port = s.getsockname(
                                )
                                del_msg._recv_host, del_msg._recv_port = new_recv
                                del_msg._msg_id = (
                                    self.node_id,
                                    threading.current_thread().ident)
                                del_msg._data_dict = {'id': n_to_delete}
                                send_msg(s, del_msg)

                # Send a heartbeat to everyone and start a timer
                for n_id in self.network_dict.keys():
                    # send messages to all using temporary port
                    with socket.socket(socket.AF_INET,
                                       socket.SOCK_STREAM) as s:
                        try:
                            s.connect((self.network_dict[n_id][0],
                                       self.network_dict[n_id][1]))
                        except:
                            pass
                        else:
                            heartbeat_msg._source_host, heartbeat_msg._source_port = s.getsockname(
                            )
                            heartbeat_msg._recv_host, heartbeat_msg._recv_port, state = self.network_dict[
                                n_id]
                            heartbeat_msg._msg_id = (
                                self.node_id, threading.current_thread().ident)
                            heartbeat_msg._data_dict = {}
                            send_msg(s, heartbeat_msg)

                # re-starting timer
                time.sleep(self.heartbeat_delay)

            # for a non-leader node
            else:
                if self.pause_heartbeat:
                    continue
                got_ldr_hbeat = False
                q = self.thread_msg_qs[threading.current_thread().ident]
                while not q.empty():
                    hmsg = q.get()
                    if ((hmsg.get_data('type') is not None)
                            and (hmsg.get_data('type') == 'reply')):
                        continue
                    self.ldr_timeout_count = 0

                    # reply to heartbeat
                    with socket.socket(socket.AF_INET,
                                       socket.SOCK_STREAM) as s:
                        hbeat_id = hmsg._msg_id[0]
                        if not (hbeat_id == self.node_id):
                            hmsg_ip, hmsg_port, state = self.network_dict[
                                hbeat_id]
                        else:
                            hmsg_ip, hmsg_port, state = (self.HOST, self.PORT,
                                                         1)
                        if hbeat_id == self.ldr_id:
                            got_ldr_hbeat = True
                        try:
                            s.connect((hmsg_ip, hmsg_port))
                        except:
                            pass
                        else:
                            heartbeat_msg._source_host, heartbeat_msg._source_port = s.getsockname(
                            )
                            heartbeat_msg._recv_host, heartbeat_msg._recv_port = (
                                hmsg_ip, hmsg_port)
                            heartbeat_msg._msg_id = (
                                self.node_id, threading.current_thread().ident)
                            heartbeat_msg._data_dict = {'type': 'reply'}
                            send_msg(s, heartbeat_msg)

                if self.ldr_alive:
                    if not got_ldr_hbeat:
                        self.ldr_timeout_count += 1
                    else:
                        self.ldr_timeout_count = 0
                    # check if leader has failed
                    self.ldr_stat_lock.acquire()
                    if self.ldr_timeout_count >= self.timeout_thresh:
                        self.ldr_timeout_count = 0
                        print("Leader failure detected")
                        self.ldr_alive = False
                        try:
                            del self.network_dict[self.ldr_id]
                        except:
                            pass
                        leader_elect_thread = threading.Thread(
                            target=self.ldrelect_thread_fn, args=())
                        leader_elect_thread.start()
                        self.ldr_elect_tid = leader_elect_thread.ident
                    self.ldr_stat_lock.release()
                # re-rstarting timer
                time.sleep(self.ldr_heartbeat_delay)
def ldrelect_thread_fn(self):
    """
	Tasked with the selection of the new leader
	"""
    # TODO: delete its entry from everywhere while exiting

    print("DEBUG_MSG: Leader Election started ")
    self.thread_msg_qs[threading.get_ident()] = queue.Queue()
    heartbeat_msg = Message(Msg_type['heartbeat'])

    has_leader = False
    nodes = list(self.network_dict.keys())
    nodes.append(self.node_id)
    while not has_leader and not self.ldr_alive:
        nodes = sorted(nodes)
        print(nodes)
        # if this is itself the smallest id node
        if nodes[0] == self.node_id:
            msg = Message(Msg_type['ldr_proposal'])
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                try:
                    s.connect((self.HOST, self.PORT))
                except:
                    pass  # go to outer while loop and re-start process
                else:
                    msg._source_host, msg._source_port = s.getsockname()
                    msg._recv_host, msg._recv_port = (self.HOST, self.PORT)
                    msg._msg_id = (self.node_id,
                                   threading.current_thread().ident)
                    # assume that beyond this point, the found node stays alive...
                    # ... or, this thread begins later again or in some other node
                    has_leader = True
                    send_msg(s, msg)
            # clear its existence before exiting
            self.ldr_elect_tid = None
            self.thread_msg_qs.pop(threading.get_ident(), None)
            return

        for n_id in nodes:
            if n_id == self.node_id:
                continue
            print("DEBUG_MSG: sending heartbeat from ldr_elect to: ", n_id)
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                try:
                    s.connect((self.network_dict[n_id][0],
                               self.network_dict[n_id][1]))
                except:
                    pass
                else:
                    heartbeat_msg._source_host, heartbeat_msg._source_port = s.getsockname(
                    )
                    heartbeat_msg._recv_host, heartbeat_msg._recv_port, status = self.network_dict[
                        n_id]
                    heartbeat_msg._msg_id = (self.node_id,
                                             threading.current_thread().ident)
                    send_msg(s, heartbeat_msg)

        # now, the coordinator is responsible to pass the heartbeat messages into this thread

        # wait for timeout amount of time before deciding which all are alive
        # TODO: need to wait for multiple time-outs?
        time.sleep(self.heartbeat_delay * self.timeout_thresh)

        responded_nodes = set([self.node_id])
        q = self.thread_msg_qs[threading.current_thread().ident]
        while not q.empty():
            msg = q.get()
            responded_nodes.add(msg._msg_id[0])
        print("DEBUG_MSG: responded_nodes: ", responded_nodes)
        prospective_ldr = min(responded_nodes)

        msg = Message(Msg_type['ldr_proposal'])
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            if not (prospective_ldr == self.node_id):
                new_recv = (self.network_dict[prospective_ldr][0],
                            self.network_dict[prospective_ldr][1])
            else:
                new_recv = (self.HOST, self.PORT)
            try:
                s.connect(new_recv)
            except:
                pass  # go to outer while loop and re-start process
            else:
                msg._source_host, msg._source_port = s.getsockname()
                msg._recv_host, msg._recv_port = new_recv
                msg._msg_id = (self.node_id, threading.current_thread().ident)
                # assume that beyond this point, the found node stays alive...
                # ... or, this thread begins later again or in some other node
                has_leader = True
                send_msg(s, msg)

    # clear its existence before exiting
    self.ldr_elect_tid = None
    self.thread_msg_qs.pop(threading.get_ident(), None)