Пример #1
0
 def connectionClosed(self, conn):
     app = self.app
     if app.master_conn is not None:
         msg = "connection to primary master node closed"
         logging.critical(msg)
         app.master_conn = None
         for txn_context in app.txn_contexts():
             txn_context['error'] = msg
     app.primary_master_node = None
     super(PrimaryNotificationsHandler, self).connectionClosed(conn)
Пример #2
0
 def connectionClosed(self, conn):
     app = self.app
     if app.master_conn is not None:
         msg = "connection to primary master node closed"
         logging.critical(msg)
         app.master_conn = None
         for txn_context in app.txn_contexts():
             txn_context["error"] = msg
     app.primary_master_node = None
     super(PrimaryNotificationsHandler, self).connectionClosed(conn)
Пример #3
0
 def connectionClosed(self, conn):
     app = self.app
     if app.master_conn is not None:
         msg = "connection to primary master node closed"
         logging.critical(msg)
         app.master_conn = None
         for txn_context in app.txn_contexts():
             txn_context.error = msg
     try:
         app.__dict__.pop('pt').clear()
     except KeyError:
         pass
     app.primary_master_node = None
     super(PrimaryNotificationsHandler, self).connectionClosed(conn)
Пример #4
0
 def connectionClosed(self, conn):
     app = self.app
     if app.master_conn is not None:
         msg = "connection to primary master node closed"
         logging.critical(msg)
         app.master_conn = None
         for txn_context in app.txn_contexts():
             txn_context['error'] = msg
     try:
         del app.pt
     except AttributeError:
         pass
     app.primary_master_node = None
     super(PrimaryNotificationsHandler, self).connectionClosed(conn)
Пример #5
0
    def shutdown(self):
        """Close all connections and exit"""
        self.changeClusterState(ClusterStates.STOPPING)
        # Marking a fictional storage node as starting operation blocks any
        # request to start a new transaction. Do this way has 2 advantages:
        # - It's simpler than changing the handler of all clients,
        #   which is anyway not supported by EventQueue.
        # - Returning an error code would cause activity on client side for
        #   nothing.
        # What's important is to not abort during the second phase of commits
        # and for this, clients must even be able to reconnect, in case of
        # failure during tpc_finish.
        # We're rarely involved in vote, so we have to trust clients that they
        # abort any transaction that is still in the first phase.
        self.storage_starting_set.add(None)
        try:
            # wait for all transaction to be finished
            while self.tm.hasPending():
                self.em.poll(1)
        except StoppedOperation:
            logging.critical('No longer operational')

        logging.info("asking remaining nodes to shutdown")
        self.listening_conn.close()
        handler = EventHandler(self)
        for node in self.nm.getList():
            if not node.isConnected(True):
                continue
            conn = node.getConnection()
            conn.setHandler(handler)
            if not conn.connecting:
                if node.isStorage():
                    conn.send(
                        Packets.NotifyNodeInformation(
                            monotonic_time(),
                            ((node.getType(), node.getAddress(),
                              node.getUUID(), NodeStates.DOWN, None), )))
                if conn.pending():
                    conn.abort()
                    continue
            conn.close()

        while self.em.connection_dict:
            self.em.poll(1)

        # then shutdown
        sys.exit()
Пример #6
0
    def shutdown(self):
        """Close all connections and exit"""
        self.changeClusterState(ClusterStates.STOPPING)
        self.listening_conn.close()
        for conn in self.em.getConnectionList():
            node = self.nm.getByUUID(conn.getUUID())
            if node is None or not node.isIdentified():
                conn.close()
        # No need to change handlers in order to reject RequestIdentification
        # & AskBeginTransaction packets because they won't be any:
        # the only remaining connected peers are identified non-clients
        # and we don't accept new connections anymore.
        try:
            # wait for all transaction to be finished
            while self.tm.hasPending():
                self.em.poll(1)
        except StoppedOperation:
            logging.critical('No longer operational')

        logging.info("asking remaining nodes to shutdown")
        handler = EventHandler(self)
        for node in self.nm.getConnectedList():
            conn = node.getConnection()
            if node.isStorage():
                conn.setHandler(handler)
                conn.notify(
                    Packets.NotifyNodeInformation(
                        ((node.getType(), node.getAddress(), node.getUUID(),
                          NodeStates.TEMPORARILY_DOWN), )))
                conn.abort()
            elif conn.pending():
                conn.abort()
            else:
                conn.close()

        while self.em.connection_dict:
            self.em.poll(1)

        # then shutdown
        sys.exit()
Пример #7
0
    def shutdown(self):
        """Close all connections and exit"""
        self.changeClusterState(ClusterStates.STOPPING)
        self.listening_conn.close()
        for conn in self.em.getConnectionList():
            node = self.nm.getByUUID(conn.getUUID())
            if node is None or not node.isIdentified():
                conn.close()
        # No need to change handlers in order to reject RequestIdentification
        # & AskBeginTransaction packets because they won't be any:
        # the only remaining connected peers are identified non-clients
        # and we don't accept new connections anymore.
        try:
            # wait for all transaction to be finished
            while self.tm.hasPending():
                self.em.poll(1)
        except StoppedOperation:
            logging.critical('No longer operational')

        logging.info("asking remaining nodes to shutdown")
        handler = EventHandler(self)
        for node in self.nm.getConnectedList():
            conn = node.getConnection()
            if node.isStorage():
                conn.setHandler(handler)
                conn.notify(Packets.NotifyNodeInformation(((
                  node.getType(), node.getAddress(), node.getUUID(),
                  NodeStates.TEMPORARILY_DOWN),)))
                conn.abort()
            elif conn.pending():
                conn.abort()
            else:
                conn.close()

        while self.em.connection_dict:
            self.em.poll(1)

        # then shutdown
        sys.exit()
Пример #8
0
    def playPrimaryRole(self):
        logging.info('play the primary role with %r', self.listening_conn)
        self.master_address_dict.clear()
        em = self.em
        packet = Packets.AnnouncePrimary()
        for conn in em.getConnectionList():
            if conn.isListening():
                conn.setHandler(identification.IdentificationHandler(self))
            else:
                conn.notify(packet)
                # Primary master should rather establish connections to all
                # secondaries, rather than the other way around. This requires
                # a bit more work when a new master joins a cluster but makes
                # it easier to resolve UUID conflicts with minimal cluster
                # impact, and ensure primary master unicity (primary masters
                # become noisy, in that they actively try to maintain
                # connections to all other master nodes, so duplicate
                # primaries will eventually get in touch with each other and
                # resolve the situation with a duel).
                # TODO: only abort client connections, don't close server
                # connections as we want to have them in the end. Secondary
                # masters will reconnect nevertheless, but it's dirty.
                # Currently, it's not trivial to preserve connected nodes,
                # because of poor node status tracking during election.
                conn.abort()

        # If I know any storage node, make sure that they are not in the
        # running state, because they are not connected at this stage.
        for node in self.nm.getStorageList():
            if node.isRunning():
                node.setTemporarilyDown()

        if self.uuid is None:
            self.uuid = self.getNewUUID(None, self.server, NodeTypes.MASTER)
            logging.info('My UUID: ' + uuid_str(self.uuid))
        else:
            in_conflict = self.nm.getByUUID(self.uuid)
            if in_conflict is not None:
                logging.warning('UUID conflict at election exit with %r',
                    in_conflict)
                in_conflict.setUUID(None)

        # Do not restart automatically if ElectionFailure is raised, in order
        # to avoid a split of the database. For example, with 2 machines with
        # a master and a storage on each one and replicas=1, the secondary
        # master becomes primary in case of network failure between the 2
        # machines but must not start automatically: otherwise, each storage
        # node would diverge.
        self._startup_allowed = False
        try:
            while True:
                self.runManager(RecoveryManager)
                try:
                    self.runManager(VerificationManager)
                    if not self.backup_tid:
                        self.provideService()
                        # self.provideService only returns without raising
                        # when switching to backup mode.
                    if self.backup_app is None:
                        raise RuntimeError("No upstream cluster to backup"
                                           " defined in configuration")
                    truncate = Packets.Truncate(
                        self.backup_app.provideService())
                except StoppedOperation, e:
                    logging.critical('No longer operational')
                    truncate = Packets.Truncate(*e.args) if e.args else None
                    # Automatic restart except if we truncate or retry to.
                    self._startup_allowed = not (self.truncate_tid or truncate)
                node_list = []
                for node in self.nm.getIdentifiedList():
                    if node.isStorage() or node.isClient():
                        conn = node.getConnection()
                        conn.notify(Packets.StopOperation())
                        if node.isClient():
                            conn.abort()
                            continue
                        if truncate:
                            conn.notify(truncate)
                        if node.isRunning():
                            node.setPending()
                            node_list.append(node)
                self.broadcastNodesInformation(node_list)
        except StateChangedException, e:
            assert e.args[0] == ClusterStates.STOPPING
            self.shutdown()
Пример #9
0
    def playPrimaryRole(self):
        logging.info('play the primary role with %r', self.listening_conn)
        self.master_address_dict.clear()
        em = self.em
        packet = Packets.AnnouncePrimary()
        for conn in em.getConnectionList():
            if conn.isListening():
                conn.setHandler(identification.IdentificationHandler(self))
            else:
                conn.notify(packet)
                # Primary master should rather establish connections to all
                # secondaries, rather than the other way around. This requires
                # a bit more work when a new master joins a cluster but makes
                # it easier to resolve UUID conflicts with minimal cluster
                # impact, and ensure primary master unicity (primary masters
                # become noisy, in that they actively try to maintain
                # connections to all other master nodes, so duplicate
                # primaries will eventually get in touch with each other and
                # resolve the situation with a duel).
                # TODO: only abort client connections, don't close server
                # connections as we want to have them in the end. Secondary
                # masters will reconnect nevertheless, but it's dirty.
                # Currently, it's not trivial to preserve connected nodes,
                # because of poor node status tracking during election.
                conn.abort()

        # If I know any storage node, make sure that they are not in the
        # running state, because they are not connected at this stage.
        for node in self.nm.getStorageList():
            if node.isRunning():
                node.setTemporarilyDown()

        if self.uuid is None:
            self.uuid = self.getNewUUID(None, self.server, NodeTypes.MASTER)
            logging.info('My UUID: ' + uuid_str(self.uuid))
        else:
            in_conflict = self.nm.getByUUID(self.uuid)
            if in_conflict is not None:
                logging.warning('UUID conflict at election exit with %r',
                                in_conflict)
                in_conflict.setUUID(None)

        # Do not restart automatically if ElectionFailure is raised, in order
        # to avoid a split of the database. For example, with 2 machines with
        # a master and a storage on each one and replicas=1, the secondary
        # master becomes primary in case of network failure between the 2
        # machines but must not start automatically: otherwise, each storage
        # node would diverge.
        self._startup_allowed = False
        try:
            while True:
                self.runManager(RecoveryManager)
                try:
                    self.runManager(VerificationManager)
                    if not self.backup_tid:
                        self.provideService()
                        # self.provideService only returns without raising
                        # when switching to backup mode.
                    if self.backup_app is None:
                        raise RuntimeError("No upstream cluster to backup"
                                           " defined in configuration")
                    truncate = Packets.Truncate(
                        self.backup_app.provideService())
                except StoppedOperation, e:
                    logging.critical('No longer operational')
                    truncate = Packets.Truncate(*e.args) if e.args else None
                    # Automatic restart except if we truncate or retry to.
                    self._startup_allowed = not (self.truncate_tid or truncate)
                node_list = []
                for node in self.nm.getIdentifiedList():
                    if node.isStorage() or node.isClient():
                        conn = node.getConnection()
                        conn.notify(Packets.StopOperation())
                        if node.isClient():
                            conn.abort()
                            continue
                        if truncate:
                            conn.notify(truncate)
                        if node.isRunning():
                            node.setPending()
                            node_list.append(node)
                self.broadcastNodesInformation(node_list)
        except StateChangedException, e:
            assert e.args[0] == ClusterStates.STOPPING
            self.shutdown()
Пример #10
0
 def stopOperation(self, conn):
     logging.critical("master node ask to stop operation")
Пример #11
0
    def playPrimaryRole(self):
        logging.info('play the primary role with %r', self.listening_conn)
        self.primary_master = None
        for conn in self.em.getConnectionList():
            if conn.isListening():
                conn.setHandler(identification.IdentificationHandler(self))
            else:
                conn.close()

        # If I know any storage node, make sure that they are not in the
        # running state, because they are not connected at this stage.
        for node in self.nm.getStorageList():
            assert node.isDown(), node

        if self.uuid is None:
            self.uuid = self.getNewUUID(None, self.server, NodeTypes.MASTER)
            logging.info('My UUID: ' + uuid_str(self.uuid))
        self._node.setRunning()
        self._node.id_timestamp = None
        self.primary = monotonic_time()

        # Do not restart automatically if an election happens, in order
        # to avoid a split of the database. For example, with 2 machines with
        # a master and a storage on each one and replicas=1, the secondary
        # master becomes primary in case of network failure between the 2
        # machines but must not start automatically: otherwise, each storage
        # node would diverge.
        self._startup_allowed = False
        try:
            while True:
                self.runManager(RecoveryManager)
                try:
                    self.runManager(VerificationManager)
                    if not self.backup_tid:
                        self.provideService()
                        # self.provideService only returns without raising
                        # when switching to backup mode.
                    if self.backup_app is None:
                        raise RuntimeError("No upstream cluster to backup"
                                           " defined in configuration")
                    truncate = Packets.Truncate(
                        self.backup_app.provideService())
                except StoppedOperation, e:
                    logging.critical('No longer operational')
                    truncate = Packets.Truncate(*e.args) if e.args else None
                    # Automatic restart except if we truncate or retry to.
                    self._startup_allowed = not (self.truncate_tid or truncate)
                self.storage_readiness = 0
                self.storage_ready_dict.clear()
                self.storage_starting_set.clear()
                node_list = []
                for node in self.nm.getIdentifiedList():
                    if node.isStorage() or node.isClient():
                        conn = node.getConnection()
                        conn.send(Packets.StopOperation())
                        if node.isClient():
                            conn.abort()
                            continue
                        if truncate:
                            conn.send(truncate)
                        if node.isRunning():
                            node.setPending()
                            node_list.append(node)
                self.broadcastNodesInformation(node_list)
        except StateChangedException, e:
            assert e.args[0] == ClusterStates.STOPPING
            self.shutdown()
Пример #12
0
 def stopOperation(self, conn):
     logging.critical("master node ask to stop operation")