def connectionClosed(self, conn): app = self.app if app.master_conn is not None: msg = "connection to primary master node closed" logging.critical(msg) app.master_conn = None for txn_context in app.txn_contexts(): txn_context['error'] = msg app.primary_master_node = None super(PrimaryNotificationsHandler, self).connectionClosed(conn)
def connectionClosed(self, conn): app = self.app if app.master_conn is not None: msg = "connection to primary master node closed" logging.critical(msg) app.master_conn = None for txn_context in app.txn_contexts(): txn_context["error"] = msg app.primary_master_node = None super(PrimaryNotificationsHandler, self).connectionClosed(conn)
def connectionClosed(self, conn): app = self.app if app.master_conn is not None: msg = "connection to primary master node closed" logging.critical(msg) app.master_conn = None for txn_context in app.txn_contexts(): txn_context.error = msg try: app.__dict__.pop('pt').clear() except KeyError: pass app.primary_master_node = None super(PrimaryNotificationsHandler, self).connectionClosed(conn)
def connectionClosed(self, conn): app = self.app if app.master_conn is not None: msg = "connection to primary master node closed" logging.critical(msg) app.master_conn = None for txn_context in app.txn_contexts(): txn_context['error'] = msg try: del app.pt except AttributeError: pass app.primary_master_node = None super(PrimaryNotificationsHandler, self).connectionClosed(conn)
def shutdown(self): """Close all connections and exit""" self.changeClusterState(ClusterStates.STOPPING) # Marking a fictional storage node as starting operation blocks any # request to start a new transaction. Do this way has 2 advantages: # - It's simpler than changing the handler of all clients, # which is anyway not supported by EventQueue. # - Returning an error code would cause activity on client side for # nothing. # What's important is to not abort during the second phase of commits # and for this, clients must even be able to reconnect, in case of # failure during tpc_finish. # We're rarely involved in vote, so we have to trust clients that they # abort any transaction that is still in the first phase. self.storage_starting_set.add(None) try: # wait for all transaction to be finished while self.tm.hasPending(): self.em.poll(1) except StoppedOperation: logging.critical('No longer operational') logging.info("asking remaining nodes to shutdown") self.listening_conn.close() handler = EventHandler(self) for node in self.nm.getList(): if not node.isConnected(True): continue conn = node.getConnection() conn.setHandler(handler) if not conn.connecting: if node.isStorage(): conn.send( Packets.NotifyNodeInformation( monotonic_time(), ((node.getType(), node.getAddress(), node.getUUID(), NodeStates.DOWN, None), ))) if conn.pending(): conn.abort() continue conn.close() while self.em.connection_dict: self.em.poll(1) # then shutdown sys.exit()
def shutdown(self): """Close all connections and exit""" self.changeClusterState(ClusterStates.STOPPING) self.listening_conn.close() for conn in self.em.getConnectionList(): node = self.nm.getByUUID(conn.getUUID()) if node is None or not node.isIdentified(): conn.close() # No need to change handlers in order to reject RequestIdentification # & AskBeginTransaction packets because they won't be any: # the only remaining connected peers are identified non-clients # and we don't accept new connections anymore. try: # wait for all transaction to be finished while self.tm.hasPending(): self.em.poll(1) except StoppedOperation: logging.critical('No longer operational') logging.info("asking remaining nodes to shutdown") handler = EventHandler(self) for node in self.nm.getConnectedList(): conn = node.getConnection() if node.isStorage(): conn.setHandler(handler) conn.notify( Packets.NotifyNodeInformation( ((node.getType(), node.getAddress(), node.getUUID(), NodeStates.TEMPORARILY_DOWN), ))) conn.abort() elif conn.pending(): conn.abort() else: conn.close() while self.em.connection_dict: self.em.poll(1) # then shutdown sys.exit()
def shutdown(self): """Close all connections and exit""" self.changeClusterState(ClusterStates.STOPPING) self.listening_conn.close() for conn in self.em.getConnectionList(): node = self.nm.getByUUID(conn.getUUID()) if node is None or not node.isIdentified(): conn.close() # No need to change handlers in order to reject RequestIdentification # & AskBeginTransaction packets because they won't be any: # the only remaining connected peers are identified non-clients # and we don't accept new connections anymore. try: # wait for all transaction to be finished while self.tm.hasPending(): self.em.poll(1) except StoppedOperation: logging.critical('No longer operational') logging.info("asking remaining nodes to shutdown") handler = EventHandler(self) for node in self.nm.getConnectedList(): conn = node.getConnection() if node.isStorage(): conn.setHandler(handler) conn.notify(Packets.NotifyNodeInformation((( node.getType(), node.getAddress(), node.getUUID(), NodeStates.TEMPORARILY_DOWN),))) conn.abort() elif conn.pending(): conn.abort() else: conn.close() while self.em.connection_dict: self.em.poll(1) # then shutdown sys.exit()
def playPrimaryRole(self): logging.info('play the primary role with %r', self.listening_conn) self.master_address_dict.clear() em = self.em packet = Packets.AnnouncePrimary() for conn in em.getConnectionList(): if conn.isListening(): conn.setHandler(identification.IdentificationHandler(self)) else: conn.notify(packet) # Primary master should rather establish connections to all # secondaries, rather than the other way around. This requires # a bit more work when a new master joins a cluster but makes # it easier to resolve UUID conflicts with minimal cluster # impact, and ensure primary master unicity (primary masters # become noisy, in that they actively try to maintain # connections to all other master nodes, so duplicate # primaries will eventually get in touch with each other and # resolve the situation with a duel). # TODO: only abort client connections, don't close server # connections as we want to have them in the end. Secondary # masters will reconnect nevertheless, but it's dirty. # Currently, it's not trivial to preserve connected nodes, # because of poor node status tracking during election. conn.abort() # If I know any storage node, make sure that they are not in the # running state, because they are not connected at this stage. for node in self.nm.getStorageList(): if node.isRunning(): node.setTemporarilyDown() if self.uuid is None: self.uuid = self.getNewUUID(None, self.server, NodeTypes.MASTER) logging.info('My UUID: ' + uuid_str(self.uuid)) else: in_conflict = self.nm.getByUUID(self.uuid) if in_conflict is not None: logging.warning('UUID conflict at election exit with %r', in_conflict) in_conflict.setUUID(None) # Do not restart automatically if ElectionFailure is raised, in order # to avoid a split of the database. For example, with 2 machines with # a master and a storage on each one and replicas=1, the secondary # master becomes primary in case of network failure between the 2 # machines but must not start automatically: otherwise, each storage # node would diverge. self._startup_allowed = False try: while True: self.runManager(RecoveryManager) try: self.runManager(VerificationManager) if not self.backup_tid: self.provideService() # self.provideService only returns without raising # when switching to backup mode. if self.backup_app is None: raise RuntimeError("No upstream cluster to backup" " defined in configuration") truncate = Packets.Truncate( self.backup_app.provideService()) except StoppedOperation, e: logging.critical('No longer operational') truncate = Packets.Truncate(*e.args) if e.args else None # Automatic restart except if we truncate or retry to. self._startup_allowed = not (self.truncate_tid or truncate) node_list = [] for node in self.nm.getIdentifiedList(): if node.isStorage() or node.isClient(): conn = node.getConnection() conn.notify(Packets.StopOperation()) if node.isClient(): conn.abort() continue if truncate: conn.notify(truncate) if node.isRunning(): node.setPending() node_list.append(node) self.broadcastNodesInformation(node_list) except StateChangedException, e: assert e.args[0] == ClusterStates.STOPPING self.shutdown()
def stopOperation(self, conn): logging.critical("master node ask to stop operation")
def playPrimaryRole(self): logging.info('play the primary role with %r', self.listening_conn) self.primary_master = None for conn in self.em.getConnectionList(): if conn.isListening(): conn.setHandler(identification.IdentificationHandler(self)) else: conn.close() # If I know any storage node, make sure that they are not in the # running state, because they are not connected at this stage. for node in self.nm.getStorageList(): assert node.isDown(), node if self.uuid is None: self.uuid = self.getNewUUID(None, self.server, NodeTypes.MASTER) logging.info('My UUID: ' + uuid_str(self.uuid)) self._node.setRunning() self._node.id_timestamp = None self.primary = monotonic_time() # Do not restart automatically if an election happens, in order # to avoid a split of the database. For example, with 2 machines with # a master and a storage on each one and replicas=1, the secondary # master becomes primary in case of network failure between the 2 # machines but must not start automatically: otherwise, each storage # node would diverge. self._startup_allowed = False try: while True: self.runManager(RecoveryManager) try: self.runManager(VerificationManager) if not self.backup_tid: self.provideService() # self.provideService only returns without raising # when switching to backup mode. if self.backup_app is None: raise RuntimeError("No upstream cluster to backup" " defined in configuration") truncate = Packets.Truncate( self.backup_app.provideService()) except StoppedOperation, e: logging.critical('No longer operational') truncate = Packets.Truncate(*e.args) if e.args else None # Automatic restart except if we truncate or retry to. self._startup_allowed = not (self.truncate_tid or truncate) self.storage_readiness = 0 self.storage_ready_dict.clear() self.storage_starting_set.clear() node_list = [] for node in self.nm.getIdentifiedList(): if node.isStorage() or node.isClient(): conn = node.getConnection() conn.send(Packets.StopOperation()) if node.isClient(): conn.abort() continue if truncate: conn.send(truncate) if node.isRunning(): node.setPending() node_list.append(node) self.broadcastNodesInformation(node_list) except StateChangedException, e: assert e.args[0] == ClusterStates.STOPPING self.shutdown()