def connectToPrimary(self): """Find a primary master node, and connect to it. If a primary master node is not elected or ready, repeat the attempt of a connection periodically. Note that I do not accept any connection from non-master nodes at this stage. """ self.cluster_state = None # search, find, connect and identify to the primary master bootstrap = BootstrapManager(self, NodeTypes.ADMIN, self.server) self.master_node, self.master_conn, num_partitions, num_replicas = \ bootstrap.getPrimaryConnection() if self.pt is None: self.pt = PartitionTable(num_partitions, num_replicas) elif self.pt.getPartitions() != num_partitions: # XXX: shouldn't we recover instead of raising ? raise RuntimeError('the number of partitions is inconsistent') elif self.pt.getReplicas() != num_replicas: # XXX: shouldn't we recover instead of raising ? raise RuntimeError('the number of replicas is inconsistent') # passive handler self.master_conn.setHandler(self.master_event_handler) self.master_conn.ask(Packets.AskClusterState()) self.master_conn.ask(Packets.AskPartitionTable())
def connectToPrimary(self): """Find a primary master node, and connect to it. If a primary master node is not elected or ready, repeat the attempt of a connection periodically. Note that I do not accept any connection from non-master nodes at this stage. """ self.cluster_state = None # search, find, connect and identify to the primary master bootstrap = BootstrapManager(self, self.name, NodeTypes.ADMIN, self.uuid, self.server) data = bootstrap.getPrimaryConnection() (node, conn, uuid, num_partitions, num_replicas) = data self.master_node = node self.master_conn = conn self.uuid = uuid if self.pt is None: self.pt = PartitionTable(num_partitions, num_replicas) elif self.pt.getPartitions() != num_partitions: # XXX: shouldn't we recover instead of raising ? raise RuntimeError('the number of partitions is inconsistent') elif self.pt.getReplicas() != num_replicas: # XXX: shouldn't we recover instead of raising ? raise RuntimeError('the number of replicas is inconsistent') # passive handler self.master_conn.setHandler(self.master_event_handler) self.master_conn.ask(Packets.AskClusterState()) self.master_conn.ask(Packets.AskNodeInformation()) self.master_conn.ask(Packets.AskPartitionTable())
def connectToPrimary(self): """Find a primary master node, and connect to it. If a primary master node is not elected or ready, repeat the attempt of a connection periodically. Note that I do not accept any connection from non-master nodes at this stage.""" # search, find, connect and identify to the primary master bootstrap = BootstrapManager(self, NodeTypes.STORAGE, None if self.new_nid else self.server, devpath=self.devpath, new_nid=self.new_nid) self.master_node, self.master_conn = bootstrap.getPrimaryConnection() self.dm.setUUID(self.uuid) # Reload a partition table from the database, # in case that we're in RECOVERING phase. self.loadPartitionTable()
def connectToPrimary(self): """Find a primary master node, and connect to it. If a primary master node is not elected or ready, repeat the attempt of a connection periodically. Note that I do not accept any connection from non-master nodes at this stage. """ self.cluster_state = None # search, find, connect and identify to the primary master bootstrap = BootstrapManager(self, NodeTypes.ADMIN, self.server, backup=list(self.backup_dict)) self.master_node, self.master_conn = bootstrap.getPrimaryConnection() # passive handler self.master_conn.setHandler(self.master_event_handler) self.master_conn.ask(Packets.AskClusterState())
def setUp(self): NeoUnitTestBase.setUp(self) self.prepareDatabase(number=1) # create an application object config = self.getStorageConfiguration() self.app = Application(config) self.bootstrap = BootstrapManager(self.app, 'main', NodeTypes.STORAGE) # define some variable to simulate client and storage node self.master_port = 10010 self.storage_port = 10020 self.num_partitions = 1009 self.num_replicas = 2
class BootstrapManagerTests(NeoUnitTestBase): def setUp(self): NeoUnitTestBase.setUp(self) self.prepareDatabase(number=1) # create an application object config = self.getStorageConfiguration() self.app = Application(config) self.bootstrap = BootstrapManager(self.app, 'main', NodeTypes.STORAGE) # define some variable to simulate client and storage node self.master_port = 10010 self.storage_port = 10020 self.num_partitions = 1009 self.num_replicas = 2 def _tearDown(self, success): self.app.close() del self.app super(BootstrapManagerTests, self)._tearDown(success) # Tests def testConnectionCompleted(self): address = ("127.0.0.1", self.master_port) conn = self.getFakeConnection(address=address) self.bootstrap.current = self.app.nm.createMaster(address=address) self.bootstrap.connectionCompleted(conn) self.checkRequestIdentification(conn) def testHandleNotReady(self): # the primary is not ready address = ("127.0.0.1", self.master_port) conn = self.getFakeConnection(address=address) self.bootstrap.current = self.app.nm.createMaster(address=address) self.bootstrap.notReady(conn, '') self.checkClosed(conn) self.checkNoPacketSent(conn)
def provideService(self): logging.info('provide backup') poll = self.em.poll app = self.app pt = app.pt while True: app.changeClusterState(ClusterStates.STARTING_BACKUP) bootstrap = BootstrapManager(self, NodeTypes.CLIENT, backup=app.name) # {offset -> node} self.primary_partition_dict = {} # [[tid]] self.tid_list = tuple([] for _ in xrange(pt.getPartitions())) try: while True: for node in pt.getNodeSet(readable=True): if not app.isStorageReady(node.getUUID()): break else: break poll(1) node, conn = bootstrap.getPrimaryConnection() try: app.changeClusterState(ClusterStates.BACKINGUP) del bootstrap, node self.ignore_invalidations = True conn.setHandler(BackupHandler(self)) conn.ask(Packets.AskLastTransaction()) # debug variable to log how big 'tid_list' can be. self.debug_tid_count = 0 while True: poll(1) except PrimaryFailure, msg: logging.error('upstream master is down: %s', msg) finally: app.backup_tid = pt.getBackupTid() try: conn.close() except PrimaryFailure: pass try: del self.pt except AttributeError: pass for node in app.nm.getClientList(True): node.getConnection().close() except StateChangedException, e: if e.args[0] != ClusterStates.STOPPING_BACKUP: raise app.changeClusterState(*e.args) tid = app.backup_tid # Wait for non-primary partitions to catch up, # so that all UP_TO_DATE cells are really UP_TO_DATE. # XXX: Another possibility could be to outdate such cells, and # they would be quickly updated at the beginning of the # RUNNING phase. This may simplify code. # Any unfinished replication from upstream will be truncated. while pt.getBackupTid(min) < tid: poll(1) last_tid = app.getLastTransaction() handler = EventHandler(app) if tid < last_tid: assert tid != ZERO_TID logging.warning("Truncating at %s (last_tid was %s)", dump(app.backup_tid), dump(last_tid)) else: # We will do a dummy truncation, just to leave backup mode, # so it's fine to start automatically if there's any # missing storage. # XXX: Consider using another method to leave backup mode, # at least when there's nothing to truncate. Because # in case of StoppedOperation during VERIFYING state, # this flag will be wrongly set to False. app._startup_allowed = True # If any error happened before reaching this line, we'd go back # to backup mode, which is the right mode to recover. del app.backup_tid # Now back to RECOVERY... return tid
def provideService(self): logging.info('provide backup') poll = self.em.poll app = self.app pt = app.pt while True: app.changeClusterState(ClusterStates.STARTING_BACKUP) bootstrap = BootstrapManager(self, self.name, NodeTypes.CLIENT) # {offset -> node} self.primary_partition_dict = {} # [[tid]] self.tid_list = tuple([] for _ in xrange(pt.getPartitions())) try: while True: for node in pt.getNodeSet(readable=True): if not app.isStorageReady(node.getUUID()): break else: break poll(1) node, conn, uuid, num_partitions, num_replicas = \ bootstrap.getPrimaryConnection() try: app.changeClusterState(ClusterStates.BACKINGUP) del bootstrap, node if num_partitions != pt.getPartitions(): raise RuntimeError("inconsistent number of partitions") self.pt = PartitionTable(num_partitions, num_replicas) conn.setHandler(BackupHandler(self)) conn.ask(Packets.AskNodeInformation()) conn.ask(Packets.AskPartitionTable()) conn.ask(Packets.AskLastTransaction()) # debug variable to log how big 'tid_list' can be. self.debug_tid_count = 0 while True: poll(1) except PrimaryFailure, msg: logging.error('upstream master is down: %s', msg) finally: app.backup_tid = pt.getBackupTid() try: conn.close() except PrimaryFailure: pass try: del self.pt except AttributeError: pass except StateChangedException, e: if e.args[0] != ClusterStates.STOPPING_BACKUP: raise app.changeClusterState(*e.args) tid = app.backup_tid # Wait for non-primary partitions to catch up, # so that all UP_TO_DATE cells are really UP_TO_DATE. # XXX: Another possibility could be to outdate such cells, and # they would be quickly updated at the beginning of the # RUNNING phase. This may simplify code. # Any unfinished replication from upstream will be truncated. while pt.getBackupTid(min) < tid: poll(1) last_tid = app.getLastTransaction() handler = EventHandler(app) if tid < last_tid: assert tid != ZERO_TID logging.warning("Truncating at %s (last_tid was %s)", dump(app.backup_tid), dump(last_tid)) else: # We will do a dummy truncation, just to leave backup mode, # so it's fine to start automatically if there's any # missing storage. # XXX: Consider using another method to leave backup mode, # at least when there's nothing to truncate. Because # in case of StoppedOperation during VERIFYING state, # this flag will be wrongly set to False. app._startup_allowed = True # If any error happened before reaching this line, we'd go back # to backup mode, which is the right mode to recover. del app.backup_tid # Now back to RECOVERY... return tid