def addPendingNodes(self, conn, uuid_list): uuids = ', '.join(map(uuid_str, uuid_list)) logging.debug('Add nodes %s', uuids) app = self.app state = app.getClusterState() # XXX: Would it be safe to allow more states ? if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP, ClusterStates.BACKINGUP): raise ProtocolError('Can not add nodes in %s state' % state) # take all pending nodes node_list = list( app.pt.addNodeList( node for node in app.nm.getStorageList() if node.isPending() and node.getUUID() in uuid_list)) if node_list: p = Packets.StartOperation(bool(app.backup_tid)) for node in node_list: node.setRunning() node.notify(p) app.broadcastNodesInformation(node_list) conn.answer( Errors.Ack('Nodes added: %s' % ', '.join(uuid_str(x.getUUID()) for x in node_list))) else: logging.warning('No node added') conn.answer(Errors.Ack('No node added'))
def setClusterState(self, conn, state): app = self.app # check request try: if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]: raise ProtocolError('Can not switch to this state') except KeyError: if state != ClusterStates.STOPPING: raise ProtocolError('Invalid state requested') # change state if state == ClusterStates.VERIFYING: storage_list = app.nm.getStorageList(only_identified=True) if not storage_list: raise ProtocolError('Cannot exit recovery without any ' 'storage node') for node in storage_list: assert node.isPending(), node if node.getConnection().isPending(): # XXX: It's wrong to use ProtocolError here. We must reply # less aggressively because the admin has no way to # know that there's still pending activity. raise ProtocolError('Cannot exit recovery now: node %r is ' 'entering cluster' % (node, )) app._startup_allowed = True state = app.cluster_state elif state == ClusterStates.STARTING_BACKUP: if app.tm.hasPending() or app.nm.getClientList(True): raise ProtocolError("Can not switch to %s state with pending" " transactions or connected clients" % state) conn.answer(Errors.Ack('Cluster state changed')) if state != app.cluster_state: raise StateChangedException(state)
def setClusterState(self, conn, state): app = self.app # check request try: if app.cluster_state not in CLUSTER_STATE_WORKFLOW[state]: raise AnswerDenied('Can not switch to this state') except KeyError: if state != ClusterStates.STOPPING: raise AnswerDenied('Invalid state requested') # change state if state == ClusterStates.VERIFYING: storage_list = app.nm.getStorageList(only_identified=True) if not storage_list: raise AnswerDenied( 'Cannot exit recovery without any storage node') for node in storage_list: assert node.isPending(), node if node.getConnection().isPending(): raise AnswerDenied( 'Cannot exit recovery now: node %r is entering cluster' % node, ) app._startup_allowed = True state = app.cluster_state elif state == ClusterStates.STARTING_BACKUP: if app.tm.hasPending() or app.nm.getClientList(True): raise AnswerDenied("Can not switch to %s state with pending" " transactions or connected clients" % state) conn.answer(Errors.Ack('Cluster state changed')) if state != app.cluster_state: raise StateChangedException(state)
def tweakPartitionTable(self, conn, uuid_list): app = self.app state = app.getClusterState() # XXX: Would it be safe to allow more states ? if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP, ClusterStates.BACKINGUP): raise ProtocolError('Can not tweak partition table in %s state' % state) app.broadcastPartitionChanges( app.pt.tweak(map(app.nm.getByUUID, uuid_list))) conn.answer(Errors.Ack(''))
def repair(self, conn, uuid_list, *args): getByUUID = self.app.nm.getByUUID node_list = [] for uuid in uuid_list: node = getByUUID(uuid) if node is None or not (node.isStorage() and node.isIdentified()): raise ProtocolError("invalid storage node %s" % uuid_str(uuid)) node_list.append(node) repair = Packets.NotifyRepair(*args) for node in node_list: node.send(repair) conn.answer(Errors.Ack(''))
def addPendingNodes(self, conn, uuid_list): uuids = ', '.join(map(uuid_str, uuid_list)) logging.debug('Add nodes %s', uuids) app = self.app # take all pending nodes node_list = list( app.pt.addNodeList( node for node in app.nm.getStorageList() if node.isPending() and node.getUUID() in uuid_list)) if node_list: for node in node_list: node.setRunning() app.startStorage(node) app.broadcastNodesInformation(node_list) conn.answer( Errors.Ack('Nodes added: %s' % ', '.join(uuid_str(x.getUUID()) for x in node_list))) else: logging.warning('No node added') conn.answer(Errors.Ack('No node added'))
def tweakPartitionTable(self, conn, uuid_list): app = self.app state = app.getClusterState() # XXX: Would it be safe to allow more states ? if state not in (ClusterStates.RUNNING, ClusterStates.STARTING_BACKUP, ClusterStates.BACKINGUP): raise ProtocolError('Can not tweak partition table in %s state' % state) app.broadcastPartitionChanges(app.pt.tweak([node for node in app.nm.getStorageList() if node.getUUID() in uuid_list or not node.isRunning()])) conn.answer(Errors.Ack(''))
def checkReplicas(self, conn, partition_dict, min_tid, max_tid): app = self.app pt = app.pt backingup = bool(app.backup_tid) if not max_tid: max_tid = pt.getCheckTid(partition_dict) if backingup else \ app.getLastTransaction() if min_tid > max_tid: logging.warning("nothing to check: min_tid=%s > max_tid=%s", dump(min_tid), dump(max_tid)) else: getByUUID = app.nm.getByUUID node_set = set() for offset, source in partition_dict.iteritems(): # XXX: For the moment, code checking replicas is unable to fix # corrupted partitions (when a good cell is known) # so only check readable ones. # (see also Checker._nextPartition of storage) cell_list = pt.getCellList(offset, True) #cell_list = [cell for cell in pt.getCellList(offset) # if not cell.isOutOfDate()] if len(cell_list) + (backingup and not source) <= 1: continue for cell in cell_list: node = cell.getNode() if node in node_set: break else: node_set.add(node) if source: source = '', getByUUID(source).getAddress() else: readable = [ cell for cell in cell_list if cell.isReadable() ] if 1 == len(readable) < len(cell_list): source = '', readable[0].getAddress() elif backingup: source = app.backup_app.name, random.choice( app.backup_app.pt.getCellList( offset, readable=True)).getAddress() else: source = '', None node.getConnection().notify( Packets.CheckPartition(offset, source, min_tid, max_tid)) conn.answer(Errors.Ack(''))
def truncate(self, conn, tid): app = self.app if app.cluster_state != ClusterStates.RUNNING: raise ProtocolError('Can not truncate in this state') conn.answer(Errors.Ack('')) raise StoppedOperation(tid)
if node.isConnected(): # notify itself so it can shutdown node.send(Packets.NotifyNodeInformation( monotonic_time(), [node.asTuple()])) # close to avoid handle the closure as a connection lost node.getConnection().abort() if keep: cell_list = app.pt.outdate() elif cell_list: message = 'node permanently removed' app.broadcastPartitionChanges(cell_list) else: node.setState(state) # /!\ send the node information *after* the partition table change conn.answer(Errors.Ack(message)) if state_changed: # notify node explicitly because broadcastNodesInformation() # ignores non-running nodes assert not node.isRunning() if node.isConnected(): node.send(Packets.NotifyNodeInformation( monotonic_time(), [node.asTuple()])) app.broadcastNodesInformation([node]) def addPendingNodes(self, conn, uuid_list): uuids = ', '.join(map(uuid_str, uuid_list)) logging.debug('Add nodes %s', uuids) app = self.app state = app.getClusterState() # XXX: Would it be safe to allow more states ?
def truncate(self, conn, tid): conn.answer(Errors.Ack('')) raise StoppedOperation(tid)
def setNumReplicas(self, conn, num_replicas): self.app.broadcastPartitionChanges((), num_replicas) conn.answer(Errors.Ack(''))