async def test_smoke(self): ni = 0 async with MockAdminAPI() as client: nodes_config_resp = await client.getNodesConfig( NodesFilter(node=NodeID(node_index=ni)) ) nodes_state_resp = await client.getNodesState( NodesStateRequest(filter=NodesFilter(node=NodeID(node_index=ni))) ) maintenances_resp = await client.getMaintenances(MaintenancesFilter()) nc = nodes_config_resp.nodes[0] ns = nodes_state_resp.states[0] mnt_ids = set() for mnt in maintenances_resp.maintenances: for s in mnt.shards: if s.node.node_index == ni: mnt_ids.add(mnt.group_id) for n in mnt.sequencer_nodes: if n.node_index == ni: mnt_ids.add(mnt.group_id) mnts = tuple( sorted( ( mnt for mnt in maintenances_resp.maintenances if mnt.group_id in mnt_ids ), key=operator.attrgetter("group_id"), ) ) nv = NodeView(node_config=nc, node_state=ns, maintenances=mnts) self._validate(nv, nc, ns, mnts)
async def applyMaintenance( self, request: MaintenanceDefinition ) -> MaintenanceDefinitionResponse: # TODO: ungroup if group == False shards = [] for sh in request.shards: if sh.shard_index == -1: # TODO: make it unwrap pass else: assert sh.node.node_index is not None # pyre-fixme[6]: Expected `int` for 1st param but got `Optional[int]`. nc = self._nc_by_node_index[sh.node.node_index] shards.append( ShardID( node=NodeID( node_index=nc.node_index, name=nc.name, address=nc.data_address, ), shard_index=sh.shard_index, ) ) seq_nodes = [] for n in request.sequencer_nodes: assert n.node_index is not None # pyre-fixme[6]: Expected `int` for 1st param but got `Optional[int]`. nc = self._nc_by_node_index[n.node_index] seq_nodes.append( NodeID(node_index=nc.node_index, name=nc.name, address=nc.data_address) ) mnt = MaintenanceDefinition( shards=shards, shard_target_state=request.shard_target_state, sequencer_nodes=seq_nodes, sequencer_target_state=request.sequencer_target_state, user=request.user, reason=request.reason, extras=request.extras, skip_safety_checks=request.skip_safety_checks, force_restore_rebuilding=request.force_restore_rebuilding, group=request.group, ttl_seconds=request.ttl_seconds, allow_passive_drains=request.allow_passive_drains, group_id=gen_word(8), last_check_impact_result=None, expires_on=1000 * (int(datetime.now().timestamp()) + request.ttl_seconds) if request.ttl_seconds else None, created_on=1000 * int(datetime.now().timestamp()), ) assert mnt.group_id is not None # pyre-fixme[6]: Expected `str` for 1st param but got `Optional[str]`. self._maintenances_by_id[mnt.group_id] = mnt return MaintenanceDefinitionResponse(maintenances=[mnt])
async def test_smoke(self): ni = 0 async with MockAdminAPI() as client: cv = await get_cluster_view(client) maintenances_resp = await apply_maintenance( client=client, shards=[ ShardID( node=cv.get_node_view_by_node_index(0).node_id, shard_index=1 ) ], sequencer_nodes=[cv.get_node_view_by_node_index(0).node_id], ) ( nodes_config_resp, nodes_state_resp, maintenances_resp, ) = await asyncio.gather( client.getNodesConfig(NodesFilter(node=NodeID(node_index=ni))), client.getNodesState( NodesStateRequest(filter=NodesFilter(node=NodeID(node_index=ni))) ), client.getMaintenances(MaintenancesFilter()), ) nc = [n for n in nodes_config_resp.nodes if n.node_index == ni][0] ns = [n for n in nodes_state_resp.states if n.node_index == ni][0] mnt_ids = set() for mnt in maintenances_resp.maintenances: for s in mnt.shards: if s.node.node_index == ni: mnt_ids.add(mnt.group_id) for n in mnt.sequencer_nodes: if n.node_index == ni: mnt_ids.add(mnt.group_id) mnts = tuple( sorted( ( mnt for mnt in maintenances_resp.maintenances if mnt.group_id in mnt_ids ), key=operator.attrgetter("group_id"), ) ) nv = NodeView(node_config=nc, node_state=ns, maintenances=mnts) self._validate(nv, nc, ns, mnts)
def node_id(self) -> NodeID: return NodeID( node_index=self.node_config.node_index, address=self.node_config.data_address, # Not self.node_name because AdminAPI expects value from NodeConfig name=self.node_config.name, )
async def check_impact( client: AdminAPI, nodes: Optional[Collection[Node]] = None, shards: Optional[Collection[ShardID]] = None, target_storage_state: ShardStorageState = ShardStorageState.DISABLED, disable_sequencers: bool = True, ) -> CheckImpactResponse: """ Performs Safety check and returns CheckImpactResponse. If no nodes and no shards passed it still does safety check, but will return current state of the cluster. """ nodes = nodes or [] shards = shards or [] req_shards: FrozenSet[ShardID] = _recombine_shards( list(shards) # shards is generic Collection, not List + [ ShardID( node=NodeID(node_index=n.node_index, address=n.data_addr.to_thrift()), shard_index=-1, ) for n in nodes ]) return await admin_api.check_impact( client=client, req=CheckImpactRequest( shards=req_shards, target_storage_state=target_storage_state, disable_sequencers=disable_sequencers, ), )
def parse_shards(src: Collection[str]) -> Set[ShardID]: """ Parses a list of strings and intrepret as ShardID objects. Accepted examples: 0 => ShardID(0, -1) N0 => ShardID(0, -1) 0:2 => ShardID(0, 2) N0:2 => ShardID(0, 2) N0:S2 => ShardID(0, 2) """ regex = re.compile(r"^N?(\d+)\:?S?(\d+)?$", flags=re.IGNORECASE) res = set() for s in src: match = regex.search(s) if not match: raise ValueError(f"Cannot parse shard: {s}") node_index = int(match.groups()[0]) if match.groups()[1] is None: shard_index = ALL_SHARDS else: shard_index = int(match.groups()[1]) res.add( ShardID(node=NodeID(node_index=node_index), shard_index=shard_index)) return res
def test_parse_shards_valid2(self) -> None: # Parse multiple inputs self.assertEqual( { ShardID(node=NodeID(node_index=0), shard_index=1), ShardID(node=NodeID(node_index=1), shard_index=2), }, helpers.parse_shards(["N0:S1", "N1:S2"]), ) # Remove duplicates self.assertEqual( { ShardID(node=NodeID(node_index=0), shard_index=1), ShardID(node=NodeID(node_index=1), shard_index=2), }, helpers.parse_shards(["N0:S1", "N1:S2", "N0:s1"]), )
async def test_mismatch(self): async with MockAdminAPI() as client: ( nodes_config_resp, nodes_state_resp, maintenances_resp, ) = await asyncio.gather( client.getNodesConfig(NodesFilter(node=NodeID(node_index=0))), client.getNodesState( NodesStateRequest(filter=NodesFilter(node=NodeID(node_index=1))) ), client.getMaintenances(MaintenancesFilter()), ) with self.assertRaises(ValueError): NodeView( node_config=nodes_config_resp.nodes[0], node_state=nodes_state_resp.states[0], maintenances=maintenances_resp.maintenances, )
def to_thrift(self) -> NodeID: """ Returns Thrift-representation of NodeID to use in communication with AdminAPI. """ return NodeID( node_index=self.node_index, address=self.data_addr.to_thrift() if self.data_addr else None, name=self.name if self.name else None, )
def to_thrift(self) -> NodeID: """ Returns Thrift-representation of NodeID to use in communication with AdminAPI. """ return NodeID( node_index=self.node_index, # pyre-fixme[16]: `Optional` has no attribute `to_thrift`. address=self.data_addr.to_thrift() if self.data_addr else None, name=self.name if self.name else None, )
async def get_node_by_name(client: AdminAPI, name: str) -> Node: """ Returns Node by node name Raises: logdevice.admin.exceptions.types.NodeNotReady: if node client is connected to is not ready yet to process request thrift.py3.TransportError: if there's network error while communicating with Thrift ldops.exceptions.NodeNotFoundError: if there's no such node from point of view of AdminAPI provider """ resp: NodesConfigResponse = await admin_api.get_nodes_config( client=client, req=NodesFilter(node=NodeID(name=name))) if not resp.nodes: raise NodeNotFoundError(f"Node not found: name=`{name}'") # There's guarantee from AdminAPI that there CANNOT be more than one # node with the same name return _get_node_by_node_config(resp.nodes[0])
def _combine( cv: ClusterView, shards: Optional[List[str]] = None, node_names: Optional[List[str]] = None, node_indexes: Optional[List[int]] = None, ) -> Tuple[ShardID, ...]: shards = list(shards or []) node_names = list(node_names or []) node_indexes = list(node_indexes or []) shard_ids = parse_shards(shards) for nn in node_names: shard_ids.add( ShardID(node=cv.get_node_id(node_name=nn), shard_index=-1)) for ni in node_indexes: shard_ids.add(ShardID(node=NodeID(node_index=ni), shard_index=-1)) shard_ids_expanded = cv.expand_shards(shard_ids) return shard_ids_expanded
async def shrink(self, node_indexes: typing.List[int]): """ Shrinks the cluster by removing nodes from the NodesConfig. This operation requires that the removed nodes are empty (storage state: NONE) and dead. """ ctx = nubia.context.get_context() async with ctx.get_cluster_admin_client() as client: try: await client.removeNodes( request=RemoveNodesRequest(node_filters=[ NodesFilter(node=NodeID(node_index=idx)) for idx in node_indexes ]), rpc_options=RpcOptions(timeout=60), ) termcolor.cprint("Successfully removed the nodes", "green") except Exception as e: termcolor.cprint(str(e), "red") return 1
def to_shard_id(scope: str) -> ShardID: """ A conversion utility that takes a Nx:Sy string and convert it into the typed ShardID. The 'Sy' part is optional and if unset the generated ShardID will have a shard_index set to -1 """ scope = scope.upper() if not scope: raise ValueError(f"Cannot parse empty scope") match = SHARD_PATTERN.match(scope) if match is None: # There were no shards, or invalid. raise ValueError(f"Cannot parse '{scope}'. Invalid format!") results = match.groupdict() shard_index = -1 if results["shard_index"] is not None: shard_index = int(results["shard_index"]) node_index = int(results["node_index"]) node = NodeID(node_index=node_index) return ShardID(node=node, shard_index=shard_index)
def test_parse_shards_valid1(self) -> None: # 5 self.assertEqual( {ShardID(node=NodeID(node_index=5), shard_index=ALL_SHARDS)}, helpers.parse_shards(["5"]), ) # 5:1 self.assertEqual( {ShardID(node=NodeID(node_index=5), shard_index=1)}, helpers.parse_shards(["5:1"]), ) # 0:S1 self.assertEqual( {ShardID(node=NodeID(node_index=0), shard_index=1)}, helpers.parse_shards(["0:S1"]), ) # N0:S1 self.assertEqual( {ShardID(node=NodeID(node_index=0), shard_index=1)}, helpers.parse_shards(["N0:S1"]), ) # N0 == ShardID(0, ALL_SHARDS) self.assertEqual( {ShardID(node=NodeID(node_index=0), shard_index=ALL_SHARDS)}, helpers.parse_shards(["N0"]), ) # N1:S4 == ShardID(1, 4) self.assertEqual( {ShardID(node=NodeID(node_index=1), shard_index=4)}, helpers.parse_shards(["N1:S4"]), ) # Allow ignored case # n1:S4 == ShardID(1, 4) self.assertEqual( {ShardID(node=NodeID(node_index=1), shard_index=4)}, helpers.parse_shards(["n1:S4"]), )
def _validate( self, nv: NodeView, nc: NodeConfig, ns: NodeState, mnts: Tuple[MaintenanceDefinition, ...], ): self.assertEqual(nv.node_config, nc) self.assertEqual(nv.node_state, ns) self.assertEqual(nv.maintenances, mnts) self.assertEqual(nv.node_index, nc.node_index) if nc.name: self.assertEqual(nv.node_name, nc.name) else: self.assertEqual( nv.node_name, str(SocketAddress.from_thrift(nc.data_address)) ) self.assertEqual(nv.data_address, SocketAddress.from_thrift(nc.data_address)) if nv.thrift_address.address_family == SocketAddressFamily.INET: assert nv.thrift_address.address is not None from_nc = SocketAddress.from_thrift(nc.data_address) assert from_nc.address is not None self.assertEqual(nv.thrift_address.port, 6440) self.assertEqual( nv.thrift_address.address.compressed, from_nc.address.compressed ) self.assertEqual( nv.node_id, NodeID(node_index=nc.node_index, address=nc.data_address, name=nc.name), ) self.assertEqual(nv.location, nc.location) self.assertEqual(nv.location_per_scope, nc.location_per_scope) self.assertEqual(nv.roles, nc.roles) for r in Role: self.assertEqual(nv.has_role(r), r in nc.roles) self.assertEqual(nv.is_sequencer, Role.SEQUENCER in nc.roles) self.assertEqual(nv.is_storage, Role.STORAGE in nc.roles) self.assertEqual(nv.daemon_state, ns.daemon_state) if Role.SEQUENCER in nc.roles: assert nc.sequencer is not None self.assertEqual(nv.sequencer_config, nc.sequencer) self.assertEqual(nv.sequencer_weight, nc.sequencer.weight) assert ns.sequencer_state is not None self.assertEqual(nv.sequencer_state, ns.sequencer_state) self.assertEqual(nv.sequencing_state, ns.sequencer_state.state) else: self.assertIsNone(nv.sequencer_config) self.assertIsNone(nv.sequencer_state) self.assertIsNone(nv.sequencer_weight) self.assertIsNone(nv.sequencing_state) if Role.STORAGE in nc.roles: assert nc.storage is not None assert ns.shard_states is not None self.assertEqual(nv.storage_config, nc.storage) self.assertEqual(nv.storage_weight, nc.storage.weight) self.assertEqual(nv.num_shards, nc.storage.num_shards) self.assertEqual(nv.shard_states, ns.shard_states) self.assertListEqual( nv.shards_data_health, [s.data_health for s in ns.shard_states] ) self.assertEqual( nv.shards_data_health_count, Counter(s.data_health for s in ns.shard_states), ) self.assertListEqual( nv.shards_current_storage_state, [s.current_storage_state for s in ns.shard_states], ) self.assertEqual( nv.shards_current_storage_state_count, Counter(s.current_storage_state for s in ns.shard_states), ) self.assertListEqual( nv.shards_current_operational_state, [s.current_operational_state for s in ns.shard_states], ) self.assertEqual( nv.shards_current_operational_state_count, Counter(s.current_operational_state for s in ns.shard_states), ) self.assertListEqual( nv.shards_membership_storage_state, [s.storage_state for s in ns.shard_states], ) self.assertEqual( nv.shards_membership_storage_state_count, Counter(s.storage_state for s in ns.shard_states), ) self.assertListEqual( nv.shards_metadata_state, [s.metadata_state for s in ns.shard_states] ) self.assertEqual( nv.shards_metadata_state_count, Counter(s.metadata_state for s in ns.shard_states), ) else: self.assertIsNone(nv.storage_config) self.assertIsNone(nv.storage_weight) self.assertIsNone(nv.num_shards) self.assertEqual(nv.shard_states, [])
def node_id(self) -> NodeID: return NodeID( node_index=self.nc.node_index, address=self.nc.data_address, name=self.nc.name, )
def _validate( self, cv: ClusterView, ncs: List[NodeConfig], nss: List[NodeState], mnts: Tuple[MaintenanceDefinition, ...], ): nis = sorted(nc.node_index for nc in ncs) ni_to_nc = {nc.node_index: nc for nc in ncs} ni_to_ns = {ns.node_index: ns for ns in nss} ni_to_mnts: Dict[int, List[MaintenanceDefinition]] = {ni: [] for ni in nis} for mnt in mnts: mnt_nis = set() for s in mnt.shards: assert s.node.node_index is not None mnt_nis.add(s.node.node_index) for n in mnt.sequencer_nodes: assert n.node_index is not None mnt_nis.add(n.node_index) for ni in mnt_nis: ni_to_mnts[ni].append(mnt) self.assertEqual(sorted(cv.get_all_node_indexes()), sorted(ni_to_nc.keys())) self.assertEqual( sorted(cv.get_all_node_views(), key=operator.attrgetter("node_index")), sorted( (NodeView( node_config=ni_to_nc[ni], node_state=ni_to_ns[ni], maintenances=tuple(ni_to_mnts[ni]), ) for ni in ni_to_nc.keys()), key=operator.attrgetter("node_index"), ), ) self.assertEqual(sorted(cv.get_all_node_names()), sorted(nc.name for nc in ncs)) self.assertEqual(sorted(cv.get_all_maintenance_ids()), sorted(mnt.group_id for mnt in mnts)) self.assertEqual( sorted(cv.get_all_maintenances(), key=operator.attrgetter("group_id")), sorted(mnts, key=operator.attrgetter("group_id")), ) for ni in nis: nn = ni_to_nc[ni].name nc = ni_to_nc[ni] ns = ni_to_ns[ni] node_mnts = tuple(ni_to_mnts[ni]) nv = NodeView( node_config=ni_to_nc[ni], node_state=ni_to_ns[ni], maintenances=node_mnts, ) self.assertEqual(cv.get_node_view_by_node_index(ni), nv) self.assertEqual(cv.get_node_name_by_node_index(ni), nn) self.assertEqual(cv.get_node_config_by_node_index(ni), nc) self.assertEqual(cv.get_node_state_by_node_index(ni), ns) self.assertEqual(cv.get_node_maintenances_by_node_index(ni), node_mnts) self.assertEqual(cv.get_node_view_by_node_name(nn), nv) self.assertEqual(cv.get_node_index_by_node_name(nn), ni) self.assertEqual(cv.get_node_config_by_node_name(nn), nc) self.assertEqual(cv.get_node_state_by_node_name(nn), ns) self.assertEqual(cv.get_node_maintenances_by_node_name(nn), node_mnts) self.assertEqual(cv.get_node_view(node_name=nn), nv) self.assertEqual(cv.get_node_index(node_name=nn), ni) self.assertEqual(cv.get_node_config(node_name=nn), nc) self.assertEqual(cv.get_node_state(node_name=nn), ns) self.assertEqual(cv.get_node_maintenances(node_name=nn), node_mnts) self.assertEqual(cv.get_node_view(node_index=ni), nv) self.assertEqual(cv.get_node_name(node_index=ni), nn) self.assertEqual(cv.get_node_config(node_index=ni), nc) self.assertEqual(cv.get_node_state(node_index=ni), ns) self.assertEqual(cv.get_node_maintenances(node_index=ni), node_mnts) with self.assertRaises(ValueError): cv.get_node_view(None, None) with self.assertRaises(ValueError): cv.get_node_config(None, None) with self.assertRaises(ValueError): cv.get_node_state(None, None) with self.assertRaises(ValueError): cv.get_node_maintenances(None, None) # mismatch node_index and node_name if len(nis) > 1: nn = ni_to_nc[nis[0]].name ni = nis[1] with self.assertRaises(ValueError): cv.get_node_view(ni, nn) with self.assertRaises(ValueError): cv.get_node_config(ni, nn) with self.assertRaises(ValueError): cv.get_node_state(ni, nn) with self.assertRaises(ValueError): cv.get_node_maintenances(ni, nn) # non-existent node_index with self.assertRaises(NodeNotFoundError): cv.get_node_view(node_index=max(nis) + 1) # non-existent node_name with self.assertRaises(NodeNotFoundError): nns = {nc.name for nc in ncs} while True: nn = gen_word() if nn not in nns: break cv.get_node_view(node_name=nn) for mnt in mnts: assert mnt.group_id is not None self.assertEqual(cv.get_maintenance_by_id(mnt.group_id), mnt) self.assertTupleEqual( cv.get_node_indexes_by_maintenance_id(mnt.group_id), tuple( sorted( set({ n.node_index for n in mnt.sequencer_nodes if n.node_index is not None }).union({ s.node.node_index for s in mnt.shards if s.node.node_index is not None }))), ) self.assertEqual( mnt.group_id, cv.get_maintenance_view_by_id(mnt.group_id).group_id) self.assertListEqual( list(sorted(m.group_id for m in mnts)), list(sorted(mv.group_id for mv in cv.get_all_maintenance_views())), ) # expand_shards self.assertEqual( cv.expand_shards(shards=[ ShardID(node=NodeID(node_index=nis[0]), shard_index=0) ]), (ShardID( node=NodeID( node_index=ni_to_nc[nis[0]].node_index, name=ni_to_nc[nis[0]].name, address=ni_to_nc[nis[0]].data_address, ), shard_index=0, ), ), ) self.assertEqual( len( cv.expand_shards(shards=[ ShardID(node=NodeID(node_index=nis[0]), shard_index=ALL_SHARDS) ])), ni_to_nc[nis[0]].storage.num_shards, ) self.assertEqual( len( cv.expand_shards(shards=[ ShardID(node=NodeID(node_index=nis[0]), shard_index=ALL_SHARDS), ShardID(node=NodeID(node_index=nis[0]), shard_index=ALL_SHARDS), ShardID(node=NodeID(node_index=nis[1]), shard_index=ALL_SHARDS), ])), ni_to_nc[nis[0]].storage.num_shards + ni_to_nc[nis[1]].storage.num_shards, ) self.assertEqual( len( cv.expand_shards( shards=[ ShardID(node=NodeID(node_index=nis[0]), shard_index=ALL_SHARDS), ShardID(node=NodeID(node_index=nis[1]), shard_index=0), ], node_ids=[NodeID(node_index=0)], )), ni_to_nc[nis[0]].storage.num_shards + 1, ) # normalize_node_id self.assertEqual( cv.normalize_node_id(NodeID(node_index=nis[0])), NodeID( node_index=nis[0], address=ni_to_nc[nis[0]].data_address, name=ni_to_nc[nis[0]].name, ), ) self.assertEqual( cv.normalize_node_id(NodeID(name=ni_to_nc[nis[0]].name)), NodeID( node_index=nis[0], address=ni_to_nc[nis[0]].data_address, name=ni_to_nc[nis[0]].name, ), ) # search_maintenances self.assertEqual(len(cv.search_maintenances()), len(mnts)) self.assertEqual( len(cv.search_maintenances(node_ids=[cv.get_node_id( node_index=3)])), 0) self.assertEqual( len(cv.search_maintenances(node_ids=[cv.get_node_id( node_index=1)])), 1) self.assertEqual( len( cv.search_maintenances(shards=[ ShardID(node=cv.get_node_id(node_index=0), shard_index=1) ])), 1, ) # shard_target_state self.assertEqual( len( cv.search_maintenances( shard_target_state=ShardOperationalState.MAY_DISAPPEAR)), 2, ) self.assertEqual( len( cv.search_maintenances( shard_target_state=ShardOperationalState.DRAINED)), 0, ) # sequencer_target_state self.assertEqual( len( cv.search_maintenances( sequencer_target_state=SequencingState.ENABLED)), 0, ) self.assertEqual( len( cv.search_maintenances( sequencer_target_state=SequencingState.DISABLED)), 2, ) self.assertEqual(len(cv.search_maintenances(user="******")), 1) self.assertEqual(len(cv.search_maintenances(reason="whatever")), 1) self.assertEqual(len(cv.search_maintenances(skip_safety_checks=True)), 0) self.assertEqual(len(cv.search_maintenances(skip_safety_checks=False)), 2) self.assertEqual( len(cv.search_maintenances(force_restore_rebuilding=True)), 0) self.assertEqual( len(cv.search_maintenances(force_restore_rebuilding=False)), 2) self.assertEqual( len(cv.search_maintenances(allow_passive_drains=True)), 0) self.assertEqual( len(cv.search_maintenances(allow_passive_drains=False)), 2) self.assertEqual( len(cv.search_maintenances(group_id=mnts[0].group_id)), 1) self.assertEqual( len( cv.search_maintenances( progress=MaintenanceProgress.IN_PROGRESS)), 2)