def restart_node(node_id, cleanup=True): self.logger.info(f"restarting node: {node_id}") self.redpanda.stop_node(self.redpanda.nodes[node_id - 1]) if cleanup: self.redpanda.clean_node(self.redpanda.nodes[node_id - 1]) self.redpanda.start_node(self.redpanda.nodes[node_id - 1]) admin = Admin(self.redpanda) admin.set_log_level("cluster", "trace") def has_new_replicas(): per_node = replicas_per_node() self.logger.info(f"replicas per node: {per_node}") return node_id in per_node wait_until(has_new_replicas, timeout_sec=240, backoff_sec=2)
def test_log_level_control(self): admin = Admin(self.redpanda) node = self.redpanda.nodes[0] # This test assumes the default log level while testing is trace default_log_level = "trace" # set to warn level. message seen at trace with self.redpanda.monitor_log(node) as mon: admin.set_log_level("admin_api_server", "warn") mon.wait_until( f"Set log level for {{admin_api_server}}: {default_log_level} -> warn", timeout_sec=5, backoff_sec=1, err_msg="Never saw message") # set to debug. log level at warn, so shouldn't see it try: with self.redpanda.monitor_log(node) as mon: admin.set_log_level("admin_api_server", "debug") mon.wait_until( "Set log level for {admin_api_server}: warn -> debug", timeout_sec=10, backoff_sec=1, err_msg="Never saw message") assert False, "Should not have seen message" except ducktape.errors.TimeoutError: pass # should now see it again with self.redpanda.monitor_log(node) as mon: admin.set_log_level("admin_api_server", "info") mon.wait_until( "Set log level for {admin_api_server}: debug -> info", timeout_sec=5, backoff_sec=1, err_msg="Never saw message") with self.redpanda.monitor_log(node) as mon: admin.set_log_level("admin_api_server", "debug", expires=5) mon.wait_until( f"Expiring log level for {{admin_api_server}} to {default_log_level}", timeout_sec=10, backoff_sec=1, err_msg="Never saw message")
def test_node_opeartions(self): # allocate 5 nodes for the cluster self.redpanda = RedpandaService( self.test_context, 5, KafkaCliTools, extra_rp_conf={ "enable_auto_rebalance_on_node_add": True, "group_topic_partitions": 3, "default_topic_replications": 3, }) # start 3 nodes self.redpanda.start() # create some topics topics = self._create_random_topics(10) self.redpanda.logger.info(f"using topics: {topics}") # select one of the topics to use in consumer/producer self.topic = random.choice(topics).name self.start_producer(1, throughput=100) self.start_consumer(1) self.await_startup() def decommission(node_id): self.logger.info(f"decommissioning node: {node_id}") admin = Admin(self.redpanda) admin.decommission_broker(id=node_id) def node_removed(): admin = Admin(self.redpanda) brokers = admin.get_brokers() for b in brokers: if b['node_id'] == node_id: return False return True wait_until(node_removed, timeout_sec=240, backoff_sec=2) kafkacat = KafkaCat(self.redpanda) def replicas_per_node(): node_replicas = {} md = kafkacat.metadata() self.redpanda.logger.info(f"metadata: {md}") for topic in md['topics']: for p in topic['partitions']: for r in p['replicas']: id = r['id'] if id not in node_replicas: node_replicas[id] = 0 node_replicas[id] += 1 return node_replicas def restart_node(node_id, cleanup=True): self.logger.info(f"restarting node: {node_id}") self.redpanda.stop_node(self.redpanda.nodes[node_id - 1]) if cleanup: self.redpanda.clean_node(self.redpanda.nodes[node_id - 1]) self.redpanda.start_node(self.redpanda.nodes[node_id - 1]) admin = Admin(self.redpanda) admin.set_log_level("cluster", "trace") def has_new_replicas(): per_node = replicas_per_node() self.logger.info(f"replicas per node: {per_node}") return node_id in per_node wait_until(has_new_replicas, timeout_sec=240, backoff_sec=2) admin = Admin(self.redpanda) admin.set_log_level("cluster", "trace") work = self.generate_random_workload(10, skip_nodes=set()) self.redpanda.logger.info(f"node operations to execute: {work}") for op in work: op_type = op[0] self.logger.info(f"executing - {op}") if op_type == ADD: id = op[1] restart_node(id) if op_type == DECOMMISSION: id = op[1] decommission(id) elif op_type == ADD_TOPIC: spec = TopicSpec(name=op[1], replication_factor=op[2], partition_count=op[3]) self.redpanda.create_topic(spec) elif op_type == DELETE_TOPIC: self.redpanda.delete_topic(op[1]) self.run_validation(enable_idempotence=False, consumer_timeout_sec=180)