def __init__(self, ctx): topics = dict(topic=dict(partitions=1, replication_factor=1)) super(WaitForLocalConsumerTest, self).__init__(test_context=ctx, num_brokers=1, topics=topics) self._producer = KafProducer(ctx, self.redpanda, "topic") self._consumer = KafConsumer(ctx, self.redpanda, "topic")
def test_sarama_consumergroup(self): count = 10 if self.scale.local else 5000 sarama_example = SaramaExamples.SaramaConsumerGroup( self.redpanda, self.topic, count) example = ExampleRunner(self._ctx, sarama_example, timeout_sec=self._timeout) producer = KafProducer(self._ctx, self.redpanda, self.topic, count) def until_partitions(): storage = self.redpanda.storage() return len(list(storage.partitions("kafka", self.topic))) == 3 # Must wait for the paritions to materialize or else # kaf may try to produce during leadership election. # This results in a skipped record since kaf doesn't auto-retry. wait_until(until_partitions, timeout_sec=30, backoff_sec=2, err_msg="Expected partition did not materialize") # Run the producer and wait for the worker # threads to finish producing producer.start() producer.wait() # Start the example example.start() # Wait until the example is OK to terminate example.wait()
class WaitForLocalConsumerTest(RedpandaTest): """ Start a kaf-based producer and consumer, then wait until the consumer has observed a certain number of produced records. """ NUM_RECORDS = 2000 def __init__(self, ctx): topics = dict(topic=dict(partitions=1, replication_factor=1)) super(WaitForLocalConsumerTest, self).__init__(test_context=ctx, num_brokers=1, topics=topics) self._producer = KafProducer(ctx, self.redpanda, "topic") self._consumer = KafConsumer(ctx, self.redpanda, "topic") @cluster(num_nodes=4) def test_wait_for_local_consumer(self): self._consumer.start() self._producer.start() start = self._consumer.offset.copy() def consumed(): assert not self._consumer.done if not start: start.update(self._consumer.offset) return False curr = self._consumer.offset consumed = [curr[p] - o for p, o in start.items()] self.logger.debug(f"Consumer progress: {consumed}") return all(c > WaitForLocalConsumerTest.NUM_RECORDS for c in consumed) wait_until(consumed, timeout_sec=180, backoff_sec=3) # ensure that the consumer is still running. one problematic behavior we # observed was that the consumer was stopping. assert not self._consumer.done
def __init__(self, ctx): super(WaitForLocalConsumerTest, self).__init__(test_context=ctx, num_brokers=1) self._producer = KafProducer(ctx, self.redpanda, self.topic) self._consumer = KafConsumer(ctx, self.redpanda, self.topic)
def test_leader_transfers_recovery(self, acks): """ Validate that leadership transfers complete successfully under acks=1 writes that prompt the leader to frequently activate recovery_stm. When acks=1, this is a reproducer for https://github.com/vectorizedio/redpanda/issues/2580 When acks=-1, this is a reproducer rfor https://github.com/vectorizedio/redpanda/issues/2606 """ leader_node_id, replicas = self._wait_for_leader() if acks == -1: producer = RpkProducer(self._ctx, self.redpanda, self.topic, 16384, sys.maxsize, acks=acks) else: # To reproduce acks=1 issue, we need an intermittent producer that # waits long enough between messages to let recovery_stm go to sleep # waiting for follower_state_change # KafProducer is intermittent because it starts a fresh process for # each message, whereas RpkProducer writes a continuous stream. # TODO: create a test traffic generator that has inter-message # delay as an explicit parameter, rather than relying on implementation # details of the producer helpers. producer = KafProducer(self._ctx, self.redpanda, self.topic) producer.start() # Pass leadership around in a ring self.logger.info(f"Initial leader of {self.topic} is {leader_node_id}") transfer_count = 50 # FIXME: with a transfer count >100, we tend to see # reactor stalls and corresponding nondeterministic behaviour/failures. # This appears unrelated to the functionality under test, something else # is tripping up the cluster when we have so many leadership transfers. # https://github.com/vectorizedio/redpanda/issues/2623 admin = Admin(self.redpanda) initial_leader_id = leader_node_id for n in range(0, transfer_count): target_idx = (initial_leader_id + n) % len(self.redpanda.nodes) target_node_id = target_idx + 1 self.logger.info(f"Starting transfer to {target_node_id}") admin.partition_transfer_leadership("kafka", self.topic, 0, target_node_id) self._wait_for_leader( lambda l: l is not None and l == target_node_id, timeout=ELECTION_TIMEOUT * 2) self.logger.info(f"Completed transfer to {target_node_id}") self.logger.info(f"Completed {transfer_count} transfers successfully") # Explicit stop of producer so that we see any errors producer.stop() producer.wait() producer.free()
def test_static(self): """ Move partitions with data, but no active producers or consumers. """ self.logger.info(f"Starting redpanda...") self.start_redpanda(num_nodes=3) topics = [] for partition_count in range(1, 5): for replication_factor in (1, 3): name = f"topic{len(topics)}" spec = TopicSpec(name=name, partition_count=partition_count, replication_factor=replication_factor) topics.append(spec) self.logger.info(f"Creating topics...") for spec in topics: self.client().create_topic(spec) num_records = 1000 produced = set( ((f"key-{i:08d}", f"record-{i:08d}") for i in range(num_records))) for spec in topics: self.logger.info(f"Producing to {spec}") producer = KafProducer(self.test_context, self.redpanda, spec.name, num_records) producer.start() self.logger.info( f"Finished producing to {spec}, waiting for producer...") producer.wait() producer.free() self.logger.info(f"Producer stop complete.") for _ in range(25): self._move_and_verify() for spec in topics: self.logger.info(f"Verifying records in {spec}") consumer = RpkConsumer(self.test_context, self.redpanda, spec.name, ignore_errors=False, retries=0) consumer.start() timeout = 30 t1 = time.time() consumed = set() while consumed != produced: if time.time() > t1 + timeout: self.logger.error( f"Validation failed for topic {spec.name}. Produced {len(produced)}, consumed {len(consumed)}" ) self.logger.error( f"Messages consumed but not produced: {sorted(consumed - produced)}" ) self.logger.error( f"Messages produced but not consumed: {sorted(produced - consumed)}" ) assert set(consumed) == produced else: time.sleep(5) for m in consumer.messages: self.logger.info(f"message: {m}") consumed = set([(m['key'], m['value']) for m in consumer.messages]) self.logger.info(f"Stopping consumer...") consumer.stop() self.logger.info(f"Awaiting consumer...") consumer.wait() self.logger.info(f"Freeing consumer...") consumer.free() self.logger.info(f"Finished verifying records in {spec}")
def test_deletion_stops_move(self): """ Delete topic which partitions are being moved and check status after topic is created again, old move opeartions should not influcence newly created topic """ self.start_redpanda(num_nodes=3) # create a single topic with replication factor of 1 topic = 'test-topic' rpk = RpkTool(self.redpanda) rpk.create_topic(topic, 1, 1) partition = 0 num_records = 1000 self.logger.info(f"Producing to {topic}") producer = KafProducer(self.test_context, self.redpanda, topic, num_records) producer.start() self.logger.info( f"Finished producing to {topic}, waiting for producer...") producer.wait() producer.free() self.logger.info(f"Producer stop complete.") admin = Admin(self.redpanda) # get current assignments assignments = self._get_assignments(admin, topic, partition) assert len(assignments) == 1 self.logger.info(f"assignments for {topic}-{partition}: {assignments}") brokers = admin.get_brokers() self.logger.info(f"available brokers: {brokers}") candidates = list( filter(lambda b: b['node_id'] != assignments[0]['node_id'], brokers)) replacement = random.choice(candidates) target_assignment = [{'node_id': replacement['node_id'], 'core': 0}] self.logger.info( f"target assignments for {topic}-{partition}: {target_assignment}") # shutdown target node to make sure that move will never complete node = self.redpanda.get_node(replacement['node_id']) self.redpanda.stop_node(node) admin.set_partition_replicas(topic, partition, target_assignment) # check that the status is in progress def get_status(): partition_info = admin.get_partitions(topic, partition) self.logger.info( f"current assignments for {topic}-{partition}: {partition_info}" ) return partition_info["status"] wait_until(lambda: get_status() == 'in_progress', 10, 1) # delete the topic rpk.delete_topic(topic) # start the node back up self.redpanda.start_node(node) # create topic again rpk.create_topic(topic, 1, 1) wait_until(lambda: get_status() == 'done', 10, 1)