Python KafkaService.start示例

class KafkaVersionTest(Test):
    """Sanity checks on kafka versioning."""
    def __init__(self, test_context):
        super(KafkaVersionTest, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)

    def setUp(self):
        self.zk.start()

    @cluster(num_nodes=2)
    def test_0_8_2(self):
        """Test kafka service node-versioning api - verify that we can bring up a single-node 0.8.2.X cluster."""
        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        node = self.kafka.nodes[0]
        node.version = LATEST_0_8_2
        self.kafka.start()

        assert is_version(node, [LATEST_0_8_2], logger=self.logger)

    @cluster(num_nodes=3)
    def test_multi_version(self):
        """Test kafka service node-versioning api - ensure we can bring up a 2-node cluster, one on version 0.8.2.X,
        the other on the current development branch."""
        self.kafka = KafkaService(self.test_context, num_nodes=2, zk=self.zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 2}})
        self.kafka.nodes[1].version = LATEST_0_8_2
        self.kafka.nodes[1].config[config_property.INTER_BROKER_PROTOCOL_VERSION] = "0.8.2.X"
        self.kafka.start()

        assert is_version(self.kafka.nodes[0], [DEV_BRANCH.vstring], logger=self.logger)
        assert is_version(self.kafka.nodes[1], [LATEST_0_8_2], logger=self.logger)

示例#2

显示文件

文件： test.py 项目： supderduper7365/muckrake

class CamusTest(Test):
    def __init__(self, test_context, num_zk, num_brokers, num_hadoop, num_schema_registry, num_rest,
                 hadoop_distro='cdh', hadoop_version=2, topics=None):

        super(CamusTest, self).__init__(test_context)
        self.num_zk = num_zk
        self.num_brokers = num_brokers
        self.num_hadoop = num_hadoop
        self.num_schema_registry = num_schema_registry
        self.num_rest = num_rest
        self.topics = topics
        self.hadoop_distro = hadoop_distro
        self.hadoop_version = hadoop_version

        self.zk = ZookeeperService(test_context, self.num_zk)
        self.kafka = KafkaService(test_context, self.num_brokers, self.zk, topics=self.topics)
        self.hadoop = create_hadoop_service(test_context, self.num_hadoop, self.hadoop_distro, self.hadoop_version)
        self.schema_registry = SchemaRegistryService(test_context, self.num_schema_registry, self.zk, self.kafka)
        self.rest = KafkaRestService(test_context, self.num_rest, self.zk, self.kafka, self.schema_registry)

    def setUp(self):
        self.zk.start()
        self.kafka.start()
        self.hadoop.start()
        self.schema_registry.start()
        self.rest.start()

示例#3

显示文件

class GetOffsetShellTest(Test):
    """
    Tests GetOffsetShell tool
    """
    def __init__(self, test_context):
        super(GetOffsetShellTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {
            TOPIC: {'partitions': NUM_PARTITIONS, 'replication-factor': REPLICATION_FACTOR}
        }

        self.zk = ZookeeperService(test_context, self.num_zk)


    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.start()

    def start_producer(self):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(self.test_context, num_nodes=1, kafka=self.kafka, topic=TOPIC, throughput=1000, max_messages=MAX_MESSAGES)
        self.producer.start()
        current_acked = self.producer.num_acked
        wait_until(lambda: self.producer.num_acked >= current_acked + MAX_MESSAGES, timeout_sec=10,
                   err_msg="Timeout awaiting messages to be produced and acked")

    def start_consumer(self):
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                        consumer_timeout_ms=1000)
        self.consumer.start()

    @cluster(num_nodes=4)
    def test_get_offset_shell(self, security_protocol='PLAINTEXT'):
        """
        Tests if GetOffsetShell is getting offsets correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer()

        # Assert that offset fetched without any consumers consuming is 0
        assert self.kafka.get_offset_shell(TOPIC, None, 1000, 1, -1), "%s:%s:%s" % (TOPIC, NUM_PARTITIONS - 1, 0)

        self.start_consumer()

        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node), timeout_sec=20, backoff_sec=.2, err_msg="Consumer was too slow to start")

        # Assert that offset is correctly indicated by GetOffsetShell tool
        wait_until(lambda: "%s:%s:%s" % (TOPIC, NUM_PARTITIONS - 1, MAX_MESSAGES) in self.kafka.get_offset_shell(TOPIC, None, 1000, 1, -1), timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

示例#4

显示文件

文件： produce_bench_test.py 项目： harshach/kafka

class ProduceBenchTest(Test):
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(ProduceBenchTest, self).__init__(test_context)
        self.zk = ZookeeperService(test_context, num_nodes=3)
        self.kafka = KafkaService(test_context, num_nodes=3, zk=self.zk)
        self.workload_service = ProduceBenchWorkloadService(test_context, self.kafka)
        self.trogdor = TrogdorService(context=self.test_context,
                                      client_services=[self.kafka, self.workload_service])

    def setUp(self):
        self.trogdor.start()
        self.zk.start()
        self.kafka.start()

    def teardown(self):
        self.trogdor.stop()
        self.kafka.stop()
        self.zk.stop()

    def test_produce_bench(self):
        active_topics={"produce_bench_topic[0-1]":{"numPartitions":1, "replicationFactor":3}}
        inactive_topics={"produce_bench_topic[2-9]":{"numPartitions":1, "replicationFactor":3}}
        spec = ProduceBenchWorkloadSpec(0, TaskSpec.MAX_DURATION_MS,
                                        self.workload_service.producer_node,
                                        self.workload_service.bootstrap_servers,
                                        target_messages_per_sec=1000,
                                        max_messages=100000,
                                        producer_conf={},
                                        inactive_topics=inactive_topics,
                                        active_topics=active_topics)
        workload1 = self.trogdor.create_task("workload1", spec)
        workload1.wait_for_done(timeout_sec=360)
        tasks = self.trogdor.tasks()
        self.logger.info("TASKS: %s\n" % json.dumps(tasks, sort_keys=True, indent=2))

示例#5

显示文件

文件： compression_test.py 项目： Paulf-999/kafka

class CompressionTest(ProduceConsumeValidateTest):
    """
    These tests validate produce / consume for compressed topics.
    """

    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(CompressionTest, self).__init__(test_context=test_context)

        self.topic = "test_topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk, topics={self.topic: {
                                                                    "partitions": 10,
                                                                    "replication-factor": 1}})
        self.num_partitions = 10
        self.timeout_sec = 60
        self.producer_throughput = 1000
        self.num_producers = 4
        self.messages_per_producer = 1000
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        # Override this since we're adding services outside of the constructor
        return super(CompressionTest, self).min_cluster_size() + self.num_producers + self.num_consumers

    @parametrize(compression_types=["snappy","gzip","lz4","none"], new_consumer=True)
    @parametrize(compression_types=["snappy","gzip","lz4","none"], new_consumer=False)
    def test_compressed_topic(self, compression_types, new_consumer):
        """Test produce => consume => validate for compressed topics
        Setup: 1 zk, 1 kafka node, 1 topic with partitions=10, replication-factor=1

        compression_types parameter gives a list of compression types (or no compression if
        "none"). Each producer in a VerifiableProducer group (num_producers = 4) will use a
        compression type from the list based on producer's index in the group.

            - Produce messages in the background
            - Consume messages in the background
            - Stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.interbroker_security_protocol = self.kafka.security_protocol
        self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka,
                                           self.topic, throughput=self.producer_throughput,
                                           message_validator=is_int_with_prefix,
                                           compression_types=compression_types)
        self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic,
                                        new_consumer=new_consumer, consumer_timeout_ms=60000,
                                        message_validator=is_int_with_prefix)
        self.kafka.start()

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(self.messages_per_producer) == True,
            timeout_sec=120, backoff_sec=1,
            err_msg="Producer did not produce all messages in reasonable amount of time"))

示例#6

显示文件

文件： log4j_appender_test.py 项目： MyPureCloud/kafka

class Log4jAppenderTest(Test):
    """
    Tests KafkaLog4jAppender using VerifiableKafkaLog4jAppender that appends increasing ints to a Kafka topic
    """
    def __init__(self, test_context):
        super(Log4jAppenderTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {
            TOPIC: {'partitions': 1, 'replication-factor': 1}
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.start()

    def start_appender(self, security_protocol):
        self.appender = KafkaLog4jAppender(self.test_context, self.num_brokers, self.kafka, TOPIC, MAX_MESSAGES,
                                           security_protocol=security_protocol)
        self.appender.start()

    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol == SecurityConfig.SSL
        self.consumer = ConsoleConsumer(self.test_context, num_nodes=self.num_brokers, kafka=self.kafka, topic=TOPIC,
                                        consumer_timeout_ms=1000, new_consumer=enable_new_consumer)
        self.consumer.start()

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_log4j_appender(self, security_protocol='PLAINTEXT'):
        """
        Tests if KafkaLog4jAppender is producing to Kafka topic
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_appender(security_protocol)
        self.appender.wait()

        self.start_consumer(security_protocol)
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
            timeout_sec=10, backoff_sec=.2, err_msg="Consumer was too slow to start")

        # Verify consumed messages count
        expected_lines_count = MAX_MESSAGES * 2  # two times to account for new lines introduced by log4j
        wait_until(lambda: len(self.consumer.messages_consumed[1]) == expected_lines_count, timeout_sec=10,
                   err_msg="Timed out waiting to consume expected number of messages.")

        self.consumer.stop()

示例#7

显示文件

文件： compatibility_test_new_broker_test.py 项目： harshach/kafka

class ClientCompatibilityTestNewBroker(ProduceConsumeValidateTest):

    def __init__(self, test_context):
        super(ClientCompatibilityTestNewBroker, self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
            
        self.zk.start()

        # Producer and consumer
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1
        self.messages_per_producer = 1000

    @cluster(num_nodes=6)
    @parametrize(producer_version=str(DEV_BRANCH), consumer_version=str(DEV_BRANCH), compression_types=["snappy"], timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(DEV_BRANCH), consumer_version=str(DEV_BRANCH), compression_types=["none"], timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(DEV_BRANCH), consumer_version=str(LATEST_0_9), compression_types=["none"], new_consumer=False, timestamp_type=None)
    @parametrize(producer_version=str(DEV_BRANCH), consumer_version=str(LATEST_0_9), compression_types=["snappy"], timestamp_type=str("CreateTime"))
    @parametrize(producer_version=str(LATEST_1_1), consumer_version=str(LATEST_1_1), compression_types=["lz4"], timestamp_type=str("CreateTime"))
    @parametrize(producer_version=str(LATEST_1_0), consumer_version=str(LATEST_1_0), compression_types=["none"], timestamp_type=str("CreateTime"))
    @parametrize(producer_version=str(LATEST_0_11_0), consumer_version=str(LATEST_0_11_0), compression_types=["gzip"], timestamp_type=str("CreateTime"))
    @parametrize(producer_version=str(LATEST_0_10_2), consumer_version=str(LATEST_0_10_2), compression_types=["lz4"], timestamp_type=str("CreateTime"))
    @parametrize(producer_version=str(LATEST_0_10_1), consumer_version=str(LATEST_0_10_1), compression_types=["snappy"], timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(LATEST_0_10_0), consumer_version=str(LATEST_0_10_0), compression_types=["snappy"], timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(LATEST_0_9), consumer_version=str(DEV_BRANCH), compression_types=["none"], timestamp_type=None)
    @parametrize(producer_version=str(LATEST_0_9), consumer_version=str(DEV_BRANCH), compression_types=["snappy"], timestamp_type=None)
    @parametrize(producer_version=str(LATEST_0_9), consumer_version=str(LATEST_0_9), compression_types=["snappy"], timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(LATEST_0_8_2), consumer_version=str(LATEST_0_8_2), compression_types=["none"], new_consumer=False, timestamp_type=None)
    def test_compatibility(self, producer_version, consumer_version, compression_types, new_consumer=True, timestamp_type=None):

        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=DEV_BRANCH, topics={self.topic: {
                                                                    "partitions": 3,
                                                                    "replication-factor": 3,
                                                                    'configs': {"min.insync.replicas": 2}}})
        for node in self.kafka.nodes:
            if timestamp_type is not None:
                node.config[config_property.MESSAGE_TIMESTAMP_TYPE] = timestamp_type
        self.kafka.start()
         
        self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka,
                                           self.topic, throughput=self.producer_throughput,
                                           message_validator=is_int,
                                           compression_types=compression_types,
                                           version=KafkaVersion(producer_version))

        self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka,
                                        self.topic, consumer_timeout_ms=30000, new_consumer=new_consumer,
                                        message_validator=is_int, version=KafkaVersion(consumer_version))

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(self.messages_per_producer) == True,
            timeout_sec=120, backoff_sec=1,
            err_msg="Producer did not produce all messages in reasonable amount of time"))

示例#8

显示文件

文件： mini_test.py 项目： supderduper7365/muckrake

class MiniTest(Test):
    def __init__(self, test_context):
        super(MiniTest, self).__init__(test_context=test_context)

        self.zk = ZookeeperService(test_context, 1)
        self.kafka = KafkaService(test_context, 1, self.zk)

    def test(self):
        self.zk.start()
        self.kafka.start()

示例#9

显示文件

文件： client_compatibility_features_test.py 项目： harshach/kafka

class ClientCompatibilityFeaturesTest(Test):
    """
    Tests clients for the presence or absence of specific features when communicating with brokers with various
    versions. Relies on ClientCompatibilityTest.java for much of the functionality.
    """

    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(ClientCompatibilityFeaturesTest, self).__init__(test_context=test_context)

        self.zk = ZookeeperService(test_context, num_nodes=3)

        # Generate a unique topic name
        topic_name = "client_compat_features_topic_%d%d" % (int(time.time()), randint(0, 2147483647))
        self.topics = { topic_name: {
            "partitions": 1, # Use only one partition to avoid worrying about ordering
            "replication-factor": 3
            }}
        self.kafka = KafkaService(test_context, num_nodes=3, zk=self.zk, topics=self.topics)

    def invoke_compatibility_program(self, features):
        # Run the compatibility test on the first Kafka node.
        node = self.zk.nodes[0]
        cmd = ("%s org.apache.kafka.tools.ClientCompatibilityTest "
               "--bootstrap-server %s "
               "--num-cluster-nodes %d "
               "--topic %s " % (self.zk.path.script("kafka-run-class.sh", node),
                               self.kafka.bootstrap_servers(),
                               len(self.kafka.nodes),
                               self.topics.keys()[0]))
        for k, v in features.iteritems():
            cmd = cmd + ("--%s %s " % (k, v))
        results_dir = TestContext.results_dir(self.test_context, 0)
        os.makedirs(results_dir)
        ssh_log_file = "%s/%s" % (results_dir, "client_compatibility_test_output.txt")
        try:
          self.logger.info("Running %s" % cmd)
          run_command(node, cmd, ssh_log_file)
        except Exception as e:
          self.logger.info("** Command failed.  See %s for log messages." % ssh_log_file)
          raise

    @parametrize(broker_version=str(DEV_BRANCH))
    @parametrize(broker_version=str(LATEST_0_10_0))
    @parametrize(broker_version=str(LATEST_0_10_1))
    @parametrize(broker_version=str(LATEST_0_10_2))
    @parametrize(broker_version=str(LATEST_0_11_0))
    @parametrize(broker_version=str(LATEST_1_0))
    @parametrize(broker_version=str(LATEST_1_1))
    def run_compatibility_test(self, broker_version):
        self.zk.start()
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.start()
        features = get_broker_features(broker_version)
        self.invoke_compatibility_program(features)

示例#10

显示文件

文件： test_verifiable_producer.py 项目： harshach/kafka

class TestVerifiableProducer(Test):
    """Sanity checks on verifiable producer service class."""
    def __init__(self, test_context):
        super(TestVerifiableProducer, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})

        self.num_messages = 1000
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                           max_messages=self.num_messages, throughput=self.num_messages/5)

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    @cluster(num_nodes=3)
    @parametrize(producer_version=str(LATEST_0_8_2))
    @parametrize(producer_version=str(LATEST_0_9))
    @parametrize(producer_version=str(LATEST_0_10_0))
    @parametrize(producer_version=str(LATEST_0_10_1))
    @parametrize(producer_version=str(DEV_BRANCH))
    def test_simple_run(self, producer_version=DEV_BRANCH):
        """
        Test that we can start VerifiableProducer on the current branch snapshot version or against the 0.8.2 jar, and
        verify that we can produce a small number of messages.
        """
        node = self.producer.nodes[0]
        node.version = KafkaVersion(producer_version)
        self.producer.start()
        wait_until(lambda: self.producer.num_acked > 5, timeout_sec=5,
             err_msg="Producer failed to start in a reasonable amount of time.")

        # using version.vstring (distutils.version.LooseVersion) is a tricky way of ensuring
        # that this check works with DEV_BRANCH
        # When running VerifiableProducer 0.8.X, both the current branch version and 0.8.X should show up because of the
        # way verifiable producer pulls in some development directories into its classpath
        #
        # If the test fails here because 'ps .. | grep' couldn't find the process it means
        # the login and grep that is_version() performs is slower than
        # the time it takes the producer to produce its messages.
        # Easy fix is to decrease throughput= above, the good fix is to make the producer
        # not terminate until explicitly killed in this case.
        if node.version <= LATEST_0_8_2:
            assert is_version(node, [node.version.vstring, DEV_BRANCH.vstring], logger=self.logger)
        else:
            assert is_version(node, [node.version.vstring], logger=self.logger)

        self.producer.wait()
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (num_produced, self.num_messages)

示例#11

显示文件

文件： upgrade_test.py 项目： mawentao007/kafkaReading-0.9.0

class TestUpgrade(ProduceConsumeValidateTest):

    def __init__(self, test_context):
        super(TestUpgrade, self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, version=LATEST_0_8_2, topics={self.topic: {
                                                                    "partitions": 3,
                                                                    "replication-factor": 3,
                                                                    'configs': {"min.insync.replicas": 2}}})
        self.zk.start()
        self.kafka.start()

        # Producer and consumer
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic,
            throughput=self.producer_throughput, version=LATEST_0_8_2)

        # TODO - reduce the timeout
        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic,
            consumer_timeout_ms=30000, message_validator=is_int, version=LATEST_0_8_2)

    def perform_upgrade(self):
        self.logger.info("First pass bounce - rolling upgrade")
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            node.version = TRUNK
            node.config[config_property.INTER_BROKER_PROTOCOL_VERSION] = "0.8.2.X"
            self.kafka.start_node(node)

        self.logger.info("Second pass bounce - remove inter.broker.protocol.version config")
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            del node.config[config_property.INTER_BROKER_PROTOCOL_VERSION]
            self.kafka.start_node(node)

    def test_upgrade(self):
        """Test upgrade of Kafka broker cluster from 0.8.2 to 0.9.0

        - Start 3 node broker cluster on version 0.8.2
        - Start producer and consumer in the background
        - Perform two-phase rolling upgrade
            - First phase: upgrade brokers to 0.9.0 with inter.broker.protocol.version set to 0.8.2.X
            - Second phase: remove inter.broker.protocol.version config with rolling bounce
        - Finally, validate that every message acked by the producer was consumed by the consumer
        """

        self.run_produce_consume_validate(core_test_action=self.perform_upgrade)

示例#12

显示文件

文件： client_compatibility_produce_consume_test.py 项目： harshach/kafka

class ClientCompatibilityProduceConsumeTest(ProduceConsumeValidateTest):
    """
    These tests validate that we can use a new client to produce and consume from older brokers.
    """

    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(ClientCompatibilityProduceConsumeTest, self).__init__(test_context=test_context)

        self.topic = "test_topic"
        self.zk = ZookeeperService(test_context, num_nodes=3)
        self.kafka = KafkaService(test_context, num_nodes=3, zk=self.zk, topics={self.topic:{
                                                                    "partitions": 10,
                                                                    "replication-factor": 2}})
        self.num_partitions = 10
        self.timeout_sec = 60
        self.producer_throughput = 1000
        self.num_producers = 2
        self.messages_per_producer = 1000
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        # Override this since we're adding services outside of the constructor
        return super(ClientCompatibilityProduceConsumeTest, self).min_cluster_size() + self.num_producers + self.num_consumers

    @parametrize(broker_version=str(DEV_BRANCH))
    @parametrize(broker_version=str(LATEST_0_10_0))
    @parametrize(broker_version=str(LATEST_0_10_1))
    @parametrize(broker_version=str(LATEST_0_10_2))
    @parametrize(broker_version=str(LATEST_0_11_0))
    @parametrize(broker_version=str(LATEST_1_0))
    @parametrize(broker_version=str(LATEST_1_1))
    def test_produce_consume(self, broker_version):
        print("running producer_consumer_compat with broker_version = %s" % broker_version)
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.interbroker_security_protocol = self.kafka.security_protocol
        self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka,
                                           self.topic, throughput=self.producer_throughput,
                                           message_validator=is_int_with_prefix)
        self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int_with_prefix)
        self.kafka.start()

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(self.messages_per_producer) == True,
            timeout_sec=120, backoff_sec=1,
            err_msg="Producer did not produce all messages in reasonable amount of time"))

示例#13

显示文件

文件： simple_consumer_shell_test.py 项目： xinchen384/kafka-benchmark

class SimpleConsumerShellTest(Test):
    """
    Tests SimpleConsumerShell tool
    """

    def __init__(self, test_context):
        super(SimpleConsumerShellTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {TOPIC: {"partitions": NUM_PARTITIONS, "replication-factor": REPLICATION_FACTOR}}

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self):
        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, topics=self.topics)
        self.kafka.start()

    def run_producer(self):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(
            self.test_context, num_nodes=1, kafka=self.kafka, topic=TOPIC, throughput=1000, max_messages=MAX_MESSAGES
        )
        self.producer.start()
        wait_until(
            lambda: self.producer.num_acked == MAX_MESSAGES,
            timeout_sec=10,
            err_msg="Timeout awaiting messages to be produced and acked",
        )

    def start_simple_consumer_shell(self):
        self.simple_consumer_shell = SimpleConsumerShell(self.test_context, 1, self.kafka, TOPIC)
        self.simple_consumer_shell.start()

    def test_simple_consumer_shell(self):
        """
        Tests if SimpleConsumerShell is fetching expected records
        :return: None
        """
        self.start_kafka()
        self.run_producer()
        self.start_simple_consumer_shell()

        # Assert that SimpleConsumerShell is fetching expected number of messages
        wait_until(
            lambda: self.simple_consumer_shell.get_output().count("\n") == (MAX_MESSAGES + 1),
            timeout_sec=10,
            err_msg="Timed out waiting to receive expected number of messages.",
        )

示例#14

显示文件

文件： test_verifiable_producer.py 项目： hk-Lei/kafka-0.10.0.0-annotated

class TestVerifiableProducer(Test):
    """Sanity checks on verifiable producer service class."""
    def __init__(self, test_context):
        super(TestVerifiableProducer, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})

        self.num_messages = 1000
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.kafka, topic=self.topic,
                                           max_messages=self.num_messages, throughput=1000)

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    @parametrize(producer_version=str(LATEST_0_8_2))
    @parametrize(producer_version=str(LATEST_0_9))
    @parametrize(producer_version=str(TRUNK))
    def test_simple_run(self, producer_version=TRUNK):
        """
        Test that we can start VerifiableProducer on trunk or against the 0.8.2 jar, and
        verify that we can produce a small number of messages.
        """
        node = self.producer.nodes[0]
        node.version = KafkaVersion(producer_version)
        self.producer.start()
        wait_until(lambda: self.producer.num_acked > 5, timeout_sec=5,
             err_msg="Producer failed to start in a reasonable amount of time.")

        # using version.vstring (distutils.version.LooseVersion) is a tricky way of ensuring
        # that this check works with TRUNK
        # When running VerifiableProducer 0.8.X, both trunk version and 0.8.X should show up because of the way
        # verifiable producer pulls in some trunk directories into its classpath
        if node.version <= LATEST_0_8_2:
            assert is_version(node, [node.version.vstring, TRUNK.vstring])
        else:
            assert is_version(node, [node.version.vstring])

        self.producer.wait()
        num_produced = self.producer.num_acked
        assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (num_produced, self.num_messages)

示例#15

显示文件

文件： log_compaction_test.py 项目： liquidm/kafka

class LogCompactionTest(Test):

    # Configure smaller segment size to create more segments for compaction
    LOG_SEGMENT_BYTES = "1024000"

    def __init__(self, test_context):
        super(LogCompactionTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1

        self.zk = ZookeeperService(test_context, self.num_zk)
        self.kafka = None
        self.compaction_verifier = None

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            num_nodes = self.num_brokers,
            zk = self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            server_prop_overides=[
                [config_property.LOG_SEGMENT_BYTES, LogCompactionTest.LOG_SEGMENT_BYTES],
            ])
        self.kafka.start()

    def start_test_log_compaction_tool(self, security_protocol):
        self.compaction_verifier = LogCompactionTester(self.test_context, self.kafka, security_protocol=security_protocol)
        self.compaction_verifier.start()

    @cluster(num_nodes=4)
    def test_log_compaction(self, security_protocol='PLAINTEXT'):

        self.start_kafka(security_protocol, security_protocol)
        self.start_test_log_compaction_tool(security_protocol)

        # Verify that compacted data verification completed in LogCompactionTester
        wait_until(lambda: self.compaction_verifier.is_done, timeout_sec=180, err_msg="Timed out waiting to complete compaction")

示例#16

显示文件

文件： test.py 项目： supderduper7365/muckrake

class KafkaTest(Test):
    """
    Helper class that manages setting up a Kafka cluster. Use this if the
    default settings for Kafka are sufficient for your test; any customization
    needs to be done manually. Your run() method should call tearDown and
    setUp. The Zookeeper and Kafka services are available as the fields
    KafkaTest.zk and KafkaTest.kafka.
    """
    def __init__(self, test_context, num_zk, num_brokers, topics=None):
        super(KafkaTest, self).__init__(test_context)
        self.num_zk = num_zk
        self.num_brokers = num_brokers
        self.topics = topics

        self.zk = ZookeeperService(test_context, self.num_zk)

        self.kafka = KafkaService(
            test_context, self.num_brokers,
            self.zk, topics=self.topics)

    def setUp(self):
        self.zk.start()
        self.kafka.start()

示例#17

显示文件

文件： everything_runs_test.py 项目： supderduper7365/muckrake

class EverythingRunsTest(Test):
    """ Sanity check to ensure that various core services all run.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(EverythingRunsTest, self).__init__(test_context=test_context)

        self.zk = ZookeeperService(test_context, num_nodes=2)
        self.kafka = KafkaService(test_context, 1, self.zk)
        self.schema_registry = SchemaRegistryService(test_context, 1, self.zk, self.kafka)
        self.rest_proxy = KafkaRestService(test_context, 1, self.zk, self.kafka, self.schema_registry)
        self.register_driver = RegisterSchemasService(
            test_context, 1, self.schema_registry,
            retry_wait_sec=.02, num_tries=5, max_time_seconds=10, max_schemas=50)

    def test(self):
        self.zk.start()
        self.kafka.start()
        self.schema_registry.start()
        self.rest_proxy.start()

        self.register_driver.start()
        self.register_driver.wait()  # block until register_driver finishes

示例#18

显示文件

class ClientCompatibilityProduceConsumeTest(ProduceConsumeValidateTest):
    """
    These tests validate that we can use a new client to produce and consume from older brokers.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(ClientCompatibilityProduceConsumeTest,
              self).__init__(test_context=test_context)

        self.topic = "test_topic"
        self.zk = ZookeeperService(test_context, num_nodes=3)
        self.kafka = KafkaService(
            test_context,
            num_nodes=3,
            zk=self.zk,
            topics={self.topic: {
                "partitions": 10,
                "replication-factor": 2
            }})
        self.num_partitions = 10
        self.timeout_sec = 60
        self.producer_throughput = 1000
        self.num_producers = 2
        self.messages_per_producer = 1000
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        # Override this since we're adding services outside of the constructor
        return super(
            ClientCompatibilityProduceConsumeTest,
            self).min_cluster_size() + self.num_producers + self.num_consumers

    @parametrize(broker_version=str(DEV_BRANCH))
    @parametrize(broker_version=str(LATEST_0_10_0))
    @parametrize(broker_version=str(LATEST_0_10_1))
    @parametrize(broker_version=str(LATEST_0_10_2))
    @parametrize(broker_version=str(LATEST_0_11_0))
    @parametrize(broker_version=str(LATEST_1_0))
    @parametrize(broker_version=str(LATEST_1_1))
    @parametrize(broker_version=str(LATEST_2_0))
    @parametrize(broker_version=str(LATEST_2_1))
    @parametrize(broker_version=str(LATEST_2_2))
    @parametrize(broker_version=str(LATEST_2_3))
    @parametrize(broker_version=str(LATEST_2_4))
    def test_produce_consume(self, broker_version):
        print("running producer_consumer_compat with broker_version = %s" %
              broker_version)
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.interbroker_security_protocol = self.kafka.security_protocol
        self.producer = VerifiableProducer(
            self.test_context,
            self.num_producers,
            self.kafka,
            self.topic,
            throughput=self.producer_throughput,
            message_validator=is_int_with_prefix)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int_with_prefix)
        self.kafka.start()

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(
                self.messages_per_producer) == True,
            timeout_sec=120,
            backoff_sec=1,
            err_msg=
            "Producer did not produce all messages in reasonable amount of time"
        ))

示例#19

显示文件

文件： log_dir_failure_test.py 项目： saravanb-msft/kafka

class LogDirFailureTest(ProduceConsumeValidateTest):
    """
    Note that consuming is a bit tricky, at least with console consumer. The goal is to consume all messages
    (foreach partition) in the topic. In this case, waiting for the last message may cause the consumer to stop
    too soon since console consumer is consuming multiple partitions from a single thread and therefore we lose
    ordering guarantees.

    Waiting on a count of consumed messages can be unreliable: if we stop consuming when num_consumed == num_acked,
    we might exit early if some messages are duplicated (though not an issue here since producer retries==0)

    Therefore rely here on the consumer.timeout.ms setting which times out on the interval between successively
    consumed messages. Since we run the producer to completion before running the consumer, this is a reliable
    indicator that nothing is left to consume.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(LogDirFailureTest, self).__init__(test_context=test_context)

        self.topic1 = "test_topic_1"
        self.topic2 = "test_topic_2"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            test_context,
            num_nodes=3,
            zk=self.zk,
            topics={
                self.topic1: {
                    "partitions": 1,
                    "replication-factor": 3,
                    "configs": {
                        "min.insync.replicas": 1
                    }
                },
                self.topic2: {
                    "partitions": 1,
                    "replication-factor": 3,
                    "configs": {
                        "min.insync.replicas": 2
                    }
                }
            },
            # Set log.roll.ms to 3 seconds so that broker will detect disk error sooner when it creates log segment
            # Otherwise broker will still be able to read/write the log file even if the log directory is inaccessible.
            server_prop_overrides=[
                [config_property.OFFSETS_TOPIC_NUM_PARTITIONS, "1"],
                [config_property.LOG_FLUSH_INTERVAL_MESSAGE, "5"],
                [
                    config_property.
                    REPLICA_HIGHWATERMARK_CHECKPOINT_INTERVAL_MS, "60000"
                ], [config_property.LOG_ROLL_TIME_MS, "3000"]
            ])

        self.producer_throughput = 1000
        self.num_producers = 1
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        """Override this since we're adding services outside of the constructor"""
        return super(LogDirFailureTest, self).min_cluster_size(
        ) + self.num_producers * 2 + self.num_consumers * 2

    @cluster(num_nodes=9)
    @matrix(bounce_broker=[False, True],
            broker_type=["leader", "follower"],
            security_protocol=["PLAINTEXT"])
    def test_replication_with_disk_failure(self, bounce_broker,
                                           security_protocol, broker_type):
        """Replication tests.
        These tests verify that replication provides simple durability guarantees by checking that data acked by
        brokers is still available for consumption in the face of various failure scenarios.

        Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2
               and another topic with partitions=3, replication-factor=3, and min.insync.replicas=1
            - Produce messages in the background
            - Consume messages in the background
            - Drive broker failures (shutdown, or bounce repeatedly with kill -15 or kill -9)
            - When done driving failures, stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.start()

        try:
            # Initialize producer/consumer for topic2
            self.producer = VerifiableProducer(
                self.test_context,
                self.num_producers,
                self.kafka,
                self.topic2,
                throughput=self.producer_throughput)
            self.consumer = ConsoleConsumer(self.test_context,
                                            self.num_consumers,
                                            self.kafka,
                                            self.topic2,
                                            group_id="test-consumer-group-1",
                                            consumer_timeout_ms=60000,
                                            message_validator=is_int)
            self.start_producer_and_consumer()

            # Get a replica of the partition of topic2 and make its log directory offline by changing the log dir's permission.
            # We assume that partition of topic2 is created in the second log directory of respective brokers.
            broker_node = select_node(self, broker_type, self.topic2)
            broker_idx = self.kafka.idx(broker_node)
            assert broker_idx in self.kafka.isr_idx_list(self.topic2), \
                   "Broker %d should be in isr set %s" % (broker_idx, str(self.kafka.isr_idx_list(self.topic2)))

            # Verify that topic1 and the consumer offset topic is in the first log directory and topic2 is in the second log directory
            topic_1_partition_0 = KafkaService.DATA_LOG_DIR_1 + "/test_topic_1-0"
            topic_2_partition_0 = KafkaService.DATA_LOG_DIR_2 + "/test_topic_2-0"
            offset_topic_partition_0 = KafkaService.DATA_LOG_DIR_1 + "/__consumer_offsets-0"
            for path in [
                    topic_1_partition_0, topic_2_partition_0,
                    offset_topic_partition_0
            ]:
                assert path_exists(broker_node, path), "%s should exist" % path

            self.logger.debug("Making log dir %s inaccessible" %
                              (KafkaService.DATA_LOG_DIR_2))
            cmd = "chmod a-w %s -R" % (KafkaService.DATA_LOG_DIR_2)
            broker_node.account.ssh(cmd, allow_fail=False)

            if bounce_broker:
                self.kafka.restart_node(broker_node, clean_shutdown=True)

            # Verify the following:
            # 1) The broker with offline log directory is not the leader of the partition of topic2
            # 2) The broker with offline log directory is not in the ISR
            # 3) The broker with offline log directory is still online
            # 4) Messages can still be produced and consumed from topic2
            wait_until(
                lambda: self.kafka.leader(self.topic2, partition=0
                                          ) != broker_node,
                timeout_sec=60,
                err_msg=
                "Broker %d should not be leader of topic %s and partition 0" %
                (broker_idx, self.topic2))
            assert self.kafka.alive(
                broker_node), "Broker %d should be still online" % (broker_idx)
            wait_until(
                lambda: broker_idx not in self.kafka.isr_idx_list(self.topic2),
                timeout_sec=60,
                err_msg="Broker %d should not be in isr set %s" %
                (broker_idx, str(self.kafka.isr_idx_list(self.topic2))))

            self.stop_producer_and_consumer()
            self.validate()

            # Shutdown all other brokers so that the broker with offline log dir is the only online broker
            offline_nodes = []
            for node in self.kafka.nodes:
                if broker_node != node:
                    offline_nodes.append(node)
                    self.logger.debug("Hard shutdown broker %d" %
                                      (self.kafka.idx(node)))
                    self.kafka.stop_node(node)

            # Verify the following:
            # 1) The broker with offline directory is the only in-sync broker of the partition of topic1
            # 2) Messages can still be produced and consumed from topic1
            self.producer = VerifiableProducer(
                self.test_context,
                self.num_producers,
                self.kafka,
                self.topic1,
                throughput=self.producer_throughput,
                offline_nodes=offline_nodes)
            self.consumer = ConsoleConsumer(self.test_context,
                                            self.num_consumers,
                                            self.kafka,
                                            self.topic1,
                                            group_id="test-consumer-group-2",
                                            consumer_timeout_ms=90000,
                                            message_validator=is_int)
            self.consumer_start_timeout_sec = 90
            self.start_producer_and_consumer()

            assert self.kafka.isr_idx_list(self.topic1) == [broker_idx], \
                   "In-sync replicas of topic %s and partition 0 should be %s" % (self.topic1, str([broker_idx]))

            self.stop_producer_and_consumer()
            self.validate()

        except BaseException as e:
            for s in self.test_context.services:
                self.mark_for_collect(s)
            raise

示例#20

显示文件

class ConnectStandaloneFileTest(Test):
    """
    Simple test of Kafka Connect that produces data from a file in one
    standalone process and consumes it on another, validating the output is
    identical to the input.
    """

    FILE_SOURCE_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSourceConnector'
    FILE_SINK_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSinkConnector'

    INPUT_FILE = "/mnt/connect.input"
    OUTPUT_FILE = "/mnt/connect.output"

    OFFSETS_FILE = "/mnt/connect.offsets"

    TOPIC = "${file:%s:topic.external}" % ConnectServiceBase.EXTERNAL_CONFIGS_FILE
    TOPIC_TEST = "test"

    FIRST_INPUT_LIST = ["foo", "bar", "baz"]
    FIRST_INPUT = "\n".join(FIRST_INPUT_LIST) + "\n"
    SECOND_INPUT_LIST = ["razz", "ma", "tazz"]
    SECOND_INPUT = "\n".join(SECOND_INPUT_LIST) + "\n"

    SCHEMA = { "type": "string", "optional": False }

    def __init__(self, test_context):
        super(ConnectStandaloneFileTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {
            'test' : { 'partitions': 1, 'replication-factor': 1 }
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

    @cluster(num_nodes=5)
    @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=True)
    @parametrize(converter="org.apache.kafka.connect.json.JsonConverter", schemas=False)
    @parametrize(converter="org.apache.kafka.connect.storage.StringConverter", schemas=None)
    @parametrize(security_protocol=SecurityConfig.PLAINTEXT)
    @cluster(num_nodes=6)
    @parametrize(security_protocol=SecurityConfig.SASL_SSL)
    def test_file_source_and_sink(self, converter="org.apache.kafka.connect.json.JsonConverter", schemas=True, security_protocol='PLAINTEXT'):
        """
        Validates basic end-to-end functionality of Connect standalone using the file source and sink converters. Includes
        parameterizations to test different converters (which also test per-connector converter overrides), schema/schemaless
        modes, and security support.
        """
        assert converter != None, "converter type must be set"
        # Template parameters. Note that we don't set key/value.converter. These default to JsonConverter and we validate
        # converter overrides via the connector configuration.
        if converter != "org.apache.kafka.connect.json.JsonConverter":
            self.override_key_converter = converter
            self.override_value_converter = converter
        self.schemas = schemas

        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk,
                                  security_protocol=security_protocol, interbroker_security_protocol=security_protocol,
                                  topics=self.topics)

        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])
        self.consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.TOPIC_TEST,
                                                  consumer_timeout_ms=10000)

        self.zk.start()
        self.kafka.start()

        self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")])
        self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")])

        self.source.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))
        self.sink.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(self.FIRST_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self.validate_output(self.FIRST_INPUT), timeout_sec=60, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.")

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.source.restart()
        self.sink.restart()

        self.source.node.account.ssh("echo -e -n " + repr(self.SECOND_INPUT) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self.validate_output(self.FIRST_INPUT + self.SECOND_INPUT), timeout_sec=60, err_msg="Sink output file never converged to the same state as the input file")

        # Validate the format of the data in the Kafka topic
        self.consumer_validator.run()
        expected = json.dumps([line if not self.schemas else { "schema": self.SCHEMA, "payload": line } for line in self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST])
        decoder = (json.loads if converter.endswith("JsonConverter") else str)
        actual = json.dumps([decoder(x) for x in self.consumer_validator.messages_consumed[1]])
        assert expected == actual, "Expected %s but saw %s in Kafka" % (expected, actual)

    def validate_output(self, value):
        try:
            output_hash = list(self.sink.node.account.ssh_capture("md5sum " + self.OUTPUT_FILE))[0].strip().split()[0]
            return output_hash == hashlib.md5(value).hexdigest()
        except RemoteCommandError:
            return False

    @cluster(num_nodes=5)
    @parametrize(error_tolerance=ErrorTolerance.ALL)
    @parametrize(error_tolerance=ErrorTolerance.NONE)
    def test_skip_and_log_to_dlq(self, error_tolerance):
        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk, topics=self.topics)

        # set config props
        self.override_error_tolerance_props = error_tolerance
        self.enable_deadletterqueue = True

        successful_records = []
        faulty_records = []
        records = []
        for i in range(0, 1000):
            if i % 2 == 0:
                records.append('{"some_key":' + str(i) + '}')
                successful_records.append('{some_key=' + str(i) + '}')
            else:
                # badly formatted json records (missing a quote after the key)
                records.append('{"some_key:' + str(i) + '}')
                faulty_records.append('{"some_key:' + str(i) + '}')

        records = "\n".join(records) + "\n"
        successful_records = "\n".join(successful_records) + "\n"
        if error_tolerance == ErrorTolerance.ALL:
            faulty_records = ",".join(faulty_records)
        else:
            faulty_records = faulty_records[0]

        self.source = ConnectStandaloneService(self.test_context, self.kafka, [self.INPUT_FILE, self.OFFSETS_FILE])
        self.sink = ConnectStandaloneService(self.test_context, self.kafka, [self.OUTPUT_FILE, self.OFFSETS_FILE])

        self.zk.start()
        self.kafka.start()

        self.override_key_converter = "org.apache.kafka.connect.storage.StringConverter"
        self.override_value_converter = "org.apache.kafka.connect.storage.StringConverter"
        self.source.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-source.properties")])

        self.override_key_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.override_value_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.override_key_converter_schemas_enable = False
        self.override_value_converter_schemas_enable = False
        self.sink.set_configs(lambda node: self.render("connect-standalone.properties", node=node), [self.render("connect-file-sink.properties")])

        self.source.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))
        self.sink.set_external_configs(lambda node: self.render("connect-file-external.properties", node=node))

        self.source.start()
        self.sink.start()

        # Generating data on the source node should generate new records and create new output on the sink node
        self.source.node.account.ssh("echo -e -n " + repr(records) + " >> " + self.INPUT_FILE)

        if error_tolerance == ErrorTolerance.NONE:
            try:
                wait_until(lambda: self.validate_output(successful_records), timeout_sec=15,
                           err_msg="Clean records added to input file were not seen in the output file in a reasonable amount of time.")
                raise Exception("Expected to not find any results in this file.")
            except TimeoutError:
                self.logger.info("Caught expected exception")
        else:
            wait_until(lambda: self.validate_output(successful_records), timeout_sec=15,
                       err_msg="Clean records added to input file were not seen in the output file in a reasonable amount of time.")

        if self.enable_deadletterqueue:
            self.logger.info("Reading records from deadletterqueue")
            consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, "my-connector-errors",
                                                 consumer_timeout_ms=10000)
            consumer_validator.run()
            actual = ",".join(consumer_validator.messages_consumed[1])
            assert faulty_records == actual, "Expected %s but saw %s in dead letter queue" % (faulty_records, actual)

示例#21

显示文件

文件： log_dir_failure_test.py 项目： harshach/kafka

class LogDirFailureTest(ProduceConsumeValidateTest):
    """
    Note that consuming is a bit tricky, at least with console consumer. The goal is to consume all messages
    (foreach partition) in the topic. In this case, waiting for the last message may cause the consumer to stop
    too soon since console consumer is consuming multiple partitions from a single thread and therefore we lose
    ordering guarantees.

    Waiting on a count of consumed messages can be unreliable: if we stop consuming when num_consumed == num_acked,
    we might exit early if some messages are duplicated (though not an issue here since producer retries==0)

    Therefore rely here on the consumer.timeout.ms setting which times out on the interval between successively
    consumed messages. Since we run the producer to completion before running the consumer, this is a reliable
    indicator that nothing is left to consume.
    """

    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(LogDirFailureTest, self).__init__(test_context=test_context)

        self.topic1 = "test_topic_1"
        self.topic2 = "test_topic_2"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context,
                                  num_nodes=3,
                                  zk=self.zk,
                                  topics={
                                      self.topic1: {"partitions": 1, "replication-factor": 3, "configs": {"min.insync.replicas": 1}},
                                      self.topic2: {"partitions": 1, "replication-factor": 3, "configs": {"min.insync.replicas": 2}}
                                  },
                                  # Set log.roll.ms to 3 seconds so that broker will detect disk error sooner when it creates log segment
                                  # Otherwise broker will still be able to read/write the log file even if the log directory is inaccessible.
                                  server_prop_overides=[
                                      [config_property.OFFSETS_TOPIC_NUM_PARTITIONS, "1"],
                                      [config_property.LOG_FLUSH_INTERVAL_MESSAGE, "5"],
                                      [config_property.REPLICA_HIGHWATERMARK_CHECKPOINT_INTERVAL_MS, "60000"],
                                      [config_property.LOG_ROLL_TIME_MS, "3000"]
                                  ])

        self.producer_throughput = 1000
        self.num_producers = 1
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        """Override this since we're adding services outside of the constructor"""
        return super(LogDirFailureTest, self).min_cluster_size() + self.num_producers * 2 + self.num_consumers * 2

    @cluster(num_nodes=9)
    @matrix(bounce_broker=[False, True], broker_type=["leader", "follower"], security_protocol=["PLAINTEXT"])
    def test_replication_with_disk_failure(self, bounce_broker, security_protocol, broker_type):
        """Replication tests.
        These tests verify that replication provides simple durability guarantees by checking that data acked by
        brokers is still available for consumption in the face of various failure scenarios.

        Setup: 1 zk, 3 kafka nodes, 1 topic with partitions=3, replication-factor=3, and min.insync.replicas=2
               and another topic with partitions=3, replication-factor=3, and min.insync.replicas=1
            - Produce messages in the background
            - Consume messages in the background
            - Drive broker failures (shutdown, or bounce repeatedly with kill -15 or kill -9)
            - When done driving failures, stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.start()

        try:
            # Initialize producer/consumer for topic2
            self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic2,
                                               throughput=self.producer_throughput)
            self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic2, group_id="test-consumer-group-1",
                                            consumer_timeout_ms=60000, message_validator=is_int)
            self.start_producer_and_consumer()

            # Get a replica of the partition of topic2 and make its log directory offline by changing the log dir's permission.
            # We assume that partition of topic2 is created in the second log directory of respective brokers.
            broker_node = select_node(self, broker_type, self.topic2)
            broker_idx = self.kafka.idx(broker_node)
            assert broker_idx in self.kafka.isr_idx_list(self.topic2), \
                   "Broker %d should be in isr set %s" % (broker_idx, str(self.kafka.isr_idx_list(self.topic2)))

            # Verify that topic1 and the consumer offset topic is in the first log directory and topic2 is in the second log directory
            topic_1_partition_0 = KafkaService.DATA_LOG_DIR_1 + "/test_topic_1-0"
            topic_2_partition_0 = KafkaService.DATA_LOG_DIR_2 + "/test_topic_2-0"
            offset_topic_partition_0 = KafkaService.DATA_LOG_DIR_1 + "/__consumer_offsets-0"
            for path in [topic_1_partition_0, topic_2_partition_0, offset_topic_partition_0]:
                assert path_exists(broker_node, path), "%s should exist" % path

            self.logger.debug("Making log dir %s inaccessible" % (KafkaService.DATA_LOG_DIR_2))
            cmd = "chmod a-w %s -R" % (KafkaService.DATA_LOG_DIR_2)
            broker_node.account.ssh(cmd, allow_fail=False)

            if bounce_broker:
                self.kafka.restart_node(broker_node, clean_shutdown=True)

            # Verify the following:
            # 1) The broker with offline log directory is not the leader of the partition of topic2
            # 2) The broker with offline log directory is not in the ISR
            # 3) The broker with offline log directory is still online
            # 4) Messages can still be produced and consumed from topic2
            wait_until(lambda: self.kafka.leader(self.topic2, partition=0) != broker_node,
                       timeout_sec=60,
                       err_msg="Broker %d should not be leader of topic %s and partition 0" % (broker_idx, self.topic2))
            assert self.kafka.alive(broker_node), "Broker %d should be still online" % (broker_idx)
            wait_until(lambda: broker_idx not in self.kafka.isr_idx_list(self.topic2),
                       timeout_sec=60,
                       err_msg="Broker %d should not be in isr set %s" % (broker_idx, str(self.kafka.isr_idx_list(self.topic2))))

            self.stop_producer_and_consumer()
            self.validate()

            # Shutdown all other brokers so that the broker with offline log dir is the only online broker
            offline_nodes = []
            for node in self.kafka.nodes:
                if broker_node != node:
                    offline_nodes.append(node)
                    self.logger.debug("Hard shutdown broker %d" % (self.kafka.idx(node)))
                    self.kafka.stop_node(node)

            # Verify the following:
            # 1) The broker with offline directory is the only in-sync broker of the partition of topic1
            # 2) Messages can still be produced and consumed from topic1
            self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic1,
                                               throughput=self.producer_throughput, offline_nodes=offline_nodes)
            self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic1, group_id="test-consumer-group-2",
                                            consumer_timeout_ms=90000, message_validator=is_int)
            self.consumer_start_timeout_sec = 90
            self.start_producer_and_consumer()

            assert self.kafka.isr_idx_list(self.topic1) == [broker_idx], \
                   "In-sync replicas of topic %s and partition 0 should be %s" % (self.topic1, str([broker_idx]))

            self.stop_producer_and_consumer()
            self.validate()

        except BaseException as e:
            for s in self.test_context.services:
                self.mark_for_collect(s)
            raise

示例#22

显示文件

class ConsoleConsumerTest(Test):
    """Sanity checks on console consumer service class."""
    def __init__(self, test_context):
        super(ConsoleConsumerTest, self).__init__(test_context)

        self.topic = "topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            self.test_context,
            num_nodes=1,
            zk=self.zk,
            zk_chroot="/kafka",
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }})
        self.consumer = ConsoleConsumer(self.test_context,
                                        num_nodes=1,
                                        kafka=self.kafka,
                                        topic=self.topic)

    def setUp(self):
        self.zk.start()

    @cluster(num_nodes=3)
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    @cluster(num_nodes=4)
    @matrix(security_protocol=['SASL_SSL'],
            sasl_mechanism=['PLAIN', 'SCRAM-SHA-256', 'SCRAM-SHA-512'])
    @matrix(security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'])
    def test_lifecycle(self, security_protocol, sasl_mechanism='GSSAPI'):
        """Check that console consumer starts/stops properly, and that we are capturing log output."""

        self.kafka.security_protocol = security_protocol
        self.kafka.client_sasl_mechanism = sasl_mechanism
        self.kafka.interbroker_sasl_mechanism = sasl_mechanism
        self.kafka.start()

        self.consumer.security_protocol = security_protocol

        t0 = time.time()
        self.consumer.start()
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")
        self.logger.info("consumer started in %s seconds " %
                         str(time.time() - t0))

        # Verify that log output is happening
        wait_until(lambda: file_exists(node, ConsoleConsumer.LOG_FILE),
                   timeout_sec=10,
                   err_msg="Timed out waiting for consumer log file to exist.")
        wait_until(lambda: line_count(node, ConsoleConsumer.LOG_FILE) > 0,
                   timeout_sec=1,
                   backoff_sec=.25,
                   err_msg="Timed out waiting for log entries to start.")

        # Verify no consumed messages
        assert line_count(node, ConsoleConsumer.STDOUT_CAPTURE) == 0

        self.consumer.stop_node(node)

    @cluster(num_nodes=4)
    def test_version(self):
        """Check that console consumer v0.8.2.X successfully starts and consumes messages."""
        self.kafka.start()

        num_messages = 1000
        self.producer = VerifiableProducer(self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=self.topic,
                                           max_messages=num_messages,
                                           throughput=1000)
        self.producer.start()
        self.producer.wait()

        self.consumer.nodes[0].version = LATEST_0_8_2
        self.consumer.new_consumer = False
        self.consumer.consumer_timeout_ms = 1000
        self.consumer.start()
        self.consumer.wait()

        num_consumed = len(self.consumer.messages_consumed[1])
        num_produced = self.producer.num_acked
        assert num_produced == num_consumed, "num_produced: %d, num_consumed: %d" % (
            num_produced, num_consumed)

示例#23

显示文件

文件： streams_broker_bounce_test.py 项目： NarasimhaKattunga/kafka-1

class StreamsBrokerBounceTest(Test):
    """
    Simple test of Kafka Streams with brokers failing
    """

    def __init__(self, test_context):
        super(StreamsBrokerBounceTest, self).__init__(test_context)
        self.replication = 3
        self.partitions = 3
        self.topics = {
            'echo' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": 2}},
            'data' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": 2} },
            'min' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": 2} },
            'max' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": 2} },
            'sum' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": 2} },
            'dif' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": 2} },
            'cnt' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": 2} },
            'avg' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": 2} },
            'wcnt' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": 2} },
            'tagg' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": 2} },
            '__consumer_offsets' : { 'partitions': 50, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": 2} }
        }

    def fail_broker_type(self, failure_mode, broker_type):
        # Pick a random topic and bounce it's leader
        topic_index = randint(0, len(self.topics.keys()) - 1)
        topic = self.topics.keys()[topic_index]
        failures[failure_mode](self, topic, broker_type)

    def fail_many_brokers(self, failure_mode, num_failures):
        sig = signal.SIGTERM
        if (failure_mode == "clean_shutdown"):
            sig = signal.SIGTERM
        else:
            sig = signal.SIGKILL
            
        for num in range(0, num_failures - 1):
            signal_node(self, self.kafka.nodes[num], sig)

    def confirm_topics_on_all_brokers(self, expected_topic_set):
        for node in self.kafka.nodes:
            match_count = 0
            # need to iterate over topic_list_generator as kafka.list_topics()
            # returns a python generator so values are fetched lazily
            # so we can't just compare directly we must iterate over what's returned
            topic_list_generator = self.kafka.list_topics(node=node)
            for topic in topic_list_generator:
                if topic in expected_topic_set:
                    match_count += 1

            if len(expected_topic_set) != match_count:
                return False

        return True

        
    def setup_system(self, start_processor=True, num_threads=3):
        # Setup phase
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        self.kafka = KafkaService(self.test_context, num_nodes=self.replication, zk=self.zk, topics=self.topics)
        self.kafka.start()

        # allow some time for topics to be created
        wait_until(lambda: self.confirm_topics_on_all_brokers(set(self.topics.keys())),
                   timeout_sec=60,
                   err_msg="Broker did not create all topics in 60 seconds ")

        # Start test harness
        self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka)
        self.processor1 = StreamsSmokeTestJobRunnerService(self.test_context, self.kafka, num_threads)

        self.driver.start()

        if (start_processor):
           self.processor1.start()

    def collect_results(self, sleep_time_secs):
        data = {}
        # End test
        self.driver.wait()
        self.driver.stop()

        self.processor1.stop()

        node = self.driver.node
        
        # Success is declared if streams does not crash when sleep time > 0
        # It should give an exception when sleep time is 0 since we kill the brokers immediately
        # and the topic manager cannot create internal topics with the desired replication factor
        if (sleep_time_secs == 0):
            output_streams = self.processor1.node.account.ssh_capture("grep SMOKE-TEST-CLIENT-EXCEPTION %s" % self.processor1.STDOUT_FILE, allow_fail=False)
        else:
            output_streams = self.processor1.node.account.ssh_capture("grep SMOKE-TEST-CLIENT-CLOSED %s" % self.processor1.STDOUT_FILE, allow_fail=False)
            
        for line in output_streams:
            data["Client closed"] = line

        # Currently it is hard to guarantee anything about Kafka since we don't have exactly once.
        # With exactly once in place, success will be defined as ALL-RECORDS-DELIEVERD and SUCCESS
        output = node.account.ssh_capture("grep -E 'ALL-RECORDS-DELIVERED|PROCESSED-MORE-THAN-GENERATED|PROCESSED-LESS-THAN-GENERATED' %s" % self.driver.STDOUT_FILE, allow_fail=False)
        for line in output:
            data["Records Delivered"] = line
        output = node.account.ssh_capture("grep -E 'SUCCESS|FAILURE' %s" % self.driver.STDOUT_FILE, allow_fail=False)
        for line in output:
            data["Logic Success/Failure"] = line
            
        
        return data

    @cluster(num_nodes=7)
    @matrix(failure_mode=["clean_shutdown", "hard_shutdown", "clean_bounce", "hard_bounce"],
            broker_type=["leader", "controller"],
            num_threads=[1, 3],
            sleep_time_secs=[120])
    def test_broker_type_bounce(self, failure_mode, broker_type, sleep_time_secs, num_threads):
        """
        Start a smoke test client, then kill one particular broker and ensure data is still received
        Record if records are delivered.
        We also add a single thread stream client to make sure we could get all partitions reassigned in
        next generation so to verify the partition lost is correctly triggered.
        """
        self.setup_system(num_threads=num_threads)

        # Sleep to allow test to run for a bit
        time.sleep(sleep_time_secs)

        # Fail brokers
        self.fail_broker_type(failure_mode, broker_type)

        return self.collect_results(sleep_time_secs)

    @ignore
    @cluster(num_nodes=7)
    @matrix(failure_mode=["clean_shutdown"],
            broker_type=["controller"],
            sleep_time_secs=[0])
    def test_broker_type_bounce_at_start(self, failure_mode, broker_type, sleep_time_secs):
        """
        Start a smoke test client, then kill one particular broker immediately before streams stats
        Streams should throw an exception since it cannot create topics with the desired
        replication factor of 3
        """
        self.setup_system(start_processor=False)

        # Sleep to allow test to run for a bit
        time.sleep(sleep_time_secs)

        # Fail brokers
        self.fail_broker_type(failure_mode, broker_type)

        self.processor1.start()

        return self.collect_results(sleep_time_secs)

    @cluster(num_nodes=7)
    @matrix(failure_mode=["clean_shutdown", "hard_shutdown", "clean_bounce", "hard_bounce"],
            num_failures=[2])
    def test_many_brokers_bounce(self, failure_mode, num_failures):
        """
        Start a smoke test client, then kill a few brokers and ensure data is still received
        Record if records are delivered
        """
        self.setup_system() 

        # Sleep to allow test to run for a bit
        time.sleep(120)

        # Fail brokers
        self.fail_many_brokers(failure_mode, num_failures)

        return self.collect_results(120)

    @cluster(num_nodes=7)
    @matrix(failure_mode=["clean_bounce", "hard_bounce"],
            num_failures=[3])
    def test_all_brokers_bounce(self, failure_mode, num_failures):
        """
        Start a smoke test client, then kill a few brokers and ensure data is still received
        Record if records are delivered
        """
        self.setup_system() 

        # Sleep to allow test to run for a bit
        time.sleep(120)

        # Fail brokers
        self.fail_many_brokers(failure_mode, num_failures)

        return self.collect_results(120)

示例#24

显示文件

class ConnectDistributedTest(Test):
    """
    Simple test of Kafka Connect in distributed mode, producing data from files on one cluster and consuming it on
    another, validating the total output is identical to the input.
    """

    FILE_SOURCE_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSourceConnector'
    FILE_SINK_CONNECTOR = 'org.apache.kafka.connect.file.FileStreamSinkConnector'

    INPUT_FILE = "/mnt/connect.input"
    OUTPUT_FILE = "/mnt/connect.output"

    TOPIC = "test"
    OFFSETS_TOPIC = "connect-offsets"
    OFFSETS_REPLICATION_FACTOR = "1"
    OFFSETS_PARTITIONS = "1"
    CONFIG_TOPIC = "connect-configs"
    CONFIG_REPLICATION_FACTOR = "1"
    STATUS_TOPIC = "connect-status"
    STATUS_REPLICATION_FACTOR = "1"
    STATUS_PARTITIONS = "1"
    SCHEDULED_REBALANCE_MAX_DELAY_MS = "60000"
    CONNECT_PROTOCOL = "sessioned"

    # Since tasks can be assigned to any node and we're testing with files, we need to make sure the content is the same
    # across all nodes.
    FIRST_INPUT_LIST = ["foo", "bar", "baz"]
    FIRST_INPUTS = "\n".join(FIRST_INPUT_LIST) + "\n"
    SECOND_INPUT_LIST = ["razz", "ma", "tazz"]
    SECOND_INPUTS = "\n".join(SECOND_INPUT_LIST) + "\n"

    SCHEMA = {"type": "string", "optional": False}

    def __init__(self, test_context):
        super(ConnectDistributedTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {self.TOPIC: {'partitions': 1, 'replication-factor': 1}}

        self.zk = ZookeeperService(test_context, self.num_zk)

        self.key_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.value_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.schemas = True

    def setup_services(self,
                       security_protocol=SecurityConfig.PLAINTEXT,
                       timestamp_type=None,
                       broker_version=DEV_BRANCH,
                       auto_create_topics=False):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=security_protocol,
            topics=self.topics,
            version=broker_version,
            server_prop_overides=[[
                "auto.create.topics.enable",
                str(auto_create_topics)
            ]])
        if timestamp_type is not None:
            for node in self.kafka.nodes:
                node.config[
                    config_property.MESSAGE_TIMESTAMP_TYPE] = timestamp_type

        self.cc = ConnectDistributedService(
            self.test_context, 3, self.kafka,
            [self.INPUT_FILE, self.OUTPUT_FILE])
        self.cc.log_level = "DEBUG"

        self.zk.start()
        self.kafka.start()

    def _start_connector(self, config_file):
        connector_props = self.render(config_file)
        connector_config = dict([
            line.strip().split('=', 1) for line in connector_props.split('\n')
            if line.strip() and not line.strip().startswith('#')
        ])
        self.cc.create_connector(connector_config)

    def _connector_status(self, connector, node=None):
        try:
            return self.cc.get_connector_status(connector, node)
        except ConnectRestError:
            return None

    def _connector_has_state(self, status, state):
        return status is not None and status['connector']['state'] == state

    def _task_has_state(self, task_id, status, state):
        if not status:
            return False

        tasks = status['tasks']
        if not tasks:
            return False

        for task in tasks:
            if task['id'] == task_id:
                return task['state'] == state

        return False

    def _all_tasks_have_state(self, status, task_count, state):
        if status is None:
            return False

        tasks = status['tasks']
        if len(tasks) != task_count:
            return False

        return reduce(operator.and_,
                      [task['state'] == state for task in tasks], True)

    def is_running(self, connector, node=None):
        status = self._connector_status(connector.name, node)
        return self._connector_has_state(
            status, 'RUNNING') and self._all_tasks_have_state(
                status, connector.tasks, 'RUNNING')

    def is_paused(self, connector, node=None):
        status = self._connector_status(connector.name, node)
        return self._connector_has_state(
            status, 'PAUSED') and self._all_tasks_have_state(
                status, connector.tasks, 'PAUSED')

    def connector_is_running(self, connector, node=None):
        status = self._connector_status(connector.name, node)
        return self._connector_has_state(status, 'RUNNING')

    def connector_is_failed(self, connector, node=None):
        status = self._connector_status(connector.name, node)
        return self._connector_has_state(status, 'FAILED')

    def task_is_failed(self, connector, task_id, node=None):
        status = self._connector_status(connector.name, node)
        return self._task_has_state(task_id, status, 'FAILED')

    def task_is_running(self, connector, task_id, node=None):
        status = self._connector_status(connector.name, node)
        return self._task_has_state(task_id, status, 'RUNNING')

    @cluster(num_nodes=5)
    @matrix(connect_protocol=['sessioned', 'compatible', 'eager'])
    def test_restart_failed_connector(self, connect_protocol):
        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services()
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))
        self.cc.start()

        self.sink = MockSink(self.cc,
                             self.topics.keys(),
                             mode='connector-failure',
                             delay_sec=5)
        self.sink.start()

        wait_until(
            lambda: self.connector_is_failed(self.sink),
            timeout_sec=15,
            err_msg="Failed to see connector transition to the FAILED state")

        self.cc.restart_connector(self.sink.name)

        wait_until(
            lambda: self.connector_is_running(self.sink),
            timeout_sec=10,
            err_msg="Failed to see connector transition to the RUNNING state")

    @cluster(num_nodes=5)
    @matrix(connector_type=['source', 'sink'],
            connect_protocol=['sessioned', 'compatible', 'eager'])
    def test_restart_failed_task(self, connector_type, connect_protocol):
        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services()
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))
        self.cc.start()

        connector = None
        if connector_type == "sink":
            connector = MockSink(self.cc,
                                 self.topics.keys(),
                                 mode='task-failure',
                                 delay_sec=5)
        else:
            connector = MockSource(self.cc, mode='task-failure', delay_sec=5)

        connector.start()

        task_id = 0
        wait_until(lambda: self.task_is_failed(connector, task_id),
                   timeout_sec=20,
                   err_msg="Failed to see task transition to the FAILED state")

        self.cc.restart_task(connector.name, task_id)

        wait_until(
            lambda: self.task_is_running(connector, task_id),
            timeout_sec=10,
            err_msg="Failed to see task transition to the RUNNING state")

    @cluster(num_nodes=5)
    @matrix(connect_protocol=['sessioned', 'compatible', 'eager'])
    def test_pause_and_resume_source(self, connect_protocol):
        """
        Verify that source connectors stop producing records when paused and begin again after
        being resumed.
        """

        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services()
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))
        self.cc.start()

        self.source = VerifiableSource(self.cc, topic=self.TOPIC)
        self.source.start()

        wait_until(
            lambda: self.is_running(self.source),
            timeout_sec=30,
            err_msg="Failed to see connector transition to the RUNNING state")

        self.cc.pause_connector(self.source.name)

        # wait until all nodes report the paused transition
        for node in self.cc.nodes:
            wait_until(
                lambda: self.is_paused(self.source, node),
                timeout_sec=30,
                err_msg="Failed to see connector transition to the PAUSED state"
            )

        # verify that we do not produce new messages while paused
        num_messages = len(self.source.sent_messages())
        time.sleep(10)
        assert num_messages == len(self.source.sent_messages(
        )), "Paused source connector should not produce any messages"

        self.cc.resume_connector(self.source.name)

        for node in self.cc.nodes:
            wait_until(
                lambda: self.is_running(self.source, node),
                timeout_sec=30,
                err_msg=
                "Failed to see connector transition to the RUNNING state")

        # after resuming, we should see records produced again
        wait_until(
            lambda: len(self.source.sent_messages()) > num_messages,
            timeout_sec=30,
            err_msg="Failed to produce messages after resuming source connector"
        )

    @cluster(num_nodes=5)
    @matrix(connect_protocol=['sessioned', 'compatible', 'eager'])
    def test_pause_and_resume_sink(self, connect_protocol):
        """
        Verify that sink connectors stop consuming records when paused and begin again after
        being resumed.
        """

        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services()
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))
        self.cc.start()

        # use the verifiable source to produce a steady stream of messages
        self.source = VerifiableSource(self.cc, topic=self.TOPIC)
        self.source.start()

        wait_until(
            lambda: len(self.source.committed_messages()) > 0,
            timeout_sec=30,
            err_msg=
            "Timeout expired waiting for source task to produce a message")

        self.sink = VerifiableSink(self.cc, topics=[self.TOPIC])
        self.sink.start()

        wait_until(
            lambda: self.is_running(self.sink),
            timeout_sec=30,
            err_msg="Failed to see connector transition to the RUNNING state")

        self.cc.pause_connector(self.sink.name)

        # wait until all nodes report the paused transition
        for node in self.cc.nodes:
            wait_until(
                lambda: self.is_paused(self.sink, node),
                timeout_sec=30,
                err_msg="Failed to see connector transition to the PAUSED state"
            )

        # verify that we do not consume new messages while paused
        num_messages = len(self.sink.received_messages())
        time.sleep(10)
        assert num_messages == len(self.sink.received_messages(
        )), "Paused sink connector should not consume any messages"

        self.cc.resume_connector(self.sink.name)

        for node in self.cc.nodes:
            wait_until(
                lambda: self.is_running(self.sink, node),
                timeout_sec=30,
                err_msg=
                "Failed to see connector transition to the RUNNING state")

        # after resuming, we should see records consumed again
        wait_until(
            lambda: len(self.sink.received_messages()) > num_messages,
            timeout_sec=30,
            err_msg="Failed to consume messages after resuming sink connector")

    @cluster(num_nodes=5)
    @matrix(connect_protocol=['sessioned', 'compatible', 'eager'])
    def test_pause_state_persistent(self, connect_protocol):
        """
        Verify that paused state is preserved after a cluster restart.
        """

        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services()
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))
        self.cc.start()

        self.source = VerifiableSource(self.cc, topic=self.TOPIC)
        self.source.start()

        wait_until(
            lambda: self.is_running(self.source),
            timeout_sec=30,
            err_msg="Failed to see connector transition to the RUNNING state")

        self.cc.pause_connector(self.source.name)

        self.cc.restart()

        if connect_protocol == 'compatible':
            timeout_sec = 120
        else:
            timeout_sec = 70

        # we should still be paused after restarting
        for node in self.cc.nodes:
            wait_until(
                lambda: self.is_paused(self.source, node),
                timeout_sec=timeout_sec,
                err_msg="Failed to see connector startup in PAUSED state")

    @cluster(num_nodes=6)
    @matrix(
        security_protocol=[SecurityConfig.PLAINTEXT, SecurityConfig.SASL_SSL],
        connect_protocol=['sessioned', 'compatible', 'eager'])
    def test_file_source_and_sink(self, security_protocol, connect_protocol):
        """
        Tests that a basic file connector works across clean rolling bounces. This validates that the connector is
        correctly created, tasks instantiated, and as nodes restart the work is rebalanced across nodes.
        """

        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services(security_protocol=security_protocol)
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))

        self.cc.start()

        self.logger.info("Creating connectors")
        self._start_connector("connect-file-source.properties")
        self._start_connector("connect-file-sink.properties")

        # Generating data on the source node should generate new records and create new output on the sink node. Timeouts
        # here need to be more generous than they are for standalone mode because a) it takes longer to write configs,
        # do rebalancing of the group, etc, and b) without explicit leave group support, rebalancing takes awhile
        for node in self.cc.nodes:
            node.account.ssh("echo -e -n " + repr(self.FIRST_INPUTS) + " >> " +
                             self.INPUT_FILE)
        wait_until(
            lambda: self._validate_file_output(self.FIRST_INPUT_LIST),
            timeout_sec=70,
            err_msg=
            "Data added to input file was not seen in the output file in a reasonable amount of time."
        )

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.cc.restart()

        if connect_protocol == 'compatible':
            timeout_sec = 150
        else:
            timeout_sec = 70

        for node in self.cc.nodes:
            node.account.ssh("echo -e -n " + repr(self.SECOND_INPUTS) +
                             " >> " + self.INPUT_FILE)
        wait_until(
            lambda: self._validate_file_output(self.FIRST_INPUT_LIST + self.
                                               SECOND_INPUT_LIST),
            timeout_sec=timeout_sec,
            err_msg=
            "Sink output file never converged to the same state as the input file"
        )

    @cluster(num_nodes=6)
    @matrix(clean=[True, False],
            connect_protocol=['sessioned', 'compatible', 'eager'])
    def test_bounce(self, clean, connect_protocol):
        """
        Validates that source and sink tasks that run continuously and produce a predictable sequence of messages
        run correctly and deliver messages exactly once when Kafka Connect workers undergo clean rolling bounces.
        """
        num_tasks = 3

        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services()
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))
        self.cc.start()

        self.source = VerifiableSource(self.cc,
                                       topic=self.TOPIC,
                                       tasks=num_tasks,
                                       throughput=100)
        self.source.start()
        self.sink = VerifiableSink(self.cc,
                                   tasks=num_tasks,
                                   topics=[self.TOPIC])
        self.sink.start()

        for _ in range(3):
            for node in self.cc.nodes:
                started = time.time()
                self.logger.info("%s bouncing Kafka Connect on %s",
                                 clean and "Clean" or "Hard",
                                 str(node.account))
                self.cc.stop_node(node, clean_shutdown=clean)
                with node.account.monitor_log(self.cc.LOG_FILE) as monitor:
                    self.cc.start_node(node)
                    monitor.wait_until(
                        "Starting connectors and tasks using config offset",
                        timeout_sec=90,
                        err_msg=
                        "Kafka Connect worker didn't successfully join group and start work"
                    )
                self.logger.info(
                    "Bounced Kafka Connect on %s and rejoined in %f seconds",
                    node.account,
                    time.time() - started)

                # Give additional time for the consumer groups to recover. Even if it is not a hard bounce, there are
                # some cases where a restart can cause a rebalance to take the full length of the session timeout
                # (e.g. if the client shuts down before it has received the memberId from its initial JoinGroup).
                # If we don't give enough time for the group to stabilize, the next bounce may cause consumers to
                # be shut down before they have any time to process data and we can end up with zero data making it
                # through the test.
                time.sleep(15)

        self.source.stop()
        self.sink.stop()
        self.cc.stop()

        # Validate at least once delivery of everything that was reported as written since we should have flushed and
        # cleanly exited. Currently this only tests at least once delivery because the sink task may not have consumed
        # all the messages generated by the source task. This needs to be done per-task since seqnos are not unique across
        # tasks.
        success = True
        errors = []
        allow_dups = not clean
        src_messages = self.source.committed_messages()
        sink_messages = self.sink.flushed_messages()
        for task in range(num_tasks):
            # Validate source messages
            src_seqnos = [
                msg['seqno'] for msg in src_messages if msg['task'] == task
            ]
            # Every seqno up to the largest one we ever saw should appear. Each seqno should only appear once because clean
            # bouncing should commit on rebalance.
            src_seqno_max = max(src_seqnos)
            self.logger.debug("Max source seqno: %d", src_seqno_max)
            src_seqno_counts = Counter(src_seqnos)
            missing_src_seqnos = sorted(
                set(range(src_seqno_max)).difference(set(src_seqnos)))
            duplicate_src_seqnos = sorted([
                seqno for seqno, count in src_seqno_counts.iteritems()
                if count > 1
            ])

            if missing_src_seqnos:
                self.logger.error("Missing source sequence numbers for task " +
                                  str(task))
                errors.append(
                    "Found missing source sequence numbers for task %d: %s" %
                    (task, missing_src_seqnos))
                success = False
            if not allow_dups and duplicate_src_seqnos:
                self.logger.error(
                    "Duplicate source sequence numbers for task " + str(task))
                errors.append(
                    "Found duplicate source sequence numbers for task %d: %s" %
                    (task, duplicate_src_seqnos))
                success = False

            # Validate sink messages
            sink_seqnos = [
                msg['seqno'] for msg in sink_messages if msg['task'] == task
            ]
            # Every seqno up to the largest one we ever saw should appear. Each seqno should only appear once because
            # clean bouncing should commit on rebalance.
            sink_seqno_max = max(sink_seqnos)
            self.logger.debug("Max sink seqno: %d", sink_seqno_max)
            sink_seqno_counts = Counter(sink_seqnos)
            missing_sink_seqnos = sorted(
                set(range(sink_seqno_max)).difference(set(sink_seqnos)))
            duplicate_sink_seqnos = sorted([
                seqno for seqno, count in sink_seqno_counts.iteritems()
                if count > 1
            ])

            if missing_sink_seqnos:
                self.logger.error("Missing sink sequence numbers for task " +
                                  str(task))
                errors.append(
                    "Found missing sink sequence numbers for task %d: %s" %
                    (task, missing_sink_seqnos))
                success = False
            if not allow_dups and duplicate_sink_seqnos:
                self.logger.error("Duplicate sink sequence numbers for task " +
                                  str(task))
                errors.append(
                    "Found duplicate sink sequence numbers for task %d: %s" %
                    (task, duplicate_sink_seqnos))
                success = False

            # Validate source and sink match
            if sink_seqno_max > src_seqno_max:
                self.logger.error(
                    "Found sink sequence number greater than any generated sink sequence number for task %d: %d > %d",
                    task, sink_seqno_max, src_seqno_max)
                errors.append(
                    "Found sink sequence number greater than any generated sink sequence number for task %d: %d > %d"
                    % (task, sink_seqno_max, src_seqno_max))
                success = False
            if src_seqno_max < 1000 or sink_seqno_max < 1000:
                errors.append(
                    "Not enough messages were processed: source:%d sink:%d" %
                    (src_seqno_max, sink_seqno_max))
                success = False

        if not success:
            self.mark_for_collect(self.cc)
            # Also collect the data in the topic to aid in debugging
            consumer_validator = ConsoleConsumer(self.test_context,
                                                 1,
                                                 self.kafka,
                                                 self.source.topic,
                                                 consumer_timeout_ms=1000,
                                                 print_key=True)
            consumer_validator.run()
            self.mark_for_collect(consumer_validator, "consumer_stdout")

        assert success, "Found validation errors:\n" + "\n  ".join(errors)

    @cluster(num_nodes=6)
    @matrix(connect_protocol=['sessioned', 'compatible', 'eager'])
    def test_transformations(self, connect_protocol):
        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services(timestamp_type='CreateTime')
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))
        self.cc.start()

        ts_fieldname = 'the_timestamp'

        NamedConnector = namedtuple('Connector', ['name'])

        source_connector = NamedConnector(name='file-src')

        self.cc.create_connector({
            'name':
            source_connector.name,
            'connector.class':
            'org.apache.kafka.connect.file.FileStreamSourceConnector',
            'tasks.max':
            1,
            'file':
            self.INPUT_FILE,
            'topic':
            self.TOPIC,
            'transforms':
            'hoistToStruct,insertTimestampField',
            'transforms.hoistToStruct.type':
            'org.apache.kafka.connect.transforms.HoistField$Value',
            'transforms.hoistToStruct.field':
            'content',
            'transforms.insertTimestampField.type':
            'org.apache.kafka.connect.transforms.InsertField$Value',
            'transforms.insertTimestampField.timestamp.field':
            ts_fieldname,
        })

        wait_until(
            lambda: self.connector_is_running(source_connector),
            timeout_sec=30,
            err_msg='Failed to see connector transition to the RUNNING state')

        for node in self.cc.nodes:
            node.account.ssh("echo -e -n " + repr(self.FIRST_INPUTS) + " >> " +
                             self.INPUT_FILE)

        consumer = ConsoleConsumer(self.test_context,
                                   1,
                                   self.kafka,
                                   self.TOPIC,
                                   consumer_timeout_ms=15000,
                                   print_timestamp=True)
        consumer.run()

        assert len(consumer.messages_consumed[1]) == len(self.FIRST_INPUT_LIST)

        expected_schema = {
            'type':
            'struct',
            'fields': [
                {
                    'field': 'content',
                    'type': 'string',
                    'optional': False
                },
                {
                    'field': ts_fieldname,
                    'name': 'org.apache.kafka.connect.data.Timestamp',
                    'type': 'int64',
                    'version': 1,
                    'optional': True
                },
            ],
            'optional':
            False
        }

        for msg in consumer.messages_consumed[1]:
            (ts_info, value) = msg.split('\t')

            assert ts_info.startswith('CreateTime:')
            ts = int(ts_info[len('CreateTime:'):])

            obj = json.loads(value)
            assert obj['schema'] == expected_schema
            assert obj['payload']['content'] in self.FIRST_INPUT_LIST
            assert obj['payload'][ts_fieldname] == ts

    @cluster(num_nodes=5)
    @parametrize(broker_version=str(DEV_BRANCH),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='sessioned')
    @parametrize(broker_version=str(LATEST_0_11_0),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='sessioned')
    @parametrize(broker_version=str(LATEST_0_10_2),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='sessioned')
    @parametrize(broker_version=str(LATEST_0_10_1),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='sessioned')
    @parametrize(broker_version=str(LATEST_0_10_0),
                 auto_create_topics=True,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='sessioned')
    @parametrize(broker_version=str(DEV_BRANCH),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_2_3),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_2_2),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_2_1),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_2_0),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_1_1),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_1_0),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_0_11_0),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_0_10_2),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_0_10_1),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(LATEST_0_10_0),
                 auto_create_topics=True,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='compatible')
    @parametrize(broker_version=str(DEV_BRANCH),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_2_3),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_2_2),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_2_1),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_2_0),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_1_1),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_1_0),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_0_11_0),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_0_10_2),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_0_10_1),
                 auto_create_topics=False,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    @parametrize(broker_version=str(LATEST_0_10_0),
                 auto_create_topics=True,
                 security_protocol=SecurityConfig.PLAINTEXT,
                 connect_protocol='eager')
    def test_broker_compatibility(self, broker_version, auto_create_topics,
                                  security_protocol, connect_protocol):
        """
        Verify that Connect will start up with various broker versions with various configurations. 
        When Connect distributed starts up, it either creates internal topics (v0.10.1.0 and after) 
        or relies upon the broker to auto-create the topics (v0.10.0.x and before).
        """
        self.CONNECT_PROTOCOL = connect_protocol
        self.setup_services(broker_version=KafkaVersion(broker_version),
                            auto_create_topics=auto_create_topics,
                            security_protocol=security_protocol)
        self.cc.set_configs(lambda node: self.render(
            "connect-distributed.properties", node=node))

        self.cc.start()

        self.logger.info("Creating connectors")
        self._start_connector("connect-file-source.properties")
        self._start_connector("connect-file-sink.properties")

        # Generating data on the source node should generate new records and create new output on the sink node. Timeouts
        # here need to be more generous than they are for standalone mode because a) it takes longer to write configs,
        # do rebalancing of the group, etc, and b) without explicit leave group support, rebalancing takes awhile
        for node in self.cc.nodes:
            node.account.ssh("echo -e -n " + repr(self.FIRST_INPUTS) + " >> " +
                             self.INPUT_FILE)
        wait_until(
            lambda: self._validate_file_output(self.FIRST_INPUT_LIST),
            timeout_sec=70,
            err_msg=
            "Data added to input file was not seen in the output file in a reasonable amount of time."
        )

    def _validate_file_output(self, input):
        input_set = set(input)
        # Output needs to be collected from all nodes because we can't be sure where the tasks will be scheduled.
        # Between the first and second rounds, we might even end up with half the data on each node.
        output_set = set(
            itertools.chain(*[[
                line.strip()
                for line in self._file_contents(node, self.OUTPUT_FILE)
            ] for node in self.cc.nodes]))
        return input_set == output_set

    def _file_contents(self, node, file):
        try:
            # Convert to a list here or the RemoteCommandError may be returned during a call to the generator instead of
            # immediately
            return list(node.account.ssh_capture("cat " + file))
        except RemoteCommandError:
            return []

示例#25

显示文件

文件： streams_cooperative_rebalance_upgrade_test.py 项目： NarasimhaKattunga/kafka-1

class StreamsCooperativeRebalanceUpgradeTest(Test):
    """
    Test of a rolling upgrade from eager rebalance to
    cooperative rebalance
    """

    source_topic = "source"
    sink_topic = "sink"
    task_delimiter = "#"
    report_interval = "1000"
    processing_message = "Processed [0-9]* records so far"
    stopped_message = "COOPERATIVE-REBALANCE-TEST-CLIENT-CLOSED"
    running_state_msg = "STREAMS in a RUNNING State"
    cooperative_turned_off_msg = "Eager rebalancing enabled now for upgrade from %s"
    cooperative_enabled_msg = "Cooperative rebalancing enabled now"
    first_bounce_phase = "first_bounce_phase-"
    second_bounce_phase = "second_bounce_phase-"

    # !!CAUTION!!: THIS LIST OF VERSIONS IS FIXED, NO VERSIONS MUST BE ADDED
    streams_eager_rebalance_upgrade_versions = [str(LATEST_0_10_0), str(LATEST_0_10_1), str(LATEST_0_10_2), str(LATEST_0_11_0),
                                                str(LATEST_1_0), str(LATEST_1_1), str(LATEST_2_0), str(LATEST_2_1), str(LATEST_2_2),
                                                str(LATEST_2_3)]

    def __init__(self, test_context):
        super(StreamsCooperativeRebalanceUpgradeTest, self).__init__(test_context)
        self.topics = {
            self.source_topic: {'partitions': 9},
            self.sink_topic: {'partitions': 9}
        }

        self.zookeeper = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3,
                                  zk=self.zookeeper, topics=self.topics)

        self.producer = VerifiableProducer(self.test_context,
                                           1,
                                           self.kafka,
                                           self.source_topic,
                                           throughput=1000,
                                           acks=1)

    @matrix(upgrade_from_version=streams_eager_rebalance_upgrade_versions)
    def test_upgrade_to_cooperative_rebalance(self, upgrade_from_version):
        self.zookeeper.start()
        self.kafka.start()

        processor1 = CooperativeRebalanceUpgradeService(self.test_context, self.kafka)
        processor2 = CooperativeRebalanceUpgradeService(self.test_context, self.kafka)
        processor3 = CooperativeRebalanceUpgradeService(self.test_context, self.kafka)

        processors = [processor1, processor2, processor3]

        # produce records continually during the test
        self.producer.start()

        # start all processors without upgrade_from config; normal operations mode
        self.logger.info("Starting all streams clients in normal running mode")
        for processor in processors:
            processor.set_version(upgrade_from_version)
            self.set_props(processor)
            processor.CLEAN_NODE_ENABLED = False
            # can't use state as older version don't have state listener
            # so just verify up and running
            verify_running(processor, self.processing_message)

        # all running rebalancing has ceased
        for processor in processors:
            self.verify_processing(processor, self.processing_message)

        # first rolling bounce with "upgrade.from" config set
        previous_phase = ""
        self.maybe_upgrade_rolling_bounce_and_verify(processors,
                                                     previous_phase,
                                                     self.first_bounce_phase,
                                                     upgrade_from_version)

        # All nodes processing, rebalancing has ceased
        for processor in processors:
            self.verify_processing(processor, self.first_bounce_phase + self.processing_message)

        # second rolling bounce without "upgrade.from" config
        self.maybe_upgrade_rolling_bounce_and_verify(processors,
                                                     self.first_bounce_phase,
                                                     self.second_bounce_phase)

        # All nodes processing, rebalancing has ceased
        for processor in processors:
            self.verify_processing(processor, self.second_bounce_phase + self.processing_message)

        # now verify tasks are unique
        for processor in processors:
            self.get_tasks_for_processor(processor)
            self.logger.info("Active tasks %s" % processor.active_tasks)

        overlapping_tasks = processor1.active_tasks.intersection(processor2.active_tasks)
        assert len(overlapping_tasks) == int(0), \
            "Final task assignments are not unique %s %s" % (processor1.active_tasks, processor2.active_tasks)

        overlapping_tasks = processor1.active_tasks.intersection(processor3.active_tasks)
        assert len(overlapping_tasks) == int(0), \
            "Final task assignments are not unique %s %s" % (processor1.active_tasks, processor3.active_tasks)

        overlapping_tasks = processor2.active_tasks.intersection(processor3.active_tasks)
        assert len(overlapping_tasks) == int(0), \
            "Final task assignments are not unique %s %s" % (processor2.active_tasks, processor3.active_tasks)

        # test done close all down
        stop_processors(processors, self.second_bounce_phase + self.stopped_message)

        self.producer.stop()
        self.kafka.stop()
        self.zookeeper.stop()

    def maybe_upgrade_rolling_bounce_and_verify(self,
                                                processors,
                                                previous_phase,
                                                current_phase,
                                                upgrade_from_version=None):
        for processor in processors:
            # stop the processor in prep for setting "update.from" or removing "update.from"
            verify_stopped(processor, previous_phase + self.stopped_message)
            # upgrade to version with cooperative rebalance
            processor.set_version("")
            processor.set_upgrade_phase(current_phase)

            if upgrade_from_version is not None:
                # need to remove minor version numbers for check of valid upgrade from numbers
                upgrade_version = upgrade_from_version[:upgrade_from_version.rfind('.')]
                rebalance_mode_msg = self.cooperative_turned_off_msg % upgrade_version
            else:
                upgrade_version = None
                rebalance_mode_msg = self.cooperative_enabled_msg

            self.set_props(processor, upgrade_version)
            node = processor.node
            with node.account.monitor_log(processor.STDOUT_FILE) as stdout_monitor:
                with node.account.monitor_log(processor.LOG_FILE) as log_monitor:
                    processor.start()
                    # verify correct rebalance mode either turned off for upgrade or enabled after upgrade
                    log_monitor.wait_until(rebalance_mode_msg,
                                           timeout_sec=60,
                                           err_msg="Never saw '%s' message " % rebalance_mode_msg + str(processor.node.account))

                # verify rebalanced into a running state
                rebalance_msg = current_phase + self.running_state_msg
                stdout_monitor.wait_until(rebalance_msg,
                                          timeout_sec=60,
                                          err_msg="Never saw '%s' message " % rebalance_msg + str(
                                              processor.node.account))

                # verify processing
                verify_processing_msg = current_phase + self.processing_message
                stdout_monitor.wait_until(verify_processing_msg,
                                          timeout_sec=60,
                                          err_msg="Never saw '%s' message " % verify_processing_msg + str(
                                              processor.node.account))

    def verify_processing(self, processor, pattern):
        self.logger.info("Verifying %s processing pattern in STDOUT_FILE" % pattern)
        with processor.node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            monitor.wait_until(pattern,
                               timeout_sec=60,
                               err_msg="Never saw processing of %s " % pattern + str(processor.node.account))

    def get_tasks_for_processor(self, processor):
        retries = 0
        while retries < 5:
            found_tasks = list(processor.node.account.ssh_capture("grep TASK-ASSIGNMENTS %s | tail -n 1" % processor.STDOUT_FILE, allow_fail=True))
            self.logger.info("Returned %s from assigned task check" % found_tasks)
            if len(found_tasks) > 0:
                task_string = str(found_tasks[0]).strip()
                self.logger.info("Converted %s from assigned task check" % task_string)
                processor.set_tasks(task_string)
                return
            retries += 1
            time.sleep(1)
        return

    def set_props(self, processor, upgrade_from=None):
        processor.SOURCE_TOPIC = self.source_topic
        processor.SINK_TOPIC = self.sink_topic
        processor.REPORT_INTERVAL = self.report_interval
        processor.UPGRADE_FROM = upgrade_from

示例#26

显示文件

文件： test_bounce.py 项目： zzccctv/kafka

class TestBounce(Test):
    """Sanity checks on verifiable producer service class with cluster roll."""
    def __init__(self, test_context):
        super(TestBounce, self).__init__(test_context)

        quorum_size_arg_name = 'quorum_size'
        default_quorum_size = 1
        quorum_size = default_quorum_size if not test_context.injected_args else test_context.injected_args.get(
            quorum_size_arg_name, default_quorum_size)
        if quorum_size < 1:
            raise Exception("Illegal %s value provided for the test: %s" %
                            (quorum_size_arg_name, quorum_size))
        self.topic = "topic"
        self.zk = ZookeeperService(test_context,
                                   num_nodes=quorum_size) if quorum.for_test(
                                       test_context) == quorum.zk else None
        num_kafka_nodes = quorum_size if quorum.for_test(
            test_context) == quorum.colocated_kraft else 1
        self.kafka = KafkaService(
            test_context,
            num_nodes=num_kafka_nodes,
            zk=self.zk,
            topics={self.topic: {
                "partitions": 1,
                "replication-factor": 1
            }},
            controller_num_nodes_override=quorum_size)
        self.num_messages = 1000

    def create_producer(self):
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=self.topic,
                                           max_messages=self.num_messages,
                                           throughput=self.num_messages // 10)

    def setUp(self):
        if self.zk:
            self.zk.start()

    # ZooKeeper and KRaft, quorum size = 1
    @cluster(num_nodes=4)
    @matrix(metadata_quorum=quorum.all, quorum_size=[1])
    # Remote and Co-located KRaft, quorum size = 3
    @cluster(num_nodes=6)
    @matrix(metadata_quorum=quorum.all_kraft, quorum_size=[3])
    def test_simple_run(self, metadata_quorum, quorum_size):
        """
        Test that we can start VerifiableProducer on the current branch snapshot version, and
        verify that we can produce a small number of messages both before and after a subsequent roll.
        """
        self.kafka.start()
        for first_time in [True, False]:
            self.create_producer()
            self.producer.start()
            wait_until(
                lambda: self.producer.num_acked > 5,
                timeout_sec=15,
                err_msg=
                "Producer failed to start in a reasonable amount of time.")

            self.producer.wait()
            num_produced = self.producer.num_acked
            assert num_produced == self.num_messages, "num_produced: %d, num_messages: %d" % (
                num_produced, self.num_messages)
            if first_time:
                self.producer.stop()
                if self.kafka.quorum_info.using_kraft and self.kafka.remote_controller_quorum:
                    self.kafka.remote_controller_quorum.restart_cluster()
                self.kafka.restart_cluster()

示例#27

显示文件

class ConsumerGroupCommandTest(Test):
    """
    Tests ConsumerGroupCommand
    """
    # Root directory for persistent output
    PERSISTENT_ROOT = "/mnt/consumer_group_command"
    COMMAND_CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "command.properties")

    def __init__(self, test_context):
        super(ConsumerGroupCommandTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {TOPIC: {'partitions': 1, 'replication-factor': 1}}
        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics)
        self.kafka.start()

    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol == SecurityConfig.SSL
        self.consumer = ConsoleConsumer(self.test_context,
                                        num_nodes=self.num_brokers,
                                        kafka=self.kafka,
                                        topic=TOPIC,
                                        consumer_timeout_ms=None,
                                        new_consumer=enable_new_consumer)
        self.consumer.start()

    def setup_and_verify(self, security_protocol, group=None):
        self.start_kafka(security_protocol, security_protocol)
        self.start_consumer(security_protocol)
        consumer_node = self.consumer.nodes[0]
        wait_until(lambda: self.consumer.alive(consumer_node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")
        kafka_node = self.kafka.nodes[0]
        if security_protocol is not SecurityConfig.PLAINTEXT:
            prop_file = str(self.kafka.security_config.client_config())
            self.logger.debug(prop_file)
            kafka_node.account.ssh("mkdir -p %s" % self.PERSISTENT_ROOT,
                                   allow_fail=False)
            kafka_node.account.create_file(self.COMMAND_CONFIG_FILE, prop_file)

        # Verify ConsumerGroupCommand lists expected consumer groups
        enable_new_consumer = security_protocol != SecurityConfig.PLAINTEXT
        command_config_file = None
        if enable_new_consumer:
            command_config_file = self.COMMAND_CONFIG_FILE

        if group:
            wait_until(
                lambda: re.search(
                    "topic-consumer-group-command",
                    self.kafka.describe_consumer_group(
                        group=group,
                        node=kafka_node,
                        new_consumer=enable_new_consumer,
                        command_config=command_config_file)),
                timeout_sec=10,
                err_msg="Timed out waiting to list expected consumer groups.")
        else:
            wait_until(
                lambda: "test-consumer-group" in self.kafka.
                list_consumer_groups(node=kafka_node,
                                     new_consumer=enable_new_consumer,
                                     command_config=command_config_file),
                timeout_sec=10,
                err_msg="Timed out waiting to list expected consumer groups.")

        self.consumer.stop()

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_list_consumer_groups(self, security_protocol='PLAINTEXT'):
        """
        Tests if ConsumerGroupCommand is listing correct consumer groups
        :return: None
        """
        self.setup_and_verify(security_protocol)

    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_describe_consumer_group(self, security_protocol='PLAINTEXT'):
        """
        Tests if ConsumerGroupCommand is describing a consumer group correctly
        :return: None
        """
        self.setup_and_verify(security_protocol, group="test-consumer-group")

示例#28

显示文件

文件： streams_broker_bounce_test.py 项目： juanlamadrid20/kafka-0.11.0.0

class StreamsBrokerBounceTest(Test):
    """
    Simple test of Kafka Streams with brokers failing
    """
    def __init__(self, test_context):
        super(StreamsBrokerBounceTest, self).__init__(test_context)
        self.replication = 3
        self.partitions = 3
        self.topics = {
            'echo': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'data': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'min': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'max': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'sum': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'dif': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'cnt': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'avg': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'wcnt': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            },
            'tagg': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": 2
                }
            }
        }

    def fail_broker_type(self, failure_mode, broker_type):
        # Pick a random topic and bounce it's leader
        topic_index = randint(0, len(self.topics.keys()) - 1)
        topic = self.topics.keys()[topic_index]
        failures[failure_mode](self, topic, broker_type)

    def fail_many_brokers(self, failure_mode, num_failures):
        sig = signal.SIGTERM
        if (failure_mode == "clean_shutdown"):
            sig = signal.SIGTERM
        else:
            sig = signal.SIGKILL

        for num in range(0, num_failures - 1):
            signal_node(self, self.kafka.nodes[num], sig)

    def setup_system(self):
        # Setup phase
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        self.kafka = KafkaService(self.test_context,
                                  num_nodes=self.replication,
                                  zk=self.zk,
                                  topics=self.topics)
        self.kafka.start()
        # Start test harness
        self.driver = StreamsSmokeTestDriverService(self.test_context,
                                                    self.kafka)
        self.processor1 = StreamsSmokeTestJobRunnerService(
            self.test_context, self.kafka)

        self.driver.start()
        self.processor1.start()

    def collect_results(self, sleep_time_secs):
        data = {}
        # End test
        self.driver.wait()
        self.driver.stop()

        self.processor1.stop()

        node = self.driver.node

        # Success is declared if streams does not crash when sleep time > 0
        # It should give an exception when sleep time is 0 since we kill the brokers immediately
        # and the topic manager cannot create internal topics with the desired replication factor
        if (sleep_time_secs == 0):
            output_streams = self.processor1.node.account.ssh_capture(
                "grep SMOKE-TEST-CLIENT-EXCEPTION %s" %
                self.processor1.STDOUT_FILE,
                allow_fail=False)
        else:
            output_streams = self.processor1.node.account.ssh_capture(
                "grep SMOKE-TEST-CLIENT-CLOSED %s" %
                self.processor1.STDOUT_FILE,
                allow_fail=False)

        for line in output_streams:
            data["Client closed"] = line

        # Currently it is hard to guarantee anything about Kafka since we don't have exactly once.
        # With exactly once in place, success will be defined as ALL-RECORDS-DELIEVERD and SUCCESS
        output = node.account.ssh_capture(
            "grep -E 'ALL-RECORDS-DELIVERED|PROCESSED-MORE-THAN-GENERATED|PROCESSED-LESS-THAN-GENERATED' %s"
            % self.driver.STDOUT_FILE,
            allow_fail=False)
        for line in output:
            data["Records Delivered"] = line
        output = node.account.ssh_capture("grep -E 'SUCCESS|FAILURE' %s" %
                                          self.driver.STDOUT_FILE,
                                          allow_fail=False)
        for line in output:
            data["Logic Success/Failure"] = line

        return data

    @cluster(num_nodes=7)
    @matrix(failure_mode=["clean_shutdown", "hard_shutdown"],
            broker_type=["leader", "controller"],
            sleep_time_secs=[120])
    def test_broker_type_bounce(self, failure_mode, broker_type,
                                sleep_time_secs):
        """
        Start a smoke test client, then kill one particular broker and ensure data is still received
        Record if records are delivered. 
        """
        self.setup_system()

        # Sleep to allow test to run for a bit
        time.sleep(sleep_time_secs)

        # Fail brokers
        self.fail_broker_type(failure_mode, broker_type)

        return self.collect_results(sleep_time_secs)

    @cluster(num_nodes=7)
    @matrix(failure_mode=["clean_shutdown"],
            broker_type=["controller"],
            sleep_time_secs=[0])
    def test_broker_type_bounce_at_start(self, failure_mode, broker_type,
                                         sleep_time_secs):
        """
        Start a smoke test client, then kill one particular broker immediately before streams stats
        Streams should throw an exception since it cannot create topics with the desired
        replication factor of 3
        """
        self.setup_system()

        # Sleep to allow test to run for a bit
        time.sleep(sleep_time_secs)

        # Fail brokers
        self.fail_broker_type(failure_mode, broker_type)

        return self.collect_results(sleep_time_secs)

    @cluster(num_nodes=7)
    @matrix(failure_mode=["clean_shutdown", "hard_shutdown"], num_failures=[2])
    def test_many_brokers_bounce(self, failure_mode, num_failures):
        """
        Start a smoke test client, then kill a few brokers and ensure data is still received
        Record if records are delivered
        """
        self.setup_system()

        # Sleep to allow test to run for a bit
        time.sleep(120)

        # Fail brokers
        self.fail_many_brokers(failure_mode, num_failures)

        return self.collect_results(120)

示例#29

显示文件

文件： security_rolling_upgrade_test.py 项目： MyPureCloud/kafka

class TestSecurityRollingUpgrade(ProduceConsumeValidateTest):
    """Tests a rolling upgrade from PLAINTEXT to a secured cluster
    """

    def __init__(self, test_context):
        super(TestSecurityRollingUpgrade, self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.producer_throughput = 100
        self.num_producers = 1
        self.num_consumers = 1
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, topics={self.topic: {
            "partitions": 3,
            "replication-factor": 3,
            'configs': {"min.insync.replicas": 2}}})
        self.zk.start()

        #reduce replica.lag.time.max.ms due to KAFKA-2827
        self.kafka.replica_lag = 2000

    def create_producer_and_consumer(self):
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic,
            throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic,
            consumer_timeout_ms=60000, message_validator=is_int, new_consumer=True)

        self.consumer.group_id = "unique-test-group-" + str(random.random())

    def bounce(self):
        #Sleeps reduce the intermittent failures reported in KAFKA-2891. Should be removed once resolved.
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            time.sleep(10)
            self.kafka.start_node(node)
            time.sleep(10)

    def roll_in_secured_settings(self, upgrade_protocol):
        self.kafka.interbroker_security_protocol = upgrade_protocol

        # Roll cluster to include inter broker security protocol.
        self.kafka.open_port(upgrade_protocol)
        self.bounce()

        # Roll cluster to disable PLAINTEXT port
        self.kafka.close_port('PLAINTEXT')
        self.bounce()

    def open_secured_port(self, upgrade_protocol):
        self.kafka.security_protocol = upgrade_protocol
        self.kafka.open_port(upgrade_protocol)
        self.kafka.start_minikdc()
        self.bounce()

    @matrix(upgrade_protocol=["SSL", "SASL_PLAINTEXT", "SASL_SSL"])
    def test_rolling_upgrade_phase_one(self, upgrade_protocol):
        """
        Start with a PLAINTEXT cluster, open a SECURED port, via a rolling upgrade, ensuring we could produce
        and consume throughout over PLAINTEXT. Finally check we can produce and consume the new secured port.
        """
        self.kafka.interbroker_security_protocol = "PLAINTEXT"
        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.start()

        #Create PLAINTEXT producer and consumer
        self.create_producer_and_consumer()

        # Rolling upgrade, opening a secure protocol, ensuring the Plaintext producer/consumer continues to run
        self.run_produce_consume_validate(self.open_secured_port, upgrade_protocol)

        # Now we can produce and consume via the secured port
        self.kafka.security_protocol = upgrade_protocol
        self.create_producer_and_consumer()
        self.run_produce_consume_validate(lambda: time.sleep(1))

    @matrix(upgrade_protocol=["SSL", "SASL_PLAINTEXT", "SASL_SSL"])
    def test_rolling_upgrade_phase_two(self, upgrade_protocol):
        """
        Start with a PLAINTEXT cluster with a second Secured port open (i.e. result of phase one).
        Start an Producer and Consumer via the SECURED port
        Rolling upgrade to add inter-broker be the secure protocol
        Rolling upgrade again to disable PLAINTEXT
        Ensure the producer and consumer ran throughout
        """
        #Given we have a broker that has both secure and PLAINTEXT ports open
        self.kafka.security_protocol = upgrade_protocol
        self.kafka.interbroker_security_protocol = "PLAINTEXT"
        self.kafka.start()

        #Create Secured Producer and Consumer
        self.create_producer_and_consumer()

        #Roll in the security protocol. Disable Plaintext. Ensure we can produce and Consume throughout
        self.run_produce_consume_validate(self.roll_in_secured_settings, upgrade_protocol)

示例#30

显示文件

文件： security_rolling_upgrade_test.py 项目： surikoya/kafka

class TestSecurityRollingUpgrade(ProduceConsumeValidateTest):
    """Tests a rolling upgrade from PLAINTEXT to a secured cluster
    """

    def __init__(self, test_context):
        super(TestSecurityRollingUpgrade, self).__init__(test_context=test_context)

    def setUp(self):
        self.acls = ACLs()
        self.topic = "test_topic"
        self.group = "group"
        self.producer_throughput = 100
        self.num_producers = 1
        self.num_consumers = 1
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, topics={self.topic: {
            "partitions": 3,
            "replication-factor": 3,
            'configs': {"min.insync.replicas": 2}}})
        self.zk.start()

    def create_producer_and_consumer(self):
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic,
            throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic,
            consumer_timeout_ms=60000, message_validator=is_int, new_consumer=True)

        self.consumer.group_id = "group"

    def bounce(self):
        self.kafka.start_minikdc()
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            self.kafka.start_node(node)
            time.sleep(10)

    def roll_in_secured_settings(self, client_protocol, broker_protocol):

        # Roll cluster to include inter broker security protocol.
        self.kafka.interbroker_security_protocol = broker_protocol
        self.kafka.open_port(client_protocol)
        self.kafka.open_port(broker_protocol)
        self.bounce()

        # Roll cluster to disable PLAINTEXT port
        self.kafka.close_port('PLAINTEXT')
        self.kafka.authorizer_class_name = KafkaService.SIMPLE_AUTHORIZER
        self.acls.set_acls(client_protocol, self.kafka, self.zk, self.topic, self.group)
        self.acls.set_acls(broker_protocol, self.kafka, self.zk, self.topic, self.group)
        self.bounce()

    def open_secured_port(self, client_protocol):
        self.kafka.security_protocol = client_protocol
        self.kafka.open_port(client_protocol)
        self.kafka.start_minikdc()
        self.bounce()

    @matrix(client_protocol=["SSL", "SASL_PLAINTEXT", "SASL_SSL"])
    def test_rolling_upgrade_phase_one(self, client_protocol):
        """
        Start with a PLAINTEXT cluster, open a SECURED port, via a rolling upgrade, ensuring we could produce
        and consume throughout over PLAINTEXT. Finally check we can produce and consume the new secured port.
        """
        self.kafka.interbroker_security_protocol = "PLAINTEXT"
        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.start()

        # Create PLAINTEXT producer and consumer
        self.create_producer_and_consumer()

        # Rolling upgrade, opening a secure protocol, ensuring the Plaintext producer/consumer continues to run
        self.run_produce_consume_validate(self.open_secured_port, client_protocol)

        # Now we can produce and consume via the secured port
        self.kafka.security_protocol = client_protocol
        self.create_producer_and_consumer()
        self.run_produce_consume_validate(lambda: time.sleep(1))

    @matrix(client_protocol=["SASL_SSL", "SSL", "SASL_PLAINTEXT"], broker_protocol=["SASL_SSL", "SSL", "SASL_PLAINTEXT"])
    def test_rolling_upgrade_phase_two(self, client_protocol, broker_protocol):
        """
        Start with a PLAINTEXT cluster with a second Secured port open (i.e. result of phase one).
        Start an Producer and Consumer via the SECURED port
        Incrementally upgrade to add inter-broker be the secure protocol
        Incrementally upgrade again to add ACLs as well as disabling the PLAINTEXT port
        Ensure the producer and consumer ran throughout
        """
        #Given we have a broker that has both secure and PLAINTEXT ports open
        self.kafka.security_protocol = client_protocol
        self.kafka.interbroker_security_protocol = "PLAINTEXT"
        self.kafka.start()

        #Create Secured Producer and Consumer
        self.create_producer_and_consumer()

        #Roll in the security protocol. Disable Plaintext. Ensure we can produce and Consume throughout
        self.run_produce_consume_validate(self.roll_in_secured_settings, client_protocol, broker_protocol)

示例#31

显示文件

文件： mirror_maker_test.py 项目： Paulf-999/kafka

class TestMirrorMakerService(ProduceConsumeValidateTest):
    """Sanity checks on mirror maker service class."""
    def __init__(self, test_context):
        super(TestMirrorMakerService, self).__init__(test_context)

        self.topic = "topic"
        self.source_zk = ZookeeperService(test_context, num_nodes=1)
        self.target_zk = ZookeeperService(test_context, num_nodes=1)

        self.source_kafka = KafkaService(test_context, num_nodes=1, zk=self.source_zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        self.target_kafka = KafkaService(test_context, num_nodes=1, zk=self.target_zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.source_kafka, topic=self.topic,
                                           throughput=1000)
        self.mirror_maker = MirrorMaker(test_context, num_nodes=1, source=self.source_kafka, target=self.target_kafka,
                                        whitelist=self.topic, offset_commit_interval_ms=1000)
        # This will consume from target kafka cluster
        self.consumer = ConsoleConsumer(test_context, num_nodes=1, kafka=self.target_kafka, topic=self.topic,
                                        message_validator=is_int, consumer_timeout_ms=60000)

    def setUp(self):
        # Source cluster
        self.source_zk.start()

        # Target cluster
        self.target_zk.start()

    def start_kafka(self, security_protocol):
        self.source_kafka.security_protocol = security_protocol
        self.source_kafka.interbroker_security_protocol = security_protocol
        self.target_kafka.security_protocol = security_protocol
        self.target_kafka.interbroker_security_protocol = security_protocol
        if self.source_kafka.security_config.has_sasl_kerberos:
            minikdc = MiniKdc(self.source_kafka.context, self.source_kafka.nodes + self.target_kafka.nodes)
            self.source_kafka.minikdc = minikdc
            self.target_kafka.minikdc = minikdc
            minikdc.start()
        self.source_kafka.start()
        self.target_kafka.start()

    def bounce(self, clean_shutdown=True):
        """Bounce mirror maker with a clean (kill -15) or hard (kill -9) shutdown"""

        # Wait until messages start appearing in the target cluster
        wait_until(lambda: len(self.consumer.messages_consumed[1]) > 0, timeout_sec=15)

        # Wait for at least one offset to be committed.
        #
        # This step is necessary to prevent data loss with default mirror maker settings:
        # currently, if we don't have at least one committed offset,
        # and we bounce mirror maker, the consumer internals will throw OffsetOutOfRangeException, and the default
        # auto.offset.reset policy ("largest") will kick in, causing mirrormaker to start consuming from the largest
        # offset. As a result, any messages produced to the source cluster while mirrormaker was dead won't get
        # mirrored to the target cluster.
        # (see https://issues.apache.org/jira/browse/KAFKA-2759)
        #
        # This isn't necessary with kill -15 because mirror maker commits its offsets during graceful
        # shutdown.
        if not clean_shutdown:
            time.sleep(self.mirror_maker.offset_commit_interval_ms / 1000.0 + .5)

        for i in range(3):
            self.logger.info("Bringing mirror maker nodes down...")
            for node in self.mirror_maker.nodes:
                self.mirror_maker.stop_node(node, clean_shutdown=clean_shutdown)

            num_consumed = len(self.consumer.messages_consumed[1])
            self.logger.info("Bringing mirror maker nodes back up...")
            for node in self.mirror_maker.nodes:
                self.mirror_maker.start_node(node)

            # Ensure new messages are once again showing up on the target cluster
            # new consumer requires higher timeout here
            wait_until(lambda: len(self.consumer.messages_consumed[1]) > num_consumed + 100, timeout_sec=60)

    def wait_for_n_messages(self, n_messages=100):
        """Wait for a minimum number of messages to be successfully produced."""
        wait_until(lambda: self.producer.num_acked > n_messages, timeout_sec=10,
                     err_msg="Producer failed to produce %d messages in a reasonable amount of time." % n_messages)

    @parametrize(security_protocol='PLAINTEXT', new_consumer=False)
    @matrix(security_protocol=['PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL'], new_consumer=[True])
    def test_simple_end_to_end(self, security_protocol, new_consumer):
        """
        Test end-to-end behavior under non-failure conditions.

        Setup: two single node Kafka clusters, each connected to its own single node zookeeper cluster.
        One is source, and the other is target. Single-node mirror maker mirrors from source to target.

        - Start mirror maker.
        - Produce a small number of messages to the source cluster.
        - Consume messages from target.
        - Verify that number of consumed messages matches the number produced.
        """
        self.start_kafka(security_protocol)
        self.consumer.new_consumer = new_consumer

        self.mirror_maker.new_consumer = new_consumer
        self.mirror_maker.start()

        mm_node = self.mirror_maker.nodes[0]
        with mm_node.account.monitor_log(self.mirror_maker.LOG_FILE) as monitor:
            if new_consumer:
                monitor.wait_until("Resetting offset for partition", timeout_sec=30, err_msg="Mirrormaker did not reset fetch offset in a reasonable amount of time.")
            else:
                monitor.wait_until("reset fetch offset", timeout_sec=30, err_msg="Mirrormaker did not reset fetch offset in a reasonable amount of time.")

        self.run_produce_consume_validate(core_test_action=self.wait_for_n_messages)
        self.mirror_maker.stop()

    @matrix(offsets_storage=["kafka", "zookeeper"], new_consumer=[False], clean_shutdown=[True, False])
    @matrix(new_consumer=[True], clean_shutdown=[True, False], security_protocol=['PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL'])
    def test_bounce(self, offsets_storage="kafka", new_consumer=True, clean_shutdown=True, security_protocol='PLAINTEXT'):
        """
        Test end-to-end behavior under failure conditions.

        Setup: two single node Kafka clusters, each connected to its own single node zookeeper cluster.
        One is source, and the other is target. Single-node mirror maker mirrors from source to target.

        - Start mirror maker.
        - Produce to source cluster, and consume from target cluster in the background.
        - Bounce MM process
        - Verify every message acknowledged by the source producer is consumed by the target consumer
        """
        if new_consumer and not clean_shutdown:
            # Increase timeout on downstream console consumer; mirror maker with new consumer takes extra time
            # during hard bounce. This is because the restarted mirror maker consumer won't be able to rejoin
            # the group until the previous session times out
            self.consumer.consumer_timeout_ms = 60000

        self.start_kafka(security_protocol)
        self.consumer.new_consumer = new_consumer

        self.mirror_maker.offsets_storage = offsets_storage
        self.mirror_maker.new_consumer = new_consumer
        self.mirror_maker.start()

        # Wait until mirror maker has reset fetch offset at least once before continuing with the rest of the test
        mm_node = self.mirror_maker.nodes[0]
        with mm_node.account.monitor_log(self.mirror_maker.LOG_FILE) as monitor:
            if new_consumer:
                monitor.wait_until("Resetting offset for partition", timeout_sec=30, err_msg="Mirrormaker did not reset fetch offset in a reasonable amount of time.")
            else:
                monitor.wait_until("reset fetch offset", timeout_sec=30, err_msg="Mirrormaker did not reset fetch offset in a reasonable amount of time.")

        self.run_produce_consume_validate(core_test_action=lambda: self.bounce(clean_shutdown=clean_shutdown))
        self.mirror_maker.stop()

示例#32

显示文件

class GroupModeTransactionsTest(Test):
    """Essentially testing the same functionality as TransactionsTest by transactionally copying data
    from a source topic to a destination topic and killing the copy process as well as the broker
    randomly through the process. The major difference is that we choose to work as a collaborated
    group with same topic subscription instead of individual copiers.

    In the end we verify that the final output topic contains exactly one committed copy of
    each message from the original producer.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(GroupModeTransactionsTest, self).__init__(test_context=test_context)

        self.input_topic = "input-topic"
        self.output_topic = "output-topic"

        self.num_brokers = 3

        # Test parameters
        self.num_input_partitions = 9
        self.num_output_partitions = 9
        self.num_copiers = 3
        self.num_seed_messages = 100000
        self.transaction_size = 750
        # The transaction timeout should be lower than the progress timeout, but at
        # least as high as the request timeout (which is 30s by default). When the
        # client is hard-bounced, progress may depend on the previous transaction
        # being aborted. When the broker is hard-bounced, we may have to wait as
        # long as the request timeout to get a `Produce` response and we do not
        # want the coordinator timing out the transaction.
        self.transaction_timeout = 40000
        self.progress_timeout_sec = 60
        self.consumer_group = "grouped-transactions-test-consumer-group"

        self.zk = ZookeeperService(test_context, num_nodes=1) if quorum.for_test(test_context) == quorum.zk else None
        self.kafka = KafkaService(test_context,
                                  num_nodes=self.num_brokers,
                                  zk=self.zk, controller_num_nodes_override=1)

    def setUp(self):
        if self.zk:
            self.zk.start()

    def seed_messages(self, topic, num_seed_messages):
        seed_timeout_sec = 10000
        seed_producer = VerifiableProducer(context=self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=topic,
                                           message_validator=is_int,
                                           max_messages=num_seed_messages,
                                           enable_idempotence=True,
                                           repeating_keys=self.num_input_partitions)
        seed_producer.start()
        wait_until(lambda: seed_producer.num_acked >= num_seed_messages,
                   timeout_sec=seed_timeout_sec,
                   err_msg="Producer failed to produce messages %d in  %ds." % \
                           (self.num_seed_messages, seed_timeout_sec))
        return seed_producer.acked_by_partition

    def get_messages_from_topic(self, topic, num_messages):
        consumer = self.start_consumer(topic, group_id="verifying_consumer")
        return self.drain_consumer(consumer, num_messages)

    def bounce_brokers(self, clean_shutdown):
        for node in self.kafka.nodes:
            if clean_shutdown:
                self.kafka.restart_node(node, clean_shutdown = True)
            else:
                self.kafka.stop_node(node, clean_shutdown = False)
                gracePeriodSecs = 5
                if self.zk:
                    wait_until(lambda: len(self.kafka.pids(node)) == 0 and not self.kafka.is_registered(node),
                               timeout_sec=self.kafka.zk_session_timeout + gracePeriodSecs,
                               err_msg="Failed to see timely deregistration of hard-killed broker %s" % str(node.account))
                else:
                    brokerSessionTimeoutSecs = 18
                    wait_until(lambda: len(self.kafka.pids(node)) == 0,
                               timeout_sec=brokerSessionTimeoutSecs + gracePeriodSecs,
                               err_msg="Failed to see timely disappearance of process for hard-killed broker %s" % str(node.account))
                    time.sleep(brokerSessionTimeoutSecs + gracePeriodSecs)
                self.kafka.start_node(node)

            self.kafka.await_no_under_replicated_partitions()

    def create_and_start_message_copier(self, input_topic, output_topic, transactional_id):
        message_copier = TransactionalMessageCopier(
            context=self.test_context,
            num_nodes=1,
            kafka=self.kafka,
            transactional_id=transactional_id,
            consumer_group=self.consumer_group,
            input_topic=input_topic,
            input_partition=-1,
            output_topic=output_topic,
            max_messages=-1,
            transaction_size=self.transaction_size,
            transaction_timeout=self.transaction_timeout,
            use_group_metadata=True,
            group_mode=True
        )
        message_copier.start()
        wait_until(lambda: message_copier.alive(message_copier.nodes[0]),
                   timeout_sec=10,
                   err_msg="Message copier failed to start after 10 s")
        return message_copier

    def bounce_copiers(self, copiers, clean_shutdown, timeout_sec=240):
        for _ in range(3):
            for copier in copiers:
                wait_until(lambda: copier.progress_percent() >= 20.0,
                           timeout_sec=self.progress_timeout_sec,
                           err_msg="%s : Message copier didn't make enough progress in %ds. Current progress: %s" \
                                   % (copier.transactional_id, self.progress_timeout_sec, str(copier.progress_percent())))
                self.logger.info("%s - progress: %s" % (copier.transactional_id,
                                                        str(copier.progress_percent())))
                copier.restart(clean_shutdown)

    def create_and_start_copiers(self, input_topic, output_topic, num_copiers):
        copiers = []
        for i in range(0, num_copiers):
            copiers.append(self.create_and_start_message_copier(
                input_topic=input_topic,
                output_topic=output_topic,
                transactional_id="copier-" + str(i)
            ))
        return copiers

    @staticmethod
    def valid_value_and_partition(msg):
        """Method used to check whether the given message is a valid tab
        separated value + partition

        return value and partition as a size-two array represented tuple: [value, partition]
        """
        try:
            splitted_msg = msg.split('\t')
            value = int(splitted_msg[1])
            partition = int(splitted_msg[0].split(":")[1])
            return [value, partition]

        except ValueError:
            raise Exception("Unexpected message format (expected a tab separated [value, partition] tuple). Message: %s" % (msg))

    def start_consumer(self, topic_to_read, group_id):
        consumer = ConsoleConsumer(context=self.test_context,
                                   num_nodes=1,
                                   kafka=self.kafka,
                                   topic=topic_to_read,
                                   group_id=group_id,
                                   message_validator=self.valid_value_and_partition,
                                   from_beginning=True,
                                   print_partition=True,
                                   isolation_level="read_committed")
        consumer.start()
        # ensure that the consumer is up.
        wait_until(lambda: (len(consumer.messages_consumed[1]) > 0) == True,
                   timeout_sec=60,
                   err_msg="Consumer failed to consume any messages for %ds" % \
                           60)
        return consumer

    @staticmethod
    def split_by_partition(messages_consumed):
        messages_by_partition = {}

        for msg in messages_consumed:
            partition = msg[1]
            if partition not in messages_by_partition:
                messages_by_partition[partition] = []
            messages_by_partition[partition].append(msg[0])
        return messages_by_partition

    def drain_consumer(self, consumer, num_messages):
        # wait until we read at least the expected number of messages.
        # This is a safe check because both failure modes will be caught:
        #  1. If we have 'num_seed_messages' but there are duplicates, then
        #     this is checked for later.
        #
        #  2. If we never reach 'num_seed_messages', then this will cause the
        #     test to fail.
        wait_until(lambda: len(consumer.messages_consumed[1]) >= num_messages,
                   timeout_sec=90,
                   err_msg="Consumer consumed only %d out of %d messages in %ds" % \
                           (len(consumer.messages_consumed[1]), num_messages, 90))
        consumer.stop()
        return self.split_by_partition(consumer.messages_consumed[1])

    def copy_messages_transactionally(self, failure_mode, bounce_target,
                                      input_topic, output_topic,
                                      num_copiers, num_messages_to_copy):
        """Copies messages transactionally from the seeded input topic to the
        output topic, either bouncing brokers or clients in a hard and soft
        way as it goes.

        This method also consumes messages in read_committed mode from the
        output topic while the bounces and copy is going on.

        It returns the concurrently consumed messages.
        """
        copiers = self.create_and_start_copiers(input_topic=input_topic,
                                                output_topic=output_topic,
                                                num_copiers=num_copiers)
        concurrent_consumer = self.start_consumer(output_topic,
                                                  group_id="concurrent_consumer")
        clean_shutdown = False
        if failure_mode == "clean_bounce":
            clean_shutdown = True

        if bounce_target == "brokers":
            self.bounce_brokers(clean_shutdown)
        elif bounce_target == "clients":
            self.bounce_copiers(copiers, clean_shutdown)

        copier_timeout_sec = 240
        for copier in copiers:
            wait_until(lambda: copier.is_done,
                       timeout_sec=copier_timeout_sec,
                       err_msg="%s - Failed to copy all messages in %ds." % \
                               (copier.transactional_id, copier_timeout_sec))
        self.logger.info("finished copying messages")

        return self.drain_consumer(concurrent_consumer, num_messages_to_copy)

    def setup_topics(self):
        self.kafka.topics = {
            self.input_topic: {
                "partitions": self.num_input_partitions,
                "replication-factor": 3,
                "configs": {
                    "min.insync.replicas": 2
                }
            },
            self.output_topic: {
                "partitions": self.num_output_partitions,
                "replication-factor": 3,
                "configs": {
                    "min.insync.replicas": 2
                }
            }
        }

    @cluster(num_nodes=10)
    @matrix(failure_mode=["hard_bounce", "clean_bounce"],
            bounce_target=["brokers", "clients"],
            metadata_quorum=quorum.all_non_upgrade)
    def test_transactions(self, failure_mode, bounce_target, metadata_quorum):
        security_protocol = 'PLAINTEXT'
        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.logs["kafka_data_1"]["collect_default"] = True
        self.kafka.logs["kafka_data_2"]["collect_default"] = True
        self.kafka.logs["kafka_operational_logs_debug"]["collect_default"] = True

        self.setup_topics()
        self.kafka.start()

        input_messages_by_partition = self.seed_messages(self.input_topic, self.num_seed_messages)
        concurrently_consumed_message_by_partition = self.copy_messages_transactionally(
            failure_mode, bounce_target, input_topic=self.input_topic,
            output_topic=self.output_topic, num_copiers=self.num_copiers,
            num_messages_to_copy=self.num_seed_messages)
        output_messages_by_partition = self.get_messages_from_topic(self.output_topic, self.num_seed_messages)

        assert len(input_messages_by_partition) == \
               len(concurrently_consumed_message_by_partition), "The lengths of partition count doesn't match: " \
                                                                "input partitions count %d, " \
                                                                "concurrently consumed partitions count %d" % \
                                                                (len(input_messages_by_partition), len(concurrently_consumed_message_by_partition))

        assert len(input_messages_by_partition) == \
               len(output_messages_by_partition), "The lengths of partition count doesn't match: " \
                                                  "input partitions count %d, " \
                                                  "output partitions count %d" % \
                                                  (len(input_messages_by_partition), len(concurrently_consumed_message_by_partition))

        for p in range(self.num_input_partitions):
            if p not in input_messages_by_partition:
                continue

            assert p in output_messages_by_partition, "Partition %d not in output messages"
            assert p in concurrently_consumed_message_by_partition, "Partition %d not in concurrently consumed messages"

            output_message_set = set(output_messages_by_partition[p])
            input_message_set = set(input_messages_by_partition[p])

            concurrently_consumed_message_set = set(concurrently_consumed_message_by_partition[p])

            num_dups = abs(len(output_messages) - len(output_message_set))
            num_dups_in_concurrent_consumer = abs(len(concurrently_consumed_messages)
                                              - len(concurrently_consumed_message_set))
            assert num_dups == 0, "Detected %d duplicates in the output stream" % num_dups
            assert input_message_set == output_message_set, "Input and output message sets are not equal. Num input messages %d. Num output messages %d" % \
                                                        (len(input_message_set), len(output_message_set))

            assert num_dups_in_concurrent_consumer == 0, "Detected %d dups in concurrently consumed messages" % num_dups_in_concurrent_consumer
            assert input_message_set == concurrently_consumed_message_set, \
                "Input and concurrently consumed output message sets are not equal. Num input messages: %d. Num concurrently_consumed_messages: %d" % \
                (len(input_message_set), len(concurrently_consumed_message_set))

            assert input_messages == sorted(input_messages), "The seed messages themselves were not in order"
            assert output_messages == input_messages, "Output messages are not in order"
            assert concurrently_consumed_messages == output_messages, "Concurrently consumed messages are not in order"

示例#33

显示文件

文件： benchmark_test.py 项目： CodeTheWorld/kafka_0.9.0.0

class Benchmark(Test):
    """A benchmark of Kafka producer/consumer performance. This replicates the test
    run here:
    https://engineering.linkedin.com/kafka/benchmarking-apache-kafka-2-million-writes-second-three-cheap-machines
    """
    def __init__(self, test_context):
        super(Benchmark, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 3
        self.topics = {
            TOPIC_REP_ONE: {'partitions': 6, 'replication-factor': 1},
            TOPIC_REP_THREE: {'partitions': 6, 'replication-factor': 3}
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

        self.msgs_large = 10000000
        self.batch_size = 8*1024
        self.buffer_memory = 64*1024*1024
        self.msg_sizes = [10, 100, 1000, 10000, 100000]
        self.target_data_size = 128*1024*1024
        self.target_data_size_gb = self.target_data_size/float(1024*1024*1024)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context, self.num_brokers,
            self.zk, security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol, topics=self.topics)
        self.kafka.log_level = "INFO"  # We don't DEBUG logging here
        self.kafka.start()

    @parametrize(acks=1, topic=TOPIC_REP_ONE)
    @parametrize(acks=1, topic=TOPIC_REP_THREE)
    @parametrize(acks=-1, topic=TOPIC_REP_THREE)
    @parametrize(acks=1, topic=TOPIC_REP_THREE, num_producers=3)
    @matrix(acks=[1], topic=[TOPIC_REP_THREE], message_size=[10, 100, 1000, 10000, 100000], security_protocol=['PLAINTEXT', 'SSL'])
    def test_producer_throughput(self, acks, topic, num_producers=1, message_size=DEFAULT_RECORD_SIZE, security_protocol='PLAINTEXT'):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Produce ~128MB worth of messages to a topic with 6 partitions. Required acks, topic replication factor,
        security protocol and message size are varied depending on arguments injected into this test.

        Collect and return aggregate throughput statistics after all messages have been acknowledged.
        (This runs ProducerPerformance.java under the hood)
        """
        self.start_kafka(security_protocol, security_protocol)
        # Always generate the same total amount of data
        nrecords = int(self.target_data_size / message_size)

        self.producer = ProducerPerformanceService(
            self.test_context, num_producers, self.kafka, topic=topic,
            num_records=nrecords, record_size=message_size,  throughput=-1,
            settings={
                'acks': acks,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory})
        self.producer.run()
        return compute_aggregate_throughput(self.producer)

    @parametrize(security_protocol='SSL', interbroker_security_protocol='PLAINTEXT')
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_long_term_producer_throughput(self, security_protocol, interbroker_security_protocol=None):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Produce 10e6 100 byte messages to a topic with 6 partitions, replication-factor 3, and acks=1.

        Collect and return aggregate throughput statistics after all messages have been acknowledged.

        (This runs ProducerPerformance.java under the hood)
        """
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol)
        self.producer = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic=TOPIC_REP_THREE, num_records=self.msgs_large, record_size=DEFAULT_RECORD_SIZE,
            throughput=-1, settings={'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory},
            intermediate_stats=True
        )
        self.producer.run()

        summary = ["Throughput over long run, data > memory:"]
        data = {}
        # FIXME we should be generating a graph too
        # Try to break it into 5 blocks, but fall back to a smaller number if
        # there aren't even 5 elements
        block_size = max(len(self.producer.stats[0]) / 5, 1)
        nblocks = len(self.producer.stats[0]) / block_size

        for i in range(nblocks):
            subset = self.producer.stats[0][i*block_size:min((i+1)*block_size, len(self.producer.stats[0]))]
            if len(subset) == 0:
                summary.append(" Time block %d: (empty)" % i)
                data[i] = None
            else:
                records_per_sec = sum([stat['records_per_sec'] for stat in subset])/float(len(subset))
                mb_per_sec = sum([stat['mbps'] for stat in subset])/float(len(subset))

                summary.append(" Time block %d: %f rec/sec (%f MB/s)" % (i, records_per_sec, mb_per_sec))
                data[i] = throughput(records_per_sec, mb_per_sec)

        self.logger.info("\n".join(summary))
        return data


    @parametrize(security_protocol='SSL', interbroker_security_protocol='PLAINTEXT')
    @matrix(security_protocol=['PLAINTEXT', 'SSL', 'SASL_PLAINTEXT', 'SASL_SSL'])
    def test_end_to_end_latency(self, security_protocol, interbroker_security_protocol=None):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Produce (acks = 1) and consume 10e3 messages to a topic with 6 partitions and replication-factor 3,
        measuring the latency between production and consumption of each message.

        Return aggregate latency statistics.

        (Under the hood, this simply runs EndToEndLatency.scala)
        """
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol)
        self.logger.info("BENCHMARK: End to end latency")
        self.perf = EndToEndLatencyService(
            self.test_context, 1, self.kafka,
            topic=TOPIC_REP_THREE, num_records=10000
        )
        self.perf.run()
        return latency(self.perf.results[0]['latency_50th_ms'],  self.perf.results[0]['latency_99th_ms'], self.perf.results[0]['latency_999th_ms'])

    @parametrize(security_protocol='PLAINTEXT', new_consumer=False)
    @parametrize(security_protocol='SSL', interbroker_security_protocol='PLAINTEXT')
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_producer_and_consumer(self, security_protocol, interbroker_security_protocol=None, new_consumer=True):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Concurrently produce and consume 10e6 messages with a single producer and a single consumer,
        using new consumer if new_consumer == True

        Return aggregate throughput statistics for both producer and consumer.

        (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala)
        """
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol)
        num_records = 10 * 1000 * 1000  # 10e6

        self.producer = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic=TOPIC_REP_THREE,
            num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1,
            settings={'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory}
        )
        self.consumer = ConsumerPerformanceService(
            self.test_context, 1, self.kafka, topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records)
        Service.run_parallel(self.producer, self.consumer)

        data = {
            "producer": compute_aggregate_throughput(self.producer),
            "consumer": compute_aggregate_throughput(self.consumer)
        }
        summary = [
            "Producer + consumer:",
            str(data)]
        self.logger.info("\n".join(summary))
        return data

    @parametrize(security_protocol='PLAINTEXT', new_consumer=False)
    @parametrize(security_protocol='SSL', interbroker_security_protocol='PLAINTEXT')
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    def test_consumer_throughput(self, security_protocol, interbroker_security_protocol=None, new_consumer=True, num_consumers=1):
        """
        Consume 10e6 100-byte messages with 1 or more consumers from a topic with 6 partitions
        (using new consumer iff new_consumer == True), and report throughput.
        """
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol)
        num_records = 10 * 1000 * 1000  # 10e6

        # seed kafka w/messages
        self.producer = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic=TOPIC_REP_THREE,
            num_records=num_records, record_size=DEFAULT_RECORD_SIZE, throughput=-1,
            settings={'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory}
        )
        self.producer.run()

        # consume
        self.consumer = ConsumerPerformanceService(
            self.test_context, num_consumers, self.kafka,
            topic=TOPIC_REP_THREE, new_consumer=new_consumer, messages=num_records)
        self.consumer.group = "test-consumer-group"
        self.consumer.run()
        return compute_aggregate_throughput(self.consumer)

示例#34

显示文件

class ConsumeBenchTest(Test):
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(ConsumeBenchTest, self).__init__(test_context)
        self.zk = ZookeeperService(test_context, num_nodes=3)
        self.kafka = KafkaService(test_context, num_nodes=3, zk=self.zk)
        self.producer_workload_service = ProduceBenchWorkloadService(
            test_context, self.kafka)
        self.consumer_workload_service = ConsumeBenchWorkloadService(
            test_context, self.kafka)
        self.consumer_workload_service_2 = ConsumeBenchWorkloadService(
            test_context, self.kafka)
        self.active_topics = {
            "consume_bench_topic[0-5]": {
                "numPartitions": 5,
                "replicationFactor": 3
            }
        }
        self.trogdor = TrogdorService(context=self.test_context,
                                      client_services=[
                                          self.kafka,
                                          self.producer_workload_service,
                                          self.consumer_workload_service,
                                          self.consumer_workload_service_2
                                      ])

    def setUp(self):
        self.trogdor.start()
        self.zk.start()
        self.kafka.start()

    def teardown(self):
        self.trogdor.stop()
        self.kafka.stop()
        self.zk.stop()

    def produce_messages(self, topics, max_messages=10000):
        produce_spec = ProduceBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            self.producer_workload_service.producer_node,
            self.producer_workload_service.bootstrap_servers,
            target_messages_per_sec=1000,
            max_messages=max_messages,
            producer_conf={},
            admin_client_conf={},
            common_client_conf={},
            inactive_topics={},
            active_topics=topics)
        produce_workload = self.trogdor.create_task("produce_workload",
                                                    produce_spec)
        produce_workload.wait_for_done(timeout_sec=180)
        self.logger.debug("Produce workload finished")

    @parametrize(topics=["consume_bench_topic[0-5]"])  # topic subscription
    @parametrize(topics=["consume_bench_topic[0-5]:[0-4]"]
                 )  # manual topic assignment
    def test_consume_bench(self, topics):
        """
        Runs a ConsumeBench workload to consume messages
        """
        self.produce_messages(self.active_topics)
        consume_spec = ConsumeBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            self.consumer_workload_service.consumer_node,
            self.consumer_workload_service.bootstrap_servers,
            target_messages_per_sec=1000,
            max_messages=10000,
            consumer_conf={},
            admin_client_conf={},
            common_client_conf={},
            active_topics=topics)
        consume_workload = self.trogdor.create_task("consume_workload",
                                                    consume_spec)
        consume_workload.wait_for_done(timeout_sec=360)
        self.logger.debug("Consume workload finished")
        tasks = self.trogdor.tasks()
        self.logger.info("TASKS: %s\n" %
                         json.dumps(tasks, sort_keys=True, indent=2))

    def test_single_partition(self):
        """
        Run a ConsumeBench against a single partition
        """
        active_topics = {
            "consume_bench_topic": {
                "numPartitions": 2,
                "replicationFactor": 3
            }
        }
        self.produce_messages(active_topics, 5000)
        consume_spec = ConsumeBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            self.consumer_workload_service.consumer_node,
            self.consumer_workload_service.bootstrap_servers,
            target_messages_per_sec=1000,
            max_messages=2500,
            consumer_conf={},
            admin_client_conf={},
            common_client_conf={},
            active_topics=["consume_bench_topic:1"])
        consume_workload = self.trogdor.create_task("consume_workload",
                                                    consume_spec)
        consume_workload.wait_for_done(timeout_sec=180)
        self.logger.debug("Consume workload finished")
        tasks = self.trogdor.tasks()
        self.logger.info("TASKS: %s\n" %
                         json.dumps(tasks, sort_keys=True, indent=2))

    def test_multiple_consumers_random_group_topics(self):
        """
        Runs multiple consumers group to read messages from topics.
        Since a consumerGroup isn't specified, each consumer should read from all topics independently
        """
        self.produce_messages(self.active_topics, max_messages=5000)
        consume_spec = ConsumeBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            self.consumer_workload_service.consumer_node,
            self.consumer_workload_service.bootstrap_servers,
            target_messages_per_sec=1000,
            max_messages=5000,  # all should read exactly 5k messages
            consumer_conf={},
            admin_client_conf={},
            common_client_conf={},
            threads_per_worker=5,
            active_topics=["consume_bench_topic[0-5]"])
        consume_workload = self.trogdor.create_task("consume_workload",
                                                    consume_spec)
        consume_workload.wait_for_done(timeout_sec=360)
        self.logger.debug("Consume workload finished")
        tasks = self.trogdor.tasks()
        self.logger.info("TASKS: %s\n" %
                         json.dumps(tasks, sort_keys=True, indent=2))

    def test_two_consumers_specified_group_topics(self):
        """
        Runs two consumers in the same consumer group to read messages from topics.
        Since a consumerGroup is specified, each consumer should dynamically get assigned a partition from group
        """
        self.produce_messages(self.active_topics)
        consume_spec = ConsumeBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            self.consumer_workload_service.consumer_node,
            self.consumer_workload_service.bootstrap_servers,
            target_messages_per_sec=1000,
            max_messages=2000,  # both should read at least 2k messages
            consumer_conf={},
            admin_client_conf={},
            common_client_conf={},
            threads_per_worker=2,
            consumer_group="testGroup",
            active_topics=["consume_bench_topic[0-5]"])
        consume_workload = self.trogdor.create_task("consume_workload",
                                                    consume_spec)
        consume_workload.wait_for_done(timeout_sec=360)
        self.logger.debug("Consume workload finished")
        tasks = self.trogdor.tasks()
        self.logger.info("TASKS: %s\n" %
                         json.dumps(tasks, sort_keys=True, indent=2))

    def test_multiple_consumers_random_group_partitions(self):
        """
        Runs multiple consumers in to read messages from specific partitions.
        Since a consumerGroup isn't specified, each consumer will get assigned a random group
        and consume from all partitions
        """
        self.produce_messages(self.active_topics, max_messages=20000)
        consume_spec = ConsumeBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            self.consumer_workload_service.consumer_node,
            self.consumer_workload_service.bootstrap_servers,
            target_messages_per_sec=1000,
            max_messages=2000,
            consumer_conf={},
            admin_client_conf={},
            common_client_conf={},
            threads_per_worker=4,
            active_topics=["consume_bench_topic1:[0-4]"])
        consume_workload = self.trogdor.create_task("consume_workload",
                                                    consume_spec)
        consume_workload.wait_for_done(timeout_sec=360)
        self.logger.debug("Consume workload finished")
        tasks = self.trogdor.tasks()
        self.logger.info("TASKS: %s\n" %
                         json.dumps(tasks, sort_keys=True, indent=2))

    def test_multiple_consumers_specified_group_partitions_should_raise(self):
        """
        Runs multiple consumers in the same group to read messages from specific partitions.
        It is an invalid configuration to provide a consumer group and specific partitions.
        """
        expected_error_msg = 'explicit partition assignment'
        self.produce_messages(self.active_topics, max_messages=20000)
        consume_spec = ConsumeBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            self.consumer_workload_service.consumer_node,
            self.consumer_workload_service.bootstrap_servers,
            target_messages_per_sec=1000,
            max_messages=2000,
            consumer_conf={},
            admin_client_conf={},
            common_client_conf={},
            threads_per_worker=4,
            consumer_group="fail_group",
            active_topics=["consume_bench_topic1:[0-4]"])
        consume_workload = self.trogdor.create_task("consume_workload",
                                                    consume_spec)
        try:
            consume_workload.wait_for_done(timeout_sec=360)
            raise Exception(
                "Should have raised an exception due to an invalid configuration"
            )
        except RuntimeError as e:
            if expected_error_msg not in str(e):
                raise RuntimeError("Unexpected Exception - " + str(e))
            self.logger.info(e)

示例#35

显示文件

文件： quota_test.py 项目： CodeTheWorld/kafka_0.9.0.0

class QuotaTest(Test):
    """
    These tests verify that quota provides expected functionality -- they run
    producer, broker, and consumer with different clientId and quota configuration and
    check that the observed throughput is close to the value we expect.
    """

    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(QuotaTest, self).__init__(test_context=test_context)

        self.topic = 'test_topic'
        self.logger.info('use topic ' + self.topic)

        # quota related parameters
        self.quota_config = {'quota_producer_default': 2500000,
                             'quota_consumer_default': 2000000,
                             'quota_producer_bytes_per_second_overrides': 'overridden_id=3750000',
                             'quota_consumer_bytes_per_second_overrides': 'overridden_id=3000000'}
        self.maximum_client_deviation_percentage = 100.0
        self.maximum_broker_deviation_percentage = 5.0
        self.num_records = 100000
        self.record_size = 3000

        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk,
                                  security_protocol='PLAINTEXT',
                                  interbroker_security_protocol='PLAINTEXT',
                                  topics={self.topic: {'partitions': 6, 'replication-factor': 1, 'configs': {'min.insync.replicas': 1}}},
                                  quota_config=self.quota_config,
                                  jmx_object_names=['kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec',
                                                    'kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec'],
                                  jmx_attributes=['OneMinuteRate'])
        self.num_producers = 1
        self.num_consumers = 2

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    def min_cluster_size(self):
        """Override this since we're adding services outside of the constructor"""
        return super(QuotaTest, self).min_cluster_size() + self.num_producers + self.num_consumers

    @parametrize(producer_id='default_id', producer_num=1, consumer_id='default_id', consumer_num=1)
    @parametrize(producer_id='overridden_id', producer_num=1, consumer_id='overridden_id', consumer_num=1)
    @parametrize(producer_id='overridden_id', producer_num=1, consumer_id='overridden_id', consumer_num=2)
    def test_quota(self, producer_id='default_id', producer_num=1, consumer_id='default_id', consumer_num=1):
        # Produce all messages
        producer = ProducerPerformanceService(
            self.test_context, producer_num, self.kafka,
            topic=self.topic, num_records=self.num_records, record_size=self.record_size, throughput=-1, client_id=producer_id,
            jmx_object_names=['kafka.producer:type=producer-metrics,client-id=%s' % producer_id], jmx_attributes=['outgoing-byte-rate'])

        producer.run()

        # Consume all messages
        consumer = ConsoleConsumer(self.test_context, consumer_num, self.kafka, self.topic,
            new_consumer=False,
            consumer_timeout_ms=60000, client_id=consumer_id,
            jmx_object_names=['kafka.consumer:type=ConsumerTopicMetrics,name=BytesPerSec,clientId=%s' % consumer_id],
            jmx_attributes=['OneMinuteRate'])
        consumer.run()

        for idx, messages in consumer.messages_consumed.iteritems():
            assert len(messages) > 0, "consumer %d didn't consume any message before timeout" % idx

        success, msg = self.validate(self.kafka, producer, consumer)
        assert success, msg

    def validate(self, broker, producer, consumer):
        """
        For each client_id we validate that:
        1) number of consumed messages equals number of produced messages
        2) maximum_producer_throughput <= producer_quota * (1 + maximum_client_deviation_percentage/100)
        3) maximum_broker_byte_in_rate <= producer_quota * (1 + maximum_broker_deviation_percentage/100)
        4) maximum_consumer_throughput <= consumer_quota * (1 + maximum_client_deviation_percentage/100)
        5) maximum_broker_byte_out_rate <= consumer_quota * (1 + maximum_broker_deviation_percentage/100)
        """
        success = True
        msg = ''

        self.kafka.read_jmx_output_all_nodes()

        # validate that number of consumed messages equals number of produced messages
        produced_num = sum([value['records'] for value in producer.results])
        consumed_num = sum([len(value) for value in consumer.messages_consumed.values()])
        self.logger.info('producer produced %d messages' % produced_num)
        self.logger.info('consumer consumed %d messages' % consumed_num)
        if produced_num != consumed_num:
            success = False
            msg += "number of produced messages %d doesn't equal number of consumed messages %d" % (produced_num, consumed_num)

        # validate that maximum_producer_throughput <= producer_quota * (1 + maximum_client_deviation_percentage/100)
        producer_attribute_name = 'kafka.producer:type=producer-metrics,client-id=%s:outgoing-byte-rate' % producer.client_id
        producer_maximum_bps = producer.maximum_jmx_value[producer_attribute_name]
        producer_quota_bps = self.get_producer_quota(producer.client_id)
        self.logger.info('producer has maximum throughput %.2f bps with producer quota %.2f bps' % (producer_maximum_bps, producer_quota_bps))
        if producer_maximum_bps > producer_quota_bps*(self.maximum_client_deviation_percentage/100+1):
            success = False
            msg += 'maximum producer throughput %.2f bps exceeded producer quota %.2f bps by more than %.1f%%' % \
                   (producer_maximum_bps, producer_quota_bps, self.maximum_client_deviation_percentage)

        # validate that maximum_broker_byte_in_rate <= producer_quota * (1 + maximum_broker_deviation_percentage/100)
        broker_byte_in_attribute_name = 'kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec:OneMinuteRate'
        broker_maximum_byte_in_bps = broker.maximum_jmx_value[broker_byte_in_attribute_name]
        self.logger.info('broker has maximum byte-in rate %.2f bps with producer quota %.2f bps' %
                         (broker_maximum_byte_in_bps, producer_quota_bps))
        if broker_maximum_byte_in_bps > producer_quota_bps*(self.maximum_broker_deviation_percentage/100+1):
            success = False
            msg += 'maximum broker byte-in rate %.2f bps exceeded producer quota %.2f bps by more than %.1f%%' % \
                   (broker_maximum_byte_in_bps, producer_quota_bps, self.maximum_broker_deviation_percentage)

        # validate that maximum_consumer_throughput <= consumer_quota * (1 + maximum_client_deviation_percentage/100)
        consumer_attribute_name = 'kafka.consumer:type=ConsumerTopicMetrics,name=BytesPerSec,clientId=%s:OneMinuteRate' % consumer.client_id
        consumer_maximum_bps = consumer.maximum_jmx_value[consumer_attribute_name]
        consumer_quota_bps = self.get_consumer_quota(consumer.client_id)
        self.logger.info('consumer has maximum throughput %.2f bps with consumer quota %.2f bps' % (consumer_maximum_bps, consumer_quota_bps))
        if consumer_maximum_bps > consumer_quota_bps*(self.maximum_client_deviation_percentage/100+1):
            success = False
            msg += 'maximum consumer throughput %.2f bps exceeded consumer quota %.2f bps by more than %.1f%%' % \
                   (consumer_maximum_bps, consumer_quota_bps, self.maximum_client_deviation_percentage)

        # validate that maximum_broker_byte_out_rate <= consumer_quota * (1 + maximum_broker_deviation_percentage/100)
        broker_byte_out_attribute_name = 'kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec:OneMinuteRate'
        broker_maximum_byte_out_bps = broker.maximum_jmx_value[broker_byte_out_attribute_name]
        self.logger.info('broker has maximum byte-out rate %.2f bps with consumer quota %.2f bps' %
                         (broker_maximum_byte_out_bps, consumer_quota_bps))
        if broker_maximum_byte_out_bps > consumer_quota_bps*(self.maximum_broker_deviation_percentage/100+1):
            success = False
            msg += 'maximum broker byte-out rate %.2f bps exceeded consumer quota %.2f bps by more than %.1f%%' % \
                   (broker_maximum_byte_out_bps, consumer_quota_bps, self.maximum_broker_deviation_percentage)

        return success, msg

    def get_producer_quota(self, client_id):
        overridden_quotas = {value.split('=')[0]:value.split('=')[1] for value in self.quota_config['quota_producer_bytes_per_second_overrides'].split(',')}
        if client_id in overridden_quotas:
            return float(overridden_quotas[client_id])
        return self.quota_config['quota_producer_default']

    def get_consumer_quota(self, client_id):
        overridden_quotas = {value.split('=')[0]:value.split('=')[1] for value in self.quota_config['quota_consumer_bytes_per_second_overrides'].split(',')}
        if client_id in overridden_quotas:
            return float(overridden_quotas[client_id])
        return self.quota_config['quota_consumer_default']

示例#36

显示文件

文件： test_performance_services.py 项目： GDUFZXP/kafka-1.0.0-src

class PerformanceServiceTest(Test):
    def __init__(self, test_context):
        super(PerformanceServiceTest, self).__init__(test_context)
        self.record_size = 100
        self.num_records = 10000
        self.topic = "topic"

        self.zk = ZookeeperService(test_context, 1)

    def setUp(self):
        self.zk.start()

    @cluster(num_nodes=5)
    # We are keeping 0.8.2 here so that we don't inadvertently break support for it. Since this is just a sanity check,
    # the overhead should be manageable.
    @parametrize(version=str(LATEST_0_8_2), new_consumer=False)
    @parametrize(version=str(LATEST_0_9), new_consumer=False)
    @parametrize(version=str(LATEST_0_9))
    @parametrize(version=str(DEV_BRANCH), new_consumer=False)
    @parametrize(version=str(DEV_BRANCH))
    def test_version(self, version=str(LATEST_0_9), new_consumer=True):
        """
        Sanity check out producer performance service - verify that we can run the service with a small
        number of messages. The actual stats here are pretty meaningless since the number of messages is quite small.
        """
        version = KafkaVersion(version)
        self.kafka = KafkaService(
            self.test_context,
            1,
            self.zk,
            topics={self.topic: {
                'partitions': 1,
                'replication-factor': 1
            }},
            version=version)
        self.kafka.start()

        # check basic run of producer performance
        self.producer_perf = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic=self.topic,
            num_records=self.num_records,
            record_size=self.record_size,
            throughput=
            1000000000,  # Set impossibly for no throttling for equivalent behavior between 0.8.X and 0.9.X
            version=version,
            settings={
                'acks': 1,
                'batch.size': 8 * 1024,
                'buffer.memory': 64 * 1024 * 1024
            })
        self.producer_perf.run()
        producer_perf_data = compute_aggregate_throughput(self.producer_perf)

        # check basic run of end to end latency
        self.end_to_end = EndToEndLatencyService(self.test_context,
                                                 1,
                                                 self.kafka,
                                                 topic=self.topic,
                                                 num_records=self.num_records,
                                                 version=version)
        self.end_to_end.run()
        end_to_end_data = latency(
            self.end_to_end.results[0]['latency_50th_ms'],
            self.end_to_end.results[0]['latency_99th_ms'],
            self.end_to_end.results[0]['latency_999th_ms'])

        # check basic run of consumer performance service
        self.consumer_perf = ConsumerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            new_consumer=new_consumer,
            topic=self.topic,
            version=version,
            messages=self.num_records)
        self.consumer_perf.group = "test-consumer-group"
        self.consumer_perf.run()
        consumer_perf_data = compute_aggregate_throughput(self.consumer_perf)

        return {
            "producer_performance": producer_perf_data,
            "end_to_end_latency": end_to_end_data,
            "consumer_performance": consumer_perf_data
        }

示例#37

显示文件

class ThrottlingTest(ProduceConsumeValidateTest):
    """Tests throttled partition reassignment. This is essentially similar
    to the reassign_partitions_test, except that we throttle the reassignment
    and verify that it takes a sensible amount of time given the throttle
    and the amount of data being moved.

    Since the correctness is time dependent, this test also simplifies the
    cluster topology. In particular, we fix the number of brokers, the
    replication-factor, the number of partitions, the partition size, and
    the number of partitions being moved so that we can accurately predict
    the time throttled reassignment should take.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(ThrottlingTest, self).__init__(test_context=test_context)

        self.topic = "test_topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        # Because we are starting the producer/consumer/validate cycle _after_
        # seeding the cluster with big data (to test throttling), we need to
        # Start the consumer from the end of the stream. further, we need to
        # ensure that the consumer is fully started before the producer starts
        # so that we don't miss any messages. This timeout ensures the sufficient
        # condition.
        self.consumer_init_timeout_sec = 20
        self.num_brokers = 6
        self.num_partitions = 3
        self.kafka = KafkaService(test_context,
                                  num_nodes=self.num_brokers,
                                  zk=self.zk,
                                  topics={
                                      self.topic: {
                                          "partitions": self.num_partitions,
                                          "replication-factor": 2,
                                          "configs": {
                                              "segment.bytes": 64 * 1024 * 1024
                                          }
                                      }
                                  })
        self.producer_throughput = 1000
        self.timeout_sec = 400
        self.num_records = 2000
        self.record_size = 4096 * 100  # 400 KB
        # 1 MB per partition on average.
        self.partition_size = (self.num_records *
                               self.record_size) / self.num_partitions
        self.num_producers = 2
        self.num_consumers = 1
        self.throttle = 4 * 1024 * 1024  # 4 MB/s

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        # Override this since we're adding services outside of the constructor
        return super(ThrottlingTest, self).min_cluster_size() +\
            self.num_producers + self.num_consumers

    def clean_bounce_some_brokers(self):
        """Bounce every other broker"""
        for node in self.kafka.nodes[::2]:
            self.kafka.restart_node(node, clean_shutdown=True)

    def reassign_partitions(self, bounce_brokers, throttle):
        """This method reassigns partitions using a throttle. It makes an
        assertion about the minimum amount of time the reassignment should take
        given the value of the throttle, the number of partitions being moved,
        and the size of each partition.
        """
        partition_info = self.kafka.parse_describe_topic(
            self.kafka.describe_topic(self.topic))
        self.logger.debug("Partitions before reassignment:" +
                          str(partition_info))
        max_num_moves = 0
        for i in range(0, self.num_partitions):
            old_replicas = set(partition_info["partitions"][i]["replicas"])
            new_part = (i + 1) % self.num_partitions
            new_replicas = set(
                partition_info["partitions"][new_part]["replicas"])
            max_num_moves = max(len(new_replicas - old_replicas),
                                max_num_moves)
            partition_info["partitions"][i]["partition"] = new_part
        self.logger.debug("Jumbled partitions: " + str(partition_info))

        self.kafka.execute_reassign_partitions(partition_info,
                                               throttle=throttle)
        start = time.time()
        if bounce_brokers:
            # bounce a few brokers at the same time
            self.clean_bounce_some_brokers()

        # Wait until finished or timeout
        size_per_broker = max_num_moves * self.partition_size
        self.logger.debug("Max amount of data transfer per broker: %fb",
                          size_per_broker)
        estimated_throttled_time = math.ceil(
            float(size_per_broker) / self.throttle)
        estimated_time_with_buffer = estimated_throttled_time * 2
        self.logger.debug("Waiting %ds for the reassignment to complete",
                          estimated_time_with_buffer)
        wait_until(
            lambda: self.kafka.verify_reassign_partitions(partition_info),
            timeout_sec=estimated_time_with_buffer,
            backoff_sec=.5)
        stop = time.time()
        time_taken = stop - start
        self.logger.debug("Transfer took %d second. Estimated time : %ds",
                          time_taken, estimated_throttled_time)
        assert time_taken >= estimated_throttled_time * 0.9, \
            ("Expected rebalance to take at least %ds, but it took %ds" % (
                estimated_throttled_time,
                time_taken))

    @cluster(num_nodes=10)
    @parametrize(bounce_brokers=True)
    @parametrize(bounce_brokers=False)
    def test_throttled_reassignment(self, bounce_brokers):
        security_protocol = 'PLAINTEXT'
        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol

        producer_id = 'bulk_producer'
        bulk_producer = ProducerPerformanceService(
            context=self.test_context,
            num_nodes=1,
            kafka=self.kafka,
            topic=self.topic,
            num_records=self.num_records,
            record_size=self.record_size,
            throughput=-1,
            client_id=producer_id)

        self.producer = VerifiableProducer(context=self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=self.topic,
                                           message_validator=is_int,
                                           throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int,
                                        from_beginning=False)

        self.kafka.start()
        bulk_producer.run()
        self.run_produce_consume_validate(
            core_test_action=lambda: self.reassign_partitions(
                bounce_brokers, self.throttle))

        self.logger.debug(
            "Bulk producer outgoing-byte-rates: %s",
            (metric.value
             for k, metrics in bulk_producer.metrics(group='producer-metrics',
                                                     name='outgoing-byte-rate',
                                                     client_id=producer_id)
             for metric in metrics))

示例#38

显示文件

文件： streams_application_upgrade_test.py 项目： urbandan/kafka

class StreamsUpgradeTest(Test):
    """
    Test upgrading Kafka Streams (all version combination)
    If metadata was changes, upgrade is more difficult
    Metadata version was bumped in 0.10.1.0 and
    subsequently bumped in 2.0.0
    """
    def __init__(self, test_context):
        super(StreamsUpgradeTest, self).__init__(test_context)
        self.topics = {
            'echo': {
                'partitions': 5
            },
            'data': {
                'partitions': 5
            },
        }

    processed_msg = "processed [0-9]* records"
    base_version_number = str(DEV_VERSION).split("-")[0]

    def perform_broker_upgrade(self, to_version):
        self.logger.info("First pass bounce - rolling broker upgrade")
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            node.version = KafkaVersion(to_version)
            self.kafka.start_node(node)

    @cluster(num_nodes=6)
    @matrix(from_version=smoke_test_versions,
            to_version=dev_version,
            bounce_type=["full"])
    def test_app_upgrade(self, from_version, to_version, bounce_type):
        """
        Starts 3 KafkaStreams instances with <old_version>, and upgrades one-by-one to <new_version>
        """

        if from_version == to_version:
            return

        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        self.kafka = KafkaService(self.test_context,
                                  num_nodes=1,
                                  zk=self.zk,
                                  topics={
                                      'echo': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'data': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'min': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'min-suppressed': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'min-raw': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'max': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'sum': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'sws-raw': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'sws-suppressed': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'dif': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'cnt': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'avg': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'wcnt': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      },
                                      'tagg': {
                                          'partitions': 5,
                                          'replication-factor': 1
                                      }
                                  })
        self.kafka.start()

        self.driver = StreamsSmokeTestDriverService(self.test_context,
                                                    self.kafka)
        self.driver.disable_auto_terminate()
        self.processor1 = StreamsSmokeTestJobRunnerService(
            self.test_context,
            self.kafka,
            processing_guarantee="at_least_once",
            replication_factor=1)
        self.processor2 = StreamsSmokeTestJobRunnerService(
            self.test_context,
            self.kafka,
            processing_guarantee="at_least_once",
            replication_factor=1)
        self.processor3 = StreamsSmokeTestJobRunnerService(
            self.test_context,
            self.kafka,
            processing_guarantee="at_least_once",
            replication_factor=1)

        self.purge_state_dir(self.processor1)
        self.purge_state_dir(self.processor2)
        self.purge_state_dir(self.processor3)

        self.driver.start()
        self.start_all_nodes_with(from_version)

        self.processors = [self.processor1, self.processor2, self.processor3]

        if bounce_type == "rolling":
            counter = 1
            random.seed()
            # upgrade one-by-one via rolling bounce
            random.shuffle(self.processors)
            for p in self.processors:
                p.CLEAN_NODE_ENABLED = False
                self.do_stop_start_bounce(p, None, to_version, counter)
                counter = counter + 1
        elif bounce_type == "full":
            self.restart_all_nodes_with(to_version)
        else:
            raise Exception("Unrecognized bounce_type: " + str(bounce_type))

        # shutdown
        self.driver.stop()

        # Ideally, we would actually verify the expected results.
        # See KAFKA-10202

        random.shuffle(self.processors)
        for p in self.processors:
            node = p.node
            with node.account.monitor_log(p.STDOUT_FILE) as monitor:
                p.stop()
                monitor.wait_until(
                    "SMOKE-TEST-CLIENT-CLOSED",
                    timeout_sec=60,
                    err_msg="Never saw output 'SMOKE-TEST-CLIENT-CLOSED' on " +
                    str(node.account))

    def start_all_nodes_with(self, version):

        self.set_version(self.processor1, version)
        self.set_version(self.processor2, version)
        self.set_version(self.processor3, version)

        self.processor1.start()
        self.processor2.start()
        self.processor3.start()

        # double-check the version
        kafka_version_str = self.get_version_string(version)
        self.wait_for_verification(self.processor1, kafka_version_str,
                                   self.processor1.LOG_FILE)
        self.wait_for_verification(self.processor2, kafka_version_str,
                                   self.processor2.LOG_FILE)
        self.wait_for_verification(self.processor3, kafka_version_str,
                                   self.processor3.LOG_FILE)

        # wait for the members to join
        self.wait_for_verification(self.processor1,
                                   "SMOKE-TEST-CLIENT-STARTED",
                                   self.processor1.STDOUT_FILE)
        self.wait_for_verification(self.processor2,
                                   "SMOKE-TEST-CLIENT-STARTED",
                                   self.processor2.STDOUT_FILE)
        self.wait_for_verification(self.processor3,
                                   "SMOKE-TEST-CLIENT-STARTED",
                                   self.processor3.STDOUT_FILE)

        # make sure they've processed something
        self.wait_for_verification(self.processor1, self.processed_msg,
                                   self.processor1.STDOUT_FILE)
        self.wait_for_verification(self.processor2, self.processed_msg,
                                   self.processor2.STDOUT_FILE)
        self.wait_for_verification(self.processor3, self.processed_msg,
                                   self.processor3.STDOUT_FILE)

    def restart_all_nodes_with(self, version):
        self.processor1.stop_node(self.processor1.node)
        self.processor2.stop_node(self.processor2.node)
        self.processor3.stop_node(self.processor3.node)

        # make sure the members have stopped
        self.wait_for_verification(self.processor1, "SMOKE-TEST-CLIENT-CLOSED",
                                   self.processor1.STDOUT_FILE)
        self.wait_for_verification(self.processor2, "SMOKE-TEST-CLIENT-CLOSED",
                                   self.processor2.STDOUT_FILE)
        self.wait_for_verification(self.processor3, "SMOKE-TEST-CLIENT-CLOSED",
                                   self.processor3.STDOUT_FILE)

        self.roll_logs(self.processor1, ".1-1")
        self.roll_logs(self.processor2, ".1-1")
        self.roll_logs(self.processor3, ".1-1")

        self.set_version(self.processor1, version)
        self.set_version(self.processor2, version)
        self.set_version(self.processor3, version)

        self.processor1.start_node(self.processor1.node)
        self.processor2.start_node(self.processor2.node)
        self.processor3.start_node(self.processor3.node)

        # double-check the version
        kafka_version_str = self.get_version_string(version)
        self.wait_for_verification(self.processor1, kafka_version_str,
                                   self.processor1.LOG_FILE)
        self.wait_for_verification(self.processor2, kafka_version_str,
                                   self.processor2.LOG_FILE)
        self.wait_for_verification(self.processor3, kafka_version_str,
                                   self.processor3.LOG_FILE)

        # wait for the members to join
        self.wait_for_verification(self.processor1,
                                   "SMOKE-TEST-CLIENT-STARTED",
                                   self.processor1.STDOUT_FILE)
        self.wait_for_verification(self.processor2,
                                   "SMOKE-TEST-CLIENT-STARTED",
                                   self.processor2.STDOUT_FILE)
        self.wait_for_verification(self.processor3,
                                   "SMOKE-TEST-CLIENT-STARTED",
                                   self.processor3.STDOUT_FILE)

        # make sure they've processed something
        self.wait_for_verification(self.processor1, self.processed_msg,
                                   self.processor1.STDOUT_FILE)
        self.wait_for_verification(self.processor2, self.processed_msg,
                                   self.processor2.STDOUT_FILE)
        self.wait_for_verification(self.processor3, self.processed_msg,
                                   self.processor3.STDOUT_FILE)

    def get_version_string(self, version):
        if version.startswith("0") or version.startswith("1") \
          or version.startswith("2.0") or version.startswith("2.1"):
            return "Kafka version : " + version
        elif "SNAPSHOT" in version:
            return "Kafka version.*" + self.base_version_number + ".*SNAPSHOT"
        else:
            return "Kafka version: " + version

    def wait_for_verification(self, processor, message, file, num_lines=1):
        wait_until(lambda: self.verify_from_file(processor, message, file
                                                 ) >= num_lines,
                   timeout_sec=60,
                   err_msg="Did expect to read '%s' from %s" %
                   (message, processor.node.account))

    def verify_from_file(self, processor, message, file):
        result = processor.node.account.ssh_output("grep -E '%s' %s | wc -l" %
                                                   (message, file),
                                                   allow_fail=False)
        try:
            return int(result)
        except ValueError:
            self.logger.warn("Command failed with ValueError: " + result)
            return 0

    def set_version(self, processor, version):
        if version == str(DEV_VERSION):
            processor.set_version("")  # set to TRUNK
        else:
            processor.set_version(version)

    def purge_state_dir(self, processor):
        processor.node.account.ssh("rm -rf " + processor.PERSISTENT_ROOT,
                                   allow_fail=False)

    def do_stop_start_bounce(self, processor, upgrade_from, new_version,
                             counter):
        kafka_version_str = self.get_version_string(new_version)

        first_other_processor = None
        second_other_processor = None
        for p in self.processors:
            if p != processor:
                if first_other_processor is None:
                    first_other_processor = p
                else:
                    second_other_processor = p

        node = processor.node
        first_other_node = first_other_processor.node
        second_other_node = second_other_processor.node

        # stop processor and wait for rebalance of others
        with first_other_node.account.monitor_log(
                first_other_processor.STDOUT_FILE) as first_other_monitor:
            with second_other_node.account.monitor_log(
                    second_other_processor.STDOUT_FILE
            ) as second_other_monitor:
                processor.stop_node(processor.node)
                first_other_monitor.wait_until(
                    self.processed_msg,
                    timeout_sec=60,
                    err_msg="Never saw output '%s' on " % self.processed_msg +
                    str(first_other_node.account))
                second_other_monitor.wait_until(
                    self.processed_msg,
                    timeout_sec=60,
                    err_msg="Never saw output '%s' on " % self.processed_msg +
                    str(second_other_node.account))
        node.account.ssh_capture("grep SMOKE-TEST-CLIENT-CLOSED %s" %
                                 processor.STDOUT_FILE,
                                 allow_fail=False)

        if upgrade_from is None:  # upgrade disabled -- second round of rolling bounces
            roll_counter = ".1-"  # second round of rolling bounces
        else:
            roll_counter = ".0-"  # first  round of rolling bounces

        self.roll_logs(processor, roll_counter + str(counter))

        self.set_version(processor, new_version)
        processor.set_upgrade_from(upgrade_from)

        grep_metadata_error = "grep \"org.apache.kafka.streams.errors.TaskAssignmentException: unable to decode subscription data: version=2\" "
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            with node.account.monitor_log(processor.LOG_FILE) as log_monitor:
                with first_other_node.account.monitor_log(
                        first_other_processor.STDOUT_FILE
                ) as first_other_monitor:
                    with second_other_node.account.monitor_log(
                            second_other_processor.STDOUT_FILE
                    ) as second_other_monitor:
                        processor.start_node(processor.node)

                        log_monitor.wait_until(
                            kafka_version_str,
                            timeout_sec=60,
                            err_msg="Could not detect Kafka Streams version " +
                            new_version + " on " + str(node.account))
                        first_other_monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg + str(first_other_node.account))
                        found = list(
                            first_other_node.account.ssh_capture(
                                grep_metadata_error +
                                first_other_processor.STDERR_FILE,
                                allow_fail=True))
                        if len(found) > 0:
                            raise Exception(
                                "Kafka Streams failed with 'unable to decode subscription data: version=2'"
                            )

                        second_other_monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg +
                            str(second_other_node.account))
                        found = list(
                            second_other_node.account.ssh_capture(
                                grep_metadata_error +
                                second_other_processor.STDERR_FILE,
                                allow_fail=True))
                        if len(found) > 0:
                            raise Exception(
                                "Kafka Streams failed with 'unable to decode subscription data: version=2'"
                            )

                        monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg + str(node.account))

    def roll_logs(self, processor, roll_suffix):
        processor.node.account.ssh("mv " + processor.STDOUT_FILE + " " +
                                   processor.STDOUT_FILE + roll_suffix,
                                   allow_fail=False)
        processor.node.account.ssh("mv " + processor.STDERR_FILE + " " +
                                   processor.STDERR_FILE + roll_suffix,
                                   allow_fail=False)
        processor.node.account.ssh("mv " + processor.LOG_FILE + " " +
                                   processor.LOG_FILE + roll_suffix,
                                   allow_fail=False)
        processor.node.account.ssh("mv " + processor.CONFIG_FILE + " " +
                                   processor.CONFIG_FILE + roll_suffix,
                                   allow_fail=False)

示例#39

显示文件

文件： get_offset_shell_test.py 项目： GDUFZXP/kafka-1.0.0-src

class GetOffsetShellTest(Test):
    """
    Tests GetOffsetShell tool
    """
    def __init__(self, test_context):
        super(GetOffsetShellTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {
            TOPIC: {
                'partitions': NUM_PARTITIONS,
                'replication-factor': REPLICATION_FACTOR
            }
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics)
        self.kafka.start()

    def start_producer(self):
        # This will produce to kafka cluster
        self.producer = VerifiableProducer(self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=TOPIC,
                                           throughput=1000,
                                           max_messages=MAX_MESSAGES)
        self.producer.start()
        current_acked = self.producer.num_acked
        wait_until(
            lambda: self.producer.num_acked >= current_acked + MAX_MESSAGES,
            timeout_sec=10,
            err_msg="Timeout awaiting messages to be produced and acked")

    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol != SecurityConfig.PLAINTEXT
        self.consumer = ConsoleConsumer(self.test_context,
                                        num_nodes=self.num_brokers,
                                        kafka=self.kafka,
                                        topic=TOPIC,
                                        consumer_timeout_ms=1000,
                                        new_consumer=enable_new_consumer)
        self.consumer.start()

    @cluster(num_nodes=4)
    def test_get_offset_shell(self, security_protocol='PLAINTEXT'):
        """
        Tests if GetOffsetShell is getting offsets correctly
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_producer()

        # Assert that offset fetched without any consumers consuming is 0
        assert self.kafka.get_offset_shell(
            TOPIC, None, 1000, 1,
            -1), "%s:%s:%s" % (TOPIC, NUM_PARTITIONS - 1, 0)

        self.start_consumer(security_protocol)

        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")

        # Assert that offset is correctly indicated by GetOffsetShell tool
        wait_until(lambda: "%s:%s:%s" %
                   (TOPIC, NUM_PARTITIONS - 1, MAX_MESSAGES
                    ) in self.kafka.get_offset_shell(TOPIC, None, 1000, 1, -1),
                   timeout_sec=10,
                   err_msg="Timed out waiting to reach expected offset.")

示例#40

显示文件

文件： transactions_test.py 项目： ifeengwd2012/fwd_kafka

class TransactionsTest(Test):
    """Tests transactions by transactionally copying data from a source topic to
    a destination topic and killing the copy process as well as the broker
    randomly through the process. In the end we verify that the final output
    topic contains exactly one committed copy of each message in the input
    topic.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(TransactionsTest, self).__init__(test_context=test_context)

        self.input_topic = "input-topic"
        self.output_topic = "output-topic"

        self.num_brokers = 3

        # Test parameters
        self.num_input_partitions = 2
        self.num_output_partitions = 3
        self.num_seed_messages = 100000
        self.transaction_size = 750

        # The transaction timeout should be lower than the progress timeout, but at
        # least as high as the request timeout (which is 30s by default). When the
        # client is hard-bounced, progress may depend on the previous transaction
        # being aborted. When the broker is hard-bounced, we may have to wait as
        # long as the request timeout to get a `Produce` response and we do not
        # want the coordinator timing out the transaction.
        self.transaction_timeout = 40000
        self.progress_timeout_sec = 60
        self.consumer_group = "transactions-test-consumer-group"

        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context,
                                  num_nodes=self.num_brokers,
                                  zk=self.zk)

    def setUp(self):
        self.zk.start()

    def seed_messages(self, topic, num_seed_messages):
        seed_timeout_sec = 10000
        seed_producer = VerifiableProducer(context=self.test_context,
                                           num_nodes=1,
                                           kafka=self.kafka,
                                           topic=topic,
                                           message_validator=is_int,
                                           max_messages=num_seed_messages,
                                           enable_idempotence=True)
        seed_producer.start()
        wait_until(lambda: seed_producer.num_acked >= num_seed_messages,
                   timeout_sec=seed_timeout_sec,
                   err_msg="Producer failed to produce messages %d in %ds." %\
                   (self.num_seed_messages, seed_timeout_sec))
        return seed_producer.acked

    def get_messages_from_topic(self, topic, num_messages):
        consumer = self.start_consumer(topic, group_id="verifying_consumer")
        return self.drain_consumer(consumer, num_messages)

    def bounce_brokers(self, clean_shutdown):
        for node in self.kafka.nodes:
            if clean_shutdown:
                self.kafka.restart_node(node, clean_shutdown=True)
            else:
                self.kafka.stop_node(node, clean_shutdown=False)
                wait_until(lambda: len(self.kafka.pids(node)) == 0 and not self
                           .kafka.is_registered(node),
                           timeout_sec=self.kafka.zk_session_timeout + 5,
                           err_msg="Failed to see timely deregistration of \
                           hard-killed broker %s" % str(node.account))
                self.kafka.start_node(node)

    def create_and_start_message_copier(self, input_topic, input_partition,
                                        output_topic, transactional_id,
                                        use_group_metadata):
        message_copier = TransactionalMessageCopier(
            context=self.test_context,
            num_nodes=1,
            kafka=self.kafka,
            transactional_id=transactional_id,
            consumer_group=self.consumer_group,
            input_topic=input_topic,
            input_partition=input_partition,
            output_topic=output_topic,
            max_messages=-1,
            transaction_size=self.transaction_size,
            transaction_timeout=self.transaction_timeout,
            use_group_metadata=use_group_metadata)
        message_copier.start()
        wait_until(lambda: message_copier.alive(message_copier.nodes[0]),
                   timeout_sec=10,
                   err_msg="Message copier failed to start after 10 s")
        return message_copier

    def bounce_copiers(self, copiers, clean_shutdown):
        for _ in range(3):
            for copier in copiers:
                wait_until(lambda: copier.progress_percent() >= 20.0,
                           timeout_sec=self.progress_timeout_sec,
                           err_msg="%s : Message copier didn't make enough progress in %ds. Current progress: %s" \
                           % (copier.transactional_id, self.progress_timeout_sec, str(copier.progress_percent())))
                self.logger.info(
                    "%s - progress: %s" %
                    (copier.transactional_id, str(copier.progress_percent())))
                copier.restart(clean_shutdown)

    def create_and_start_copiers(self, input_topic, output_topic, num_copiers,
                                 use_group_metadata):
        copiers = []
        for i in range(0, num_copiers):
            copiers.append(
                self.create_and_start_message_copier(
                    input_topic=input_topic,
                    output_topic=output_topic,
                    input_partition=i,
                    transactional_id="copier-" + str(i),
                    use_group_metadata=use_group_metadata))
        return copiers

    def start_consumer(self, topic_to_read, group_id):
        consumer = ConsoleConsumer(context=self.test_context,
                                   num_nodes=1,
                                   kafka=self.kafka,
                                   topic=topic_to_read,
                                   group_id=group_id,
                                   message_validator=is_int,
                                   from_beginning=True,
                                   isolation_level="read_committed")
        consumer.start()
        # ensure that the consumer is up.
        wait_until(lambda: (len(consumer.messages_consumed[1]) > 0) == True,
                   timeout_sec=60,
                   err_msg="Consumer failed to consume any messages for %ds" %\
                   60)
        return consumer

    def drain_consumer(self, consumer, num_messages):
        # wait until we read at least the expected number of messages.
        # This is a safe check because both failure modes will be caught:
        #  1. If we have 'num_seed_messages' but there are duplicates, then
        #     this is checked for later.
        #
        #  2. If we never reach 'num_seed_messages', then this will cause the
        #     test to fail.
        wait_until(lambda: len(consumer.messages_consumed[1]) >= num_messages,
                   timeout_sec=90,
                   err_msg="Consumer consumed only %d out of %d messages in %ds" %\
                   (len(consumer.messages_consumed[1]), num_messages, 90))
        consumer.stop()
        return consumer.messages_consumed[1]

    def copy_messages_transactionally(self, failure_mode, bounce_target,
                                      input_topic, output_topic, num_copiers,
                                      num_messages_to_copy,
                                      use_group_metadata):
        """Copies messages transactionally from the seeded input topic to the
        output topic, either bouncing brokers or clients in a hard and soft
        way as it goes.

        This method also consumes messages in read_committed mode from the
        output topic while the bounces and copy is going on.

        It returns the concurrently consumed messages.
        """
        copiers = self.create_and_start_copiers(
            input_topic=input_topic,
            output_topic=output_topic,
            num_copiers=num_copiers,
            use_group_metadata=use_group_metadata)
        concurrent_consumer = self.start_consumer(
            output_topic, group_id="concurrent_consumer")
        clean_shutdown = False
        if failure_mode == "clean_bounce":
            clean_shutdown = True

        if bounce_target == "brokers":
            self.bounce_brokers(clean_shutdown)
        elif bounce_target == "clients":
            self.bounce_copiers(copiers, clean_shutdown)

        copier_timeout_sec = 120
        for copier in copiers:
            wait_until(lambda: copier.is_done,
                       timeout_sec=copier_timeout_sec,
                       err_msg="%s - Failed to copy all messages in  %ds." %\
                       (copier.transactional_id, copier_timeout_sec))
        self.logger.info("finished copying messages")

        return self.drain_consumer(concurrent_consumer, num_messages_to_copy)

    def setup_topics(self):
        self.kafka.topics = {
            self.input_topic: {
                "partitions": self.num_input_partitions,
                "replication-factor": 3,
                "configs": {
                    "min.insync.replicas": 2
                }
            },
            self.output_topic: {
                "partitions": self.num_output_partitions,
                "replication-factor": 3,
                "configs": {
                    "min.insync.replicas": 2
                }
            }
        }

    @cluster(num_nodes=9)
    @matrix(failure_mode=["hard_bounce", "clean_bounce"],
            bounce_target=["brokers", "clients"],
            check_order=[True, False],
            use_group_metadata=[True, False])
    def test_transactions(self, failure_mode, bounce_target, check_order,
                          use_group_metadata):
        security_protocol = 'PLAINTEXT'
        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.logs["kafka_data_1"]["collect_default"] = True
        self.kafka.logs["kafka_data_2"]["collect_default"] = True
        self.kafka.logs["kafka_operational_logs_debug"][
            "collect_default"] = True
        if check_order:
            # To check ordering, we simply create input and output topics
            # with a single partition.
            # We reduce the number of seed messages to copy to account for the fewer output
            # partitions, and thus lower parallelism. This helps keep the test
            # time shorter.
            self.num_seed_messages = self.num_seed_messages // 3
            self.num_input_partitions = 1
            self.num_output_partitions = 1

        self.setup_topics()
        self.kafka.start()

        input_messages = self.seed_messages(self.input_topic,
                                            self.num_seed_messages)
        concurrently_consumed_messages = self.copy_messages_transactionally(
            failure_mode,
            bounce_target,
            input_topic=self.input_topic,
            output_topic=self.output_topic,
            num_copiers=self.num_input_partitions,
            num_messages_to_copy=self.num_seed_messages,
            use_group_metadata=use_group_metadata)
        output_messages = self.get_messages_from_topic(self.output_topic,
                                                       self.num_seed_messages)

        concurrently_consumed_message_set = set(concurrently_consumed_messages)
        output_message_set = set(output_messages)
        input_message_set = set(input_messages)

        num_dups = abs(len(output_messages) - len(output_message_set))
        num_dups_in_concurrent_consumer = abs(
            len(concurrently_consumed_messages) -
            len(concurrently_consumed_message_set))
        assert num_dups == 0, "Detected %d duplicates in the output stream" % num_dups
        assert input_message_set == output_message_set, "Input and output message sets are not equal. Num input messages %d. Num output messages %d" %\
            (len(input_message_set), len(output_message_set))

        assert num_dups_in_concurrent_consumer == 0, "Detected %d dups in concurrently consumed messages" % num_dups_in_concurrent_consumer
        assert input_message_set == concurrently_consumed_message_set, \
            "Input and concurrently consumed output message sets are not equal. Num input messages: %d. Num concurrently_consumed_messages: %d" %\
            (len(input_message_set), len(concurrently_consumed_message_set))
        if check_order:
            assert input_messages == sorted(
                input_messages
            ), "The seed messages themselves were not in order"
            assert output_messages == input_messages, "Output messages are not in order"
            assert concurrently_consumed_messages == output_messages, "Concurrently consumed messages are not in order"

示例#41

显示文件

文件： mirror_maker_test.py 项目： paganini0102/kafka

class TestMirrorMakerService(ProduceConsumeValidateTest):
    """Sanity checks on mirror maker service class."""
    def __init__(self, test_context):
        super(TestMirrorMakerService, self).__init__(test_context)

        self.topic = "topic"
        self.source_zk = ZookeeperService(test_context, num_nodes=1)
        self.target_zk = ZookeeperService(test_context, num_nodes=1)

        self.source_kafka = KafkaService(test_context, num_nodes=1, zk=self.source_zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        self.target_kafka = KafkaService(test_context, num_nodes=1, zk=self.target_zk,
                                  topics={self.topic: {"partitions": 1, "replication-factor": 1}})
        # This will produce to source kafka cluster
        self.producer = VerifiableProducer(test_context, num_nodes=1, kafka=self.source_kafka, topic=self.topic,
                                           throughput=1000)
        self.mirror_maker = MirrorMaker(test_context, num_nodes=1, source=self.source_kafka, target=self.target_kafka,
                                        whitelist=self.topic, offset_commit_interval_ms=1000)
        # This will consume from target kafka cluster
        self.consumer = ConsoleConsumer(test_context, num_nodes=1, kafka=self.target_kafka, topic=self.topic,
                                        message_validator=is_int, consumer_timeout_ms=60000)

    def setUp(self):
        # Source cluster
        self.source_zk.start()

        # Target cluster
        self.target_zk.start()

    def start_kafka(self, security_protocol):
        self.source_kafka.security_protocol = security_protocol
        self.source_kafka.interbroker_security_protocol = security_protocol
        self.target_kafka.security_protocol = security_protocol
        self.target_kafka.interbroker_security_protocol = security_protocol
        if self.source_kafka.security_config.has_sasl_kerberos:
            minikdc = MiniKdc(self.source_kafka.context, self.source_kafka.nodes + self.target_kafka.nodes)
            self.source_kafka.minikdc = minikdc
            self.target_kafka.minikdc = minikdc
            minikdc.start()
        self.source_kafka.start()
        self.target_kafka.start()

    def bounce(self, clean_shutdown=True):
        """Bounce mirror maker with a clean (kill -15) or hard (kill -9) shutdown"""

        # Wait until messages start appearing in the target cluster
        wait_until(lambda: len(self.consumer.messages_consumed[1]) > 0, timeout_sec=15)

        # Wait for at least one offset to be committed.
        #
        # This step is necessary to prevent data loss with default mirror maker settings:
        # currently, if we don't have at least one committed offset,
        # and we bounce mirror maker, the consumer internals will throw OffsetOutOfRangeException, and the default
        # auto.offset.reset policy ("largest") will kick in, causing mirrormaker to start consuming from the largest
        # offset. As a result, any messages produced to the source cluster while mirrormaker was dead won't get
        # mirrored to the target cluster.
        # (see https://issues.apache.org/jira/browse/KAFKA-2759)
        #
        # This isn't necessary with kill -15 because mirror maker commits its offsets during graceful
        # shutdown.
        if not clean_shutdown:
            time.sleep(self.mirror_maker.offset_commit_interval_ms / 1000.0 + .5)

        for i in range(3):
            self.logger.info("Bringing mirror maker nodes down...")
            for node in self.mirror_maker.nodes:
                self.mirror_maker.stop_node(node, clean_shutdown=clean_shutdown)

            num_consumed = len(self.consumer.messages_consumed[1])
            self.logger.info("Bringing mirror maker nodes back up...")
            for node in self.mirror_maker.nodes:
                self.mirror_maker.start_node(node)

            # Ensure new messages are once again showing up on the target cluster
            # new consumer requires higher timeout here
            wait_until(lambda: len(self.consumer.messages_consumed[1]) > num_consumed + 100, timeout_sec=60)

    def wait_for_n_messages(self, n_messages=100):
        """Wait for a minimum number of messages to be successfully produced."""
        wait_until(lambda: self.producer.num_acked > n_messages, timeout_sec=10,
                     err_msg="Producer failed to produce %d messages in a reasonable amount of time." % n_messages)

    @cluster(num_nodes=7)
    @parametrize(security_protocol='PLAINTEXT', new_consumer=False)
    @matrix(security_protocol=['PLAINTEXT', 'SSL'], new_consumer=[True])
    @cluster(num_nodes=8)
    @matrix(security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'], new_consumer=[True])
    def test_simple_end_to_end(self, security_protocol, new_consumer):
        """
        Test end-to-end behavior under non-failure conditions.

        Setup: two single node Kafka clusters, each connected to its own single node zookeeper cluster.
        One is source, and the other is target. Single-node mirror maker mirrors from source to target.

        - Start mirror maker.
        - Produce a small number of messages to the source cluster.
        - Consume messages from target.
        - Verify that number of consumed messages matches the number produced.
        """
        self.start_kafka(security_protocol)
        self.consumer.new_consumer = new_consumer

        self.mirror_maker.new_consumer = new_consumer
        self.mirror_maker.start()

        mm_node = self.mirror_maker.nodes[0]
        with mm_node.account.monitor_log(self.mirror_maker.LOG_FILE) as monitor:
            if new_consumer:
                monitor.wait_until("Resetting offset for partition", timeout_sec=30, err_msg="Mirrormaker did not reset fetch offset in a reasonable amount of time.")
            else:
                monitor.wait_until("reset fetch offset", timeout_sec=30, err_msg="Mirrormaker did not reset fetch offset in a reasonable amount of time.")

        self.run_produce_consume_validate(core_test_action=self.wait_for_n_messages)
        self.mirror_maker.stop()

    @cluster(num_nodes=7)
    @matrix(offsets_storage=["kafka", "zookeeper"], new_consumer=[False], clean_shutdown=[True, False])
    @matrix(new_consumer=[True], clean_shutdown=[True, False], security_protocol=['PLAINTEXT', 'SSL'])
    @cluster(num_nodes=8)
    @matrix(new_consumer=[True], clean_shutdown=[True, False], security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'])
    def test_bounce(self, offsets_storage="kafka", new_consumer=True, clean_shutdown=True, security_protocol='PLAINTEXT'):
        """
        Test end-to-end behavior under failure conditions.

        Setup: two single node Kafka clusters, each connected to its own single node zookeeper cluster.
        One is source, and the other is target. Single-node mirror maker mirrors from source to target.

        - Start mirror maker.
        - Produce to source cluster, and consume from target cluster in the background.
        - Bounce MM process
        - Verify every message acknowledged by the source producer is consumed by the target consumer
        """
        if new_consumer and not clean_shutdown:
            # Increase timeout on downstream console consumer; mirror maker with new consumer takes extra time
            # during hard bounce. This is because the restarted mirror maker consumer won't be able to rejoin
            # the group until the previous session times out
            self.consumer.consumer_timeout_ms = 60000

        self.start_kafka(security_protocol)
        self.consumer.new_consumer = new_consumer

        self.mirror_maker.offsets_storage = offsets_storage
        self.mirror_maker.new_consumer = new_consumer
        self.mirror_maker.start()

        # Wait until mirror maker has reset fetch offset at least once before continuing with the rest of the test
        mm_node = self.mirror_maker.nodes[0]
        with mm_node.account.monitor_log(self.mirror_maker.LOG_FILE) as monitor:
            if new_consumer:
                monitor.wait_until("Resetting offset for partition", timeout_sec=30, err_msg="Mirrormaker did not reset fetch offset in a reasonable amount of time.")
            else:
                monitor.wait_until("reset fetch offset", timeout_sec=30, err_msg="Mirrormaker did not reset fetch offset in a reasonable amount of time.")

        self.run_produce_consume_validate(core_test_action=lambda: self.bounce(clean_shutdown=clean_shutdown))
        self.mirror_maker.stop()

示例#42

显示文件

class StreamsUpgradeTest(Test):
    """
    Test upgrading Kafka Streams (all version combination)
    If metadata was changes, upgrade is more difficult
    Metadata version was bumped in 0.10.1.0 and
    subsequently bumped in 2.0.0
    """
    def __init__(self, test_context):
        super(StreamsUpgradeTest, self).__init__(test_context)
        self.topics = {
            'echo': {
                'partitions': 5
            },
            'data': {
                'partitions': 5
            },
        }

    processed_msg = "processed [0-9]* records"
    base_version_number = str(DEV_VERSION).split("-")[0]

    def perform_broker_upgrade(self, to_version):
        self.logger.info("First pass bounce - rolling broker upgrade")
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            node.version = KafkaVersion(to_version)
            self.kafka.start_node(node)

    @ignore
    @cluster(num_nodes=6)
    @matrix(from_version=broker_upgrade_versions,
            to_version=broker_upgrade_versions)
    def test_upgrade_downgrade_brokers(self, from_version, to_version):
        """
        Start a smoke test client then perform rolling upgrades on the broker.
        """

        if from_version == to_version:
            return

        self.replication = 3
        self.num_kafka_nodes = 3
        self.partitions = 1
        self.isr = 2
        self.topics = {
            'echo': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'data': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'min': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'max': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'sum': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'dif': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'cnt': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'avg': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'wcnt': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            },
            'tagg': {
                'partitions': self.partitions,
                'replication-factor': self.replication,
                'configs': {
                    "min.insync.replicas": self.isr
                }
            }
        }

        # Setup phase
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        # number of nodes needs to be >= 3 for the smoke test
        self.kafka = KafkaService(self.test_context,
                                  num_nodes=self.num_kafka_nodes,
                                  zk=self.zk,
                                  version=KafkaVersion(from_version),
                                  topics=self.topics)
        self.kafka.start()

        # allow some time for topics to be created
        wait_until(lambda: self.confirm_topics_on_all_brokers(
            set(self.topics.keys())),
                   timeout_sec=60,
                   err_msg="Broker did not create all topics in 60 seconds ")

        self.driver = StreamsSmokeTestDriverService(self.test_context,
                                                    self.kafka)

        processor = StreamsSmokeTestJobRunnerService(self.test_context,
                                                     self.kafka,
                                                     "at_least_once")

        with self.driver.node.account.monitor_log(
                self.driver.STDOUT_FILE) as driver_monitor:
            self.driver.start()

            with processor.node.account.monitor_log(
                    processor.STDOUT_FILE) as monitor:
                processor.start()
                monitor.wait_until(
                    self.processed_msg,
                    timeout_sec=60,
                    err_msg="Never saw output '%s' on " % self.processed_msg +
                    str(processor.node))

            connected_message = "Discovered group coordinator"
            with processor.node.account.monitor_log(
                    processor.LOG_FILE) as log_monitor:
                with processor.node.account.monitor_log(
                        processor.STDOUT_FILE) as stdout_monitor:
                    self.perform_broker_upgrade(to_version)

                    log_monitor.wait_until(
                        connected_message,
                        timeout_sec=120,
                        err_msg=("Never saw output '%s' on " %
                                 connected_message) +
                        str(processor.node.account))

                    stdout_monitor.wait_until(
                        self.processed_msg,
                        timeout_sec=60,
                        err_msg="Never saw output '%s' on" % self.processed_msg
                        + str(processor.node.account))

            # SmokeTestDriver allows up to 6 minutes to consume all
            # records for the verification step so this timeout is set to
            # 6 minutes (360 seconds) for consuming of verification records
            # and a very conservative additional 2 minutes (120 seconds) to process
            # the records in the verification step
            driver_monitor.wait_until(
                'ALL-RECORDS-DELIVERED\|PROCESSED-MORE-THAN-GENERATED',
                timeout_sec=480,
                err_msg="Never saw output '%s' on" %
                'ALL-RECORDS-DELIVERED|PROCESSED-MORE-THAN-GENERATED' +
                str(self.driver.node.account))

        self.driver.stop()
        processor.stop()
        processor.node.account.ssh_capture("grep SMOKE-TEST-CLIENT-CLOSED %s" %
                                           processor.STDOUT_FILE,
                                           allow_fail=False)

    @matrix(from_version=metadata_2_versions, to_version=metadata_2_versions)
    def test_simple_upgrade_downgrade(self, from_version, to_version):
        """
        Starts 3 KafkaStreams instances with <old_version>, and upgrades one-by-one to <new_version>
        """

        if from_version == to_version:
            return

        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        self.kafka = KafkaService(self.test_context,
                                  num_nodes=1,
                                  zk=self.zk,
                                  topics=self.topics)
        self.kafka.start()

        self.driver = StreamsSmokeTestDriverService(self.test_context,
                                                    self.kafka)
        self.driver.disable_auto_terminate()
        self.processor1 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)
        self.processor2 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)
        self.processor3 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)

        self.driver.start()
        self.start_all_nodes_with(from_version)

        self.processors = [self.processor1, self.processor2, self.processor3]

        counter = 1
        random.seed()

        # upgrade one-by-one via rolling bounce
        random.shuffle(self.processors)
        for p in self.processors:
            p.CLEAN_NODE_ENABLED = False
            self.do_stop_start_bounce(p, None, to_version, counter)
            counter = counter + 1

        # shutdown
        self.driver.stop()

        random.shuffle(self.processors)
        for p in self.processors:
            node = p.node
            with node.account.monitor_log(p.STDOUT_FILE) as monitor:
                p.stop()
                monitor.wait_until(
                    "UPGRADE-TEST-CLIENT-CLOSED",
                    timeout_sec=60,
                    err_msg="Never saw output 'UPGRADE-TEST-CLIENT-CLOSED' on"
                    + str(node.account))

    @matrix(from_version=metadata_1_versions,
            to_version=backward_compatible_metadata_2_versions)
    @matrix(from_version=metadata_1_versions,
            to_version=metadata_3_or_higher_versions)
    @matrix(from_version=metadata_2_versions,
            to_version=metadata_3_or_higher_versions)
    def test_metadata_upgrade(self, from_version, to_version):
        """
        Starts 3 KafkaStreams instances with version <from_version> and upgrades one-by-one to <to_version>
        """

        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        self.kafka = KafkaService(self.test_context,
                                  num_nodes=1,
                                  zk=self.zk,
                                  topics=self.topics)
        self.kafka.start()

        self.driver = StreamsSmokeTestDriverService(self.test_context,
                                                    self.kafka)
        self.driver.disable_auto_terminate()
        self.processor1 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)
        self.processor2 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)
        self.processor3 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)

        self.driver.start()
        self.start_all_nodes_with(from_version)

        self.processors = [self.processor1, self.processor2, self.processor3]

        counter = 1
        random.seed()

        # first rolling bounce
        random.shuffle(self.processors)
        for p in self.processors:
            p.CLEAN_NODE_ENABLED = False
            self.do_stop_start_bounce(p, from_version[:-2], to_version,
                                      counter)
            counter = counter + 1

        # second rolling bounce
        random.shuffle(self.processors)
        for p in self.processors:
            self.do_stop_start_bounce(p, None, to_version, counter)
            counter = counter + 1

        # shutdown
        self.driver.stop()

        random.shuffle(self.processors)
        for p in self.processors:
            node = p.node
            with node.account.monitor_log(p.STDOUT_FILE) as monitor:
                p.stop()
                monitor.wait_until(
                    "UPGRADE-TEST-CLIENT-CLOSED",
                    timeout_sec=60,
                    err_msg="Never saw output 'UPGRADE-TEST-CLIENT-CLOSED' on"
                    + str(node.account))

    def test_version_probing_upgrade(self):
        """
        Starts 3 KafkaStreams instances, and upgrades one-by-one to "future version"
        """

        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        self.kafka = KafkaService(self.test_context,
                                  num_nodes=1,
                                  zk=self.zk,
                                  topics=self.topics)
        self.kafka.start()

        self.driver = StreamsSmokeTestDriverService(self.test_context,
                                                    self.kafka)
        self.driver.disable_auto_terminate()
        # TODO KIP-441: consider rewriting the test for HighAvailabilityTaskAssignor
        self.processor1 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)
        self.processor1.set_config(
            "internal.task.assignor.class",
            "org.apache.kafka.streams.processor.internals.assignment.StickyTaskAssignor"
        )
        self.processor2 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)
        self.processor2.set_config(
            "internal.task.assignor.class",
            "org.apache.kafka.streams.processor.internals.assignment.StickyTaskAssignor"
        )
        self.processor3 = StreamsUpgradeTestJobRunnerService(
            self.test_context, self.kafka)
        self.processor3.set_config(
            "internal.task.assignor.class",
            "org.apache.kafka.streams.processor.internals.assignment.StickyTaskAssignor"
        )

        self.driver.start()
        self.start_all_nodes_with("")  # run with TRUNK

        self.processors = [self.processor1, self.processor2, self.processor3]
        self.old_processors = [
            self.processor1, self.processor2, self.processor3
        ]
        self.upgraded_processors = []

        counter = 1
        current_generation = 3

        random.seed()
        random.shuffle(self.processors)

        for p in self.processors:
            p.CLEAN_NODE_ENABLED = False
            current_generation = self.do_rolling_bounce(
                p, counter, current_generation)
            counter = counter + 1

        # shutdown
        self.driver.stop()

        random.shuffle(self.processors)
        for p in self.processors:
            node = p.node
            with node.account.monitor_log(p.STDOUT_FILE) as monitor:
                p.stop()
                monitor.wait_until(
                    "UPGRADE-TEST-CLIENT-CLOSED",
                    timeout_sec=60,
                    err_msg="Never saw output 'UPGRADE-TEST-CLIENT-CLOSED' on"
                    + str(node.account))

    def get_version_string(self, version):
        if version.startswith("0") or version.startswith("1") \
          or version.startswith("2.0") or version.startswith("2.1"):
            return "Kafka version : " + version
        elif "SNAPSHOT" in version:
            return "Kafka version.*" + self.base_version_number + ".*SNAPSHOT"
        else:
            return "Kafka version: " + version

    def start_all_nodes_with(self, version):
        kafka_version_str = self.get_version_string(version)

        # start first with <version>
        self.prepare_for(self.processor1, version)
        node1 = self.processor1.node
        with node1.account.monitor_log(self.processor1.STDOUT_FILE) as monitor:
            with node1.account.monitor_log(
                    self.processor1.LOG_FILE) as log_monitor:
                self.processor1.start()
                log_monitor.wait_until(
                    kafka_version_str,
                    timeout_sec=60,
                    err_msg="Could not detect Kafka Streams version " +
                    version + " " + str(node1.account))
                monitor.wait_until(
                    self.processed_msg,
                    timeout_sec=60,
                    err_msg="Never saw output '%s' on " % self.processed_msg +
                    str(node1.account))

        # start second with <version>
        self.prepare_for(self.processor2, version)
        node2 = self.processor2.node
        with node1.account.monitor_log(
                self.processor1.STDOUT_FILE) as first_monitor:
            with node2.account.monitor_log(
                    self.processor2.STDOUT_FILE) as second_monitor:
                with node2.account.monitor_log(
                        self.processor2.LOG_FILE) as log_monitor:
                    self.processor2.start()
                    log_monitor.wait_until(
                        kafka_version_str,
                        timeout_sec=60,
                        err_msg="Could not detect Kafka Streams version " +
                        version + " on " + str(node2.account))
                    first_monitor.wait_until(
                        self.processed_msg,
                        timeout_sec=60,
                        err_msg="Never saw output '%s' on " %
                        self.processed_msg + str(node1.account))
                    second_monitor.wait_until(
                        self.processed_msg,
                        timeout_sec=60,
                        err_msg="Never saw output '%s' on " %
                        self.processed_msg + str(node2.account))

        # start third with <version>
        self.prepare_for(self.processor3, version)
        node3 = self.processor3.node
        with node1.account.monitor_log(
                self.processor1.STDOUT_FILE) as first_monitor:
            with node2.account.monitor_log(
                    self.processor2.STDOUT_FILE) as second_monitor:
                with node3.account.monitor_log(
                        self.processor3.STDOUT_FILE) as third_monitor:
                    with node3.account.monitor_log(
                            self.processor3.LOG_FILE) as log_monitor:
                        self.processor3.start()
                        log_monitor.wait_until(
                            kafka_version_str,
                            timeout_sec=60,
                            err_msg="Could not detect Kafka Streams version " +
                            version + " on " + str(node3.account))
                        first_monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg + str(node1.account))
                        second_monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg + str(node2.account))
                        third_monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg + str(node3.account))

    @staticmethod
    def prepare_for(processor, version):
        processor.node.account.ssh("rm -rf " + processor.PERSISTENT_ROOT,
                                   allow_fail=False)
        if version == str(DEV_VERSION):
            processor.set_version("")  # set to TRUNK
        else:
            processor.set_version(version)

    def do_stop_start_bounce(self, processor, upgrade_from, new_version,
                             counter):
        kafka_version_str = self.get_version_string(new_version)

        first_other_processor = None
        second_other_processor = None
        for p in self.processors:
            if p != processor:
                if first_other_processor is None:
                    first_other_processor = p
                else:
                    second_other_processor = p

        node = processor.node
        first_other_node = first_other_processor.node
        second_other_node = second_other_processor.node

        # stop processor and wait for rebalance of others
        with first_other_node.account.monitor_log(
                first_other_processor.STDOUT_FILE) as first_other_monitor:
            with second_other_node.account.monitor_log(
                    second_other_processor.STDOUT_FILE
            ) as second_other_monitor:
                processor.stop()
                first_other_monitor.wait_until(
                    self.processed_msg,
                    timeout_sec=60,
                    err_msg="Never saw output '%s' on " % self.processed_msg +
                    str(first_other_node.account))
                second_other_monitor.wait_until(
                    self.processed_msg,
                    timeout_sec=60,
                    err_msg="Never saw output '%s' on " % self.processed_msg +
                    str(second_other_node.account))
        node.account.ssh_capture("grep UPGRADE-TEST-CLIENT-CLOSED %s" %
                                 processor.STDOUT_FILE,
                                 allow_fail=False)

        if upgrade_from is None:  # upgrade disabled -- second round of rolling bounces
            roll_counter = ".1-"  # second round of rolling bounces
        else:
            roll_counter = ".0-"  # first  round of rolling bounces

        node.account.ssh("mv " + processor.STDOUT_FILE + " " +
                         processor.STDOUT_FILE + roll_counter + str(counter),
                         allow_fail=False)
        node.account.ssh("mv " + processor.STDERR_FILE + " " +
                         processor.STDERR_FILE + roll_counter + str(counter),
                         allow_fail=False)
        node.account.ssh("mv " + processor.LOG_FILE + " " +
                         processor.LOG_FILE + roll_counter + str(counter),
                         allow_fail=False)

        if new_version == str(DEV_VERSION):
            processor.set_version("")  # set to TRUNK
        else:
            processor.set_version(new_version)
        processor.set_upgrade_from(upgrade_from)

        grep_metadata_error = "grep \"org.apache.kafka.streams.errors.TaskAssignmentException: unable to decode subscription data: version=2\" "
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            with node.account.monitor_log(processor.LOG_FILE) as log_monitor:
                with first_other_node.account.monitor_log(
                        first_other_processor.STDOUT_FILE
                ) as first_other_monitor:
                    with second_other_node.account.monitor_log(
                            second_other_processor.STDOUT_FILE
                    ) as second_other_monitor:
                        processor.start()

                        log_monitor.wait_until(
                            kafka_version_str,
                            timeout_sec=60,
                            err_msg="Could not detect Kafka Streams version " +
                            new_version + " on " + str(node.account))
                        first_other_monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg + str(first_other_node.account))
                        found = list(
                            first_other_node.account.ssh_capture(
                                grep_metadata_error +
                                first_other_processor.STDERR_FILE,
                                allow_fail=True))
                        if len(found) > 0:
                            raise Exception(
                                "Kafka Streams failed with 'unable to decode subscription data: version=2'"
                            )

                        second_other_monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg +
                            str(second_other_node.account))
                        found = list(
                            second_other_node.account.ssh_capture(
                                grep_metadata_error +
                                second_other_processor.STDERR_FILE,
                                allow_fail=True))
                        if len(found) > 0:
                            raise Exception(
                                "Kafka Streams failed with 'unable to decode subscription data: version=2'"
                            )

                        monitor.wait_until(
                            self.processed_msg,
                            timeout_sec=60,
                            err_msg="Never saw output '%s' on " %
                            self.processed_msg + str(node.account))

    def do_rolling_bounce(self, processor, counter, current_generation):
        first_other_processor = None
        second_other_processor = None
        for p in self.processors:
            if p != processor:
                if first_other_processor is None:
                    first_other_processor = p
                else:
                    second_other_processor = p

        node = processor.node
        first_other_node = first_other_processor.node
        second_other_node = second_other_processor.node

        with first_other_node.account.monitor_log(
                first_other_processor.LOG_FILE) as first_other_monitor:
            with second_other_node.account.monitor_log(
                    second_other_processor.LOG_FILE) as second_other_monitor:
                # stop processor
                processor.stop()
                node.account.ssh_capture("grep UPGRADE-TEST-CLIENT-CLOSED %s" %
                                         processor.STDOUT_FILE,
                                         allow_fail=False)

                node.account.ssh("mv " + processor.STDOUT_FILE + " " +
                                 processor.STDOUT_FILE + "." + str(counter),
                                 allow_fail=False)
                node.account.ssh("mv " + processor.STDERR_FILE + " " +
                                 processor.STDERR_FILE + "." + str(counter),
                                 allow_fail=False)
                node.account.ssh("mv " + processor.LOG_FILE + " " +
                                 processor.LOG_FILE + "." + str(counter),
                                 allow_fail=False)

                with node.account.monitor_log(
                        processor.LOG_FILE) as log_monitor:
                    processor.set_upgrade_to("future_version")
                    processor.start()
                    self.old_processors.remove(processor)
                    self.upgraded_processors.append(processor)

                    # checking for the dev version which should be the only SNAPSHOT
                    log_monitor.wait_until(
                        "Kafka version.*" + self.base_version_number +
                        ".*SNAPSHOT",
                        timeout_sec=60,
                        err_msg="Could not detect Kafka Streams version " +
                        str(DEV_VERSION) + " in " + str(node.account))
                    log_monitor.offset = 5
                    log_monitor.wait_until(
                        "partition\.assignment\.strategy = \[org\.apache\.kafka\.streams\.tests\.StreamsUpgradeTest$FutureStreamsPartitionAssignor\]",
                        timeout_sec=60,
                        err_msg=
                        "Could not detect FutureStreamsPartitionAssignor in " +
                        str(node.account))

                    monitors = {}
                    monitors[processor] = log_monitor
                    monitors[first_other_processor] = first_other_monitor
                    monitors[second_other_processor] = second_other_monitor

                    if len(self.old_processors) > 0:
                        log_monitor.wait_until(
                            "Sent a version 8 subscription and got version 7 assignment back (successful version probing). Downgrade subscription metadata to commonly supported version 7 and trigger new rebalance.",
                            timeout_sec=60,
                            err_msg=
                            "Could not detect 'successful version probing' at upgrading node "
                            + str(node.account))
                    else:
                        log_monitor.wait_until(
                            "Sent a version 8 subscription and got version 7 assignment back (successful version probing). Downgrade subscription metadata to commonly supported version 8 and trigger new rebalance.",
                            timeout_sec=60,
                            err_msg=
                            "Could not detect 'successful version probing with upgraded leader' at upgrading node "
                            + str(node.account))
                        first_other_monitor.wait_until(
                            "Sent a version 7 subscription and group.s latest commonly supported version is 8 (successful version probing and end of rolling upgrade). Upgrading subscription metadata version to 8 for next rebalance.",
                            timeout_sec=60,
                            err_msg=
                            "Never saw output 'Upgrade metadata to version 8' on"
                            + str(first_other_node.account))
                        second_other_monitor.wait_until(
                            "Sent a version 7 subscription and group.s latest commonly supported version is 8 (successful version probing and end of rolling upgrade). Upgrading subscription metadata version to 8 for next rebalance.",
                            timeout_sec=60,
                            err_msg=
                            "Never saw output 'Upgrade metadata to version 8' on"
                            + str(second_other_node.account))

                    log_monitor.wait_until(
                        "Detected that the assignor requested a rebalance. Rejoining the consumer group to trigger a new rebalance.",
                        timeout_sec=60,
                        err_msg=
                        "Could not detect 'Triggering new rebalance' at upgrading node "
                        + str(node.account))

                    # version probing should trigger second rebalance
                    # now we check that after consecutive rebalances we have synchronized generation
                    generation_synchronized = False
                    retries = 0

                    while retries < 10:
                        processor_found = extract_generation_from_logs(
                            processor)
                        first_other_processor_found = extract_generation_from_logs(
                            first_other_processor)
                        second_other_processor_found = extract_generation_from_logs(
                            second_other_processor)

                        if len(processor_found) > 0 and len(
                                first_other_processor_found) > 0 and len(
                                    second_other_processor_found) > 0:
                            self.logger.info("processor: " +
                                             str(processor_found))
                            self.logger.info("first other processor: " +
                                             str(first_other_processor_found))
                            self.logger.info("second other processor: " +
                                             str(second_other_processor_found))

                            processor_generation = self.extract_highest_generation(
                                processor_found)
                            first_other_processor_generation = self.extract_highest_generation(
                                first_other_processor_found)
                            second_other_processor_generation = self.extract_highest_generation(
                                second_other_processor_found)

                            if processor_generation == first_other_processor_generation and processor_generation == second_other_processor_generation:
                                current_generation = processor_generation
                                generation_synchronized = True
                                break

                        time.sleep(5)
                        retries = retries + 1

                    if generation_synchronized == False:
                        raise Exception(
                            "Never saw all three processors have the synchronized generation number"
                        )

                    if len(self.old_processors) > 0:
                        self.verify_metadata_no_upgraded_yet()

        return current_generation

    def extract_highest_generation(self, found_generations):
        return int(found_generations[-1])

    def verify_metadata_no_upgraded_yet(self):
        for p in self.processors:
            found = list(
                p.node.account.ssh_capture(
                    "grep \"Sent a version 6 subscription and group.s latest commonly supported version is 7 (successful version probing and end of rolling upgrade). Upgrading subscription metadata version to 7 for next rebalance.\" "
                    + p.LOG_FILE,
                    allow_fail=True))
            if len(found) > 0:
                raise Exception(
                    "Kafka Streams failed with 'group member upgraded to metadata 7 too early'"
                )

    def confirm_topics_on_all_brokers(self, expected_topic_set):
        for node in self.kafka.nodes:
            match_count = 0
            # need to iterate over topic_list_generator as kafka.list_topics()
            # returns a python generator so values are fetched lazily
            # so we can't just compare directly we must iterate over what's returned
            topic_list_generator = self.kafka.list_topics(node=node)
            for topic in topic_list_generator:
                if topic in expected_topic_set:
                    match_count += 1

            if len(expected_topic_set) != match_count:
                return False

        return True

示例#43

显示文件

class RoundTripFaultTest(Test):
    topic_name_index = 0

    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(RoundTripFaultTest, self).__init__(test_context)
        self.zk = ZookeeperService(test_context, num_nodes=3)
        self.kafka = KafkaService(test_context, num_nodes=4, zk=self.zk)
        self.workload_service = RoundTripWorkloadService(
            test_context, self.kafka)
        self.trogdor = TrogdorService(
            context=self.test_context,
            client_services=[self.zk, self.kafka, self.workload_service])
        topic_name = "round_trip_topic%d" % RoundTripFaultTest.topic_name_index
        RoundTripFaultTest.topic_name_index = RoundTripFaultTest.topic_name_index + 1
        active_topics = {
            topic_name: {
                "partitionAssignments": {
                    "0": [0, 1, 2]
                }
            }
        }
        self.round_trip_spec = RoundTripWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            self.workload_service.client_node,
            self.workload_service.bootstrap_servers,
            target_messages_per_sec=10000,
            max_messages=100000,
            active_topics=active_topics)

    def setUp(self):
        self.zk.start()
        self.kafka.start()
        self.trogdor.start()

    def teardown(self):
        self.trogdor.stop()
        self.kafka.stop()
        self.zk.stop()

    def test_round_trip_workload(self):
        workload1 = self.trogdor.create_task("workload1", self.round_trip_spec)
        workload1.wait_for_done(timeout_sec=600)

    def test_round_trip_workload_with_broker_partition(self):
        workload1 = self.trogdor.create_task("workload1", self.round_trip_spec)
        time.sleep(2)
        part1 = [self.kafka.nodes[0]]
        part2 = self.kafka.nodes[1:] + [self.workload_service.nodes[0]
                                        ] + self.zk.nodes
        partition1_spec = NetworkPartitionFaultSpec(0,
                                                    TaskSpec.MAX_DURATION_MS,
                                                    [part1, part2])
        partition1 = self.trogdor.create_task("partition1", partition1_spec)
        workload1.wait_for_done(timeout_sec=600)
        partition1.stop()
        partition1.wait_for_done()

    def test_produce_consume_with_broker_pause(self):
        workload1 = self.trogdor.create_task("workload1", self.round_trip_spec)
        time.sleep(2)
        stop1_spec = ProcessStopFaultSpec(0, TaskSpec.MAX_DURATION_MS,
                                          [self.kafka.nodes[0]],
                                          self.kafka.java_class_name())
        stop1 = self.trogdor.create_task("stop1", stop1_spec)
        workload1.wait_for_done(timeout_sec=600)
        stop1.stop()
        stop1.wait_for_done()
        self.kafka.stop_node(self.kafka.nodes[0], False)

    def test_produce_consume_with_client_partition(self):
        workload1 = self.trogdor.create_task("workload1", self.round_trip_spec)
        time.sleep(2)
        part1 = [self.workload_service.nodes[0]]
        part2 = self.kafka.nodes + self.zk.nodes
        partition1_spec = NetworkPartitionFaultSpec(0, 60000, [part1, part2])
        stop1 = self.trogdor.create_task("stop1", partition1_spec)
        workload1.wait_for_done(timeout_sec=600)
        stop1.stop()
        stop1.wait_for_done()

    def test_produce_consume_with_latency(self):
        workload1 = self.trogdor.create_task("workload1", self.round_trip_spec)
        time.sleep(2)
        node_specs = {}
        for node in self.kafka.nodes + self.zk.nodes:
            node_specs[node.name] = {"latencyMs": 500, "networkDevice": "eth0"}
        spec = DegradedNetworkFaultSpec(0, 60000, node_specs)
        slow1 = self.trogdor.create_task("slow1", spec)
        workload1.wait_for_done(timeout_sec=600)
        slow1.stop()
        slow1.wait_for_done()

示例#44

显示文件

文件： security_rolling_upgrade_test.py 项目： saravanb-msft/kafka

class TestSecurityRollingUpgrade(ProduceConsumeValidateTest):
    """Tests a rolling upgrade from PLAINTEXT to a secured cluster
    """

    def __init__(self, test_context):
        super(TestSecurityRollingUpgrade, self).__init__(test_context=test_context)

    def setUp(self):
        self.acls = ACLs(self.test_context)
        self.topic = "test_topic"
        self.group = "group"
        self.producer_throughput = 100
        self.num_producers = 1
        self.num_consumers = 1
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, topics={self.topic: {
            "partitions": 3,
            "replication-factor": 3,
            'configs': {"min.insync.replicas": 2}}})
        self.zk.start()

    def create_producer_and_consumer(self):
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic,
            throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic,
            consumer_timeout_ms=60000, message_validator=is_int)

        self.consumer.group_id = "group"

    def bounce(self):
        self.kafka.start_minikdc_if_necessary()
        self.kafka.restart_cluster(after_each_broker_restart = lambda: time.sleep(10))

    def roll_in_secured_settings(self, client_protocol, broker_protocol):
        # Roll cluster to include inter broker security protocol.
        self.kafka.setup_interbroker_listener(broker_protocol)
        self.bounce()

        # Roll cluster to disable PLAINTEXT port
        self.kafka.close_port(SecurityConfig.PLAINTEXT)
        self.set_authorizer_and_bounce(client_protocol, broker_protocol)

    def set_authorizer_and_bounce(self, client_protocol, broker_protocol):
        self.kafka.authorizer_class_name = KafkaService.ACL_AUTHORIZER
        # Force use of direct ZooKeeper access due to SecurityDisabledException: No Authorizer is configured on the broker.
        self.acls.set_acls(client_protocol, self.kafka, self.topic, self.group, force_use_zk_connection=True)
        self.acls.set_acls(broker_protocol, self.kafka, self.topic, self.group, force_use_zk_connection=True)
        self.bounce() # enables the authorizer

    def open_secured_port(self, client_protocol):
        self.kafka.security_protocol = client_protocol
        self.kafka.open_port(client_protocol)
        self.kafka.start_minikdc_if_necessary()
        self.bounce()

    def add_sasl_mechanism(self, new_client_sasl_mechanism):
        self.kafka.client_sasl_mechanism = new_client_sasl_mechanism
        self.kafka.start_minikdc_if_necessary()
        self.bounce()

    def roll_in_sasl_mechanism(self, security_protocol, new_sasl_mechanism):
        # Roll cluster to update inter-broker SASL mechanism.
        # We need the inter-broker SASL mechanism to still be enabled through this roll.
        self.kafka.client_sasl_mechanism = "%s,%s" % (self.kafka.interbroker_sasl_mechanism, new_sasl_mechanism)
        self.kafka.interbroker_sasl_mechanism = new_sasl_mechanism
        self.bounce()

        # Bounce again with ACLs for new mechanism.
        self.kafka.client_sasl_mechanism = new_sasl_mechanism  # Removes old SASL mechanism completely
        self.set_authorizer_and_bounce(security_protocol, security_protocol)

    def add_separate_broker_listener(self, broker_security_protocol, broker_sasl_mechanism):
        # Enable the new internal listener on all brokers first
        self.kafka.open_port(self.kafka.INTERBROKER_LISTENER_NAME)
        self.kafka.port_mappings[self.kafka.INTERBROKER_LISTENER_NAME].security_protocol = broker_security_protocol
        self.kafka.port_mappings[self.kafka.INTERBROKER_LISTENER_NAME].sasl_mechanism = broker_sasl_mechanism
        self.bounce()

        # Update inter-broker listener after all brokers have been updated to enable the new listener
        self.kafka.setup_interbroker_listener(broker_security_protocol, True)
        self.kafka.interbroker_sasl_mechanism = broker_sasl_mechanism
        self.bounce()

    def remove_separate_broker_listener(self, client_security_protocol, client_sasl_mechanism):
        # separate interbroker listener port will be closed automatically in setup_interbroker_listener
        # if not using separate interbroker listener
        self.kafka.setup_interbroker_listener(client_security_protocol, False)
        self.kafka.interbroker_sasl_mechanism = client_sasl_mechanism
        self.bounce()

    @cluster(num_nodes=8)
    @matrix(client_protocol=[SecurityConfig.SSL])
    @cluster(num_nodes=9)
    @matrix(client_protocol=[SecurityConfig.SASL_PLAINTEXT, SecurityConfig.SASL_SSL])
    def test_rolling_upgrade_phase_one(self, client_protocol):
        """
        Start with a PLAINTEXT cluster, open a SECURED port, via a rolling upgrade, ensuring we could produce
        and consume throughout over PLAINTEXT. Finally check we can produce and consume the new secured port.
        """
        self.kafka.setup_interbroker_listener(SecurityConfig.PLAINTEXT)
        self.kafka.security_protocol = SecurityConfig.PLAINTEXT
        self.kafka.start()

        # Create PLAINTEXT producer and consumer
        self.create_producer_and_consumer()

        # Rolling upgrade, opening a secure protocol, ensuring the Plaintext producer/consumer continues to run
        self.run_produce_consume_validate(self.open_secured_port, client_protocol)

        # Now we can produce and consume via the secured port
        self.kafka.security_protocol = client_protocol
        self.create_producer_and_consumer()
        self.run_produce_consume_validate(lambda: time.sleep(1))

    @cluster(num_nodes=8)
    @matrix(client_protocol=[SecurityConfig.SASL_SSL, SecurityConfig.SSL, SecurityConfig.SASL_PLAINTEXT],
            broker_protocol=[SecurityConfig.SASL_SSL, SecurityConfig.SSL, SecurityConfig.SASL_PLAINTEXT])
    def test_rolling_upgrade_phase_two(self, client_protocol, broker_protocol):
        """
        Start with a PLAINTEXT cluster with a second Secured port open (i.e. result of phase one).
        A third secure port is also open if inter-broker and client protocols are different.
        Start a Producer and Consumer via the SECURED client port
        Incrementally upgrade to add inter-broker be the secure broker protocol
        Incrementally upgrade again to add ACLs as well as disabling the PLAINTEXT port
        Ensure the producer and consumer ran throughout
        """
        #Given we have a broker that has both secure and PLAINTEXT ports open
        self.kafka.security_protocol = client_protocol
        self.kafka.setup_interbroker_listener(SecurityConfig.PLAINTEXT, use_separate_listener=False)
        self.kafka.open_port(broker_protocol)
        self.kafka.start()

        #Create Secured Producer and Consumer
        self.create_producer_and_consumer()

        #Roll in the security protocol. Disable Plaintext. Ensure we can produce and Consume throughout
        self.run_produce_consume_validate(self.roll_in_secured_settings, client_protocol, broker_protocol)

    @cluster(num_nodes=9)
    @matrix(new_client_sasl_mechanism=[SecurityConfig.SASL_MECHANISM_PLAIN])
    def test_rolling_upgrade_sasl_mechanism_phase_one(self, new_client_sasl_mechanism):
        """
        Start with a SASL/GSSAPI cluster, add new SASL mechanism, via a rolling upgrade, ensuring we could produce
        and consume throughout over SASL/GSSAPI. Finally check we can produce and consume using new mechanism.
        """
        self.kafka.setup_interbroker_listener(SecurityConfig.SASL_SSL, use_separate_listener=False)
        self.kafka.security_protocol = SecurityConfig.SASL_SSL
        self.kafka.client_sasl_mechanism = SecurityConfig.SASL_MECHANISM_GSSAPI
        self.kafka.interbroker_sasl_mechanism = SecurityConfig.SASL_MECHANISM_GSSAPI
        self.kafka.start()

        # Create SASL/GSSAPI producer and consumer
        self.create_producer_and_consumer()

        # Rolling upgrade, adding new SASL mechanism, ensuring the GSSAPI producer/consumer continues to run
        self.run_produce_consume_validate(self.add_sasl_mechanism, new_client_sasl_mechanism)

        # Now we can produce and consume using the new SASL mechanism
        self.kafka.client_sasl_mechanism = new_client_sasl_mechanism
        self.create_producer_and_consumer()
        self.run_produce_consume_validate(lambda: time.sleep(1))

    @cluster(num_nodes=8)
    @matrix(new_sasl_mechanism=[SecurityConfig.SASL_MECHANISM_PLAIN])
    def test_rolling_upgrade_sasl_mechanism_phase_two(self, new_sasl_mechanism):
        """
        Start with a SASL cluster with GSSAPI for inter-broker and a second mechanism for clients (i.e. result of phase one).
        Start Producer and Consumer using the second mechanism
        Incrementally upgrade to set inter-broker to the second mechanism and disable GSSAPI
        Incrementally upgrade again to add ACLs
        Ensure the producer and consumer run throughout
        """
        #Start with a broker that has GSSAPI for inter-broker and a second mechanism for clients
        self.kafka.security_protocol = SecurityConfig.SASL_SSL
        self.kafka.setup_interbroker_listener(SecurityConfig.SASL_SSL, use_separate_listener=False)
        self.kafka.client_sasl_mechanism = new_sasl_mechanism
        self.kafka.interbroker_sasl_mechanism = SecurityConfig.SASL_MECHANISM_GSSAPI
        self.kafka.start()

        #Create Producer and Consumer using second mechanism
        self.create_producer_and_consumer()

        #Roll in the second SASL mechanism for inter-broker, disabling first mechanism. Ensure we can produce and consume throughout
        self.run_produce_consume_validate(self.roll_in_sasl_mechanism, self.kafka.security_protocol, new_sasl_mechanism)

    @cluster(num_nodes=9)
    def test_enable_separate_interbroker_listener(self):
        """
        Start with a cluster that has a single PLAINTEXT listener.
        Start producing/consuming on PLAINTEXT port.
        While doing that, do a rolling restart to enable separate secured interbroker port
        """
        self.kafka.security_protocol = SecurityConfig.PLAINTEXT
        self.kafka.setup_interbroker_listener(SecurityConfig.PLAINTEXT, use_separate_listener=False)

        self.kafka.start()

        self.create_producer_and_consumer()

        self.run_produce_consume_validate(self.add_separate_broker_listener, SecurityConfig.SASL_SSL,
                                          SecurityConfig.SASL_MECHANISM_PLAIN)

    @cluster(num_nodes=9)
    def test_disable_separate_interbroker_listener(self):
        """
        Start with a cluster that has two listeners, one on SSL (clients), another on SASL_SSL (broker-to-broker).
        Start producer and consumer on SSL listener.
        Close dedicated interbroker listener via rolling restart.
        Ensure we can produce and consume via SSL listener throughout.
        """
        client_protocol = SecurityConfig.SSL
        client_sasl_mechanism = SecurityConfig.SASL_MECHANISM_GSSAPI

        self.kafka.security_protocol = client_protocol
        self.kafka.client_sasl_mechanism = client_sasl_mechanism
        self.kafka.setup_interbroker_listener(SecurityConfig.SASL_SSL, use_separate_listener=True)
        self.kafka.interbroker_sasl_mechanism = SecurityConfig.SASL_MECHANISM_GSSAPI

        self.kafka.start()
        # create producer and consumer via client security protocol
        self.create_producer_and_consumer()

        # run produce/consume/validate loop while disabling a separate interbroker listener via rolling restart
        self.run_produce_consume_validate(
            self.remove_separate_broker_listener, client_protocol, client_sasl_mechanism)

示例#45

显示文件

文件： connect_distributed_test.py 项目： lovejavaee/kafka_enhancement

class ConnectDistributedTest(Test):
    """
    Simple test of Kafka Connect in distributed mode, producing data from files on one cluster and consuming it on
    another, validating the total output is identical to the input.
    """

    INPUT_FILE = "/mnt/connect.input"
    OUTPUT_FILE = "/mnt/connect.output"

    TOPIC = "test"
    OFFSETS_TOPIC = "connect-offsets"
    CONFIG_TOPIC = "connect-configs"
    STATUS_TOPIC = "connect-status"

    # Since tasks can be assigned to any node and we're testing with files, we need to make sure the content is the same
    # across all nodes.
    FIRST_INPUT_LIST = ["foo", "bar", "baz"]
    FIRST_INPUTS = "\n".join(FIRST_INPUT_LIST) + "\n"
    SECOND_INPUT_LIST = ["razz", "ma", "tazz"]
    SECOND_INPUTS = "\n".join(SECOND_INPUT_LIST) + "\n"

    SCHEMA = { "type": "string", "optional": False }

    def __init__(self, test_context):
        super(ConnectDistributedTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.topics = {
            'test' : { 'partitions': 1, 'replication-factor': 1 }
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

        self.key_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.value_converter = "org.apache.kafka.connect.json.JsonConverter"
        self.schemas = True

    def setup_services(self, security_protocol=SecurityConfig.PLAINTEXT):
        self.kafka = KafkaService(self.test_context, self.num_brokers, self.zk,
                                  security_protocol=security_protocol, interbroker_security_protocol=security_protocol,
                                  topics=self.topics)

        self.cc = ConnectDistributedService(self.test_context, 3, self.kafka, [self.INPUT_FILE, self.OUTPUT_FILE])
        self.cc.log_level = "DEBUG"

        self.zk.start()
        self.kafka.start()


    @matrix(security_protocol=[SecurityConfig.PLAINTEXT, SecurityConfig.SASL_SSL])
    def test_file_source_and_sink(self, security_protocol):
        """
        Tests that a basic file connector works across clean rolling bounces. This validates that the connector is
        correctly created, tasks instantiated, and as nodes restart the work is rebalanced across nodes.
        """

        self.setup_services(security_protocol=security_protocol)
        self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node))

        self.cc.start()

        self.logger.info("Creating connectors")
        for connector_props in [self.render("connect-file-source.properties"), self.render("connect-file-sink.properties")]:
            connector_config = dict([line.strip().split('=', 1) for line in connector_props.split('\n') if line.strip() and not line.strip().startswith('#')])
            self.cc.create_connector(connector_config)

        # Generating data on the source node should generate new records and create new output on the sink node. Timeouts
        # here need to be more generous than they are for standalone mode because a) it takes longer to write configs,
        # do rebalancing of the group, etc, and b) without explicit leave group support, rebalancing takes awhile
        for node in self.cc.nodes:
            node.account.ssh("echo -e -n " + repr(self.FIRST_INPUTS) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self._validate_file_output(self.FIRST_INPUT_LIST), timeout_sec=70, err_msg="Data added to input file was not seen in the output file in a reasonable amount of time.")

        # Restarting both should result in them picking up where they left off,
        # only processing new data.
        self.cc.restart()

        for node in self.cc.nodes:
            node.account.ssh("echo -e -n " + repr(self.SECOND_INPUTS) + " >> " + self.INPUT_FILE)
        wait_until(lambda: self._validate_file_output(self.FIRST_INPUT_LIST + self.SECOND_INPUT_LIST), timeout_sec=70, err_msg="Sink output file never converged to the same state as the input file")


    @matrix(clean=[True, False])
    def test_bounce(self, clean):
        """
        Validates that source and sink tasks that run continuously and produce a predictable sequence of messages
        run correctly and deliver messages exactly once when Kafka Connect workers undergo clean rolling bounces.
        """
        num_tasks = 3

        self.setup_services()
        self.cc.set_configs(lambda node: self.render("connect-distributed.properties", node=node))
        self.cc.start()

        self.source = VerifiableSource(self.cc, tasks=num_tasks)
        self.source.start()
        self.sink = VerifiableSink(self.cc, tasks=num_tasks)
        self.sink.start()

        for _ in range(3):
            for node in self.cc.nodes:
                started = time.time()
                self.logger.info("%s bouncing Kafka Connect on %s", clean and "Clean" or "Hard", str(node.account))
                self.cc.stop_node(node, clean_shutdown=clean)
                with node.account.monitor_log(self.cc.LOG_FILE) as monitor:
                    self.cc.start_node(node)
                    monitor.wait_until("Starting connectors and tasks using config offset", timeout_sec=90,
                                       err_msg="Kafka Connect worker didn't successfully join group and start work")
                self.logger.info("Bounced Kafka Connect on %s and rejoined in %f seconds", node.account, time.time() - started)
                # If this is a hard bounce, give additional time for the consumer groups to recover. If we don't give
                # some time here, the next bounce may cause consumers to be shut down before they have any time to process
                # data and we can end up with zero data making it through the test.
                if not clean:
                    time.sleep(15)


        self.source.stop()
        self.sink.stop()
        self.cc.stop()

        # Validate at least once delivery of everything that was reported as written since we should have flushed and
        # cleanly exited. Currently this only tests at least once delivery because the sink task may not have consumed
        # all the messages generated by the source task. This needs to be done per-task since seqnos are not unique across
        # tasks.
        success = True
        errors = []
        allow_dups = not clean
        src_messages = self.source.messages()
        sink_messages = self.sink.messages()
        for task in range(num_tasks):
            # Validate source messages
            src_seqnos = [msg['seqno'] for msg in src_messages if msg['task'] == task]
            # Every seqno up to the largest one we ever saw should appear. Each seqno should only appear once because clean
            # bouncing should commit on rebalance.
            src_seqno_max = max(src_seqnos)
            self.logger.debug("Max source seqno: %d", src_seqno_max)
            src_seqno_counts = Counter(src_seqnos)
            missing_src_seqnos = sorted(set(range(src_seqno_max)).difference(set(src_seqnos)))
            duplicate_src_seqnos = sorted([seqno for seqno,count in src_seqno_counts.iteritems() if count > 1])

            if missing_src_seqnos:
                self.logger.error("Missing source sequence numbers for task " + str(task))
                errors.append("Found missing source sequence numbers for task %d: %s" % (task, missing_src_seqnos))
                success = False
            if not allow_dups and duplicate_src_seqnos:
                self.logger.error("Duplicate source sequence numbers for task " + str(task))
                errors.append("Found duplicate source sequence numbers for task %d: %s" % (task, duplicate_src_seqnos))
                success = False


            # Validate sink messages
            sink_seqnos = [msg['seqno'] for msg in sink_messages if msg['task'] == task and 'flushed' in msg]
            # Every seqno up to the largest one we ever saw should appear. Each seqno should only appear once because
            # clean bouncing should commit on rebalance.
            sink_seqno_max = max(sink_seqnos)
            self.logger.debug("Max sink seqno: %d", sink_seqno_max)
            sink_seqno_counts = Counter(sink_seqnos)
            missing_sink_seqnos = sorted(set(range(sink_seqno_max)).difference(set(sink_seqnos)))
            duplicate_sink_seqnos = sorted([seqno for seqno,count in sink_seqno_counts.iteritems() if count > 1])

            if missing_sink_seqnos:
                self.logger.error("Missing sink sequence numbers for task " + str(task))
                errors.append("Found missing sink sequence numbers for task %d: %s" % (task, missing_sink_seqnos))
                success = False
            if not allow_dups and duplicate_sink_seqnos:
                self.logger.error("Duplicate sink sequence numbers for task " + str(task))
                errors.append("Found duplicate sink sequence numbers for task %d: %s" % (task, duplicate_sink_seqnos))
                success = False

            # Validate source and sink match
            if sink_seqno_max > src_seqno_max:
                self.logger.error("Found sink sequence number greater than any generated sink sequence number for task %d: %d > %d", task, sink_seqno_max, src_seqno_max)
                errors.append("Found sink sequence number greater than any generated sink sequence number for task %d: %d > %d" % (task, sink_seqno_max, src_seqno_max))
                success = False
            if src_seqno_max < 1000 or sink_seqno_max < 1000:
                errors.append("Not enough messages were processed: source:%d sink:%d" % (src_seqno_max, sink_seqno_max))
                success = False

        if not success:
            self.mark_for_collect(self.cc)
            # Also collect the data in the topic to aid in debugging
            consumer_validator = ConsoleConsumer(self.test_context, 1, self.kafka, self.source.topic, consumer_timeout_ms=1000, print_key=True)
            consumer_validator.run()
            self.mark_for_collect(consumer_validator, "consumer_stdout")

        assert success, "Found validation errors:\n" + "\n  ".join(errors)



    def _validate_file_output(self, input):
        input_set = set(input)
        # Output needs to be collected from all nodes because we can't be sure where the tasks will be scheduled.
        # Between the first and second rounds, we might even end up with half the data on each node.
        output_set = set(itertools.chain(*[
            [line.strip() for line in self._file_contents(node, self.OUTPUT_FILE)] for node in self.cc.nodes
        ]))
        return input_set == output_set

    def _file_contents(self, node, file):
        try:
            # Convert to a list here or the CalledProcessError may be returned during a call to the generator instead of
            # immediately
            return list(node.account.ssh_capture("cat " + file))
        except subprocess.CalledProcessError:
            return []

示例#46

显示文件

class StreamsBrokerDownResilience(Test):
    """
    This test validates that Streams is resilient to a broker
    being down longer than specified timeouts in configs
    """

    inputTopic = "streamsResilienceSource"
    outputTopic = "streamsResilienceSink"
    num_messages = 5

    def __init__(self, test_context):
        super(StreamsBrokerDownResilience,
              self).__init__(test_context=test_context)
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context,
                                  num_nodes=1,
                                  zk=self.zk,
                                  topics={
                                      self.inputTopic: {
                                          'partitions': 1,
                                          'replication-factor': 1
                                      },
                                      self.outputTopic: {
                                          'partitions': 1,
                                          'replication-factor': 1
                                      }
                                  })

    def get_consumer(self):
        return VerifiableConsumer(self.test_context,
                                  1,
                                  self.kafka,
                                  self.outputTopic,
                                  "stream-broker-resilience-verify-consumer",
                                  max_messages=self.num_messages)

    def get_producer(self):
        return VerifiableProducer(self.test_context,
                                  1,
                                  self.kafka,
                                  self.inputTopic,
                                  max_messages=self.num_messages,
                                  acks=1)

    def assert_produce_consume(self, test_state):
        producer = self.get_producer()
        producer.start()

        wait_until(lambda: producer.num_acked > 0,
                   timeout_sec=30,
                   err_msg="At %s failed to send messages " % test_state)

        consumer = self.get_consumer()
        consumer.start()

        wait_until(
            lambda: consumer.total_consumed() > 0,
            timeout_sec=60,
            err_msg="At %s streams did not process messages in 60 seconds " %
            test_state)

    def setUp(self):
        self.zk.start()

    def test_streams_resilient_to_broker_down(self):
        self.kafka.start()

        # Consumer max.poll.interval > min(max.block.ms, ((retries + 1) * request.timeout)
        consumer_poll_ms = "consumer.max.poll.interval.ms=50000"
        retries_config = "producer.retries=2"
        request_timeout = "producer.request.timeout.ms=15000"
        max_block_ms = "producer.max.block.ms=30000"

        # Broker should be down over 2x of retries * timeout ms
        # So with (2 * 15000) = 30 seconds, we'll set downtime to 70 seconds
        broker_down_time_in_seconds = 70

        # java code expects configs in key=value,key=value format
        updated_configs = consumer_poll_ms + "," + retries_config + "," + request_timeout + "," + max_block_ms

        processor = StreamsBrokerDownResilienceService(self.test_context,
                                                       self.kafka,
                                                       updated_configs)
        processor.start()

        # until KIP-91 is merged we'll only send 5 messages to assert Kafka Streams is running before taking the broker down
        # After KIP-91 is merged we'll continue to send messages the duration of the test
        self.assert_produce_consume("before_broker_stop")

        node = self.kafka.leader(self.inputTopic)

        self.kafka.stop_node(node)

        time.sleep(broker_down_time_in_seconds)

        self.kafka.start_node(node)

        self.assert_produce_consume("after_broker_stop")

        self.kafka.stop()

示例#47

显示文件

文件： upgrade_test.py 项目： iraideruiz/kafka

class TestUpgrade(ProduceConsumeValidateTest):

    def __init__(self, test_context):
        super(TestUpgrade, self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        # Producer and consumer
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1

    def perform_upgrade(self, from_kafka_version, to_message_format_version=None):
        self.logger.info("First pass bounce - rolling upgrade")
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            node.version = TRUNK
            node.config[config_property.INTER_BROKER_PROTOCOL_VERSION] = from_kafka_version
            node.config[config_property.MESSAGE_FORMAT_VERSION] = from_kafka_version
            self.kafka.start_node(node)

        self.logger.info("Second pass bounce - remove inter.broker.protocol.version config")
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            del node.config[config_property.INTER_BROKER_PROTOCOL_VERSION]
            if to_message_format_version is None:
                del node.config[config_property.MESSAGE_FORMAT_VERSION]
            else:
                node.config[config_property.MESSAGE_FORMAT_VERSION] = to_message_format_version
            self.kafka.start_node(node)

    @parametrize(from_kafka_version=str(LATEST_0_10_0), to_message_format_version=None, compression_types=["snappy"], new_consumer=False)
    @parametrize(from_kafka_version=str(LATEST_0_10_0), to_message_format_version=None, compression_types=["snappy"])
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["none"], new_consumer=False)
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["none"], security_protocol="SASL_SSL")
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["snappy"])
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["lz4"], new_consumer=False)
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=None, compression_types=["lz4"])
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["none"], new_consumer=False)
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["snappy"])
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["lz4"], new_consumer=False)
    @parametrize(from_kafka_version=str(LATEST_0_9), to_message_format_version=str(LATEST_0_9), compression_types=["lz4"])
    @parametrize(from_kafka_version=str(LATEST_0_8_2), to_message_format_version=None, compression_types=["none"], new_consumer=False)
    @parametrize(from_kafka_version=str(LATEST_0_8_2), to_message_format_version=None, compression_types=["snappy"], new_consumer=False)
    def test_upgrade(self, from_kafka_version, to_message_format_version, compression_types,
                     new_consumer=True, security_protocol="PLAINTEXT"):
        """Test upgrade of Kafka broker cluster from 0.8.2, 0.9.0 or 0.10.0 to the current version

        from_kafka_version is a Kafka version to upgrade from: either 0.8.2.X, 0.9.0.x or 0.10.0.x

        If to_message_format_version is None, it means that we will upgrade to default (latest)
        message format version. It is possible to upgrade to 0.10 brokers but still use message
        format version 0.9

        - Start 3 node broker cluster on version 'from_kafka_version'
        - Start producer and consumer in the background
        - Perform two-phase rolling upgrade
            - First phase: upgrade brokers to 0.10 with inter.broker.protocol.version set to
            from_kafka_version and log.message.format.version set to from_kafka_version
            - Second phase: remove inter.broker.protocol.version config with rolling bounce; if
            to_message_format_version is set to 0.9, set log.message.format.version to
            to_message_format_version, otherwise remove log.message.format.version config
        - Finally, validate that every message acked by the producer was consumed by the consumer
        """
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk,
                                  version=KafkaVersion(from_kafka_version),
                                  topics={self.topic: {"partitions": 3, "replication-factor": 3,
                                                       'configs': {"min.insync.replicas": 2}}})
        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = security_protocol
        self.kafka.start()

        self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka,
                                           self.topic, throughput=self.producer_throughput,
                                           message_validator=is_int,
                                           compression_types=compression_types,
                                           version=KafkaVersion(from_kafka_version))

        assert self.zk.query("/cluster/id") is None

        # TODO - reduce the timeout
        self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka,
                                        self.topic, consumer_timeout_ms=200000, new_consumer=new_consumer,
                                        message_validator=is_int, version=KafkaVersion(from_kafka_version))

        self.run_produce_consume_validate(core_test_action=lambda: self.perform_upgrade(from_kafka_version,
                                                                                        to_message_format_version))

        cluster_id_json = self.zk.query("/cluster/id")
        assert cluster_id_json is not None
        try:
            cluster_id = json.loads(cluster_id_json)
        except :
            self.logger.debug("Data in /cluster/id znode could not be parsed. Data = %s" % cluster_id_json)

        self.logger.debug("Cluster id [%s]", cluster_id)
        assert len(cluster_id["id"]) == 22

示例#48

显示文件

文件： compression_test.py 项目： zclvip/kafka_CN_Notes

class CompressionTest(ProduceConsumeValidateTest):
    """
    These tests validate produce / consume for compressed topics.
    """
    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(CompressionTest, self).__init__(test_context=test_context)

        self.topic = "test_topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.zk,
            topics={self.topic: {
                "partitions": 10,
                "replication-factor": 1
            }})
        self.num_partitions = 10
        self.timeout_sec = 60
        self.producer_throughput = 1000
        self.num_producers = 4
        self.messages_per_producer = 1000
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    def min_cluster_size(self):
        # Override this since we're adding services outside of the constructor
        return super(
            CompressionTest,
            self).min_cluster_size() + self.num_producers + self.num_consumers

    @cluster(num_nodes=7)
    @parametrize(compression_types=["snappy", "gzip", "lz4", "none"])
    def test_compressed_topic(self, compression_types):
        """Test produce => consume => validate for compressed topics
        Setup: 1 zk, 1 kafka node, 1 topic with partitions=10, replication-factor=1

        compression_types parameter gives a list of compression types (or no compression if
        "none"). Each producer in a VerifiableProducer group (num_producers = 4) will use a
        compression type from the list based on producer's index in the group.

            - Produce messages in the background
            - Consume messages in the background
            - Stop producing, and finish consuming
            - Validate that every acked message was consumed
        """

        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.interbroker_security_protocol = self.kafka.security_protocol
        self.producer = VerifiableProducer(
            self.test_context,
            self.num_producers,
            self.kafka,
            self.topic,
            throughput=self.producer_throughput,
            message_validator=is_int_with_prefix,
            compression_types=compression_types)
        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int_with_prefix)
        self.kafka.start()

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(
                self.messages_per_producer) == True,
            timeout_sec=120,
            backoff_sec=1,
            err_msg=
            "Producer did not produce all messages in reasonable amount of time"
        ))

示例#49

显示文件

文件： security_test.py 项目： iraideruiz/kafka

class SecurityTest(ProduceConsumeValidateTest):
    """
    These tests validate security features.
    """

    def __init__(self, test_context):
        """:type test_context: ducktape.tests.test.TestContext"""
        super(SecurityTest, self).__init__(test_context=test_context)

        self.topic = "test_topic"
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context, num_nodes=1, zk=self.zk, topics={self.topic: {
                                                                    "partitions": 2,
                                                                    "replication-factor": 1}
                                                                })
        self.num_partitions = 2
        self.timeout_sec = 10000
        self.producer_throughput = 1000
        self.num_producers = 1
        self.num_consumers = 1

    def setUp(self):
        self.zk.start()

    @parametrize(security_protocol='PLAINTEXT', interbroker_security_protocol='SSL')
    @parametrize(security_protocol='SSL', interbroker_security_protocol='PLAINTEXT')
    def test_client_ssl_endpoint_validation_failure(self, security_protocol, interbroker_security_protocol):
        """
        Test that invalid hostname in certificate results in connection failures.
        When security_protocol=SSL, client SSL handshakes are expected to fail due to hostname verification failure.
        When security_protocol=PLAINTEXT and interbroker_security_protocol=SSL, controller connections fail
        with hostname verification failure. Hence clients are expected to fail with LEADER_NOT_AVAILABLE.
        """

        self.kafka.security_protocol = security_protocol
        self.kafka.interbroker_security_protocol = interbroker_security_protocol
        SecurityConfig.ssl_stores = TestSslStores()

        SecurityConfig.ssl_stores.invalid_hostname = True
        self.kafka.start()
        self.create_producer_and_consumer()
        self.producer.log_level = "TRACE"
        self.producer.start()
        self.consumer.start()
        time.sleep(10)
        assert self.producer.num_acked == 0, "Messages published successfully, endpoint validation did not fail with invalid hostname"
        error = 'SSLHandshakeException' if security_protocol is 'SSL' else 'LEADER_NOT_AVAILABLE'
        for node in self.producer.nodes:
            node.account.ssh("grep %s %s" % (error, self.producer.LOG_FILE))
        for node in self.consumer.nodes:
            node.account.ssh("grep %s %s" % (error, self.consumer.LOG_FILE))

        self.producer.stop()
        self.consumer.stop()
        self.producer.log_level = "INFO"

        SecurityConfig.ssl_stores.invalid_hostname = False
        for node in self.kafka.nodes:
            self.kafka.restart_node(node, clean_shutdown=True)
        self.create_producer_and_consumer()
        self.run_produce_consume_validate()

    def create_producer_and_consumer(self):
        self.producer = VerifiableProducer(self.test_context, self.num_producers, self.kafka, self.topic, throughput=self.producer_throughput)
        self.consumer = ConsoleConsumer(self.test_context, self.num_consumers, self.kafka, self.topic, consumer_timeout_ms=10000, message_validator=is_int)

示例#50

显示文件

文件： benchmark_test.py 项目： GDUFZXP/kafka-trunk

class Benchmark(Test):
    """A benchmark of Kafka producer/consumer performance. This replicates the test
    run here:
    https://engineering.linkedin.com/kafka/benchmarking-apache-kafka-2-million-writes-second-three-cheap-machines
    """
    def __init__(self, test_context):
        super(Benchmark, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 3
        self.topics = {
            TOPIC_REP_ONE: {
                'partitions': 6,
                'replication-factor': 1
            },
            TOPIC_REP_THREE: {
                'partitions': 6,
                'replication-factor': 3
            }
        }

        self.zk = ZookeeperService(test_context, self.num_zk)

        self.msgs_large = 10000000
        self.batch_size = 8 * 1024
        self.buffer_memory = 64 * 1024 * 1024
        self.msg_sizes = [10, 100, 1000, 10000, 100000]
        self.target_data_size = 128 * 1024 * 1024
        self.target_data_size_gb = self.target_data_size / float(
            1024 * 1024 * 1024)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol,
                    version):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics,
            version=version)
        self.kafka.log_level = "INFO"  # We don't DEBUG logging here
        self.kafka.start()

    @cluster(num_nodes=5)
    @parametrize(acks=1, topic=TOPIC_REP_ONE)
    @parametrize(acks=1, topic=TOPIC_REP_THREE)
    @parametrize(acks=-1, topic=TOPIC_REP_THREE)
    @matrix(acks=[1],
            topic=[TOPIC_REP_THREE],
            message_size=[10, 100, 1000, 10000, 100000],
            compression_type=["none", "snappy"],
            security_protocol=['PLAINTEXT', 'SSL'])
    @cluster(num_nodes=7)
    @parametrize(acks=1, topic=TOPIC_REP_THREE, num_producers=3)
    def test_producer_throughput(self,
                                 acks,
                                 topic,
                                 num_producers=1,
                                 message_size=DEFAULT_RECORD_SIZE,
                                 compression_type="none",
                                 security_protocol='PLAINTEXT',
                                 client_version=str(DEV_BRANCH),
                                 broker_version=str(DEV_BRANCH)):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Produce ~128MB worth of messages to a topic with 6 partitions. Required acks, topic replication factor,
        security protocol and message size are varied depending on arguments injected into this test.

        Collect and return aggregate throughput statistics after all messages have been acknowledged.
        (This runs ProducerPerformance.java under the hood)
        """
        client_version = KafkaVersion(client_version)
        broker_version = KafkaVersion(broker_version)
        self.validate_versions(client_version, broker_version)
        self.start_kafka(security_protocol, security_protocol, broker_version)
        # Always generate the same total amount of data
        nrecords = int(self.target_data_size / message_size)

        self.producer = ProducerPerformanceService(self.test_context,
                                                   num_producers,
                                                   self.kafka,
                                                   topic=topic,
                                                   num_records=nrecords,
                                                   record_size=message_size,
                                                   throughput=-1,
                                                   version=client_version,
                                                   settings={
                                                       'acks':
                                                       acks,
                                                       'compression.type':
                                                       compression_type,
                                                       'batch.size':
                                                       self.batch_size,
                                                       'buffer.memory':
                                                       self.buffer_memory
                                                   })
        self.producer.run()
        return compute_aggregate_throughput(self.producer)

    @cluster(num_nodes=5)
    @parametrize(security_protocol='SSL',
                 interbroker_security_protocol='PLAINTEXT')
    @matrix(security_protocol=['PLAINTEXT', 'SSL'],
            compression_type=["none", "snappy"])
    def test_long_term_producer_throughput(self,
                                           compression_type="none",
                                           security_protocol='PLAINTEXT',
                                           interbroker_security_protocol=None,
                                           client_version=str(DEV_BRANCH),
                                           broker_version=str(DEV_BRANCH)):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Produce 10e6 100 byte messages to a topic with 6 partitions, replication-factor 3, and acks=1.

        Collect and return aggregate throughput statistics after all messages have been acknowledged.

        (This runs ProducerPerformance.java under the hood)
        """
        client_version = KafkaVersion(client_version)
        broker_version = KafkaVersion(broker_version)
        self.validate_versions(client_version, broker_version)
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol,
                         broker_version)
        self.producer = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic=TOPIC_REP_THREE,
            num_records=self.msgs_large,
            record_size=DEFAULT_RECORD_SIZE,
            throughput=-1,
            version=client_version,
            settings={
                'acks': 1,
                'compression.type': compression_type,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            },
            intermediate_stats=True)
        self.producer.run()

        summary = ["Throughput over long run, data > memory:"]
        data = {}
        # FIXME we should be generating a graph too
        # Try to break it into 5 blocks, but fall back to a smaller number if
        # there aren't even 5 elements
        block_size = max(len(self.producer.stats[0]) / 5, 1)
        nblocks = len(self.producer.stats[0]) / block_size

        for i in range(nblocks):
            subset = self.producer.stats[0][i * block_size:min(
                (i + 1) * block_size, len(self.producer.stats[0]))]
            if len(subset) == 0:
                summary.append(" Time block %d: (empty)" % i)
                data[i] = None
            else:
                records_per_sec = sum(
                    [stat['records_per_sec']
                     for stat in subset]) / float(len(subset))
                mb_per_sec = sum([stat['mbps']
                                  for stat in subset]) / float(len(subset))

                summary.append(" Time block %d: %f rec/sec (%f MB/s)" %
                               (i, records_per_sec, mb_per_sec))
                data[i] = throughput(records_per_sec, mb_per_sec)

        self.logger.info("\n".join(summary))
        return data

    @cluster(num_nodes=5)
    @parametrize(security_protocol='SSL',
                 interbroker_security_protocol='PLAINTEXT')
    @matrix(security_protocol=['PLAINTEXT', 'SSL'],
            compression_type=["none", "snappy"])
    @cluster(num_nodes=6)
    @matrix(security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'],
            compression_type=["none", "snappy"])
    def test_end_to_end_latency(self,
                                compression_type="none",
                                security_protocol="PLAINTEXT",
                                interbroker_security_protocol=None,
                                client_version=str(DEV_BRANCH),
                                broker_version=str(DEV_BRANCH)):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Produce (acks = 1) and consume 10e3 messages to a topic with 6 partitions and replication-factor 3,
        measuring the latency between production and consumption of each message.

        Return aggregate latency statistics.

        (Under the hood, this simply runs EndToEndLatency.scala)
        """
        client_version = KafkaVersion(client_version)
        broker_version = KafkaVersion(broker_version)
        self.validate_versions(client_version, broker_version)
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol,
                         broker_version)
        self.logger.info("BENCHMARK: End to end latency")
        self.perf = EndToEndLatencyService(self.test_context,
                                           1,
                                           self.kafka,
                                           topic=TOPIC_REP_THREE,
                                           num_records=10000,
                                           compression_type=compression_type,
                                           version=client_version)
        self.perf.run()
        return latency(self.perf.results[0]['latency_50th_ms'],
                       self.perf.results[0]['latency_99th_ms'],
                       self.perf.results[0]['latency_999th_ms'])

    @cluster(num_nodes=6)
    @parametrize(security_protocol='SSL',
                 interbroker_security_protocol='PLAINTEXT')
    @matrix(security_protocol=['PLAINTEXT', 'SSL'],
            compression_type=["none", "snappy"])
    def test_producer_and_consumer(self,
                                   compression_type="none",
                                   security_protocol="PLAINTEXT",
                                   interbroker_security_protocol=None,
                                   client_version=str(DEV_BRANCH),
                                   broker_version=str(DEV_BRANCH)):
        """
        Setup: 1 node zk + 3 node kafka cluster
        Concurrently produce and consume 10e6 messages with a single producer and a single consumer,

        Return aggregate throughput statistics for both producer and consumer.

        (Under the hood, this runs ProducerPerformance.java, and ConsumerPerformance.scala)
        """
        client_version = KafkaVersion(client_version)
        broker_version = KafkaVersion(broker_version)
        self.validate_versions(client_version, broker_version)
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol,
                         broker_version)
        num_records = 10 * 1000 * 1000  # 10e6

        self.producer = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic=TOPIC_REP_THREE,
            num_records=num_records,
            record_size=DEFAULT_RECORD_SIZE,
            throughput=-1,
            version=client_version,
            settings={
                'acks': 1,
                'compression.type': compression_type,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            })
        self.consumer = ConsumerPerformanceService(self.test_context,
                                                   1,
                                                   self.kafka,
                                                   topic=TOPIC_REP_THREE,
                                                   messages=num_records)
        Service.run_parallel(self.producer, self.consumer)

        data = {
            "producer": compute_aggregate_throughput(self.producer),
            "consumer": compute_aggregate_throughput(self.consumer)
        }
        summary = ["Producer + consumer:", str(data)]
        self.logger.info("\n".join(summary))
        return data

    @cluster(num_nodes=6)
    @parametrize(security_protocol='SSL',
                 interbroker_security_protocol='PLAINTEXT')
    @matrix(security_protocol=['PLAINTEXT', 'SSL'],
            compression_type=["none", "snappy"])
    def test_consumer_throughput(self,
                                 compression_type="none",
                                 security_protocol="PLAINTEXT",
                                 interbroker_security_protocol=None,
                                 num_consumers=1,
                                 client_version=str(DEV_BRANCH),
                                 broker_version=str(DEV_BRANCH)):
        """
        Consume 10e6 100-byte messages with 1 or more consumers from a topic with 6 partitions
        and report throughput.
        """
        client_version = KafkaVersion(client_version)
        broker_version = KafkaVersion(broker_version)
        self.validate_versions(client_version, broker_version)
        if interbroker_security_protocol is None:
            interbroker_security_protocol = security_protocol
        self.start_kafka(security_protocol, interbroker_security_protocol,
                         broker_version)
        num_records = 10 * 1000 * 1000  # 10e6

        # seed kafka w/messages
        self.producer = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic=TOPIC_REP_THREE,
            num_records=num_records,
            record_size=DEFAULT_RECORD_SIZE,
            throughput=-1,
            version=client_version,
            settings={
                'acks': 1,
                'compression.type': compression_type,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            })
        self.producer.run()

        # consume
        self.consumer = ConsumerPerformanceService(self.test_context,
                                                   num_consumers,
                                                   self.kafka,
                                                   topic=TOPIC_REP_THREE,
                                                   messages=num_records)
        self.consumer.group = "test-consumer-group"
        self.consumer.run()
        return compute_aggregate_throughput(self.consumer)

    def validate_versions(self, client_version, broker_version):
        assert client_version <= broker_version, "Client version %s should be <= than broker version %s" (
            client_version, broker_version)

示例#51

显示文件

文件： streams_upgrade_test.py 项目： iraideruiz/kafka

class StreamsUpgradeTest(Test):
    """
    Test upgrading Kafka Streams (all version combination)
    If metadata was changes, upgrade is more difficult
    Metadata version was bumped in 0.10.1.0
    """

    def __init__(self, test_context):
        super(StreamsUpgradeTest, self).__init__(test_context)
        self.topics = {
            'echo' : { 'partitions': 5 },
            'data' : { 'partitions': 5 },
        }
        self.leader = None

    def perform_broker_upgrade(self, to_version):
        self.logger.info("First pass bounce - rolling broker upgrade")
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            node.version = KafkaVersion(to_version)
            self.kafka.start_node(node)

    @ignore
    @cluster(num_nodes=6)
    @matrix(from_version=broker_upgrade_versions, to_version=broker_upgrade_versions)
    def test_upgrade_downgrade_brokers(self, from_version, to_version):
        """
        Start a smoke test client then perform rolling upgrades on the broker.
        """

        if from_version == to_version:
            return

        self.replication = 3
        self.partitions = 1
        self.isr = 2
        self.topics = {
            'echo' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": self.isr}},
            'data' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": self.isr} },
            'min' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": self.isr} },
            'max' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": self.isr} },
            'sum' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": self.isr} },
            'dif' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": self.isr} },
            'cnt' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": self.isr} },
            'avg' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                      'configs': {"min.insync.replicas": self.isr} },
            'wcnt' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": self.isr} },
            'tagg' : { 'partitions': self.partitions, 'replication-factor': self.replication,
                       'configs': {"min.insync.replicas": self.isr} }
        }

        # Setup phase
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        # number of nodes needs to be >= 3 for the smoke test
        self.kafka = KafkaService(self.test_context, num_nodes=3,
                                  zk=self.zk, version=KafkaVersion(from_version), topics=self.topics)
        self.kafka.start()

        # allow some time for topics to be created
        time.sleep(10)

        self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka)
        self.processor1 = StreamsSmokeTestJobRunnerService(self.test_context, self.kafka)
        
        self.driver.start()
        self.processor1.start()
        time.sleep(15)

        self.perform_broker_upgrade(to_version)

        time.sleep(15)
        self.driver.wait()
        self.driver.stop()

        self.processor1.stop()

        node = self.driver.node
        node.account.ssh("grep ALL-RECORDS-DELIVERED %s" % self.driver.STDOUT_FILE, allow_fail=False)
        self.processor1.node.account.ssh_capture("grep SMOKE-TEST-CLIENT-CLOSED %s" % self.processor1.STDOUT_FILE, allow_fail=False)

    @ignore
    @matrix(from_version=metadata_2_versions, to_version=metadata_2_versions)
    def test_simple_upgrade_downgrade(self, from_version, to_version):
        """
        Starts 3 KafkaStreams instances with <old_version>, and upgrades one-by-one to <new_version>
        """

        if from_version == to_version:
            return

        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, topics=self.topics)
        self.kafka.start()

        self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka)
        self.driver.disable_auto_terminate()
        self.processor1 = StreamsUpgradeTestJobRunnerService(self.test_context, self.kafka)
        self.processor2 = StreamsUpgradeTestJobRunnerService(self.test_context, self.kafka)
        self.processor3 = StreamsUpgradeTestJobRunnerService(self.test_context, self.kafka)

        self.driver.start()
        self.start_all_nodes_with(from_version)

        self.processors = [self.processor1, self.processor2, self.processor3]

        counter = 1
        random.seed()

        # upgrade one-by-one via rolling bounce
        random.shuffle(self.processors)
        for p in self.processors:
            p.CLEAN_NODE_ENABLED = False
            self.do_rolling_bounce(p, None, to_version, counter)
            counter = counter + 1

        # shutdown
        self.driver.stop()
        self.driver.wait()

        random.shuffle(self.processors)
        for p in self.processors:
            node = p.node
            with node.account.monitor_log(p.STDOUT_FILE) as monitor:
                p.stop()
                monitor.wait_until("UPGRADE-TEST-CLIENT-CLOSED",
                                   timeout_sec=60,
                                   err_msg="Never saw output 'UPGRADE-TEST-CLIENT-CLOSED' on" + str(node.account))

        self.driver.stop()

    #@matrix(from_version=metadata_1_versions, to_version=backward_compatible_metadata_2_versions)
    @ignore
    @matrix(from_version=metadata_1_versions, to_version=metadata_3_versions)
    @matrix(from_version=metadata_2_versions, to_version=metadata_3_versions)
    def test_metadata_upgrade(self, from_version, to_version):
        """
        Starts 3 KafkaStreams instances with version <from_version> and upgrades one-by-one to <to_version>
        """

        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()

        self.kafka = KafkaService(self.test_context, num_nodes=1, zk=self.zk, topics=self.topics)
        self.kafka.start()

        self.driver = StreamsSmokeTestDriverService(self.test_context, self.kafka)
        self.driver.disable_auto_terminate()
        self.processor1 = StreamsUpgradeTestJobRunnerService(self.test_context, self.kafka)
        self.processor2 = StreamsUpgradeTestJobRunnerService(self.test_context, self.kafka)
        self.processor3 = StreamsUpgradeTestJobRunnerService(self.test_context, self.kafka)

        self.driver.start()
        self.start_all_nodes_with(from_version)

        self.processors = [self.processor1, self.processor2, self.processor3]

        counter = 1
        random.seed()

        # first rolling bounce
        random.shuffle(self.processors)
        for p in self.processors:
            p.CLEAN_NODE_ENABLED = False
            self.do_rolling_bounce(p, from_version[:-2], to_version, counter)
            counter = counter + 1

        # second rolling bounce
        random.shuffle(self.processors)
        for p in self.processors:
            self.do_rolling_bounce(p, None, to_version, counter)
            counter = counter + 1

        # shutdown
        self.driver.stop()
        self.driver.wait()

        random.shuffle(self.processors)
        for p in self.processors:
            node = p.node
            with node.account.monitor_log(p.STDOUT_FILE) as monitor:
                p.stop()
                monitor.wait_until("UPGRADE-TEST-CLIENT-CLOSED",
                                   timeout_sec=60,
                                   err_msg="Never saw output 'UPGRADE-TEST-CLIENT-CLOSED' on" + str(node.account))

        self.driver.stop()

    def start_all_nodes_with(self, version):
        # start first with <version>
        self.prepare_for(self.processor1, version)
        node1 = self.processor1.node
        with node1.account.monitor_log(self.processor1.STDOUT_FILE) as monitor:
            with node1.account.monitor_log(self.processor1.LOG_FILE) as log_monitor:
                self.processor1.start()
                log_monitor.wait_until("Kafka version : " + version,
                                       timeout_sec=60,
                                       err_msg="Could not detect Kafka Streams version " + version + " " + str(node1.account))
                monitor.wait_until("processed 100 records from topic",
                                   timeout_sec=60,
                                   err_msg="Never saw output 'processed 100 records from topic' on" + str(node1.account))

        # start second with <version>
        self.prepare_for(self.processor2, version)
        node2 = self.processor2.node
        with node1.account.monitor_log(self.processor1.STDOUT_FILE) as first_monitor:
            with node2.account.monitor_log(self.processor2.STDOUT_FILE) as second_monitor:
                with node2.account.monitor_log(self.processor2.LOG_FILE) as log_monitor:
                    self.processor2.start()
                    log_monitor.wait_until("Kafka version : " + version,
                                           timeout_sec=60,
                                           err_msg="Could not detect Kafka Streams version " + version + " " + str(node2.account))
                    first_monitor.wait_until("processed 100 records from topic",
                                             timeout_sec=60,
                                             err_msg="Never saw output 'processed 100 records from topic' on" + str(node1.account))
                    second_monitor.wait_until("processed 100 records from topic",
                                              timeout_sec=60,
                                              err_msg="Never saw output 'processed 100 records from topic' on" + str(node2.account))

        # start third with <version>
        self.prepare_for(self.processor3, version)
        node3 = self.processor3.node
        with node1.account.monitor_log(self.processor1.STDOUT_FILE) as first_monitor:
            with node2.account.monitor_log(self.processor2.STDOUT_FILE) as second_monitor:
                with node3.account.monitor_log(self.processor3.STDOUT_FILE) as third_monitor:
                    with node3.account.monitor_log(self.processor3.LOG_FILE) as log_monitor:
                        self.processor3.start()
                        log_monitor.wait_until("Kafka version : " + version,
                                               timeout_sec=60,
                                               err_msg="Could not detect Kafka Streams version " + version + " " + str(node3.account))
                        first_monitor.wait_until("processed 100 records from topic",
                                                 timeout_sec=60,
                                                 err_msg="Never saw output 'processed 100 records from topic' on" + str(node1.account))
                        second_monitor.wait_until("processed 100 records from topic",
                                                  timeout_sec=60,
                                                  err_msg="Never saw output 'processed 100 records from topic' on" + str(node2.account))
                        third_monitor.wait_until("processed 100 records from topic",
                                                  timeout_sec=60,
                                                  err_msg="Never saw output 'processed 100 records from topic' on" + str(node3.account))

    @staticmethod
    def prepare_for(processor, version):
        processor.node.account.ssh("rm -rf " + processor.PERSISTENT_ROOT, allow_fail=False)
        if version == str(DEV_VERSION):
            processor.set_version("")  # set to TRUNK
        else:
            processor.set_version(version)

    def do_rolling_bounce(self, processor, upgrade_from, new_version, counter):
        first_other_processor = None
        second_other_processor = None
        for p in self.processors:
            if p != processor:
                if first_other_processor is None:
                    first_other_processor = p
                else:
                    second_other_processor = p

        node = processor.node
        first_other_node = first_other_processor.node
        second_other_node = second_other_processor.node

        # stop processor and wait for rebalance of others
        with first_other_node.account.monitor_log(first_other_processor.STDOUT_FILE) as first_other_monitor:
            with second_other_node.account.monitor_log(second_other_processor.STDOUT_FILE) as second_other_monitor:
                processor.stop()
                first_other_monitor.wait_until("processed 100 records from topic",
                                               timeout_sec=60,
                                               err_msg="Never saw output 'processed 100 records from topic' on" + str(first_other_node.account))
                second_other_monitor.wait_until("processed 100 records from topic",
                                                timeout_sec=60,
                                                err_msg="Never saw output 'processed 100 records from topic' on" + str(second_other_node.account))
        node.account.ssh_capture("grep UPGRADE-TEST-CLIENT-CLOSED %s" % processor.STDOUT_FILE, allow_fail=False)

        if upgrade_from is None:  # upgrade disabled -- second round of rolling bounces
            roll_counter = ".1-"  # second round of rolling bounces
        else:
            roll_counter = ".0-"  # first  round of rolling boundes

        node.account.ssh("mv " + processor.STDOUT_FILE + " " + processor.STDOUT_FILE + roll_counter + str(counter), allow_fail=False)
        node.account.ssh("mv " + processor.STDERR_FILE + " " + processor.STDERR_FILE + roll_counter + str(counter), allow_fail=False)
        node.account.ssh("mv " + processor.LOG_FILE + " " + processor.LOG_FILE + roll_counter + str(counter), allow_fail=False)

        if new_version == str(DEV_VERSION):
            processor.set_version("")  # set to TRUNK
        else:
            processor.set_version(new_version)
        processor.set_upgrade_from(upgrade_from)

        grep_metadata_error = "grep \"org.apache.kafka.streams.errors.TaskAssignmentException: unable to decode subscription data: version=2\" "
        with node.account.monitor_log(processor.STDOUT_FILE) as monitor:
            with node.account.monitor_log(processor.LOG_FILE) as log_monitor:
                with first_other_node.account.monitor_log(first_other_processor.STDOUT_FILE) as first_other_monitor:
                    with second_other_node.account.monitor_log(second_other_processor.STDOUT_FILE) as second_other_monitor:
                        processor.start()

                        log_monitor.wait_until("Kafka version : " + new_version,
                                               timeout_sec=60,
                                               err_msg="Could not detect Kafka Streams version " + new_version + " " + str(node.account))
                        first_other_monitor.wait_until("processed 100 records from topic",
                                                       timeout_sec=60,
                                                       err_msg="Never saw output 'processed 100 records from topic' on" + str(first_other_node.account))
                        found = list(first_other_node.account.ssh_capture(grep_metadata_error + first_other_processor.STDERR_FILE, allow_fail=True))
                        if len(found) > 0:
                            raise Exception("Kafka Streams failed with 'unable to decode subscription data: version=2'")

                        second_other_monitor.wait_until("processed 100 records from topic",
                                                        timeout_sec=60,
                                                        err_msg="Never saw output 'processed 100 records from topic' on" + str(second_other_node.account))
                        found = list(second_other_node.account.ssh_capture(grep_metadata_error + second_other_processor.STDERR_FILE, allow_fail=True))
                        if len(found) > 0:
                            raise Exception("Kafka Streams failed with 'unable to decode subscription data: version=2'")

                        monitor.wait_until("processed 100 records from topic",
                                           timeout_sec=60,
                                           err_msg="Never saw output 'processed 100 records from topic' on" + str(node.account))

示例#52

显示文件

文件： zookeeper_tls_encrypt_only_test.py 项目： hoangviet27072000/KAFKA

class ZookeeperTlsEncryptOnlyTest(ProduceConsumeValidateTest):
    """Tests TLS encryption-only (ssl.clientAuth=none) connectivity to zookeeper.
    """
    def __init__(self, test_context):
        super(ZookeeperTlsEncryptOnlyTest,
              self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.group = "group"
        self.producer_throughput = 100
        self.num_producers = 1
        self.num_consumers = 1

        self.zk = ZookeeperService(self.test_context,
                                   num_nodes=3,
                                   zk_client_port=False,
                                   zk_client_secure_port=True,
                                   zk_tls_encrypt_only=True)

        self.kafka = KafkaService(self.test_context,
                                  num_nodes=3,
                                  zk=self.zk,
                                  zk_client_secure=True,
                                  topics={
                                      self.topic: {
                                          "partitions": 3,
                                          "replication-factor": 3,
                                          'configs': {
                                              "min.insync.replicas": 2
                                          }
                                      }
                                  })

    def create_producer_and_consumer(self):
        self.producer = VerifiableProducer(self.test_context,
                                           self.num_producers,
                                           self.kafka,
                                           self.topic,
                                           throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=60000,
                                        message_validator=is_int)

        self.consumer.group_id = self.group

    def perform_produce_consume_validation(self):
        self.create_producer_and_consumer()
        self.run_produce_consume_validate()
        self.producer.free()
        self.consumer.free()

    @cluster(num_nodes=9)
    def test_zk_tls_encrypt_only(self):
        self.zk.start()
        self.kafka.security_protocol = self.kafka.interbroker_security_protocol = "PLAINTEXT"

        self.kafka.start()

        self.perform_produce_consume_validation()

        # Make sure the ZooKeeper command line is able to talk to a TLS-enabled, encrypt-only ZooKeeper quorum
        # Test both create() and query(), each of which leverages the ZooKeeper command line
        # This tests the code in org.apache.zookeeper.ZooKeeperMainWithTlsSupportForKafka
        path = "/foo"
        value = "{\"bar\": 0}"
        self.zk.create(path, value=value)
        if self.zk.query(path) != value:
            raise Exception(
                "Error creating and then querying a znode using the CLI with a TLS-enabled ZooKeeper quorum"
            )

        # Make sure the ConfigCommand CLI is able to talk to a TLS-enabled, encrypt-only ZooKeeper quorum
        # This is necessary for the bootstrap use case despite direct ZooKeeper connectivity being deprecated
        self.zk.describe(self.topic)

        # Make sure the AclCommand CLI is able to talk to a TLS-enabled, encrypt-only ZooKeeper quorum
        # This is necessary for the bootstrap use case despite direct ZooKeeper connectivity being deprecated
        self.zk.list_acls(self.topic)

示例#53

显示文件

文件： streams_broker_compatibility.py 项目： iraideruiz/kafka

class StreamsBrokerCompatibility(Test):
    """
    These tests validate that Streams v0.10.2+ can connect to older brokers v0.10+
    and that Streams fails fast for pre-0.10 brokers
    """

    input = "brokerCompatibilitySourceTopic"
    output = "brokerCompatibilitySinkTopic"

    def __init__(self, test_context):
        super(StreamsBrokerCompatibility, self).__init__(test_context=test_context)

        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(test_context,
                                  num_nodes=1,
                                  zk=self.zk,
                                  topics={
                                      self.input: {'partitions': 1, 'replication-factor': 1},
                                      self.output: {'partitions': 1, 'replication-factor': 1}
                                  })

        self.processor = StreamsBrokerCompatibilityService(self.test_context, self.kafka)

        self.consumer = VerifiableConsumer(test_context,
                                           1,
                                           self.kafka,
                                           self.output,
                                           "stream-broker-compatibility-verify-consumer")

    def setUp(self):
        self.zk.start()

    @parametrize(broker_version=str(DEV_BRANCH))
    @parametrize(broker_version=str(LATEST_0_10_1))
    def test_compatible_brokers(self, broker_version):
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.start()

        self.processor.start()
        self.consumer.start()

        self.processor.wait()

        num_consumed_mgs = self.consumer.total_consumed()

        self.consumer.stop()
        self.kafka.stop()

        assert num_consumed_mgs == 1, \
            "Did expect to read exactly one message but got %d" % num_consumed_mgs

    @parametrize(broker_version=str(LATEST_0_10_0))
    def test_fail_fast_on_incompatible_brokers(self, broker_version):
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.start()

        self.processor.start()

        self.processor.node.account.ssh(self.processor.start_cmd(self.processor.node))
        with self.processor.node.account.monitor_log(self.processor.STDERR_FILE) as monitor:
            monitor.wait_until('Exception in thread "main" org.apache.kafka.streams.errors.StreamsException: Kafka Streams requires broker version 0.10.1.x or higher.',
                        timeout_sec=60,
                        err_msg="Never saw 'incompatible broker' error message " + str(self.processor.node.account))

        self.kafka.stop()

示例#54

显示文件

文件： streams_broker_compatibility_test.py 项目： ottomata/kafka-2

class StreamsBrokerCompatibility(Test):
    """
    These tests validates that
    - Streams works for older brokers 0.11 (or newer)
    - Streams w/ EOS-alpha works for older brokers 0.11 (or newer)
    - Streams w/ EOS-beta works for older brokers 2.5 (or newer)
    - Streams fails fast for older brokers 0.10.0, 0.10.2, and 0.10.1
    - Streams w/ EOS-beta fails fast for older brokers 2.4 or older
    """

    input = "brokerCompatibilitySourceTopic"
    output = "brokerCompatibilitySinkTopic"

    def __init__(self, test_context):
        super(StreamsBrokerCompatibility,
              self).__init__(test_context=test_context)
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(
            test_context,
            num_nodes=1,
            zk=self.zk,
            topics={
                self.input: {
                    'partitions': 1,
                    'replication-factor': 1
                },
                self.output: {
                    'partitions': 1,
                    'replication-factor': 1
                }
            },
            server_prop_overides=[[
                "transaction.state.log.replication.factor", "1"
            ], ["transaction.state.log.min.isr", "1"]])
        self.consumer = VerifiableConsumer(
            test_context, 1, self.kafka, self.output,
            "stream-broker-compatibility-verify-consumer")

    def setUp(self):
        self.zk.start()

    @parametrize(broker_version=str(LATEST_2_4))
    @parametrize(broker_version=str(LATEST_2_3))
    @parametrize(broker_version=str(LATEST_2_2))
    @parametrize(broker_version=str(LATEST_2_1))
    @parametrize(broker_version=str(LATEST_2_0))
    @parametrize(broker_version=str(LATEST_1_1))
    @parametrize(broker_version=str(LATEST_1_0))
    @parametrize(broker_version=str(LATEST_0_11_0))
    def test_compatible_brokers_eos_disabled(self, broker_version):
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.start()

        processor = StreamsBrokerCompatibilityService(self.test_context,
                                                      self.kafka,
                                                      "at_least_once")
        processor.start()

        self.consumer.start()

        processor.wait()

        wait_until(
            lambda: self.consumer.total_consumed() > 0,
            timeout_sec=30,
            err_msg=
            "Did expect to read a message but got none within 30 seconds.")

        self.consumer.stop()
        self.kafka.stop()

    @parametrize(broker_version=str(LATEST_2_4))
    @parametrize(broker_version=str(LATEST_2_3))
    @parametrize(broker_version=str(LATEST_2_2))
    @parametrize(broker_version=str(LATEST_2_1))
    @parametrize(broker_version=str(LATEST_2_0))
    @parametrize(broker_version=str(LATEST_1_1))
    @parametrize(broker_version=str(LATEST_1_0))
    @parametrize(broker_version=str(LATEST_0_11_0))
    def test_compatible_brokers_eos_alpha_enabled(self, broker_version):
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.start()

        processor = StreamsBrokerCompatibilityService(self.test_context,
                                                      self.kafka,
                                                      "exactly_once")
        processor.start()

        self.consumer.start()

        processor.wait()

        wait_until(
            lambda: self.consumer.total_consumed() > 0,
            timeout_sec=30,
            err_msg=
            "Did expect to read a message but got none within 30 seconds.")

        self.consumer.stop()
        self.kafka.stop()

    # TODO enable after 2.5 is released
    # @parametrize(broker_version=str(LATEST_2_5))
    # def test_compatible_brokers_eos_beta_enabled(self, broker_version):
    #     self.kafka.set_version(KafkaVersion(broker_version))
    #     self.kafka.start()
    #
    #     processor = StreamsBrokerCompatibilityService(self.test_context, self.kafka, "exactly_once_beta")
    #     processor.start()
    #
    #     self.consumer.start()
    #
    #     processor.wait()
    #
    #     wait_until(lambda: self.consumer.total_consumed() > 0, timeout_sec=30, err_msg="Did expect to read a message but got none within 30 seconds.")
    #
    #     self.consumer.stop()
    #     self.kafka.stop()

    @parametrize(broker_version=str(LATEST_0_10_2))
    @parametrize(broker_version=str(LATEST_0_10_1))
    @parametrize(broker_version=str(LATEST_0_10_0))
    def test_fail_fast_on_incompatible_brokers(self, broker_version):
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.start()

        processor = StreamsBrokerCompatibilityService(self.test_context,
                                                      self.kafka,
                                                      "at_least_once")

        with processor.node.account.monitor_log(
                processor.STDERR_FILE) as monitor:
            processor.start()
            monitor.wait_until(
                'FATAL: An unexpected exception org.apache.kafka.common.errors.UnsupportedVersionException',
                timeout_sec=60,
                err_msg=
                "Never saw 'FATAL: An unexpected exception org.apache.kafka.common.errors.UnsupportedVersionException "
                + str(processor.node.account))

        self.kafka.stop()

    @parametrize(broker_version=str(LATEST_2_4))
    @parametrize(broker_version=str(LATEST_2_3))
    @parametrize(broker_version=str(LATEST_2_2))
    @parametrize(broker_version=str(LATEST_2_1))
    @parametrize(broker_version=str(LATEST_2_0))
    @parametrize(broker_version=str(LATEST_1_1))
    @parametrize(broker_version=str(LATEST_1_0))
    @parametrize(broker_version=str(LATEST_0_11_0))
    def test_fail_fast_on_incompatible_brokers_if_eos_beta_enabled(
            self, broker_version):
        self.kafka.set_version(KafkaVersion(broker_version))
        self.kafka.start()

        processor = StreamsBrokerCompatibilityService(self.test_context,
                                                      self.kafka,
                                                      "exactly_once_beta")

        with processor.node.account.monitor_log(
                processor.STDERR_FILE) as monitor:
            with processor.node.account.monitor_log(processor.LOG_FILE) as log:
                processor.start()
                log.wait_until(
                    'Shutting down because the Kafka cluster seems to be on a too old version. Setting processing\.guarantee="exactly_once_beta" requires broker version 2\.5 or higher\.',
                    timeout_sec=60,
                    err_msg=
                    "Never saw 'Shutting down, because the Kafka cluster seems to be on a too old version. Setting `processing.guarantee=\"exaclty_once_beta\"` requires broker version 2.5 or higher.' log message "
                    + str(processor.node.account))
                monitor.wait_until(
                    'FATAL: An unexpected exception org.apache.kafka.common.errors.UnsupportedVersionException',
                    timeout_sec=60,
                    err_msg=
                    "Never saw 'FATAL: An unexpected exception org.apache.kafka.common.errors.UnsupportedVersionException' error message "
                    + str(processor.node.account))

        self.kafka.stop()

示例#55

显示文件

文件： security_rolling_upgrade_test.py 项目： hk-Lei/kafka-0.10.0.0-annotated

class TestSecurityRollingUpgrade(ProduceConsumeValidateTest):
    """Tests a rolling upgrade from PLAINTEXT to a secured cluster
    """

    def __init__(self, test_context):
        super(TestSecurityRollingUpgrade, self).__init__(test_context=test_context)

    def setUp(self):
        self.acls = ACLs(self.test_context)
        self.topic = "test_topic"
        self.group = "group"
        self.producer_throughput = 100
        self.num_producers = 1
        self.num_consumers = 1
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=3, zk=self.zk, topics={self.topic: {
            "partitions": 3,
            "replication-factor": 3,
            'configs': {"min.insync.replicas": 2}}})
        self.zk.start()

    def create_producer_and_consumer(self):
        self.producer = VerifiableProducer(
            self.test_context, self.num_producers, self.kafka, self.topic,
            throughput=self.producer_throughput)

        self.consumer = ConsoleConsumer(
            self.test_context, self.num_consumers, self.kafka, self.topic,
            consumer_timeout_ms=60000, message_validator=is_int, new_consumer=True)

        self.consumer.group_id = "group"

    def bounce(self):
        self.kafka.start_minikdc()
        for node in self.kafka.nodes:
            self.kafka.stop_node(node)
            self.kafka.start_node(node)
            time.sleep(10)

    def roll_in_secured_settings(self, client_protocol, broker_protocol):

        # Roll cluster to include inter broker security protocol.
        self.kafka.interbroker_security_protocol = broker_protocol
        self.kafka.open_port(client_protocol)
        self.kafka.open_port(broker_protocol)
        self.bounce()

        # Roll cluster to disable PLAINTEXT port
        self.kafka.close_port('PLAINTEXT')
        self.set_authorizer_and_bounce(client_protocol, broker_protocol)

    def set_authorizer_and_bounce(self, client_protocol, broker_protocol):
        self.kafka.authorizer_class_name = KafkaService.SIMPLE_AUTHORIZER
        self.acls.set_acls(client_protocol, self.kafka, self.zk, self.topic, self.group)
        self.acls.set_acls(broker_protocol, self.kafka, self.zk, self.topic, self.group)
        self.bounce()

    def open_secured_port(self, client_protocol):
        self.kafka.security_protocol = client_protocol
        self.kafka.open_port(client_protocol)
        self.kafka.start_minikdc()
        self.bounce()

    def add_sasl_mechanism(self, new_client_sasl_mechanism):
        self.kafka.client_sasl_mechanism = new_client_sasl_mechanism
        self.kafka.start_minikdc()
        self.bounce()

    def roll_in_sasl_mechanism(self, security_protocol, new_sasl_mechanism):
        # Roll cluster to update inter-broker SASL mechanism. This disables the old mechanism.
        self.kafka.interbroker_sasl_mechanism = new_sasl_mechanism
        self.bounce()

        # Bounce again with ACLs for new mechanism
        self.set_authorizer_and_bounce(security_protocol, security_protocol)

    @matrix(client_protocol=["SSL", "SASL_PLAINTEXT", "SASL_SSL"])
    def test_rolling_upgrade_phase_one(self, client_protocol):
        """
        Start with a PLAINTEXT cluster, open a SECURED port, via a rolling upgrade, ensuring we could produce
        and consume throughout over PLAINTEXT. Finally check we can produce and consume the new secured port.
        """
        self.kafka.interbroker_security_protocol = "PLAINTEXT"
        self.kafka.security_protocol = "PLAINTEXT"
        self.kafka.start()

        # Create PLAINTEXT producer and consumer
        self.create_producer_and_consumer()

        # Rolling upgrade, opening a secure protocol, ensuring the Plaintext producer/consumer continues to run
        self.run_produce_consume_validate(self.open_secured_port, client_protocol)

        # Now we can produce and consume via the secured port
        self.kafka.security_protocol = client_protocol
        self.create_producer_and_consumer()
        self.run_produce_consume_validate(lambda: time.sleep(1))

    @matrix(client_protocol=["SASL_SSL", "SSL", "SASL_PLAINTEXT"], broker_protocol=["SASL_SSL", "SSL", "SASL_PLAINTEXT"])
    def test_rolling_upgrade_phase_two(self, client_protocol, broker_protocol):
        """
        Start with a PLAINTEXT cluster with a second Secured port open (i.e. result of phase one).
        Start an Producer and Consumer via the SECURED port
        Incrementally upgrade to add inter-broker be the secure protocol
        Incrementally upgrade again to add ACLs as well as disabling the PLAINTEXT port
        Ensure the producer and consumer ran throughout
        """
        #Given we have a broker that has both secure and PLAINTEXT ports open
        self.kafka.security_protocol = client_protocol
        self.kafka.interbroker_security_protocol = "PLAINTEXT"
        self.kafka.start()

        #Create Secured Producer and Consumer
        self.create_producer_and_consumer()

        #Roll in the security protocol. Disable Plaintext. Ensure we can produce and Consume throughout
        self.run_produce_consume_validate(self.roll_in_secured_settings, client_protocol, broker_protocol)

    @parametrize(new_client_sasl_mechanism='PLAIN')
    def test_rolling_upgrade_sasl_mechanism_phase_one(self, new_client_sasl_mechanism):
        """
        Start with a SASL/GSSAPI cluster, add new SASL mechanism, via a rolling upgrade, ensuring we could produce
        and consume throughout over SASL/GSSAPI. Finally check we can produce and consume using new mechanism.
        """
        self.kafka.interbroker_security_protocol = "SASL_SSL"
        self.kafka.security_protocol = "SASL_SSL"
        self.kafka.client_sasl_mechanism = "GSSAPI"
        self.kafka.interbroker_sasl_mechanism = "GSSAPI"
        self.kafka.start()

        # Create SASL/GSSAPI producer and consumer
        self.create_producer_and_consumer()

        # Rolling upgrade, adding new SASL mechanism, ensuring the GSSAPI producer/consumer continues to run
        self.run_produce_consume_validate(self.add_sasl_mechanism, new_client_sasl_mechanism)

        # Now we can produce and consume using the new SASL mechanism
        self.kafka.client_sasl_mechanism = new_client_sasl_mechanism
        self.create_producer_and_consumer()
        self.run_produce_consume_validate(lambda: time.sleep(1))

    @parametrize(new_sasl_mechanism='PLAIN')
    def test_rolling_upgrade_sasl_mechanism_phase_two(self, new_sasl_mechanism):
        """
        Start with a SASL cluster with GSSAPI for inter-broker and a second mechanism for clients (i.e. result of phase one).
        Start Producer and Consumer using the second mechanism
        Incrementally upgrade to set inter-broker to the second mechanism and disable GSSAPI
        Incrementally upgrade again to add ACLs
        Ensure the producer and consumer run throughout
        """
        #Start with a broker that has GSSAPI for inter-broker and a second mechanism for clients
        self.kafka.security_protocol = "SASL_SSL"
        self.kafka.interbroker_security_protocol = "SASL_SSL"
        self.kafka.client_sasl_mechanism = new_sasl_mechanism
        self.kafka.interbroker_sasl_mechanism = "GSSAPI"
        self.kafka.start()

        #Create Producer and Consumer using second mechanism
        self.create_producer_and_consumer()

        #Roll in the second SASL mechanism for inter-broker, disabling first mechanism. Ensure we can produce and consume throughout
        self.run_produce_consume_validate(self.roll_in_sasl_mechanism, self.kafka.security_protocol, new_sasl_mechanism)

示例#56

显示文件

文件： replica_scale_test.py 项目： kamiingithub/kafka

class ReplicaScaleTest(Test):
    def __init__(self, test_context):
        super(ReplicaScaleTest, self).__init__(test_context=test_context)
        self.test_context = test_context
        self.zk = ZookeeperService(test_context, num_nodes=1)
        self.kafka = KafkaService(self.test_context, num_nodes=8, zk=self.zk)

    def setUp(self):
        self.zk.start()
        self.kafka.start()

    def teardown(self):
        # Need to increase the timeout due to partition count
        for node in self.kafka.nodes:
            self.kafka.stop_node(node, clean_shutdown=False, timeout_sec=60)
        self.kafka.stop()
        self.zk.stop()

    @cluster(num_nodes=12)
    @parametrize(topic_count=50, partition_count=34, replication_factor=3)
    def test_produce_consume(self, topic_count, partition_count,
                             replication_factor):
        topics_create_start_time = time.time()
        for i in range(topic_count):
            topic = "replicas_produce_consume_%d" % i
            print("Creating topic %s" % topic)  # Force some stdout for Jenkins
            topic_cfg = {
                "topic": topic,
                "partitions": partition_count,
                "replication-factor": replication_factor,
                "configs": {
                    "min.insync.replicas": 2
                }
            }
            self.kafka.create_topic(topic_cfg)

        topics_create_end_time = time.time()
        self.logger.info("Time to create topics: %d" %
                         (topics_create_end_time - topics_create_start_time))

        producer_workload_service = ProduceBenchWorkloadService(
            self.test_context, self.kafka)
        consumer_workload_service = ConsumeBenchWorkloadService(
            self.test_context, self.kafka)
        trogdor = TrogdorService(context=self.test_context,
                                 client_services=[
                                     self.kafka, producer_workload_service,
                                     consumer_workload_service
                                 ])
        trogdor.start()

        produce_spec = ProduceBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            producer_workload_service.producer_node,
            producer_workload_service.bootstrap_servers,
            target_messages_per_sec=10000,
            max_messages=3400000,
            producer_conf={},
            admin_client_conf={},
            common_client_conf={},
            inactive_topics={},
            active_topics={
                "replicas_produce_consume_[0-2]": {
                    "numPartitions": partition_count,
                    "replicationFactor": replication_factor
                }
            })
        produce_workload = trogdor.create_task("replicas-produce-workload",
                                               produce_spec)
        produce_workload.wait_for_done(timeout_sec=600)
        self.logger.info("Completed produce bench")

        consume_spec = ConsumeBenchWorkloadSpec(
            0,
            TaskSpec.MAX_DURATION_MS,
            consumer_workload_service.consumer_node,
            consumer_workload_service.bootstrap_servers,
            target_messages_per_sec=10000,
            max_messages=3400000,
            consumer_conf={},
            admin_client_conf={},
            common_client_conf={},
            active_topics=["replicas_produce_consume_[0-2]"])
        consume_workload = trogdor.create_task("replicas-consume-workload",
                                               consume_spec)
        consume_workload.wait_for_done(timeout_sec=600)
        self.logger.info("Completed consume bench")

        trogdor.stop()

    @cluster(num_nodes=12)
    @parametrize(topic_count=50, partition_count=34, replication_factor=3)
    def test_clean_bounce(self, topic_count, partition_count,
                          replication_factor):
        topics_create_start_time = time.time()
        for i in range(topic_count):
            topic = "topic-%04d" % i
            print("Creating topic %s" % topic)  # Force some stdout for Jenkins
            topic_cfg = {
                "topic": topic,
                "partitions": partition_count,
                "replication-factor": replication_factor,
                "configs": {
                    "min.insync.replicas": 2
                }
            }
            self.kafka.create_topic(topic_cfg)
        topics_create_end_time = time.time()
        self.logger.info("Time to create topics: %d" %
                         (topics_create_end_time - topics_create_start_time))

        restart_times = []
        for node in self.kafka.nodes:
            broker_bounce_start_time = time.time()
            self.kafka.stop_node(node, clean_shutdown=True, timeout_sec=600)
            self.kafka.start_node(node, timeout_sec=600)
            broker_bounce_end_time = time.time()
            restart_times.append(broker_bounce_end_time -
                                 broker_bounce_start_time)
            self.logger.info(
                "Time to restart %s: %d" %
                (node.name, broker_bounce_end_time - broker_bounce_start_time))

        self.logger.info("Total time to restart: %s" % sum(restart_times))

        delete_start_time = time.time()
        for i in range(topic_count):
            topic = "topic-%04d" % i
            self.logger.info("Deleting topic %s" % topic)
            self.kafka.delete_topic(topic)
        delete_end_time = time.time()
        self.logger.info("Time to delete topics: %d" %
                         (delete_end_time - delete_start_time))

示例#57

显示文件

文件： streams_simple_benchmark_test.py 项目： iraideruiz/kafka

class StreamsSimpleBenchmarkTest(Test):
    """
    Simple benchmark of Kafka Streams.
    """

    def __init__(self, test_context):
        super(StreamsSimpleBenchmarkTest, self).__init__(test_context)

        # these values could be updated in ad-hoc benchmarks
        self.key_skew = 0
        self.value_size = 1024
        self.num_records = 10000000L
        self.num_threads = 1

        self.replication = 1

    @cluster(num_nodes=12)
    @matrix(test=["consume", "consumeproduce", "streams-simple", "streams-count", "streams-join"], scale=[1])
    def test_simple_benchmark(self, test, scale):
        """
        Run simple Kafka Streams benchmark
        """
        self.driver = [None] * (scale + 1)

        self.final = {}

        #############
        # SETUP PHASE
        #############
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()
        self.kafka = KafkaService(self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={
            'simpleBenchmarkSourceTopic1' : { 'partitions': scale, 'replication-factor': self.replication },
            'simpleBenchmarkSourceTopic2' : { 'partitions': scale, 'replication-factor': self.replication },
            'simpleBenchmarkSinkTopic' : { 'partitions': scale, 'replication-factor': self.replication },
            'yahooCampaigns' : { 'partitions': 20, 'replication-factor': self.replication },
            'yahooEvents' : { 'partitions': 20, 'replication-factor': self.replication }
        })
        self.kafka.log_level = "INFO"
        self.kafka.start()


        load_test = ""
        if test == ALL_TEST:
            load_test = "load-two"
        if test in STREAMS_JOIN_TESTS or test == STREAMS_JOIN_TEST:
            load_test = "load-two"
        if test in STREAMS_COUNT_TESTS or test == STREAMS_COUNT_TEST:
            load_test = "load-one"
        if test in STREAMS_SIMPLE_TESTS or test == STREAMS_SIMPLE_TEST:
            load_test = "load-one"
        if test in NON_STREAMS_TESTS:
            load_test = "load-one"



        ################
        # LOAD PHASE
        ################
        self.load_driver = StreamsSimpleBenchmarkService(self.test_context,
                                                         self.kafka,
                                                         load_test,
                                                         self.num_threads,
                                                         self.num_records,
                                                         self.key_skew,
                                                         self.value_size)

        self.load_driver.start()
        self.load_driver.wait(3600) # wait at most 30 minutes
        self.load_driver.stop()

        if test == ALL_TEST:
            for single_test in STREAMS_SIMPLE_TESTS + STREAMS_COUNT_TESTS + STREAMS_JOIN_TESTS:
                self.execute(single_test, scale)
        elif test == STREAMS_SIMPLE_TEST:
            for single_test in STREAMS_SIMPLE_TESTS:
                self.execute(single_test, scale)
        elif test == STREAMS_COUNT_TEST:
            for single_test in STREAMS_COUNT_TESTS:
                self.execute(single_test, scale)
        elif test == STREAMS_JOIN_TEST:
            for single_test in STREAMS_JOIN_TESTS:
                self.execute(single_test, scale)
        else:
            self.execute(test, scale)

        return self.final

    def execute(self, test, scale):

        ################
        # RUN PHASE
        ################
        for num in range(0, scale):
            self.driver[num] = StreamsSimpleBenchmarkService(self.test_context,
                                                             self.kafka,
                                                             test,
                                                             self.num_threads,
                                                             self.num_records,
                                                             self.key_skew,
                                                             self.value_size)
            self.driver[num].start()

        #######################
        # STOP + COLLECT PHASE
        #######################
        data = [None] * (scale)

        for num in range(0, scale):
            self.driver[num].wait()
            self.driver[num].stop()
            self.driver[num].node.account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False)
            data[num] = self.driver[num].collect_data(self.driver[num].node, "")
            self.driver[num].read_jmx_output_all_nodes()

        for num in range(0, scale):
            for key in data[num]:
                self.final[key + "-" + str(num)] = data[num][key]

            for key in sorted(self.driver[num].jmx_stats[0]):
                self.logger.info("%s: %s" % (key, self.driver[num].jmx_stats[0][key]))

            self.final[test + "-jmx-avg-" + str(num)] = self.driver[num].average_jmx_value
            self.final[test + "-jmx-max-" + str(num)] = self.driver[num].maximum_jmx_value

示例#58

显示文件

文件： log4j_appender_test.py 项目： paganini0102/kafka

class Log4jAppenderTest(Test):
    """
    Tests KafkaLog4jAppender using VerifiableKafkaLog4jAppender that appends increasing ints to a Kafka topic
    """
    def __init__(self, test_context):
        super(Log4jAppenderTest, self).__init__(test_context)
        self.num_zk = 1
        self.num_brokers = 1
        self.messages_received_count = 0
        self.topics = {TOPIC: {'partitions': 1, 'replication-factor': 1}}

        self.zk = ZookeeperService(test_context, self.num_zk)

    def setUp(self):
        self.zk.start()

    def start_kafka(self, security_protocol, interbroker_security_protocol):
        self.kafka = KafkaService(
            self.test_context,
            self.num_brokers,
            self.zk,
            security_protocol=security_protocol,
            interbroker_security_protocol=interbroker_security_protocol,
            topics=self.topics)
        self.kafka.start()

    def start_appender(self, security_protocol):
        self.appender = KafkaLog4jAppender(self.test_context,
                                           self.num_brokers,
                                           self.kafka,
                                           TOPIC,
                                           MAX_MESSAGES,
                                           security_protocol=security_protocol)
        self.appender.start()

    def custom_message_validator(self, msg):
        if msg and "INFO : org.apache.kafka.tools.VerifiableLog4jAppender" in msg:
            self.logger.debug("Received message: %s" % msg)
            self.messages_received_count += 1

    def start_consumer(self, security_protocol):
        enable_new_consumer = security_protocol != SecurityConfig.PLAINTEXT
        self.consumer = ConsoleConsumer(
            self.test_context,
            num_nodes=self.num_brokers,
            kafka=self.kafka,
            topic=TOPIC,
            consumer_timeout_ms=1000,
            new_consumer=enable_new_consumer,
            message_validator=self.custom_message_validator)
        self.consumer.start()

    @cluster(num_nodes=4)
    @matrix(security_protocol=['PLAINTEXT', 'SSL'])
    @cluster(num_nodes=5)
    @matrix(security_protocol=['SASL_PLAINTEXT', 'SASL_SSL'])
    def test_log4j_appender(self, security_protocol='PLAINTEXT'):
        """
        Tests if KafkaLog4jAppender is producing to Kafka topic
        :return: None
        """
        self.start_kafka(security_protocol, security_protocol)
        self.start_appender(security_protocol)
        self.appender.wait()

        self.start_consumer(security_protocol)
        node = self.consumer.nodes[0]

        wait_until(lambda: self.consumer.alive(node),
                   timeout_sec=10,
                   backoff_sec=.2,
                   err_msg="Consumer was too slow to start")

        # Verify consumed messages count
        wait_until(
            lambda: self.messages_received_count == MAX_MESSAGES,
            timeout_sec=10,
            err_msg="Timed out waiting to consume expected number of messages."
        )

        self.consumer.stop()

示例#59

显示文件

class ClientCompatibilityTestNewBroker(ProduceConsumeValidateTest):
    def __init__(self, test_context):
        super(ClientCompatibilityTestNewBroker,
              self).__init__(test_context=test_context)

    def setUp(self):
        self.topic = "test_topic"
        self.zk = ZookeeperService(self.test_context, num_nodes=1)

        self.zk.start()

        # Producer and consumer
        self.producer_throughput = 10000
        self.num_producers = 1
        self.num_consumers = 1
        self.messages_per_producer = 1000

    @cluster(num_nodes=6)
    @parametrize(producer_version=str(DEV_BRANCH),
                 consumer_version=str(DEV_BRANCH),
                 compression_types=["snappy"],
                 timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(DEV_BRANCH),
                 consumer_version=str(DEV_BRANCH),
                 compression_types=["none"],
                 new_consumer=False,
                 timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(DEV_BRANCH),
                 consumer_version=str(LATEST_0_9),
                 compression_types=["none"],
                 new_consumer=False,
                 timestamp_type=None)
    @parametrize(producer_version=str(DEV_BRANCH),
                 consumer_version=str(LATEST_0_9),
                 compression_types=["snappy"],
                 timestamp_type=str("CreateTime"))
    @parametrize(producer_version=str(LATEST_0_11_0),
                 consumer_version=str(LATEST_0_11_0),
                 compression_types=["gzip"],
                 timestamp_type=str("CreateTime"))
    @parametrize(producer_version=str(LATEST_0_10_2),
                 consumer_version=str(LATEST_0_10_2),
                 compression_types=["lz4"],
                 timestamp_type=str("CreateTime"))
    @parametrize(producer_version=str(LATEST_0_10_1),
                 consumer_version=str(LATEST_0_10_1),
                 compression_types=["snappy"],
                 timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(LATEST_0_10_0),
                 consumer_version=str(LATEST_0_10_0),
                 compression_types=["snappy"],
                 timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(LATEST_0_9),
                 consumer_version=str(DEV_BRANCH),
                 compression_types=["none"],
                 new_consumer=False,
                 timestamp_type=None)
    @parametrize(producer_version=str(LATEST_0_9),
                 consumer_version=str(DEV_BRANCH),
                 compression_types=["snappy"],
                 timestamp_type=None)
    @parametrize(producer_version=str(LATEST_0_9),
                 consumer_version=str(LATEST_0_9),
                 compression_types=["snappy"],
                 timestamp_type=str("LogAppendTime"))
    @parametrize(producer_version=str(LATEST_0_8_2),
                 consumer_version=str(LATEST_0_8_2),
                 compression_types=["none"],
                 new_consumer=False,
                 timestamp_type=None)
    def test_compatibility(self,
                           producer_version,
                           consumer_version,
                           compression_types,
                           new_consumer=True,
                           timestamp_type=None):

        self.kafka = KafkaService(self.test_context,
                                  num_nodes=3,
                                  zk=self.zk,
                                  version=DEV_BRANCH,
                                  topics={
                                      self.topic: {
                                          "partitions": 3,
                                          "replication-factor": 3,
                                          'configs': {
                                              "min.insync.replicas": 2
                                          }
                                      }
                                  })
        for node in self.kafka.nodes:
            if timestamp_type is not None:
                node.config[
                    config_property.MESSAGE_TIMESTAMP_TYPE] = timestamp_type
        self.kafka.start()

        self.producer = VerifiableProducer(
            self.test_context,
            self.num_producers,
            self.kafka,
            self.topic,
            throughput=self.producer_throughput,
            message_validator=is_int,
            compression_types=compression_types,
            version=KafkaVersion(producer_version))

        self.consumer = ConsoleConsumer(self.test_context,
                                        self.num_consumers,
                                        self.kafka,
                                        self.topic,
                                        consumer_timeout_ms=30000,
                                        new_consumer=new_consumer,
                                        message_validator=is_int,
                                        version=KafkaVersion(consumer_version))

        self.run_produce_consume_validate(lambda: wait_until(
            lambda: self.producer.each_produced_at_least(
                self.messages_per_producer) == True,
            timeout_sec=120,
            backoff_sec=1,
            err_msg=
            "Producer did not produce all messages in reasonable amount of time"
        ))

示例#60

显示文件

文件： streams_simple_benchmark_test.py 项目： justfra/kafka-1

class StreamsSimpleBenchmarkTest(Test):
    """
    Simple benchmark of Kafka Streams.
    """

    def __init__(self, test_context):
        super(StreamsSimpleBenchmarkTest, self).__init__(test_context)
        self.num_records = 2000000L
        self.replication = 1


    @cluster(num_nodes=9)
    @matrix(test=["produce", "consume", "count", "processstream", "processstreamwithsink", "processstreamwithstatestore", "processstreamwithcachedstatestore", "kstreamktablejoin", "kstreamkstreamjoin", "ktablektablejoin"], scale=[1, 2, 3])
    def test_simple_benchmark(self, test, scale):
        """
        Run simple Kafka Streams benchmark
        """
        self.driver = [None] * (scale + 1)
        node = [None] * (scale)
        data = [None] * (scale)

        #############
        # SETUP PHASE
        #############
        self.zk = ZookeeperService(self.test_context, num_nodes=1)
        self.zk.start()
        self.kafka = KafkaService(self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={
            'simpleBenchmarkSourceTopic' : { 'partitions': scale, 'replication-factor': self.replication },
            'countTopic' : { 'partitions': scale, 'replication-factor': self.replication },
            'simpleBenchmarkSinkTopic' : { 'partitions': scale, 'replication-factor': self.replication },
            'joinSourceTopic1KStreamKStream' : { 'partitions': scale, 'replication-factor': self.replication },
            'joinSourceTopic2KStreamKStream' : { 'partitions': scale, 'replication-factor': self.replication },
            'joinSourceTopic1KStreamKTable' : { 'partitions': scale, 'replication-factor': self.replication },
            'joinSourceTopic2KStreamKTable' : { 'partitions': scale, 'replication-factor': self.replication },
            'joinSourceTopic1KTableKTable' : { 'partitions': scale, 'replication-factor': self.replication },
            'joinSourceTopic2KTableKTable' : { 'partitions': scale, 'replication-factor': self.replication }
        })
        self.kafka.start()
 
        ################
        # LOAD PHASE
        ################
        self.load_driver = StreamsSimpleBenchmarkService(self.test_context, self.kafka,
                                                         self.num_records * scale, "true", test)
        self.load_driver.start()
        self.load_driver.wait()
        self.load_driver.stop()

        ################
        # RUN PHASE
        ################
        for num in range(0, scale):
            self.driver[num] = StreamsSimpleBenchmarkService(self.test_context, self.kafka,
                                                             self.num_records/(scale), "false", test)
            self.driver[num].start()

        #######################
        # STOP + COLLECT PHASE
        #######################
        for num in range(0, scale):    
            self.driver[num].wait()    
            self.driver[num].stop()
            node[num] = self.driver[num].node
            node[num].account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False)
            data[num] = self.driver[num].collect_data(node[num], "" )
                

        final = {}
        for num in range(0, scale):
            for key in data[num]:
                final[key + str(num)] = data[num][key]
        
        return final