def test_kafka_producer_proper_record_metadata(kafka_broker, compression):
    if compression == 'zstd' and env_kafka_version() < (2, 1, 0):
        pytest.skip('zstd requires 2.1.0 or more')
    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression)
    magic = producer._max_usable_produce_magic()

    # record headers are supported in 0.11.0
    if env_kafka_version() < (0, 11, 0):
        headers = None
    else:
        headers = [("Header Key", b"Header Value")]

    topic = random_string(5)
    future = producer.send(
        topic,
        value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999,
        partition=0)
    record = future.get(timeout=5)
    assert record is not None
    assert record.topic == topic
    assert record.partition == 0
    assert record.topic_partition == TopicPartition(topic, 0)
    assert record.offset == 0
    if magic >= 1:
        assert record.timestamp == 9999999
    else:
        assert record.timestamp == -1  # NO_TIMESTAMP

    if magic >= 2:
        assert record.checksum is None
    elif magic == 1:
        assert record.checksum == 1370034956
    else:
        assert record.checksum == 3296137851

    assert record.serialized_key_size == 10
    assert record.serialized_value_size == 12
    if headers:
        assert record.serialized_header_size == 22

    if magic == 0:
        pytest.skip('generated timestamp case is skipped for broker 0.9 and below')
    send_time = time.time() * 1000
    future = producer.send(
        topic,
        value=b"Simple value", key=b"Simple key", timestamp_ms=None,
        partition=0)
    record = future.get(timeout=5)
    assert abs(record.timestamp - send_time) <= 1000  # Allow 1s deviation
Пример #2
0
    def _create_topic(self,
                      topic_name,
                      num_partitions,
                      replication_factor,
                      timeout_ms=10000):
        if num_partitions is None:
            num_partitions = self.partitions
        if replication_factor is None:
            replication_factor = self.replicas

        # Try different methods to create a topic, from the fastest to the slowest
        if self.auto_create_topic and \
           num_partitions == self.partitions and \
           replication_factor == self.replicas:
            self._send_request(MetadataRequest[0]([topic_name]))
        elif env_kafka_version() >= (0, 10, 1, 0):
            request = CreateTopicsRequest[0](
                [(topic_name, num_partitions, replication_factor, [], [])],
                timeout_ms)
            result = self._send_request(request, timeout=timeout_ms)
            for topic_result in result[0].topic_error_codes:
                error_code = topic_result[1]
                if error_code != 0:
                    raise errors.for_code(error_code)
        else:
            args = self.kafka_run_class_args('kafka.admin.TopicCommand',
                                             '--zookeeper', '%s:%s/%s' % (self.zookeeper.host,
                                                                          self.zookeeper.port,
                                                                          self.zk_chroot),
                                             '--create',
                                             '--topic', topic_name,
                                             '--partitions', self.partitions \
                                                 if num_partitions is None else num_partitions,
                                             '--replication-factor', self.replicas \
                                                 if replication_factor is None \
                                                 else replication_factor)
            if env_kafka_version() >= (0, 10):
                args.append('--if-not-exists')
            env = self.kafka_run_class_env()
            proc = subprocess.Popen(args,
                                    env=env,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            stdout, stderr = proc.communicate()
            if proc.returncode != 0:
                if 'kafka.common.TopicExistsException' not in stdout:
                    self.out("Failed to create topic %s" % (topic_name, ))
                    self.out(stdout)
                    self.out(stderr)
                    raise RuntimeError("Failed to create topic %s" %
                                       (topic_name, ))
    def _create_topic(self,
                      topic_name,
                      num_partitions=None,
                      replication_factor=None,
                      timeout_ms=10000):
        if num_partitions is None:
            num_partitions = self.partitions
        if replication_factor is None:
            replication_factor = self.replicas

        # Try different methods to create a topic, from the fastest to the slowest
        if self.auto_create_topic and num_partitions == self.partitions and replication_factor == self.replicas:
            self._create_topic_via_metadata(topic_name, timeout_ms)
        elif env_kafka_version() >= (0, 10, 1, 0):
            try:
                self._create_topic_via_admin_api(topic_name, num_partitions,
                                                 replication_factor,
                                                 timeout_ms)
            except InvalidReplicationFactorError:
                # wait and try again
                # on travis the brokers sometimes take a while to find themselves
                time.sleep(0.5)
                self._create_topic_via_admin_api(topic_name, num_partitions,
                                                 replication_factor,
                                                 timeout_ms)
        else:
            self._create_topic_via_cli(topic_name, num_partitions,
                                       replication_factor)
 def _create_topic_via_cli(self, topic_name, num_partitions,
                           replication_factor):
     args = self.kafka_run_class_args('kafka.admin.TopicCommand',
                                      '--zookeeper', '%s:%s/%s' % (self.zookeeper.host,
                                                                   self.zookeeper.port,
                                                                   self.zk_chroot),
                                      '--create',
                                      '--topic', topic_name,
                                      '--partitions', self.partitions \
                                          if num_partitions is None else num_partitions,
                                      '--replication-factor', self.replicas \
                                          if replication_factor is None \
                                          else replication_factor)
     if env_kafka_version() >= (0, 10):
         args.append('--if-not-exists')
     env = self.kafka_run_class_env()
     proc = subprocess.Popen(args,
                             env=env,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
     stdout, stderr = proc.communicate()
     if proc.returncode != 0:
         if 'kafka.common.TopicExistsException' not in stdout:
             self.out("Failed to create topic %s" % (topic_name, ))
             self.out(stdout)
             self.out(stderr)
             raise RuntimeError("Failed to create topic %s" %
                                (topic_name, ))
def test_kafka_version_infer(kafka_consumer_factory):
    consumer = kafka_consumer_factory()
    actual_ver_major_minor = env_kafka_version()[:2]
    client = consumer._client
    conn = list(client._conns.values())[0]
    inferred_ver_major_minor = conn.check_version()[:2]
    assert actual_ver_major_minor == inferred_ver_major_minor, \
        "Was expecting inferred broker version to be %s but was %s" % (actual_ver_major_minor, inferred_ver_major_minor)
def test_end_to_end(kafka_broker, compression):
    if compression == 'lz4':
        if env_kafka_version() < (0, 8, 2):
            pytest.skip('LZ4 requires 0.8.2')
        elif platform.python_implementation() == 'PyPy':
            pytest.skip('python-lz4 crashes on older versions of pypy')

    if compression == 'zstd' and env_kafka_version() < (2, 1, 0):
        pytest.skip('zstd requires kafka 2.1.0 or newer')

    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression,
                             value_serializer=str.encode)
    consumer = KafkaConsumer(bootstrap_servers=connect_str,
                             group_id=None,
                             consumer_timeout_ms=30000,
                             auto_offset_reset='earliest',
                             value_deserializer=bytes.decode)

    topic = random_string(5)

    messages = 100
    futures = []
    for i in range(messages):
        futures.append(producer.send(topic, 'msg %d' % i))
    ret = [f.get(timeout=30) for f in futures]
    assert len(ret) == messages
    producer.close()

    consumer.subscribe([topic])
    msgs = set()
    for i in range(messages):
        try:
            msgs.add(next(consumer).value)
        except StopIteration:
            break

    assert msgs == set(['msg %d' % (i,) for i in range(messages)])
    consumer.close()
Пример #7
0
def kafka_broker_factory(zookeeper):
    """Return a Kafka broker fixture factory"""
    assert env_kafka_version(
    ), 'KAFKA_VERSION must be specified to run integration tests'

    _brokers = []

    def factory(**broker_params):
        params = {} if broker_params is None else broker_params.copy()
        params.setdefault('partitions', 4)
        num_brokers = params.pop('num_brokers', 1)
        brokers = tuple(
            KafkaFixture.instance(x, zookeeper, **params)
            for x in range(num_brokers))
        _brokers.extend(brokers)
        return brokers

    yield factory

    for broker in _brokers:
        broker.close()
Пример #8
0
    def setUpClass(cls):  # noqa
        if env_kafka_version() < (0, 11) or DISABLED:
            return

        cls.zk = ZookeeperFixture.instance()
        cls.server = KafkaFixture.instance(0, cls.zk)
class TestKafkaProducerIntegration(KafkaIntegrationTestCase):
    @classmethod
    def setUpClass(cls):  # noqa
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.zk = ZookeeperFixture.instance()
        cls.server = KafkaFixture.instance(0, cls.zk)

    @classmethod
    def tearDownClass(cls):  # noqa
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.server.close()
        cls.zk.close()

    def test_produce_10k_simple(self):
        start_offset = self.current_offset(self.topic, 0)

        self.assert_produce_request(
            [
                create_message(("Test message %d" % i).encode('utf-8'))
                for i in range(10000)
            ],
            start_offset,
            10000,
        )

    def test_produce_many_gzip(self):
        start_offset = self.current_offset(self.topic, 0)

        message1 = create_gzip_message([
            (("Gzipped 1 %d" % i).encode('utf-8'), None) for i in range(100)
        ])
        message2 = create_gzip_message([
            (("Gzipped 2 %d" % i).encode('utf-8'), None) for i in range(100)
        ])

        self.assert_produce_request(
            [message1, message2],
            start_offset,
            200,
        )

    def test_produce_many_snappy(self):
        self.skipTest("All snappy integration tests fail with nosnappyjava")
        start_offset = self.current_offset(self.topic, 0)

        self.assert_produce_request(
            [
                create_snappy_message([("Snappy 1 %d" % i, None)
                                       for i in range(100)]),
                create_snappy_message([("Snappy 2 %d" % i, None)
                                       for i in range(100)]),
            ],
            start_offset,
            200,
        )

    def test_produce_mixed(self):
        start_offset = self.current_offset(self.topic, 0)

        msg_count = 1 + 100
        messages = [
            create_message(b"Just a plain message"),
            create_gzip_message([(("Gzipped %d" % i).encode('utf-8'), None)
                                 for i in range(100)]),
        ]

        # All snappy integration tests fail with nosnappyjava
        if False and has_snappy():
            msg_count += 100
            messages.append(
                create_snappy_message([("Snappy %d" % i, None)
                                       for i in range(100)]))

        self.assert_produce_request(messages, start_offset, msg_count)

    def test_produce_100k_gzipped(self):
        start_offset = self.current_offset(self.topic, 0)

        self.assert_produce_request(
            [
                create_gzip_message([
                    (("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
                    for i in range(50000)
                ])
            ],
            start_offset,
            50000,
        )

        self.assert_produce_request(
            [
                create_gzip_message([
                    (("Gzipped batch 1, message %d" % i).encode('utf-8'), None)
                    for i in range(50000)
                ])
            ],
            start_offset + 50000,
            50000,
        )

    ############################
    #   SimpleProducer Tests   #
    ############################

    def test_simple_producer_new_topic(self):
        producer = SimpleProducer(self.client)
        resp = producer.send_messages('new_topic', self.msg('foobar'))
        self.assert_produce_response(resp, 0)
        producer.stop()

    def test_simple_producer(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = SimpleProducer(self.client, random_start=False)

        # Goes to first partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("one"),
                                      self.msg("two"))
        self.assert_produce_response(resp, start_offsets[0])

        # Goes to the next partition, randomly.
        resp = producer.send_messages(self.topic, self.msg("three"))
        self.assert_produce_response(resp, start_offsets[1])

        self.assert_fetch_offset(
            partitions[0], start_offsets[0],
            [self.msg("one"), self.msg("two")])
        self.assert_fetch_offset(partitions[1], start_offsets[1],
                                 [self.msg("three")])

        # Goes back to the first partition because there's only two partitions
        resp = producer.send_messages(self.topic, self.msg("four"),
                                      self.msg("five"))
        self.assert_produce_response(resp, start_offsets[0] + 2)
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("four"),
            self.msg("five")
        ])

        producer.stop()

    def test_producer_random_order(self):
        producer = SimpleProducer(self.client, random_start=True)
        resp1 = producer.send_messages(self.topic, self.msg("one"),
                                       self.msg("two"))
        resp2 = producer.send_messages(self.topic, self.msg("three"))
        resp3 = producer.send_messages(self.topic, self.msg("four"),
                                       self.msg("five"))

        self.assertEqual(resp1[0].partition, resp3[0].partition)
        self.assertNotEqual(resp1[0].partition, resp2[0].partition)

    def test_producer_ordered_start(self):
        producer = SimpleProducer(self.client, random_start=False)
        resp1 = producer.send_messages(self.topic, self.msg("one"),
                                       self.msg("two"))
        resp2 = producer.send_messages(self.topic, self.msg("three"))
        resp3 = producer.send_messages(self.topic, self.msg("four"),
                                       self.msg("five"))

        self.assertEqual(resp1[0].partition, 0)
        self.assertEqual(resp2[0].partition, 1)
        self.assertEqual(resp3[0].partition, 0)

    def test_async_simple_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = SimpleProducer(self.client,
                                  async_send=True,
                                  random_start=False)
        resp = producer.send_messages(self.topic, self.msg("one"))
        self.assertEqual(len(resp), 0)

        # flush messages
        producer.stop()

        self.assert_fetch_offset(partition, start_offset, [self.msg("one")])

    def test_batched_simple_producer__triggers_by_message(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        # Configure batch producer
        batch_messages = 5
        batch_interval = 5
        producer = SimpleProducer(self.client,
                                  async_send=True,
                                  batch_send_every_n=batch_messages,
                                  batch_send_every_t=batch_interval,
                                  random_start=False)

        # Send 4 messages -- should not trigger a batch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        # send 3 more messages -- should trigger batch on first 5
        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait until producer has pulled all messages from internal queue
        # this should signal that the first batch was sent, and the producer
        # is now waiting for enough messages to batch again (or a timeout)
        timeout = 5
        start = time.time()
        while not producer.queue.empty():
            if time.time() - start > timeout:
                self.fail('timeout waiting for producer queue to empty')
            time.sleep(0.1)

        # send messages groups all *msgs in a single call to the same partition
        # so we should see all messages from the first call in one partition
        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        # Because we are batching every 5 messages, we should only see one
        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
        ])

        producer.stop()

    def test_batched_simple_producer__triggers_by_time(self):
        self.skipTest("Flakey test -- should be refactored or removed")
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        batch_interval = 5
        producer = SimpleProducer(self.client,
                                  async_send=True,
                                  batch_send_every_n=100,
                                  batch_send_every_t=batch_interval,
                                  random_start=False)

        # Send 5 messages and do a fetch
        resp = producer.send_messages(
            self.topic,
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # It hasn't sent yet
        self.assert_fetch_offset(partitions[0], start_offsets[0], [])
        self.assert_fetch_offset(partitions[1], start_offsets[1], [])

        resp = producer.send_messages(
            self.topic,
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        )

        # Batch mode is async. No ack
        self.assertEqual(len(resp), 0)

        # Wait the timeout out
        time.sleep(batch_interval)

        self.assert_fetch_offset(partitions[0], start_offsets[0], [
            self.msg("one"),
            self.msg("two"),
            self.msg("three"),
            self.msg("four"),
        ])

        self.assert_fetch_offset(partitions[1], start_offsets[1], [
            self.msg("five"),
            self.msg("six"),
            self.msg("seven"),
        ])

        producer.stop()

    ############################
    #   KeyedProducer Tests    #
    ############################

    @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
    def test_keyedproducer_null_payload(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner)
        key = "test"

        resp = producer.send_messages(self.topic, self.key("key1"),
                                      self.msg("one"))
        self.assert_produce_response(resp, start_offsets[0])
        resp = producer.send_messages(self.topic, self.key("key2"), None)
        self.assert_produce_response(resp, start_offsets[1])
        resp = producer.send_messages(self.topic, self.key("key3"), None)
        self.assert_produce_response(resp, start_offsets[0] + 1)
        resp = producer.send_messages(self.topic, self.key("key4"),
                                      self.msg("four"))
        self.assert_produce_response(resp, start_offsets[1] + 1)

        self.assert_fetch_offset(partitions[0], start_offsets[0],
                                 [self.msg("one"), None])
        self.assert_fetch_offset(partitions[1], start_offsets[1],
                                 [None, self.msg("four")])

        producer.stop()

    def test_round_robin_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner)
        resp1 = producer.send_messages(self.topic, self.key("key1"),
                                       self.msg("one"))
        resp2 = producer.send_messages(self.topic, self.key("key2"),
                                       self.msg("two"))
        resp3 = producer.send_messages(self.topic, self.key("key3"),
                                       self.msg("three"))
        resp4 = producer.send_messages(self.topic, self.key("key4"),
                                       self.msg("four"))

        self.assert_produce_response(resp1, start_offsets[0] + 0)
        self.assert_produce_response(resp2, start_offsets[1] + 0)
        self.assert_produce_response(resp3, start_offsets[0] + 1)
        self.assert_produce_response(resp4, start_offsets[1] + 1)

        self.assert_fetch_offset(
            partitions[0], start_offsets[0],
            [self.msg("one"), self.msg("three")])
        self.assert_fetch_offset(
            partitions[1], start_offsets[1],
            [self.msg("two"), self.msg("four")])

        producer.stop()

    def test_hashed_partitioner(self):
        partitions = self.client.get_partition_ids_for_topic(self.topic)
        start_offsets = [
            self.current_offset(self.topic, p) for p in partitions
        ]

        producer = KeyedProducer(self.client, partitioner=HashedPartitioner)
        resp1 = producer.send_messages(self.topic, self.key("1"),
                                       self.msg("one"))
        resp2 = producer.send_messages(self.topic, self.key("2"),
                                       self.msg("two"))
        resp3 = producer.send_messages(self.topic, self.key("3"),
                                       self.msg("three"))
        resp4 = producer.send_messages(self.topic, self.key("3"),
                                       self.msg("four"))
        resp5 = producer.send_messages(self.topic, self.key("4"),
                                       self.msg("five"))

        offsets = {
            partitions[0]: start_offsets[0],
            partitions[1]: start_offsets[1]
        }
        messages = {partitions[0]: [], partitions[1]: []}

        keys = [self.key(k) for k in ["1", "2", "3", "3", "4"]]
        resps = [resp1, resp2, resp3, resp4, resp5]
        msgs = [self.msg(m) for m in ["one", "two", "three", "four", "five"]]

        for key, resp, msg in zip(keys, resps, msgs):
            k = hash(key) % 2
            partition = partitions[k]
            offset = offsets[partition]
            self.assert_produce_response(resp, offset)
            offsets[partition] += 1
            messages[partition].append(msg)

        self.assert_fetch_offset(partitions[0], start_offsets[0],
                                 messages[partitions[0]])
        self.assert_fetch_offset(partitions[1], start_offsets[1],
                                 messages[partitions[1]])

        producer.stop()

    def test_async_keyed_producer(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = KeyedProducer(self.client,
                                 partitioner=RoundRobinPartitioner,
                                 async_send=True,
                                 batch_send_every_t=1)

        resp = producer.send_messages(self.topic, self.key("key1"),
                                      self.msg("one"))
        self.assertEqual(len(resp), 0)

        # wait for the server to report a new highwatermark
        while self.current_offset(self.topic, partition) == start_offset:
            time.sleep(0.1)

        self.assert_fetch_offset(partition, start_offset, [self.msg("one")])

        producer.stop()

    ############################
    #   Producer ACK Tests     #
    ############################

    def test_acks_none(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = Producer(
            self.client,
            req_acks=Producer.ACK_NOT_REQUIRED,
        )
        resp = producer.send_messages(self.topic, partition, self.msg("one"))

        # No response from produce request with no acks required
        self.assertEqual(len(resp), 0)

        # But the message should still have been delivered
        self.assert_fetch_offset(partition, start_offset, [self.msg("one")])
        producer.stop()

    def test_acks_local_write(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = Producer(
            self.client,
            req_acks=Producer.ACK_AFTER_LOCAL_WRITE,
        )
        resp = producer.send_messages(self.topic, partition, self.msg("one"))

        self.assert_produce_response(resp, start_offset)
        self.assert_fetch_offset(partition, start_offset, [self.msg("one")])

        producer.stop()

    def test_acks_cluster_commit(self):
        partition = self.client.get_partition_ids_for_topic(self.topic)[0]
        start_offset = self.current_offset(self.topic, partition)

        producer = Producer(
            self.client,
            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
        )

        resp = producer.send_messages(self.topic, partition, self.msg("one"))
        self.assert_produce_response(resp, start_offset)
        self.assert_fetch_offset(partition, start_offset, [self.msg("one")])

        producer.stop()

    def assert_produce_request(self,
                               messages,
                               initial_offset,
                               message_ct,
                               partition=0):
        produce = ProduceRequestPayload(self.topic,
                                        partition,
                                        messages=messages)

        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.
        resp = self.client.send_produce_request([produce])
        self.assert_produce_response(resp, initial_offset)

        self.assertEqual(self.current_offset(self.topic, partition),
                         initial_offset + message_ct)

    def assert_produce_response(self, resp, initial_offset):
        self.assertEqual(len(resp), 1)
        self.assertEqual(resp[0].error, 0)
        self.assertEqual(resp[0].offset, initial_offset)

    def assert_fetch_offset(self, partition, start_offset, expected_messages):
        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.

        resp, = self.client.send_fetch_request(
            [FetchRequestPayload(self.topic, partition, start_offset, 1024)])

        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.partition, partition)
        messages = [x.message.value for x in resp.messages]

        self.assertEqual(messages, expected_messages)
        self.assertEqual(resp.highwaterMark,
                         start_offset + len(expected_messages))
Пример #10
0
    def tearDownClass(cls):  # noqa
        if env_kafka_version() < (0, 11) or DISABLED:
            return

        cls.server.close()
        cls.zk.close()
Пример #11
0
class TestKafkaClientIntegration(KafkaIntegrationTestCase):
    @classmethod
    def setUpClass(cls):  # noqa
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.zk = ZookeeperFixture.instance()
        cls.server = KafkaFixture.instance(0, cls.zk)

    @classmethod
    def tearDownClass(cls):  # noqa
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.server.close()
        cls.zk.close()

    def test_consume_none(self):
        fetch = FetchRequestPayload(self.topic, 0, 0, 1024)

        fetch_resp, = self.client.send_fetch_request([fetch])
        self.assertEqual(fetch_resp.error, 0)
        self.assertEqual(fetch_resp.topic, self.topic)
        self.assertEqual(fetch_resp.partition, 0)

        messages = list(fetch_resp.messages)
        self.assertEqual(len(messages), 0)

    def test_ensure_topic_exists(self):

        # assume that self.topic was created by setUp
        # if so, this should succeed
        self.client.ensure_topic_exists(self.topic, timeout=1)

        # ensure_topic_exists should fail with KafkaTimeoutError
        with self.assertRaises(KafkaTimeoutError):
            self.client.ensure_topic_exists('this_topic_doesnt_exist',
                                            timeout=0)

    def test_send_produce_request_maintains_request_response_order(self):

        self.client.ensure_topic_exists('foo')
        self.client.ensure_topic_exists('bar')

        requests = [
            ProduceRequestPayload('foo', 0,
                                  [create_message(b'a'),
                                   create_message(b'b')]),
            ProduceRequestPayload('bar', 1,
                                  [create_message(b'a'),
                                   create_message(b'b')]),
            ProduceRequestPayload('foo', 1,
                                  [create_message(b'a'),
                                   create_message(b'b')]),
            ProduceRequestPayload('bar', 0,
                                  [create_message(b'a'),
                                   create_message(b'b')]),
        ]

        responses = self.client.send_produce_request(requests)
        while len(responses):
            request = requests.pop()
            response = responses.pop()
            self.assertEqual(request.topic, response.topic)
            self.assertEqual(request.partition, response.partition)

    ####################
    #   Offset Tests   #
    ####################

    @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
    def test_commit_fetch_offsets(self):
        req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata')
        (resp, ) = self.client.send_offset_commit_request('group', [req])
        self.assertEqual(resp.error, 0)

        req = OffsetFetchRequestPayload(self.topic, 0)
        (resp, ) = self.client.send_offset_fetch_request('group', [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, 42)
        self.assertEqual(resp.metadata, '')  # Metadata isn't stored for now

    @pytest.mark.skipif(env_kafka_version() < (0, 9),
                        reason='Unsupported Kafka Version')
    def test_commit_fetch_offsets_dual(self):
        req = OffsetCommitRequestPayload(self.topic, 0, 42, 'metadata')
        (resp, ) = self.client.send_offset_commit_request_kafka('group', [req])
        self.assertEqual(resp.error, 0)

        (resp, ) = self.client.send_offset_fetch_request_kafka('group', [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, 42)
        # Metadata is stored in kafka
        self.assertEqual(resp.metadata, 'metadata')
    resp = client.send_produce_request([produce])
    assert_produce_response(resp, initial_offset)

    assert current_offset(client, topic,
                          partition) == initial_offset + message_ct


def assert_produce_response(resp, initial_offset):
    """Verify that a produce response is well-formed
    """
    assert len(resp) == 1
    assert resp[0].error == 0
    assert resp[0].offset == initial_offset


@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
def test_produce_many_simple(simple_client, topic):
    """Test multiple produces using the SimpleClient
    """
    start_offset = current_offset(simple_client, topic, 0)

    assert_produce_request(
        simple_client,
        topic,
        [
            create_message(("Test message %d" % i).encode('utf-8'))
            for i in range(100)
        ],
        start_offset,
        100,
    )
Пример #13
0
 def setUp(self):
     if env_kafka_version() < (0, 11) or DISABLED:
         self.skipTest(
             'Admin ACL Integration test requires KAFKA_VERSION >= 0.11')
     super(TestAdminClientIntegration, self).setUp()
Пример #14
0
from . import unittest
from kafka import (KafkaConsumer, MultiProcessConsumer, SimpleConsumer,
                   create_message, create_gzip_message, KafkaProducer)
import kafka.codec
from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES
from kafka.errors import (ConsumerFetchSizeTooSmall, OffsetOutOfRangeError,
                          UnsupportedVersionError, KafkaTimeoutError,
                          UnsupportedCodecError)
from kafka.structs import (ProduceRequestPayload, TopicPartition,
                           OffsetAndTimestamp)

from test.fixtures import ZookeeperFixture, KafkaFixture
from test.testutil import KafkaIntegrationTestCase, Timer, assert_message_count, env_kafka_version, random_string


@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
def test_kafka_consumer(kafka_consumer_factory, send_messages):
    """Test KafkaConsumer"""
    consumer = kafka_consumer_factory(auto_offset_reset='earliest')
    send_messages(range(0, 100), partition=0)
    send_messages(range(0, 100), partition=1)
    cnt = 0
    messages = {0: [], 1: []}
    for message in consumer:
        logging.debug("Consumed message %s", repr(message))
        cnt += 1
        messages[message.partition].append(message)
        if cnt >= 200:
            break

    assert_message_count(messages[0], 100)
class TestKafkaAdminClientIntegration(KafkaIntegrationTestCase):
    
    @classmethod
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.zk = ZookeeperFixture.instance()
        cls.server = KafkaFixture.instance(0, cls.zk)

    @classmethod
    def tearDownClass(cls):  
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.server.close()
        cls.zk.close()
    
    @pytest.mark.skipif(env_kafka_version() < (0, 10, 1), reason='Unsupported Kafka Version')
    def test_create_delete_topics(self):
        admin = AdminClient(self.client_async)
        topic = NewTopic(
            name='topic', 
            num_partitions=1, 
            replication_factor=1,
        )
        metadata_request = MetadataRequest[1]()
        response = admin.create_topics(topics=[topic], timeout=KAFKA_ADMIN_TIMEOUT_SECONDS)
        # Error code 7 means that RequestTimedOut but we can safely assume
        # that topic is created or will be created eventually. 
        # see this https://cwiki.apache.org/confluence/display/KAFKA/
        # KIP-4+-+Command+line+and+centralized+administrative+operations
        self.assertTrue(
            response[0].topic_errors[0][1] == 0 or
            response[0].topic_errors[0][1] == 7
        )
        time.sleep(1) # allows the topic to be created
        delete_response = admin.delete_topics(['topic'], timeout=1)
        self.assertTrue(
            response[0].topic_errors[0][1] == 0 or
            response[0].topic_errors[0][1] == 7
        )

    @pytest.mark.skipif(env_kafka_version() < (1, 0, 0), reason='Unsupported Kafka Version')
    def test_create_partitions(self):
        admin = AdminClient(self.client_async)
        topic = NewTopic(
            name='topic',
            num_partitions=1,
            replication_factor=1,
        )
        metadata_request = MetadataRequest[1]()
        admin.create_topics(topics=[topic], timeout=KAFKA_ADMIN_TIMEOUT_SECONDS)

        time.sleep(1) # allows the topic to be created

        new_partitions_info = NewPartitionsInfo('topic', 2, [[0]])
        response = admin.create_partitions([new_partitions_info], timeout=1, validate_only=False)

        self.assertTrue(
            response[0].topic_errors[0][1] == 0 or
            response[0].topic_errors[0][1] == 7
        )
import kafka.codec
from kafka.consumer.base import MAX_FETCH_BUFFER_SIZE_BYTES
from kafka.errors import (
    ConsumerFetchSizeTooSmall, OffsetOutOfRangeError, UnsupportedVersionError,
    KafkaTimeoutError, UnsupportedCodecError, ConsumerTimeout
)
from kafka.protocol.message import PartialMessage
from kafka.structs import (
    ProduceRequestPayload, TopicPartition, OffsetAndTimestamp
)

from test.fixtures import ZookeeperFixture, KafkaFixture
from test.testutil import KafkaIntegrationTestCase, Timer, assert_message_count, env_kafka_version, random_string


@pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
def test_kafka_consumer(kafka_consumer_factory, send_messages):
    """Test KafkaConsumer"""
    consumer = kafka_consumer_factory(auto_offset_reset='earliest')
    send_messages(range(0, 100), partition=0)
    send_messages(range(0, 100), partition=1)
    cnt = 0
    messages = {0: [], 1: []}
    for message in consumer:
        logging.debug("Consumed message %s", repr(message))
        cnt += 1
        messages[message.partition].append(message)
        if cnt >= 200:
            break

    assert_message_count(messages[0], 100)
import pytest

from logging import info
from test.testutil import env_kafka_version, random_string
from threading import Event, Thread
from time import time, sleep

from kafka.admin import (
    ACLFilter, ACLOperation, ACLPermissionType, ResourcePattern, ResourceType, ACL, ConfigResource, ConfigResourceType)
from kafka.errors import (NoError, GroupCoordinatorNotAvailableError)


@pytest.mark.skipif(env_kafka_version() < (0, 11), reason="ACL features require broker >=0.11")
def test_create_describe_delete_acls(kafka_admin_client):
    """Tests that we can add, list and remove ACLs
    """

    # Check that we don't have any ACLs in the cluster
    acls, error = kafka_admin_client.describe_acls(
        ACLFilter(
            principal=None,
            host="*",
            operation=ACLOperation.ANY,
            permission_type=ACLPermissionType.ANY,
            resource_pattern=ResourcePattern(ResourceType.TOPIC, "topic")
        )
    )

    assert error is NoError
    assert len(acls) == 0
Пример #18
0
import pytest

from logging import info
from test.testutil import env_kafka_version, random_string
from threading import Event, Thread
from time import time, sleep

from kafka.admin import (ACLFilter, ACLOperation, ACLPermissionType,
                         ResourcePattern, ResourceType, ACL, ConfigResource,
                         ConfigResourceType)
from kafka.errors import (NoError, GroupCoordinatorNotAvailableError,
                          NonEmptyGroupError, GroupIdNotFoundError)


@pytest.mark.skipif(env_kafka_version() < (0, 11),
                    reason="ACL features require broker >=0.11")
def test_create_describe_delete_acls(kafka_admin_client):
    """Tests that we can add, list and remove ACLs
    """

    # Check that we don't have any ACLs in the cluster
    acls, error = kafka_admin_client.describe_acls(
        ACLFilter(principal=None,
                  host="*",
                  operation=ACLOperation.ANY,
                  permission_type=ACLPermissionType.ANY,
                  resource_pattern=ResourcePattern(ResourceType.TOPIC,
                                                   "topic")))

    assert error is NoError
    assert len(acls) == 0
Пример #19
0
 def tearDown(self):
     if env_kafka_version() < (0, 11) or DISABLED:
         return
     super(TestAdminClientIntegration, self).tearDown()
class TestConsumerIntegration(KafkaIntegrationTestCase):
    maxDiff = None

    @classmethod
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.zk = ZookeeperFixture.instance()
        chroot = random_string(10)
        cls.server1 = KafkaFixture.instance(0, cls.zk,
                                            zk_chroot=chroot)
        cls.server2 = KafkaFixture.instance(1, cls.zk,
                                            zk_chroot=chroot)

        cls.server = cls.server1 # Bootstrapping server

    @classmethod
    def tearDownClass(cls):
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.server1.close()
        cls.server2.close()
        cls.zk.close()

    def send_messages(self, partition, messages):
        messages = [ create_message(self.msg(str(msg))) for msg in messages ]
        produce = ProduceRequestPayload(self.topic, partition, messages = messages)
        resp, = self.client.send_produce_request([produce])
        self.assertEqual(resp.error, 0)

        return [ x.value for x in messages ]

    def send_gzip_message(self, partition, messages):
        message = create_gzip_message([(self.msg(str(msg)), None) for msg in messages])
        produce = ProduceRequestPayload(self.topic, partition, messages = [message])
        resp, = self.client.send_produce_request([produce])
        self.assertEqual(resp.error, 0)

    def assert_message_count(self, messages, num_messages):
        # Make sure we got them all
        self.assertEqual(len(messages), num_messages)

        # Make sure there are no duplicates
        self.assertEqual(len(set(messages)), num_messages)

    def consumer(self, **kwargs):
        if os.environ['KAFKA_VERSION'] == "0.8.0":
            # Kafka 0.8.0 simply doesn't support offset requests, so hard code it being off
            kwargs['group'] = None
            kwargs['auto_commit'] = False
        else:
            kwargs.setdefault('group', None)
            kwargs.setdefault('auto_commit', False)

        consumer_class = kwargs.pop('consumer', SimpleConsumer)
        group = kwargs.pop('group', None)
        topic = kwargs.pop('topic', self.topic)

        if consumer_class in [SimpleConsumer, MultiProcessConsumer]:
            kwargs.setdefault('iter_timeout', 0)

        return consumer_class(self.client, group, topic, **kwargs)

    def kafka_consumer(self, **configs):
        brokers = '%s:%d' % (self.server.host, self.server.port)
        consumer = KafkaConsumer(self.topic,
                                 bootstrap_servers=brokers,
                                 **configs)
        return consumer

    def kafka_producer(self, **configs):
        brokers = '%s:%d' % (self.server.host, self.server.port)
        producer = KafkaProducer(
            bootstrap_servers=brokers, **configs)
        return producer

    def test_simple_consumer(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer = self.consumer()

        self.assert_message_count([ message for message in consumer ], 200)

        consumer.stop()

    def test_simple_consumer_gzip(self):
        self.send_gzip_message(0, range(0, 100))
        self.send_gzip_message(1, range(100, 200))

        # Start a consumer
        consumer = self.consumer()

        self.assert_message_count([ message for message in consumer ], 200)

        consumer.stop()

    def test_simple_consumer_smallest_offset_reset(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        consumer = self.consumer(auto_offset_reset='smallest')
        # Move fetch offset ahead of 300 message (out of range)
        consumer.seek(300, 2)
        # Since auto_offset_reset is set to smallest we should read all 200
        # messages from beginning.
        self.assert_message_count([message for message in consumer], 200)

    def test_simple_consumer_largest_offset_reset(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Default largest
        consumer = self.consumer()
        # Move fetch offset ahead of 300 message (out of range)
        consumer.seek(300, 2)
        # Since auto_offset_reset is set to largest we should not read any
        # messages.
        self.assert_message_count([message for message in consumer], 0)
        # Send 200 new messages to the queue
        self.send_messages(0, range(200, 300))
        self.send_messages(1, range(300, 400))
        # Since the offset is set to largest we should read all the new messages.
        self.assert_message_count([message for message in consumer], 200)

    def test_simple_consumer_no_reset(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Default largest
        consumer = self.consumer(auto_offset_reset=None)
        # Move fetch offset ahead of 300 message (out of range)
        consumer.seek(300, 2)
        with self.assertRaises(OffsetOutOfRangeError):
            consumer.get_message()

    @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
    def test_simple_consumer_load_initial_offsets(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Create 1st consumer and change offsets
        consumer = self.consumer(group='test_simple_consumer_load_initial_offsets')
        self.assertEqual(consumer.offsets, {0: 0, 1: 0})
        consumer.offsets.update({0:51, 1:101})
        # Update counter after manual offsets update
        consumer.count_since_commit += 1
        consumer.commit()

        # Create 2nd consumer and check initial offsets
        consumer = self.consumer(group='test_simple_consumer_load_initial_offsets',
                                 auto_commit=False)
        self.assertEqual(consumer.offsets, {0: 51, 1: 101})

    def test_simple_consumer__seek(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        consumer = self.consumer()

        # Rewind 10 messages from the end
        consumer.seek(-10, 2)
        self.assert_message_count([ message for message in consumer ], 10)

        # Rewind 13 messages from the end
        consumer.seek(-13, 2)
        self.assert_message_count([ message for message in consumer ], 13)

        # Set absolute offset
        consumer.seek(100)
        self.assert_message_count([ message for message in consumer ], 0)
        consumer.seek(100, partition=0)
        self.assert_message_count([ message for message in consumer ], 0)
        consumer.seek(101, partition=1)
        self.assert_message_count([ message for message in consumer ], 0)
        consumer.seek(90, partition=0)
        self.assert_message_count([ message for message in consumer ], 10)
        consumer.seek(20, partition=1)
        self.assert_message_count([ message for message in consumer ], 80)
        consumer.seek(0, partition=1)
        self.assert_message_count([ message for message in consumer ], 100)

        consumer.stop()

    @pytest.mark.skipif(env_kafka_version() >= (2, 0),
                        reason="SimpleConsumer blocking does not handle PartialMessage change in kafka 2.0+")
    def test_simple_consumer_blocking(self):
        consumer = self.consumer()

        # Ask for 5 messages, nothing in queue, block 1 second
        with Timer() as t:
            messages = consumer.get_messages(block=True, timeout=1)
            self.assert_message_count(messages, 0)
        self.assertGreaterEqual(t.interval, 1)

        self.send_messages(0, range(0, 5))
        self.send_messages(1, range(5, 10))

        # Ask for 5 messages, 10 in queue. Get 5 back, no blocking
        with Timer() as t:
            messages = consumer.get_messages(count=5, block=True, timeout=3)
            self.assert_message_count(messages, 5)
        self.assertLess(t.interval, 3)

        # Ask for 10 messages, get 5 back, block 1 second
        with Timer() as t:
            messages = consumer.get_messages(count=10, block=True, timeout=1)
            self.assert_message_count(messages, 5)
        self.assertGreaterEqual(t.interval, 1)

        # Ask for 10 messages, 5 in queue, ask to block for 1 message or 1
        # second, get 5 back, no blocking
        self.send_messages(0, range(0, 3))
        self.send_messages(1, range(3, 5))
        with Timer() as t:
            messages = consumer.get_messages(count=10, block=1, timeout=1)
            self.assert_message_count(messages, 5)
        self.assertLessEqual(t.interval, 1)

        consumer.stop()

    def test_simple_consumer_pending(self):
        # make sure that we start with no pending messages
        consumer = self.consumer()
        self.assertEquals(consumer.pending(), 0)
        self.assertEquals(consumer.pending(partitions=[0]), 0)
        self.assertEquals(consumer.pending(partitions=[1]), 0)

        # Produce 10 messages to partitions 0 and 1
        self.send_messages(0, range(0, 10))
        self.send_messages(1, range(10, 20))

        consumer = self.consumer()

        self.assertEqual(consumer.pending(), 20)
        self.assertEqual(consumer.pending(partitions=[0]), 10)
        self.assertEqual(consumer.pending(partitions=[1]), 10)

        # move to last message, so one partition should have 1 pending
        # message and other 0
        consumer.seek(-1, 2)
        self.assertEqual(consumer.pending(), 1)

        pending_part1 = consumer.pending(partitions=[0])
        pending_part2 = consumer.pending(partitions=[1])
        self.assertEquals(set([0, 1]), set([pending_part1, pending_part2]))
        consumer.stop()

    @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky')
    def test_multi_process_consumer(self):
        # Produce 100 messages to partitions 0 and 1
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        consumer = self.consumer(consumer = MultiProcessConsumer)

        self.assert_message_count([ message for message in consumer ], 200)

        consumer.stop()

    @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky')
    def test_multi_process_consumer_blocking(self):
        consumer = self.consumer(consumer = MultiProcessConsumer)

        # Ask for 5 messages, No messages in queue, block 1 second
        with Timer() as t:
            messages = consumer.get_messages(block=True, timeout=1)
            self.assert_message_count(messages, 0)

        self.assertGreaterEqual(t.interval, 1)

        # Send 10 messages
        self.send_messages(0, range(0, 10))

        # Ask for 5 messages, 10 messages in queue, block 0 seconds
        with Timer() as t:
            messages = consumer.get_messages(count=5, block=True, timeout=5)
            self.assert_message_count(messages, 5)
        self.assertLessEqual(t.interval, 1)

        # Ask for 10 messages, 5 in queue, block 1 second
        with Timer() as t:
            messages = consumer.get_messages(count=10, block=True, timeout=1)
            self.assert_message_count(messages, 5)
        self.assertGreaterEqual(t.interval, 1)

        # Ask for 10 messages, 5 in queue, ask to block for 1 message or 1
        # second, get at least one back, no blocking
        self.send_messages(0, range(0, 5))
        with Timer() as t:
            messages = consumer.get_messages(count=10, block=1, timeout=1)
            received_message_count = len(messages)
            self.assertGreaterEqual(received_message_count, 1)
            self.assert_message_count(messages, received_message_count)
        self.assertLessEqual(t.interval, 1)

        consumer.stop()

    @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky')
    def test_multi_proc_pending(self):
        self.send_messages(0, range(0, 10))
        self.send_messages(1, range(10, 20))

        # set group to None and auto_commit to False to avoid interactions w/
        # offset commit/fetch apis
        consumer = MultiProcessConsumer(self.client, None, self.topic,
                                        auto_commit=False, iter_timeout=0)

        self.assertEqual(consumer.pending(), 20)
        self.assertEqual(consumer.pending(partitions=[0]), 10)
        self.assertEqual(consumer.pending(partitions=[1]), 10)

        consumer.stop()

    @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky')
    @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
    def test_multi_process_consumer_load_initial_offsets(self):
        self.send_messages(0, range(0, 10))
        self.send_messages(1, range(10, 20))

        # Create 1st consumer and change offsets
        consumer = self.consumer(group='test_multi_process_consumer_load_initial_offsets')
        self.assertEqual(consumer.offsets, {0: 0, 1: 0})
        consumer.offsets.update({0:5, 1:15})
        # Update counter after manual offsets update
        consumer.count_since_commit += 1
        consumer.commit()

        # Create 2nd consumer and check initial offsets
        consumer = self.consumer(consumer = MultiProcessConsumer,
                                 group='test_multi_process_consumer_load_initial_offsets',
                                 auto_commit=False)
        self.assertEqual(consumer.offsets, {0: 5, 1: 15})

    def test_large_messages(self):
        # Produce 10 "normal" size messages
        small_messages = self.send_messages(0, [ str(x) for x in range(10) ])

        # Produce 10 messages that are large (bigger than default fetch size)
        large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ])

        # Brokers prior to 0.11 will return the next message
        # if it is smaller than max_bytes (called buffer_size in SimpleConsumer)
        # Brokers 0.11 and later that store messages in v2 format
        # internally will return the next message only if the
        # full MessageSet is smaller than max_bytes.
        # For that reason, we set the max buffer size to a little more
        # than the size of all large messages combined
        consumer = self.consumer(max_buffer_size=60000)

        expected_messages = set(small_messages + large_messages)
        actual_messages = set([x.message.value for x in consumer
                               if not isinstance(x.message, PartialMessage)])
        self.assertEqual(expected_messages, actual_messages)

        consumer.stop()

    def test_huge_messages(self):
        huge_message, = self.send_messages(0, [
            create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)),
        ])

        # Create a consumer with the default buffer size
        consumer = self.consumer()

        # This consumer fails to get the message
        with self.assertRaises(ConsumerFetchSizeTooSmall):
            consumer.get_message(False, 0.1)

        consumer.stop()

        # Create a consumer with no fetch size limit
        big_consumer = self.consumer(
            max_buffer_size = None,
            partitions = [0],
        )

        # Seek to the last message
        big_consumer.seek(-1, 2)

        # Consume giant message successfully
        message = big_consumer.get_message(block=False, timeout=10)
        self.assertIsNotNone(message)
        self.assertEqual(message.message.value, huge_message)

        big_consumer.stop()

    @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
    def test_offset_behavior__resuming_behavior(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer1 = self.consumer(
            group='test_offset_behavior__resuming_behavior',
            auto_commit=True,
            auto_commit_every_t = None,
            auto_commit_every_n = 20,
        )

        # Grab the first 195 messages
        output_msgs1 = [ consumer1.get_message().message.value for _ in range(195) ]
        self.assert_message_count(output_msgs1, 195)

        # The total offset across both partitions should be at 180
        consumer2 = self.consumer(
            group='test_offset_behavior__resuming_behavior',
            auto_commit=True,
            auto_commit_every_t = None,
            auto_commit_every_n = 20,
        )

        # 181-200
        self.assert_message_count([ message for message in consumer2 ], 20)

        consumer1.stop()
        consumer2.stop()

    @unittest.skip('MultiProcessConsumer deprecated and these tests are flaky')
    @pytest.mark.skipif(not env_kafka_version(), reason="No KAFKA_VERSION set")
    def test_multi_process_offset_behavior__resuming_behavior(self):
        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer1 = self.consumer(
            consumer=MultiProcessConsumer,
            group='test_multi_process_offset_behavior__resuming_behavior',
            auto_commit=True,
            auto_commit_every_t = None,
            auto_commit_every_n = 20,
            )

        # Grab the first 195 messages
        output_msgs1 = []
        idx = 0
        for message in consumer1:
            output_msgs1.append(message.message.value)
            idx += 1
            if idx >= 195:
                break
        self.assert_message_count(output_msgs1, 195)

        # The total offset across both partitions should be at 180
        consumer2 = self.consumer(
            consumer=MultiProcessConsumer,
            group='test_multi_process_offset_behavior__resuming_behavior',
            auto_commit=True,
            auto_commit_every_t = None,
            auto_commit_every_n = 20,
            )

        # 181-200
        self.assert_message_count([ message for message in consumer2 ], 20)

        consumer1.stop()
        consumer2.stop()

    # TODO: Make this a unit test -- should not require integration
    def test_fetch_buffer_size(self):

        # Test parameters (see issue 135 / PR 136)
        TEST_MESSAGE_SIZE=1048
        INIT_BUFFER_SIZE=1024
        MAX_BUFFER_SIZE=2048
        assert TEST_MESSAGE_SIZE > INIT_BUFFER_SIZE
        assert TEST_MESSAGE_SIZE < MAX_BUFFER_SIZE
        assert MAX_BUFFER_SIZE == 2 * INIT_BUFFER_SIZE

        self.send_messages(0, [ "x" * 1048 ])
        self.send_messages(1, [ "x" * 1048 ])

        consumer = self.consumer(buffer_size=1024, max_buffer_size=2048)
        messages = [ message for message in consumer ]
        self.assertEqual(len(messages), 2)
import logging
import uuid

import pytest

from kafka.admin import NewTopic
from kafka.protocol.metadata import MetadataRequest_v1
from test.testutil import assert_message_count, env_kafka_version, random_string, special_to_underscore


@pytest.fixture(
    params=[
        pytest.param(
            "PLAIN", marks=pytest.mark.skipif(env_kafka_version() < (0, 10), reason="Requires KAFKA_VERSION >= 0.10")
        ),
        pytest.param(
            "SCRAM-SHA-256",
            marks=pytest.mark.skipif(env_kafka_version() < (0, 10, 2), reason="Requires KAFKA_VERSION >= 0.10.2"),
        ),
        pytest.param(
            "SCRAM-SHA-512",
            marks=pytest.mark.skipif(env_kafka_version() < (0, 10, 2), reason="Requires KAFKA_VERSION >= 0.10.2"),
        ),
    ]
)
def sasl_kafka(request, kafka_broker_factory):
    sasl_kafka = kafka_broker_factory(transport="SASL_PLAINTEXT", sasl_mechanism=request.param)[0]
    yield sasl_kafka
    sasl_kafka.child.dump_logs()