def test_huge_messages(self):
        huge_message, = self.send_messages(0, [
            create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)),
        ])

        # Create a consumer with the default buffer size
        consumer = self.consumer()

        # This consumer fails to get the message
        with self.assertRaises(ConsumerFetchSizeTooSmall):
            consumer.get_message(False, 0.1)

        consumer.stop()

        # Create a consumer with no fetch size limit
        big_consumer = self.consumer(
            max_buffer_size = None,
            partitions = [0],
        )

        # Seek to the last message
        big_consumer.seek(-1, 2)

        # Consume giant message successfully
        message = big_consumer.get_message(block=False, timeout=10)
        self.assertIsNotNone(message)
        self.assertEqual(message.message.value, huge_message)

        big_consumer.stop()
def test_produce_and_consume(request, sasl_kafka):
    topic_name = special_to_underscore(request.node.name + random_string(4))
    sasl_kafka.create_topics([topic_name], num_partitions=2)
    producer, = sasl_kafka.get_producers(1)

    messages_and_futures = []  # [(message, produce_future),]
    for i in range(100):
        encoded_msg = "{}-{}-{}".format(i, request.node.name, uuid.uuid4()).encode("utf-8")
        future = producer.send(topic_name, value=encoded_msg, partition=i % 2)
        messages_and_futures.append((encoded_msg, future))
    producer.flush()

    for (msg, f) in messages_and_futures:
        assert f.succeeded()

    consumer, = sasl_kafka.get_consumers(1, [topic_name])
    messages = {0: [], 1: []}
    for i, message in enumerate(consumer, 1):
        logging.debug("Consumed message %s", repr(message))
        messages[message.partition].append(message)
        if i >= 100:
            break

    assert_message_count(messages[0], 50)
    assert_message_count(messages[1], 50)
Exemplo n.º 3
0
def test_heartbeat_thread(kafka_broker, topic):
    group_id = 'test-group-' + random_string(6)
    consumer = KafkaConsumer(topic,
                             bootstrap_servers=get_connect_str(kafka_broker),
                             group_id=group_id,
                             heartbeat_interval_ms=500)

    # poll until we have joined group / have assignment
    while not consumer.assignment():
        consumer.poll(timeout_ms=100)

    assert consumer._coordinator.state is MemberState.STABLE
    last_poll = consumer._coordinator.heartbeat.last_poll
    last_beat = consumer._coordinator.heartbeat.last_send

    timeout = time.time() + 30
    while True:
        if time.time() > timeout:
            raise RuntimeError('timeout waiting for heartbeat')
        if consumer._coordinator.heartbeat.last_send > last_beat:
            break
        time.sleep(0.5)

    assert consumer._coordinator.heartbeat.last_poll == last_poll
    consumer.poll(timeout_ms=100)
    assert consumer._coordinator.heartbeat.last_poll > last_poll
Exemplo n.º 4
0
    def assert_message_count(self,
                             topic,
                             check_count,
                             timeout=10,
                             partitions=None):
        hosts = ','.join(
            ['%s:%d' % (broker.host, broker.port) for broker in self.brokers])

        client = KafkaClient(hosts)
        group = random_string(10)
        consumer = SimpleConsumer(client,
                                  group,
                                  topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
        while pending != check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)

        consumer.stop()
        client.close()

        self.assertEqual(pending, check_count)
Exemplo n.º 5
0
 def get_producers(self, cnt, **params):
     params.setdefault('client_id', 'producer')
     params['bootstrap_servers'] = self.bootstrap_server()
     client_id = params['client_id']
     for x in range(cnt):
         params['client_id'] = '%s_%s' % (client_id, random_string(4))
         yield KafkaProducer(**params)
Exemplo n.º 6
0
 def get_admin_clients(self, cnt=1, **params):
     params.setdefault('client_id', 'admin_client')
     params['bootstrap_servers'] = self.bootstrap_server()
     client_id = params['client_id']
     for x in range(cnt):
         params['client_id'] = '%s_%s' % (client_id, random_string(4))
         yield KafkaAdminClient(**params)
Exemplo n.º 7
0
    def test_large_messages(self):
        # Produce 10 "normal" size messages
        small_messages = self.send_messages(0, [str(x) for x in range(10)])

        # Produce 10 messages that are large (bigger than default fetch size)
        large_messages = self.send_messages(
            0, [random_string(5000) for x in range(10)])

        # Brokers prior to 0.11 will return the next message
        # if it is smaller than max_bytes (called buffer_size in SimpleConsumer)
        # Brokers 0.11 and later that store messages in v2 format
        # internally will return the next message only if the
        # full MessageSet is smaller than max_bytes.
        # For that reason, we set the max buffer size to a little more
        # than the size of all large messages combined
        consumer = self.consumer(max_buffer_size=60000)

        expected_messages = set(small_messages + large_messages)
        actual_messages = set([
            x.message.value for x in consumer
            if not isinstance(x.message, PartialMessage)
        ])
        self.assertEqual(expected_messages, actual_messages)

        consumer.stop()
Exemplo n.º 8
0
 def get_clients(self, cnt=1, client_id=None):
     if client_id is None:
         client_id = 'client'
     return tuple(
         KafkaClient(client_id='%s_%s' % (client_id, random_string(4)),
                     bootstrap_servers=self.bootstrap_server())
         for x in range(cnt))
Exemplo n.º 9
0
 def _send_random_messages(self, producer, topic, partition, n):
     for j in range(n):
         logging.debug('_send_random_message to %s:%d -- try %d', topic, partition, j)
         resp = producer.send_messages(topic, partition, random_string(10))
         if len(resp) > 0:
             self.assertEqual(resp[0].error, 0)
         logging.debug('_send_random_message to %s:%d -- try %d success', topic, partition, j)
Exemplo n.º 10
0
def test_heartbeat_thread(kafka_broker, topic):
    group_id = 'test-group-' + random_string(6)
    consumer = KafkaConsumer(topic,
                             bootstrap_servers=get_connect_str(kafka_broker),
                             group_id=group_id,
                             heartbeat_interval_ms=500)

    # poll until we have joined group / have assignment
    while not consumer.assignment():
        consumer.poll(timeout_ms=100)

    assert consumer._coordinator.state is MemberState.STABLE
    last_poll = consumer._coordinator.heartbeat.last_poll
    last_beat = consumer._coordinator.heartbeat.last_send

    timeout = time.time() + 30
    while True:
        if time.time() > timeout:
            raise RuntimeError('timeout waiting for heartbeat')
        if consumer._coordinator.heartbeat.last_send > last_beat:
            break
        time.sleep(0.5)

    assert consumer._coordinator.heartbeat.last_poll == last_poll
    consumer.poll(timeout_ms=100)
    assert consumer._coordinator.heartbeat.last_poll > last_poll
    consumer.close()
Exemplo n.º 11
0
def test_end_to_end(kafka_broker):
    connect_str = 'localhost:' + str(kafka_broker.port)
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             max_block_ms=10000,
                             value_serializer=str.encode)
    consumer = KafkaConsumer(bootstrap_servers=connect_str,
                             group_id=None,
                             consumer_timeout_ms=10000,
                             auto_offset_reset='earliest',
                             value_deserializer=bytes.decode)

    topic = random_string(5)

    for i in range(1000):
        producer.send(topic, 'msg %d' % i)
    producer.flush()
    producer.close()

    consumer.subscribe([topic])
    msgs = set()
    for i in range(1000):
        try:
            msgs.add(next(consumer).value)
        except StopIteration:
            break

    assert msgs == set(['msg %d' % i for i in range(1000)])
    def test_huge_messages(self):
        huge_message, = self.send_messages(0, [
            create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)),
        ])

        # Create a consumer with the default buffer size
        consumer = self.consumer()

        # This consumer failes to get the message
        with self.assertRaises(ConsumerFetchSizeTooSmall):
            consumer.get_message(False, 0.1)

        consumer.stop()

        # Create a consumer with no fetch size limit
        big_consumer = self.consumer(
            max_buffer_size = None,
            partitions = [0],
        )

        # Seek to the last message
        big_consumer.seek(-1, 2)

        # Consume giant message successfully
        message = big_consumer.get_message(block=False, timeout=10)
        self.assertIsNotNone(message)
        self.assertEqual(message.message.value, huge_message)

        big_consumer.stop()
 def _send_random_messages(self, producer, topic, partition, n):
     for j in range(n):
         logging.debug('_send_random_message to %s:%d -- try %d', topic, partition, j)
         resp = producer.send_messages(topic, partition, random_string(10))
         if len(resp) > 0:
             self.assertEqual(resp[0].error, 0)
         logging.debug('_send_random_message to %s:%d -- try %d success', topic, partition, j)
Exemplo n.º 14
0
def test_lz4_incremental():
    for i in xrange(1000):
        # lz4 max single block size is 4MB
        # make sure we test with multiple-blocks
        b1 = random_string(100).encode('utf-8') * 50000
        b2 = lz4_decode(lz4_encode(b1))
        assert len(b1) == len(b2)
        assert b1 == b2
Exemplo n.º 15
0
 def get_consumers(self, cnt, topics, **params):
     params.setdefault('client_id', 'consumer')
     params.setdefault('heartbeat_interval_ms', 500)
     params['bootstrap_servers'] = self.bootstrap_server()
     client_id = params['client_id']
     for x in range(cnt):
         params['client_id'] = '%s_%s' % (client_id, random_string(4))
         yield KafkaConsumer(*topics, **params)
Exemplo n.º 16
0
def test_lz4_incremental():
    for i in xrange(1000):
        # lz4 max single block size is 4MB
        # make sure we test with multiple-blocks
        b1 = random_string(100).encode('utf-8') * 50000
        b2 = lz4_decode(lz4_encode(b1))
        assert len(b1) == len(b2)
        assert b1 == b2
Exemplo n.º 17
0
def test_kafka_producer_proper_record_metadata(kafka_broker, compression):
    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression)
    magic = producer._max_usable_produce_magic()

    # record headers are supported in 0.11.0
    if env_kafka_version() < (0, 11, 0):
        headers = None
    else:
        headers = [("Header Key", b"Header Value")]

    topic = random_string(5)
    future = producer.send(topic,
                           value=b"Simple value",
                           key=b"Simple key",
                           headers=headers,
                           timestamp_ms=9999999,
                           partition=0)
    record = future.get(timeout=5)
    assert record is not None
    assert record.topic == topic
    assert record.partition == 0
    assert record.topic_partition == TopicPartition(topic, 0)
    assert record.offset == 0
    if magic >= 1:
        assert record.timestamp == 9999999
    else:
        assert record.timestamp == -1  # NO_TIMESTAMP

    if magic >= 2:
        assert record.checksum is None
    elif magic == 1:
        assert record.checksum == 1370034956
    else:
        assert record.checksum == 3296137851

    assert record.serialized_key_size == 10
    assert record.serialized_value_size == 12
    if headers:
        assert record.serialized_header_size == 22

    # generated timestamp case is skipped for broker 0.9 and below
    if magic == 0:
        return

    send_time = time.time() * 1000
    future = producer.send(topic,
                           value=b"Simple value",
                           key=b"Simple key",
                           timestamp_ms=None,
                           partition=0)
    record = future.get(timeout=5)
    assert abs(record.timestamp - send_time) <= 1000  # Allow 1s deviation
Exemplo n.º 18
0
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.zk = ZookeeperFixture.instance()
        chroot = random_string(10)
        cls.server1 = KafkaFixture.instance(0, cls.zk, zk_chroot=chroot)
        cls.server2 = KafkaFixture.instance(1, cls.zk, zk_chroot=chroot)

        cls.server = cls.server1  # Bootstrapping server
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.zk = ZookeeperFixture.instance()
        chroot = random_string(10)
        cls.server1 = KafkaFixture.instance(0, cls.zk.host, cls.zk.port, chroot)
        cls.server2 = KafkaFixture.instance(1, cls.zk.host, cls.zk.port, chroot)

        cls.server = cls.server1 # Bootstrapping server
 def _send_random_messages(self, producer, topic, partition, n):
     for j in range(n):
         msg = 'msg {0}: {1}'.format(j, random_string(10))
         log.debug('_send_random_message %s to %s:%d', msg, topic, partition)
         while True:
             try:
                 producer.send_messages(topic, partition, msg.encode('utf-8'))
             except:
                 log.exception('failure in _send_random_messages - retrying')
                 continue
             else:
                 break
 def _send_random_messages(self, producer, topic, partition, n):
     for j in range(n):
         msg = 'msg {0}: {1}'.format(j, random_string(10))
         log.debug('_send_random_message %s to %s:%d', msg, topic, partition)
         while True:
             try:
                 producer.send_messages(topic, partition, msg.encode('utf-8'))
             except:
                 log.exception('failure in _send_random_messages - retrying')
                 continue
             else:
                 break
Exemplo n.º 22
0
    def test_large_messages(self):
        # Produce 10 "normal" size messages
        small_messages = self.send_messages(0, [ str(x) for x in range(10) ])

        # Produce 10 messages that are large (bigger than default fetch size)
        large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ])

        # Consumer should still get all of them
        consumer = self.consumer()

        expected_messages = set(small_messages + large_messages)
        actual_messages = set([ x.message.value for x in consumer ])
        self.assertEqual(expected_messages, actual_messages)

        consumer.stop()
def test_client(request, sasl_kafka):
    topic_name = special_to_underscore(request.node.name + random_string(4))
    sasl_kafka.create_topics([topic_name], num_partitions=1)

    client, = sasl_kafka.get_clients(1)
    request = MetadataRequest_v1(None)
    client.send(0, request)
    for _ in range(10):
        result = client.poll(timeout_ms=10000)
        if len(result) > 0:
            break
    else:
        raise RuntimeError("Couldn't fetch topic response from Broker.")
    result = result[0]
    assert topic_name in [t[1] for t in result.topics]
    def setUpClass(cls):  # noqa
        if not os.environ.get('KAFKA_VERSION'):
            return

        zk_chroot = random_string(10)
        replicas = 2
        partitions = 2

        # mini zookeeper, 2 kafka brokers
        cls.zk = ZookeeperFixture.instance()
        kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
        cls.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in cls.brokers]
        cls.client = KafkaClient(hosts)
Exemplo n.º 25
0
def test_kafka_producer_proper_record_metadata(kafka_broker, compression):
    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=10000,
                             compression_type=compression)
    if producer.config['api_version'] >= (0, 10):
        magic = 1
    else:
        magic = 0

    topic = random_string(5)
    future = producer.send(topic,
                           value=b"Simple value",
                           key=b"Simple key",
                           timestamp_ms=9999999,
                           partition=0)
    record = future.get(timeout=5)
    assert record is not None
    assert record.topic == topic
    assert record.partition == 0
    assert record.topic_partition == TopicPartition(topic, 0)
    assert record.offset == 0
    if magic >= 1:
        assert record.timestamp == 9999999
    else:
        assert record.timestamp == -1  # NO_TIMESTAMP

    if magic == 1:
        assert record.checksum == 1370034956
    else:
        assert record.checksum == 3296137851

    assert record.serialized_key_size == 10
    assert record.serialized_value_size == 12

    # generated timestamp case is skipped for broker 0.9 and below
    if magic == 0:
        return

    send_time = time.time() * 1000
    future = producer.send(topic,
                           value=b"Simple value",
                           key=b"Simple key",
                           timestamp_ms=None,
                           partition=0)
    record = future.get(timeout=5)
    assert abs(record.timestamp - send_time) <= 1000  # Allow 1s deviation
    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions]
        self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()
Exemplo n.º 27
0
    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            return

        zk_chroot = random_string(10)
        replicas = 2
        partitions = 2

        # mini zookeeper, 2 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions]
        self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = KafkaClient(hosts)
        super(TestFailover, self).setUp()
Exemplo n.º 28
0
    def setUpClass(cls):  # noqa
        if not os.environ.get('KAFKA_VERSION'):
            return

        zk_chroot = random_string(10)
        replicas = 2
        partitions = 2

        # mini zookeeper, 2 kafka brokers
        cls.zk = ZookeeperFixture.instance()
        kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
        cls.brokers = [
            KafkaFixture.instance(i, *kk_args) for i in range(replicas)
        ]

        hosts = ['%s:%d' % (b.host, b.port) for b in cls.brokers]
        cls.client = KafkaClient(hosts)
Exemplo n.º 29
0
def test_kafka_producer_proper_record_metadata(kafka_broker, compression):
    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression)
    magic = producer._max_usable_produce_magic()

    topic = random_string(5)
    future = producer.send(
        topic,
        value=b"Simple value", key=b"Simple key", timestamp_ms=9999999,
        partition=0)
    record = future.get(timeout=5)
    assert record is not None
    assert record.topic == topic
    assert record.partition == 0
    assert record.topic_partition == TopicPartition(topic, 0)
    assert record.offset == 0
    if magic >= 1:
        assert record.timestamp == 9999999
    else:
        assert record.timestamp == -1  # NO_TIMESTAMP

    if magic >= 2:
        assert record.checksum is None
    elif magic == 1:
        assert record.checksum == 1370034956
    else:
        assert record.checksum == 3296137851

    assert record.serialized_key_size == 10
    assert record.serialized_value_size == 12

    # generated timestamp case is skipped for broker 0.9 and below
    if magic == 0:
        return

    send_time = time.time() * 1000
    future = producer.send(
        topic,
        value=b"Simple value", key=b"Simple key", timestamp_ms=None,
        partition=0)
    record = future.get(timeout=5)
    assert abs(record.timestamp - send_time) <= 1000  # Allow 1s deviation
Exemplo n.º 30
0
    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions]
        self.brokers = [
            KafkaFixture.instance(i, *kk_args) for i in range(replicas)
        ]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()
    def assert_message_count(self, topic, check_count, timeout=10, partitions=None):
        hosts = ",".join(["%s:%d" % (broker.host, broker.port) for broker in self.brokers])

        client = KafkaClient(hosts)
        group = random_string(10)
        consumer = SimpleConsumer(client, group, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout)

        started_at = time.time()
        pending = consumer.pending(partitions)

        # Keep checking if it isn't immediately correct, subject to timeout
        while pending != check_count and (time.time() - started_at < timeout):
            pending = consumer.pending(partitions)

        consumer.stop()
        client.close()

        self.assertEqual(pending, check_count)
Exemplo n.º 32
0
def test_delete_consumergroups(kafka_admin_client, kafka_consumer_factory,
                               send_messages):
    random_group_id = 'test-group-' + random_string(6)
    group1 = random_group_id + "_1"
    group2 = random_group_id + "_2"
    group3 = random_group_id + "_3"

    send_messages(range(0, 100), partition=0)
    consumer1 = kafka_consumer_factory(group_id=group1)
    next(consumer1)
    consumer1.close()

    consumer2 = kafka_consumer_factory(group_id=group2)
    next(consumer2)
    consumer2.close()

    consumer3 = kafka_consumer_factory(group_id=group3)
    next(consumer3)
    consumer3.close()

    consumergroups = {
        group_id
        for group_id, _ in kafka_admin_client.list_consumer_groups()
    }
    assert group1 in consumergroups
    assert group2 in consumergroups
    assert group3 in consumergroups

    delete_results = {
        group_id: error
        for group_id, error in kafka_admin_client.delete_consumer_groups(
            [group1, group2])
    }
    assert delete_results[group1] == NoError
    assert delete_results[group2] == NoError
    assert group3 not in delete_results

    consumergroups = {
        group_id
        for group_id, _ in kafka_admin_client.list_consumer_groups()
    }
    assert group1 not in consumergroups
    assert group2 not in consumergroups
    assert group3 in consumergroups
    def test_kafka_consumer__offset_commit_resume_dual(self):
        GROUP_ID = random_string(10).encode('utf-8')

        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer1 = self.kafka_consumer(
            group_id = GROUP_ID,
            auto_commit_enable = True,
            auto_commit_interval_ms = None,
            auto_commit_interval_messages = 20,
            auto_offset_reset='smallest',
            offset_storage='kafka',
        )

        # Grab the first 195 messages
        output_msgs1 = []
        for _ in xrange(195):
            m = consumer1.next()
            output_msgs1.append(m)
            consumer1.task_done(m)
        self.assert_message_count(output_msgs1, 195)

        # The total offset across both partitions should be at 180
        consumer2 = self.kafka_consumer(
            group_id = GROUP_ID,
            auto_commit_enable = True,
            auto_commit_interval_ms = None,
            auto_commit_interval_messages = 20,
            consumer_timeout_ms = 100,
            auto_offset_reset='smallest',
            offset_storage='dual',
        )

        # 181-200
        output_msgs2 = []
        with self.assertRaises(ConsumerTimeout):
            while True:
                m = consumer2.next()
                output_msgs2.append(m)
        self.assert_message_count(output_msgs2, 20)
        self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15)
Exemplo n.º 34
0
    def test_kafka_consumer__offset_commit_resume_dual(self):
        GROUP_ID = random_string(10).encode('utf-8')

        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer1 = self.kafka_consumer(
            group_id=GROUP_ID,
            auto_commit_enable=True,
            auto_commit_interval_ms=None,
            auto_commit_interval_messages=20,
            auto_offset_reset='smallest',
            offset_storage='kafka',
        )

        # Grab the first 195 messages
        output_msgs1 = []
        for _ in xrange(195):
            m = consumer1.next()
            output_msgs1.append(m)
            consumer1.task_done(m)
        self.assert_message_count(output_msgs1, 195)

        # The total offset across both partitions should be at 180
        consumer2 = self.kafka_consumer(
            group_id=GROUP_ID,
            auto_commit_enable=True,
            auto_commit_interval_ms=None,
            auto_commit_interval_messages=20,
            consumer_timeout_ms=100,
            auto_offset_reset='smallest',
            offset_storage='dual',
        )

        # 181-200
        output_msgs2 = []
        with self.assertRaises(ConsumerTimeout):
            while True:
                m = consumer2.next()
                output_msgs2.append(m)
        self.assert_message_count(output_msgs2, 20)
        self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15)
Exemplo n.º 35
0
def test_end_to_end(kafka_broker, compression):

    if compression == 'lz4':
        # LZ4 requires 0.8.2
        if version() < (0, 8, 2):
            return
        # LZ4 python libs dont work on python2.6
        elif sys.version_info < (2, 7):
            return

    connect_str = 'localhost:' + str(kafka_broker.port)
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=10000,
                             compression_type=compression,
                             value_serializer=str.encode)
    consumer = KafkaConsumer(bootstrap_servers=connect_str,
                             group_id=None,
                             consumer_timeout_ms=10000,
                             auto_offset_reset='earliest',
                             value_deserializer=bytes.decode)

    topic = random_string(5)

    messages = 100
    futures = []
    for i in range(messages):
        futures.append(producer.send(topic, 'msg %d' % i))
    ret = [f.get(timeout=30) for f in futures]
    assert len(ret) == messages

    producer.close()

    consumer.subscribe([topic])
    msgs = set()
    for i in range(messages):
        try:
            msgs.add(next(consumer).value)
        except StopIteration:
            break

    assert msgs == set(['msg %d' % i for i in range(messages)])
Exemplo n.º 36
0
def test_kafka_consumer__offset_commit_resume(kafka_consumer_factory,
                                              send_messages):
    GROUP_ID = random_string(10)

    send_messages(range(0, 100), partition=0)
    send_messages(range(100, 200), partition=1)

    # Start a consumer and grab the first 180 messages
    consumer1 = kafka_consumer_factory(
        group_id=GROUP_ID,
        enable_auto_commit=True,
        auto_commit_interval_ms=100,
        auto_offset_reset='earliest',
    )
    output_msgs1 = []
    for _ in range(180):
        m = next(consumer1)
        output_msgs1.append(m)
    assert_message_count(output_msgs1, 180)

    # Normally we let the pytest fixture `kafka_consumer_factory` handle
    # closing as part of its teardown. Here we manually call close() to force
    # auto-commit to occur before the second consumer starts. That way the
    # second consumer only consumes previously unconsumed messages.
    consumer1.close()

    # Start a second consumer to grab 181-200
    consumer2 = kafka_consumer_factory(
        group_id=GROUP_ID,
        enable_auto_commit=True,
        auto_commit_interval_ms=100,
        auto_offset_reset='earliest',
    )
    output_msgs2 = []
    for _ in range(20):
        m = next(consumer2)
        output_msgs2.append(m)
    assert_message_count(output_msgs2, 20)

    # Verify the second consumer wasn't reconsuming messages that the first
    # consumer already saw
    assert_message_count(output_msgs1 + output_msgs2, 200)
Exemplo n.º 37
0
def test_end_to_end(kafka_broker, compression):

    if compression == 'lz4':
        # LZ4 requires 0.8.2
        if version() < (0, 8, 2):
            return
        # python-lz4 crashes on older versions of pypy
        elif platform.python_implementation() == 'PyPy':
            return

    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression,
                             value_serializer=str.encode)
    consumer = KafkaConsumer(bootstrap_servers=connect_str,
                             group_id=None,
                             consumer_timeout_ms=30000,
                             auto_offset_reset='earliest',
                             value_deserializer=bytes.decode)

    topic = random_string(5)

    messages = 100
    futures = []
    for i in range(messages):
        futures.append(producer.send(topic, 'msg %d' % i))
    ret = [f.get(timeout=30) for f in futures]
    assert len(ret) == messages
    producer.close()

    consumer.subscribe([topic])
    msgs = set()
    for i in range(messages):
        try:
            msgs.add(next(consumer).value)
        except StopIteration:
            break

    assert msgs == set(['msg %d' % i for i in range(messages)])
    consumer.close()
    def test_large_messages(self):
        # Produce 10 "normal" size messages
        small_messages = self.send_messages(0, [ str(x) for x in range(10) ])

        # Produce 10 messages that are large (bigger than default fetch size)
        large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ])

        # Brokers prior to 0.11 will return the next message
        # if it is smaller than max_bytes (called buffer_size in SimpleConsumer)
        # Brokers 0.11 and later that store messages in v2 format
        # internally will return the next message only if the
        # full MessageSet is smaller than max_bytes.
        # For that reason, we set the max buffer size to a little more
        # than the size of all large messages combined
        consumer = self.consumer(max_buffer_size=60000)

        expected_messages = set(small_messages + large_messages)
        actual_messages = set([ x.message.value for x in consumer ])
        self.assertEqual(expected_messages, actual_messages)

        consumer.stop()
    def test_kafka_consumer_max_bytes_one_msg(self):
        # We send to only 1 partition so we don't have parallel requests to 2
        # nodes for data.
        self.send_messages(0, range(100, 200))

        # Start a consumer. FetchResponse_v3 should always include at least 1
        # full msg, so by setting fetch_max_bytes=1 we must get 1 msg at a time
        group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5)
        consumer = self.kafka_consumer(group_id=group,
                                       auto_offset_reset='earliest',
                                       fetch_max_bytes=1)
        fetched_msgs = []
        # A bit hacky, but we need this in order for message count to be exact
        consumer._coordinator.ensure_active_group()
        for i in range(10):
            poll_res = consumer.poll(timeout_ms=2000)
            print(poll_res)
            for partition, msgs in six.iteritems(poll_res):
                for msg in msgs:
                    fetched_msgs.append(msg)

        self.assertEqual(len(fetched_msgs), 10)
Exemplo n.º 40
0
    def test_kafka_consumer_max_bytes_one_msg(self):
        # We send to only 1 partition so we don't have parallel requests to 2
        # nodes for data.
        self.send_messages(0, range(100, 200))

        # Start a consumer. FetchResponse_v3 should always include at least 1
        # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time
        # But 0.11.0.0 returns 1 MessageSet at a time when the messages are
        # stored in the new v2 format by the broker.
        #
        # DP Note: This is a strange test. The consumer shouldn't care
        # how many messages are included in a FetchResponse, as long as it is
        # non-zero. I would not mind if we deleted this test. It caused
        # a minor headache when testing 0.11.0.0.
        group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5)
        consumer = self.kafka_consumer(group_id=group,
                                       auto_offset_reset='earliest',
                                       consumer_timeout_ms=5000,
                                       fetch_max_bytes=1)

        fetched_msgs = [next(consumer) for i in range(10)]
        self.assertEqual(len(fetched_msgs), 10)
Exemplo n.º 41
0
    def test_kafka_consumer__offset_commit_resume(self):
        GROUP_ID = random_string(10)

        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer1 = self.kafka_consumer(
            group_id=GROUP_ID,
            enable_auto_commit=True,
            auto_commit_interval_ms=100,
            auto_offset_reset='earliest',
        )

        # Grab the first 180 messages
        output_msgs1 = []
        for _ in xrange(180):
            m = next(consumer1)
            output_msgs1.append(m)
        self.assert_message_count(output_msgs1, 180)
        consumer1.close()

        # The total offset across both partitions should be at 180
        consumer2 = self.kafka_consumer(
            group_id=GROUP_ID,
            enable_auto_commit=True,
            auto_commit_interval_ms=100,
            auto_offset_reset='earliest',
        )

        # 181-200
        output_msgs2 = []
        for _ in xrange(20):
            m = next(consumer2)
            output_msgs2.append(m)
        self.assert_message_count(output_msgs2, 20)
        self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200)
        consumer2.close()
    def test_kafka_consumer__offset_commit_resume(self):
        GROUP_ID = random_string(10)

        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer1 = self.kafka_consumer(
            group_id=GROUP_ID,
            enable_auto_commit=True,
            auto_commit_interval_ms=100,
            auto_offset_reset='earliest',
        )

        # Grab the first 180 messages
        output_msgs1 = []
        for _ in xrange(180):
            m = next(consumer1)
            output_msgs1.append(m)
        self.assert_message_count(output_msgs1, 180)
        consumer1.close()

        # The total offset across both partitions should be at 180
        consumer2 = self.kafka_consumer(
            group_id=GROUP_ID,
            enable_auto_commit=True,
            auto_commit_interval_ms=100,
            auto_offset_reset='earliest',
        )

        # 181-200
        output_msgs2 = []
        for _ in xrange(20):
            m = next(consumer2)
            output_msgs2.append(m)
        self.assert_message_count(output_msgs2, 20)
        self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200)
        consumer2.close()
    def test_kafka_consumer_max_bytes_one_msg(self):
        # We send to only 1 partition so we don't have parallel requests to 2
        # nodes for data.
        self.send_messages(0, range(100, 200))

        # Start a consumer. FetchResponse_v3 should always include at least 1
        # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time
        # But 0.11.0.0 returns 1 MessageSet at a time when the messages are
        # stored in the new v2 format by the broker.
        #
        # DP Note: This is a strange test. The consumer shouldn't care
        # how many messages are included in a FetchResponse, as long as it is
        # non-zero. I would not mind if we deleted this test. It caused
        # a minor headache when testing 0.11.0.0.
        group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5)
        consumer = self.kafka_consumer(
            group_id=group,
            auto_offset_reset='earliest',
            consumer_timeout_ms=5000,
            fetch_max_bytes=1)

        fetched_msgs = [next(consumer) for i in range(10)]
        self.assertEqual(len(fetched_msgs), 10)
    def test_kafka_consumer_max_bytes_one_msg(self):
        # We send to only 1 partition so we don't have parallel requests to 2
        # nodes for data.
        self.send_messages(0, range(100, 200))

        # Start a consumer. FetchResponse_v3 should always include at least 1
        # full msg, so by setting fetch_max_bytes=1 we must get 1 msg at a time
        group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5)
        consumer = self.kafka_consumer(
            group_id=group,
            auto_offset_reset='earliest',
            fetch_max_bytes=1)
        fetched_msgs = []
        # A bit hacky, but we need this in order for message count to be exact
        consumer._coordinator.ensure_active_group()
        for i in range(10):
            poll_res = consumer.poll(timeout_ms=2000)
            print(poll_res)
            for partition, msgs in six.iteritems(poll_res):
                for msg in msgs:
                    fetched_msgs.append(msg)

        self.assertEqual(len(fetched_msgs), 10)
Exemplo n.º 45
0
    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[kafka_bytestring(topic)].partition(
                        key) == 0:
                    recovered = True
            except (FailedPayloadsError, ConnectionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)
    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[topic].partition(key) == 0:
                    recovered = True
            except (FailedPayloadsError, ConnectionError, RequestTimedOutError,
                    NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)
Exemplo n.º 47
0
 def test_snappy(self):
     for i in xrange(1000):
         b1 = random_string(100).encode('utf-8')
         b2 = snappy_decode(snappy_encode(b1))
         self.assertEqual(b1, b2)
Exemplo n.º 48
0
def topic(kafka_broker, request):
    """Return a topic fixture"""
    topic_name = '%s_%s' % (request.node.name, random_string(10))
    kafka_broker.create_topics([topic_name])
    return topic_name
Exemplo n.º 49
0
def topic(simple_client):
    topic = random_string(5)
    simple_client.ensure_topic_exists(topic)
    return topic
Exemplo n.º 50
0
def test_snappy():
    for i in xrange(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = snappy_decode(snappy_encode(b1))
        assert b1 == b2
Exemplo n.º 51
0
def test_group(kafka_broker, topic):
    num_partitions = 4
    connect_str = get_connect_str(kafka_broker)
    consumers = {}
    stop = {}
    threads = {}
    messages = collections.defaultdict(list)
    group_id = 'test-group-' + random_string(6)
    def consumer_thread(i):
        assert i not in consumers
        assert i not in stop
        stop[i] = threading.Event()
        consumers[i] = KafkaConsumer(topic,
                                     bootstrap_servers=connect_str,
                                     group_id=group_id,
                                     heartbeat_interval_ms=500)
        while not stop[i].is_set():
            for tp, records in six.itervalues(consumers[i].poll(100)):
                messages[i][tp].extend(records)
        consumers[i].close()
        del consumers[i]
        del stop[i]

    num_consumers = 4
    for i in range(num_consumers):
        t = threading.Thread(target=consumer_thread, args=(i,))
        t.start()
        threads[i] = t

    try:
        timeout = time.time() + 35
        while True:
            for c in range(num_consumers):

                # Verify all consumers have been created
                if c not in consumers:
                    break

                # Verify all consumers have an assignment
                elif not consumers[c].assignment():
                    break

            # If all consumers exist and have an assignment
            else:

                # Verify all consumers are in the same generation
                # then log state and break while loop
                generations = set([consumer._coordinator.generation
                                   for consumer in list(consumers.values())])

                # New generation assignment is not complete until
                # coordinator.rejoining = False
                rejoining = any([consumer._coordinator.rejoining
                                 for consumer in list(consumers.values())])

                if not rejoining and len(generations) == 1:
                    for c, consumer in list(consumers.items()):
                        logging.info("[%s] %s %s: %s", c,
                                     consumer._coordinator.generation,
                                     consumer._coordinator.member_id,
                                     consumer.assignment())
                    break
            assert time.time() < timeout, "timeout waiting for assignments"

        group_assignment = set()
        for c in range(num_consumers):
            assert len(consumers[c].assignment()) != 0
            assert set.isdisjoint(consumers[c].assignment(), group_assignment)
            group_assignment.update(consumers[c].assignment())

        assert group_assignment == set([
            TopicPartition(topic, partition)
            for partition in range(num_partitions)])

    finally:
        for c in range(num_consumers):
            stop[c].set()
            threads[c].join()
Exemplo n.º 52
0
def test_gzip():
    for i in xrange(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = gzip_decode(gzip_encode(b1))
        assert b1 == b2
Exemplo n.º 53
0
def topic(kafka_broker, request):
    """Return a topic fixture"""
    topic_name = '%s_%s' % (request.node.name, random_string(10))
    kafka_broker.create_topics([topic_name])
    return topic_name
Exemplo n.º 54
0
 def test_snappy(self):
     for i in xrange(1000):
         s1 = random_string(100)
         s2 = snappy_decode(snappy_encode(s1))
         self.assertEquals(s1, s2)
Exemplo n.º 55
0
 def test_gzip(self):
     for i in xrange(1000):
         s1 = random_string(100)
         s2 = gzip_decode(gzip_encode(s1))
         self.assertEquals(s1, s2)
Exemplo n.º 56
0
def test_lz4():
    for i in xrange(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = lz4_decode(lz4_encode(b1))
        assert b1 == b2