def test_huge_messages(self):
        huge_message, = self.send_messages(0, [
            create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)),
        ])

        # Create a consumer with the default buffer size
        consumer = self.consumer()

        # This consumer fails to get the message
        with self.assertRaises(ConsumerFetchSizeTooSmall):
            consumer.get_message(False, 0.1)

        consumer.stop()

        # Create a consumer with no fetch size limit
        big_consumer = self.consumer(
            max_buffer_size = None,
            partitions = [0],
        )

        # Seek to the last message
        big_consumer.seek(-1, 2)

        # Consume giant message successfully
        message = big_consumer.get_message(block=False, timeout=10)
        self.assertIsNotNone(message)
        self.assertEqual(message.message.value, huge_message)

        big_consumer.stop()
    def test_huge_messages(self):
        huge_message, = self.send_messages(0, [
            create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)),
        ])

        # Create a consumer with the default buffer size
        consumer = self.consumer()

        # This consumer fails to get the message
        with self.assertRaises(ConsumerFetchSizeTooSmall):
            consumer.get_message(False, 0.1)

        consumer.stop()

        # Create a consumer with no fetch size limit
        big_consumer = self.consumer(
            max_buffer_size = None,
            partitions = [0],
        )

        # Seek to the last message
        big_consumer.seek(-1, 2)

        # Consume giant message successfully
        message = big_consumer.get_message(block=False, timeout=10)
        self.assertIsNotNone(message)
        self.assertEqual(message.message.value, huge_message)

        big_consumer.stop()
Пример #3
0
def test_heartbeat_thread(kafka_broker, topic):
    group_id = 'test-group-' + random_string(6)
    consumer = KafkaConsumer(topic,
                             bootstrap_servers=get_connect_str(kafka_broker),
                             group_id=group_id,
                             heartbeat_interval_ms=500)

    # poll until we have joined group / have assignment
    while not consumer.assignment():
        consumer.poll(timeout_ms=100)

    assert consumer._coordinator.state is MemberState.STABLE
    last_poll = consumer._coordinator.heartbeat.last_poll
    last_beat = consumer._coordinator.heartbeat.last_send

    timeout = time.time() + 30
    while True:
        if time.time() > timeout:
            raise RuntimeError('timeout waiting for heartbeat')
        if consumer._coordinator.heartbeat.last_send > last_beat:
            break
        time.sleep(0.5)

    assert consumer._coordinator.heartbeat.last_poll == last_poll
    consumer.poll(timeout_ms=100)
    assert consumer._coordinator.heartbeat.last_poll > last_poll
    consumer.close()
Пример #4
0
def test_heartbeat_thread(kafka_broker, topic):
    group_id = 'test-group-' + random_string(6)
    consumer = KafkaConsumer(topic,
                             bootstrap_servers=get_connect_str(kafka_broker),
                             group_id=group_id,
                             heartbeat_interval_ms=500)

    # poll until we have joined group / have assignment
    while not consumer.assignment():
        consumer.poll(timeout_ms=100)

    assert consumer._coordinator.state is MemberState.STABLE
    last_poll = consumer._coordinator.heartbeat.last_poll
    last_beat = consumer._coordinator.heartbeat.last_send

    timeout = time.time() + 30
    while True:
        if time.time() > timeout:
            raise RuntimeError('timeout waiting for heartbeat')
        if consumer._coordinator.heartbeat.last_send > last_beat:
            break
        time.sleep(0.5)

    assert consumer._coordinator.heartbeat.last_poll == last_poll
    consumer.poll(timeout_ms=100)
    assert consumer._coordinator.heartbeat.last_poll > last_poll
    consumer.close()
Пример #5
0
def test_lz4_incremental():
    for i in range(1000):
        # lz4 max single block size is 4MB
        # make sure we test with multiple-blocks
        b1 = random_string(100).encode('utf-8') * 50000
        b2 = lz4_decode(lz4_encode(b1))
        assert len(b1) == len(b2)
        assert b1 == b2
Пример #6
0
def test_lz4_incremental():
    for i in range(1000):
        # lz4 max single block size is 4MB
        # make sure we test with multiple-blocks
        b1 = random_string(100).encode('utf-8') * 50000
        b2 = lz4_decode(lz4_encode(b1))
        assert len(b1) == len(b2)
        assert b1 == b2
Пример #7
0
def test_kafka_producer_proper_record_metadata(kafka_broker, compression):
    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression)
    magic = producer._max_usable_produce_magic()

    # record headers are supported in 0.11.0
    if version() < (0, 11, 0):
        headers = None
    else:
        headers = [("Header Key", b"Header Value")]

    topic = random_string(5)
    future = producer.send(topic,
                           value=b"Simple value",
                           key=b"Simple key",
                           headers=headers,
                           timestamp_ms=9999999,
                           partition=0)
    record = future.get(timeout=5)
    assert record is not None
    assert record.topic == topic
    assert record.partition == 0
    assert record.topic_partition == TopicPartition(topic, 0)
    assert record.offset == 0
    if magic >= 1:
        assert record.timestamp == 9999999
    else:
        assert record.timestamp == -1  # NO_TIMESTAMP

    if magic >= 2:
        assert record.checksum is None
    elif magic == 1:
        assert record.checksum == 1370034956
    else:
        assert record.checksum == 3296137851

    assert record.serialized_key_size == 10
    assert record.serialized_value_size == 12
    if headers:
        assert record.serialized_header_size == 22

    # generated timestamp case is skipped for broker 0.9 and below
    if magic == 0:
        return

    send_time = time.time() * 1000
    future = producer.send(topic,
                           value=b"Simple value",
                           key=b"Simple key",
                           timestamp_ms=None,
                           partition=0)
    record = future.get(timeout=5)
    assert abs(record.timestamp - send_time) <= 1000  # Allow 1s deviation
Пример #8
0
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.zk = ZookeeperFixture.instance()
        chroot = random_string(10)
        cls.server1 = KafkaFixture.instance(0, cls.zk, zk_chroot=chroot)
        cls.server2 = KafkaFixture.instance(1, cls.zk, zk_chroot=chroot)

        cls.server = cls.server1  # Bootstrapping server
Пример #9
0
def test_kafka_producer_proper_record_metadata(kafka_broker, compression):
    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression)
    magic = producer._max_usable_produce_magic()

    # record headers are supported in 0.11.0
    if version() < (0, 11, 0):
        headers = None
    else:
        headers = [("Header Key", b"Header Value")]

    topic = random_string(5)
    future = producer.send(
        topic,
        value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999,
        partition=0)
    record = future.get(timeout=5)
    assert record is not None
    assert record.topic == topic
    assert record.partition == 0
    assert record.topic_partition == TopicPartition(topic, 0)
    assert record.offset == 0
    if magic >= 1:
        assert record.timestamp == 9999999
    else:
        assert record.timestamp == -1  # NO_TIMESTAMP

    if magic >= 2:
        assert record.checksum is None
    elif magic == 1:
        assert record.checksum == 1370034956
    else:
        assert record.checksum == 3296137851

    assert record.serialized_key_size == 10
    assert record.serialized_value_size == 12
    if headers:
        assert record.serialized_header_size == 22

    # generated timestamp case is skipped for broker 0.9 and below
    if magic == 0:
        return

    send_time = time.time() * 1000
    future = producer.send(
        topic,
        value=b"Simple value", key=b"Simple key", timestamp_ms=None,
        partition=0)
    record = future.get(timeout=5)
    assert abs(record.timestamp - send_time) <= 1000  # Allow 1s deviation
Пример #10
0
 def _send_random_messages(self, producer, topic, partition, n):
     for j in range(n):
         msg = 'msg {0}: {1}'.format(j, random_string(10))
         log.debug('_send_random_message %s to %s:%d', msg, topic, partition)
         while True:
             try:
                 producer.send_messages(topic, partition, msg.encode('utf-8'))
             except Exception:
                 log.exception('failure in _send_random_messages - retrying')
                 continue
             else:
                 break
Пример #11
0
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):
            return

        cls.zk = ZookeeperFixture.instance()
        chroot = random_string(10)
        cls.server1 = KafkaFixture.instance(0, cls.zk,
                                            zk_chroot=chroot)
        cls.server2 = KafkaFixture.instance(1, cls.zk,
                                            zk_chroot=chroot)

        cls.server = cls.server1 # Bootstrapping server
Пример #12
0
 def _send_random_messages(self, producer, topic, partition, n):
     for j in range(n):
         msg = 'msg {0}: {1}'.format(j, random_string(10))
         log.debug('_send_random_message %s to %s:%d', msg, topic,
                   partition)
         while True:
             try:
                 producer.send_messages(topic, partition,
                                        msg.encode('utf-8'))
             except Exception:
                 log.exception(
                     'failure in _send_random_messages - retrying')
                 continue
             else:
                 break
Пример #13
0
    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' %
                                       (self.server.host, self.server.port))
            self.client_async = KafkaClient(
                bootstrap_servers='%s:%d' %
                (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(
                    self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')

        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError,
                        UnknownTopicOrPartitionError,
                        FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError(
                            'Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}
Пример #14
0
    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_kwargs = {'zk_chroot': zk_chroot, 'replicas': replicas,
                     'partitions': partitions}
        self.brokers = [KafkaFixture.instance(i, self.zk, **kk_kwargs)
                        for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()
Пример #15
0
def test_end_to_end(kafka_broker, compression):

    if compression == 'lz4':
        # LZ4 requires 0.8.2
        if version() < (0, 8, 2):
            return
        # python-lz4 crashes on older versions of pypy
        elif platform.python_implementation() == 'PyPy':
            return

    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression,
                             value_serializer=str.encode)
    consumer = KafkaConsumer(bootstrap_servers=connect_str,
                             group_id=None,
                             consumer_timeout_ms=30000,
                             auto_offset_reset='earliest',
                             value_deserializer=bytes.decode)

    topic = random_string(5)

    messages = 100
    futures = []
    for i in range(messages):
        futures.append(producer.send(topic, 'msg %d' % i))
    ret = [f.get(timeout=30) for f in futures]
    assert len(ret) == messages
    producer.close()

    consumer.subscribe([topic])
    msgs = set()
    for i in range(messages):
        try:
            msgs.add(next(consumer).value)
        except StopIteration:
            break

    assert msgs == set(['msg %d' % (i,) for i in range(messages)])
    consumer.close()
Пример #16
0
def test_end_to_end(kafka_broker, compression):

    if compression == 'lz4':
        # LZ4 requires 0.8.2
        if version() < (0, 8, 2):
            return
        # python-lz4 crashes on older versions of pypy
        elif platform.python_implementation() == 'PyPy':
            return

    connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)])
    producer = KafkaProducer(bootstrap_servers=connect_str,
                             retries=5,
                             max_block_ms=30000,
                             compression_type=compression,
                             value_serializer=str.encode)
    consumer = KafkaConsumer(bootstrap_servers=connect_str,
                             group_id=None,
                             consumer_timeout_ms=30000,
                             auto_offset_reset='earliest',
                             value_deserializer=bytes.decode)

    topic = random_string(5)

    messages = 100
    futures = []
    for i in range(messages):
        futures.append(producer.send(topic, 'msg %d' % i))
    ret = [f.get(timeout=30) for f in futures]
    assert len(ret) == messages
    producer.close()

    consumer.subscribe([topic])
    msgs = set()
    for i in range(messages):
        try:
            msgs.add(next(consumer).value)
        except StopIteration:
            break

    assert msgs == set(['msg %d' % (i, ) for i in range(messages)])
    consumer.close()
Пример #17
0
    def test_large_messages(self):
        # Produce 10 "normal" size messages
        small_messages = self.send_messages(0, [ str(x) for x in range(10) ])

        # Produce 10 messages that are large (bigger than default fetch size)
        large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ])

        # Brokers prior to 0.11 will return the next message
        # if it is smaller than max_bytes (called buffer_size in SimpleConsumer)
        # Brokers 0.11 and later that store messages in v2 format
        # internally will return the next message only if the
        # full MessageSet is smaller than max_bytes.
        # For that reason, we set the max buffer size to a little more
        # than the size of all large messages combined
        consumer = self.consumer(max_buffer_size=60000)

        expected_messages = set(small_messages + large_messages)
        actual_messages = set([ x.message.value for x in consumer ])
        self.assertEqual(expected_messages, actual_messages)

        consumer.stop()
    def test_large_messages(self):
        # Produce 10 "normal" size messages
        small_messages = self.send_messages(0, [ str(x) for x in range(10) ])

        # Produce 10 messages that are large (bigger than default fetch size)
        large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ])

        # Brokers prior to 0.11 will return the next message
        # if it is smaller than max_bytes (called buffer_size in SimpleConsumer)
        # Brokers 0.11 and later that store messages in v2 format
        # internally will return the next message only if the
        # full MessageSet is smaller than max_bytes.
        # For that reason, we set the max buffer size to a little more
        # than the size of all large messages combined
        consumer = self.consumer(max_buffer_size=60000)

        expected_messages = set(small_messages + large_messages)
        actual_messages = set([ x.message.value for x in consumer ])
        self.assertEqual(expected_messages, actual_messages)

        consumer.stop()
Пример #19
0
    def test_kafka_consumer__offset_commit_resume(self):
        GROUP_ID = random_string(10)

        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer1 = self.kafka_consumer(
            group_id=GROUP_ID,
            enable_auto_commit=True,
            auto_commit_interval_ms=100,
            auto_offset_reset='earliest',
        )

        # Grab the first 180 messages
        output_msgs1 = []
        for _ in range(180):
            m = next(consumer1)
            output_msgs1.append(m)
        self.assert_message_count(output_msgs1, 180)
        consumer1.close()

        # The total offset across both partitions should be at 180
        consumer2 = self.kafka_consumer(
            group_id=GROUP_ID,
            enable_auto_commit=True,
            auto_commit_interval_ms=100,
            auto_offset_reset='earliest',
        )

        # 181-200
        output_msgs2 = []
        for _ in range(20):
            m = next(consumer2)
            output_msgs2.append(m)
        self.assert_message_count(output_msgs2, 20)
        self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200)
        consumer2.close()
Пример #20
0
    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')


        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError('Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}
    def test_kafka_consumer__offset_commit_resume(self):
        GROUP_ID = random_string(10)

        self.send_messages(0, range(0, 100))
        self.send_messages(1, range(100, 200))

        # Start a consumer
        consumer1 = self.kafka_consumer(
            group_id=GROUP_ID,
            enable_auto_commit=True,
            auto_commit_interval_ms=100,
            auto_offset_reset='earliest',
        )

        # Grab the first 180 messages
        output_msgs1 = []
        for _ in range(180):
            m = next(consumer1)
            output_msgs1.append(m)
        self.assert_message_count(output_msgs1, 180)
        consumer1.close()

        # The total offset across both partitions should be at 180
        consumer2 = self.kafka_consumer(
            group_id=GROUP_ID,
            enable_auto_commit=True,
            auto_commit_interval_ms=100,
            auto_offset_reset='earliest',
        )

        # 181-200
        output_msgs2 = []
        for _ in range(20):
            m = next(consumer2)
            output_msgs2.append(m)
        self.assert_message_count(output_msgs2, 20)
        self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200)
        consumer2.close()
Пример #22
0
    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_kwargs = {
            'zk_chroot': zk_chroot,
            'replicas': replicas,
            'partitions': partitions
        }
        self.brokers = [
            KafkaFixture.instance(i, self.zk, **kk_kwargs)
            for i in range(replicas)
        ]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()
Пример #23
0
    def test_kafka_consumer_max_bytes_one_msg(self):
        # We send to only 1 partition so we don't have parallel requests to 2
        # nodes for data.
        self.send_messages(0, range(100, 200))

        # Start a consumer. FetchResponse_v3 should always include at least 1
        # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time
        # But 0.11.0.0 returns 1 MessageSet at a time when the messages are
        # stored in the new v2 format by the broker.
        #
        # DP Note: This is a strange test. The consumer shouldn't care
        # how many messages are included in a FetchResponse, as long as it is
        # non-zero. I would not mind if we deleted this test. It caused
        # a minor headache when testing 0.11.0.0.
        group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5)
        consumer = self.kafka_consumer(group_id=group,
                                       auto_offset_reset='earliest',
                                       consumer_timeout_ms=5000,
                                       fetch_max_bytes=1)

        fetched_msgs = [next(consumer) for i in range(10)]
        self.assertEqual(len(fetched_msgs), 10)
        consumer.close()
Пример #24
0
    def test_kafka_consumer_max_bytes_one_msg(self):
        # We send to only 1 partition so we don't have parallel requests to 2
        # nodes for data.
        self.send_messages(0, range(100, 200))

        # Start a consumer. FetchResponse_v3 should always include at least 1
        # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time
        # But 0.11.0.0 returns 1 MessageSet at a time when the messages are
        # stored in the new v2 format by the broker.
        #
        # DP Note: This is a strange test. The consumer shouldn't care
        # how many messages are included in a FetchResponse, as long as it is
        # non-zero. I would not mind if we deleted this test. It caused
        # a minor headache when testing 0.11.0.0.
        group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5)
        consumer = self.kafka_consumer(
            group_id=group,
            auto_offset_reset='earliest',
            consumer_timeout_ms=5000,
            fetch_max_bytes=1)

        fetched_msgs = [next(consumer) for i in range(10)]
        self.assertEqual(len(fetched_msgs), 10)
        consumer.close()
Пример #25
0
    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async_send=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[topic].partition(key) == 0:
                    recovered = True
            except (FailedPayloadsError, KafkaConnectionError,
                    RequestTimedOutError, NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)
Пример #26
0
    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async_send=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[topic].partition(key) == 0:
                    recovered = True
            except (FailedPayloadsError, KafkaConnectionError, RequestTimedOutError,
                    NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)
Пример #27
0
def test_group(kafka_broker, topic):
    num_partitions = 4
    connect_str = get_connect_str(kafka_broker)
    consumers = {}
    stop = {}
    threads = {}
    messages = collections.defaultdict(list)
    group_id = 'test-group-' + random_string(6)

    def consumer_thread(i):
        assert i not in consumers
        assert i not in stop
        stop[i] = threading.Event()
        consumers[i] = KafkaConsumer(topic,
                                     bootstrap_servers=connect_str,
                                     group_id=group_id,
                                     heartbeat_interval_ms=500)
        while not stop[i].is_set():
            for tp, records in six.itervalues(consumers[i].poll(100)):
                messages[i][tp].extend(records)
        consumers[i].close()
        consumers[i] = None
        stop[i] = None

    num_consumers = 4
    for i in range(num_consumers):
        t = threading.Thread(target=consumer_thread, args=(i, ))
        t.start()
        threads[i] = t

    try:
        timeout = time.time() + 35
        while True:
            for c in range(num_consumers):

                # Verify all consumers have been created
                if c not in consumers:
                    break

                # Verify all consumers have an assignment
                elif not consumers[c].assignment():
                    break

            # If all consumers exist and have an assignment
            else:

                logging.info(
                    'All consumers have assignment... checking for stable group'
                )
                # Verify all consumers are in the same generation
                # then log state and break while loop
                generations = set([
                    consumer._coordinator._generation.generation_id
                    for consumer in list(consumers.values())
                ])

                # New generation assignment is not complete until
                # coordinator.rejoining = False
                rejoining = any([
                    consumer._coordinator.rejoining
                    for consumer in list(consumers.values())
                ])

                if not rejoining and len(generations) == 1:
                    for c, consumer in list(consumers.items()):
                        logging.info(
                            "[%s] %s %s: %s", c,
                            consumer._coordinator._generation.generation_id,
                            consumer._coordinator._generation.member_id,
                            consumer.assignment())
                    break
                else:
                    logging.info('Rejoining: %s, generations: %s', rejoining,
                                 generations)
                    time.sleep(1)
            assert time.time() < timeout, "timeout waiting for assignments"

        logging.info('Group stabilized; verifying assignment')
        group_assignment = set()
        for c in range(num_consumers):
            assert len(consumers[c].assignment()) != 0
            assert set.isdisjoint(consumers[c].assignment(), group_assignment)
            group_assignment.update(consumers[c].assignment())

        assert group_assignment == set([
            TopicPartition(topic, partition)
            for partition in range(num_partitions)
        ])
        logging.info('Assignment looks good!')

    finally:
        logging.info('Shutting down %s consumers', num_consumers)
        for c in range(num_consumers):
            logging.info('Stopping consumer %s', c)
            stop[c].set()
            threads[c].join()
            threads[c] = None
Пример #28
0
def test_gzip():
    for i in range(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = gzip_decode(gzip_encode(b1))
        assert b1 == b2
Пример #29
0
def test_snappy():
    for i in range(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = snappy_decode(snappy_encode(b1))
        assert b1 == b2
Пример #30
0
def test_lz4_old():
    for i in range(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = lz4_decode_old_kafka(lz4_encode_old_kafka(b1))
        assert len(b1) == len(b2)
        assert b1 == b2
Пример #31
0
def test_snappy():
    for i in range(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = snappy_decode(snappy_encode(b1))
        assert b1 == b2
Пример #32
0
def test_gzip():
    for i in range(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = gzip_decode(gzip_encode(b1))
        assert b1 == b2
def topic(simple_client):
    topic = random_string(5)
    simple_client.ensure_topic_exists(topic)
    return topic
Пример #34
0
def topic(kafka_broker, request):
    """Return a topic fixture"""
    topic_name = '%s_%s' % (request.node.name, random_string(10))
    kafka_broker.create_topics([topic_name])
    return topic_name
Пример #35
0
def test_lz4():
    for i in range(1000):
        b1 = random_string(100).encode('utf-8')
        b2 = lz4_decode(lz4_encode(b1))
        assert len(b1) == len(b2)
        assert b1 == b2
Пример #36
0
def topic(kafka_broker, request):
    """Return a topic fixture"""
    topic_name = '%s_%s' % (request.node.name, random_string(10))
    kafka_broker.create_topics([topic_name])
    return topic_name
Пример #37
0
def test_group(kafka_broker, topic):
    num_partitions = 4
    connect_str = get_connect_str(kafka_broker)
    consumers = {}
    stop = {}
    threads = {}
    messages = collections.defaultdict(list)
    group_id = 'test-group-' + random_string(6)
    def consumer_thread(i):
        assert i not in consumers
        assert i not in stop
        stop[i] = threading.Event()
        consumers[i] = KafkaConsumer(topic,
                                     bootstrap_servers=connect_str,
                                     group_id=group_id,
                                     heartbeat_interval_ms=500)
        while not stop[i].is_set():
            for tp, records in six.itervalues(consumers[i].poll(100)):
                messages[i][tp].extend(records)
        consumers[i].close()
        consumers[i] = None
        stop[i] = None

    num_consumers = 4
    for i in range(num_consumers):
        t = threading.Thread(target=consumer_thread, args=(i,))
        t.start()
        threads[i] = t

    try:
        timeout = time.time() + 35
        while True:
            for c in range(num_consumers):

                # Verify all consumers have been created
                if c not in consumers:
                    break

                # Verify all consumers have an assignment
                elif not consumers[c].assignment():
                    break

            # If all consumers exist and have an assignment
            else:

                logging.info('All consumers have assignment... checking for stable group')
                # Verify all consumers are in the same generation
                # then log state and break while loop
                generations = set([consumer._coordinator._generation.generation_id
                                   for consumer in list(consumers.values())])

                # New generation assignment is not complete until
                # coordinator.rejoining = False
                rejoining = any([consumer._coordinator.rejoining
                                 for consumer in list(consumers.values())])

                if not rejoining and len(generations) == 1:
                    for c, consumer in list(consumers.items()):
                        logging.info("[%s] %s %s: %s", c,
                                     consumer._coordinator._generation.generation_id,
                                     consumer._coordinator._generation.member_id,
                                     consumer.assignment())
                    break
                else:
                    logging.info('Rejoining: %s, generations: %s', rejoining, generations)
                    time.sleep(1)
            assert time.time() < timeout, "timeout waiting for assignments"

        logging.info('Group stabilized; verifying assignment')
        group_assignment = set()
        for c in range(num_consumers):
            assert len(consumers[c].assignment()) != 0
            assert set.isdisjoint(consumers[c].assignment(), group_assignment)
            group_assignment.update(consumers[c].assignment())

        assert group_assignment == set([
            TopicPartition(topic, partition)
            for partition in range(num_partitions)])
        logging.info('Assignment looks good!')

    finally:
        logging.info('Shutting down %s consumers', num_consumers)
        for c in range(num_consumers):
            logging.info('Stopping consumer %s', c)
            stop[c].set()
            threads[c].join()
            threads[c] = None