def test_huge_messages(self): huge_message, = self.send_messages(0, [ create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)), ]) # Create a consumer with the default buffer size consumer = self.consumer() # This consumer fails to get the message with self.assertRaises(ConsumerFetchSizeTooSmall): consumer.get_message(False, 0.1) consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer( max_buffer_size = None, partitions = [0], ) # Seek to the last message big_consumer.seek(-1, 2) # Consume giant message successfully message = big_consumer.get_message(block=False, timeout=10) self.assertIsNotNone(message) self.assertEqual(message.message.value, huge_message) big_consumer.stop()
def test_produce_and_consume(request, sasl_kafka): topic_name = special_to_underscore(request.node.name + random_string(4)) sasl_kafka.create_topics([topic_name], num_partitions=2) producer, = sasl_kafka.get_producers(1) messages_and_futures = [] # [(message, produce_future),] for i in range(100): encoded_msg = "{}-{}-{}".format(i, request.node.name, uuid.uuid4()).encode("utf-8") future = producer.send(topic_name, value=encoded_msg, partition=i % 2) messages_and_futures.append((encoded_msg, future)) producer.flush() for (msg, f) in messages_and_futures: assert f.succeeded() consumer, = sasl_kafka.get_consumers(1, [topic_name]) messages = {0: [], 1: []} for i, message in enumerate(consumer, 1): logging.debug("Consumed message %s", repr(message)) messages[message.partition].append(message) if i >= 100: break assert_message_count(messages[0], 50) assert_message_count(messages[1], 50)
def test_heartbeat_thread(kafka_broker, topic): group_id = 'test-group-' + random_string(6) consumer = KafkaConsumer(topic, bootstrap_servers=get_connect_str(kafka_broker), group_id=group_id, heartbeat_interval_ms=500) # poll until we have joined group / have assignment while not consumer.assignment(): consumer.poll(timeout_ms=100) assert consumer._coordinator.state is MemberState.STABLE last_poll = consumer._coordinator.heartbeat.last_poll last_beat = consumer._coordinator.heartbeat.last_send timeout = time.time() + 30 while True: if time.time() > timeout: raise RuntimeError('timeout waiting for heartbeat') if consumer._coordinator.heartbeat.last_send > last_beat: break time.sleep(0.5) assert consumer._coordinator.heartbeat.last_poll == last_poll consumer.poll(timeout_ms=100) assert consumer._coordinator.heartbeat.last_poll > last_poll
def assert_message_count(self, topic, check_count, timeout=10, partitions=None): hosts = ','.join( ['%s:%d' % (broker.host, broker.port) for broker in self.brokers]) client = KafkaClient(hosts) group = random_string(10) consumer = SimpleConsumer(client, group, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = consumer.pending(partitions) # Keep checking if it isn't immediately correct, subject to timeout while pending != check_count and (time.time() - started_at < timeout): pending = consumer.pending(partitions) consumer.stop() client.close() self.assertEqual(pending, check_count)
def get_producers(self, cnt, **params): params.setdefault('client_id', 'producer') params['bootstrap_servers'] = self.bootstrap_server() client_id = params['client_id'] for x in range(cnt): params['client_id'] = '%s_%s' % (client_id, random_string(4)) yield KafkaProducer(**params)
def get_admin_clients(self, cnt=1, **params): params.setdefault('client_id', 'admin_client') params['bootstrap_servers'] = self.bootstrap_server() client_id = params['client_id'] for x in range(cnt): params['client_id'] = '%s_%s' % (client_id, random_string(4)) yield KafkaAdminClient(**params)
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = self.send_messages(0, [str(x) for x in range(10)]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = self.send_messages( 0, [random_string(5000) for x in range(10)]) # Brokers prior to 0.11 will return the next message # if it is smaller than max_bytes (called buffer_size in SimpleConsumer) # Brokers 0.11 and later that store messages in v2 format # internally will return the next message only if the # full MessageSet is smaller than max_bytes. # For that reason, we set the max buffer size to a little more # than the size of all large messages combined consumer = self.consumer(max_buffer_size=60000) expected_messages = set(small_messages + large_messages) actual_messages = set([ x.message.value for x in consumer if not isinstance(x.message, PartialMessage) ]) self.assertEqual(expected_messages, actual_messages) consumer.stop()
def get_clients(self, cnt=1, client_id=None): if client_id is None: client_id = 'client' return tuple( KafkaClient(client_id='%s_%s' % (client_id, random_string(4)), bootstrap_servers=self.bootstrap_server()) for x in range(cnt))
def _send_random_messages(self, producer, topic, partition, n): for j in range(n): logging.debug('_send_random_message to %s:%d -- try %d', topic, partition, j) resp = producer.send_messages(topic, partition, random_string(10)) if len(resp) > 0: self.assertEqual(resp[0].error, 0) logging.debug('_send_random_message to %s:%d -- try %d success', topic, partition, j)
def test_heartbeat_thread(kafka_broker, topic): group_id = 'test-group-' + random_string(6) consumer = KafkaConsumer(topic, bootstrap_servers=get_connect_str(kafka_broker), group_id=group_id, heartbeat_interval_ms=500) # poll until we have joined group / have assignment while not consumer.assignment(): consumer.poll(timeout_ms=100) assert consumer._coordinator.state is MemberState.STABLE last_poll = consumer._coordinator.heartbeat.last_poll last_beat = consumer._coordinator.heartbeat.last_send timeout = time.time() + 30 while True: if time.time() > timeout: raise RuntimeError('timeout waiting for heartbeat') if consumer._coordinator.heartbeat.last_send > last_beat: break time.sleep(0.5) assert consumer._coordinator.heartbeat.last_poll == last_poll consumer.poll(timeout_ms=100) assert consumer._coordinator.heartbeat.last_poll > last_poll consumer.close()
def test_end_to_end(kafka_broker): connect_str = 'localhost:' + str(kafka_broker.port) producer = KafkaProducer(bootstrap_servers=connect_str, max_block_ms=10000, value_serializer=str.encode) consumer = KafkaConsumer(bootstrap_servers=connect_str, group_id=None, consumer_timeout_ms=10000, auto_offset_reset='earliest', value_deserializer=bytes.decode) topic = random_string(5) for i in range(1000): producer.send(topic, 'msg %d' % i) producer.flush() producer.close() consumer.subscribe([topic]) msgs = set() for i in range(1000): try: msgs.add(next(consumer).value) except StopIteration: break assert msgs == set(['msg %d' % i for i in range(1000)])
def test_huge_messages(self): huge_message, = self.send_messages(0, [ create_message(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)), ]) # Create a consumer with the default buffer size consumer = self.consumer() # This consumer failes to get the message with self.assertRaises(ConsumerFetchSizeTooSmall): consumer.get_message(False, 0.1) consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer( max_buffer_size = None, partitions = [0], ) # Seek to the last message big_consumer.seek(-1, 2) # Consume giant message successfully message = big_consumer.get_message(block=False, timeout=10) self.assertIsNotNone(message) self.assertEqual(message.message.value, huge_message) big_consumer.stop()
def test_lz4_incremental(): for i in xrange(1000): # lz4 max single block size is 4MB # make sure we test with multiple-blocks b1 = random_string(100).encode('utf-8') * 50000 b2 = lz4_decode(lz4_encode(b1)) assert len(b1) == len(b2) assert b1 == b2
def get_consumers(self, cnt, topics, **params): params.setdefault('client_id', 'consumer') params.setdefault('heartbeat_interval_ms', 500) params['bootstrap_servers'] = self.bootstrap_server() client_id = params['client_id'] for x in range(cnt): params['client_id'] = '%s_%s' % (client_id, random_string(4)) yield KafkaConsumer(*topics, **params)
def test_kafka_producer_proper_record_metadata(kafka_broker, compression): connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression) magic = producer._max_usable_produce_magic() # record headers are supported in 0.11.0 if env_kafka_version() < (0, 11, 0): headers = None else: headers = [("Header Key", b"Header Value")] topic = random_string(5) future = producer.send(topic, value=b"Simple value", key=b"Simple key", headers=headers, timestamp_ms=9999999, partition=0) record = future.get(timeout=5) assert record is not None assert record.topic == topic assert record.partition == 0 assert record.topic_partition == TopicPartition(topic, 0) assert record.offset == 0 if magic >= 1: assert record.timestamp == 9999999 else: assert record.timestamp == -1 # NO_TIMESTAMP if magic >= 2: assert record.checksum is None elif magic == 1: assert record.checksum == 1370034956 else: assert record.checksum == 3296137851 assert record.serialized_key_size == 10 assert record.serialized_value_size == 12 if headers: assert record.serialized_header_size == 22 # generated timestamp case is skipped for broker 0.9 and below if magic == 0: return send_time = time.time() * 1000 future = producer.send(topic, value=b"Simple value", key=b"Simple key", timestamp_ms=None, partition=0) record = future.get(timeout=5) assert abs(record.timestamp - send_time) <= 1000 # Allow 1s deviation
def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): return cls.zk = ZookeeperFixture.instance() chroot = random_string(10) cls.server1 = KafkaFixture.instance(0, cls.zk, zk_chroot=chroot) cls.server2 = KafkaFixture.instance(1, cls.zk, zk_chroot=chroot) cls.server = cls.server1 # Bootstrapping server
def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): return cls.zk = ZookeeperFixture.instance() chroot = random_string(10) cls.server1 = KafkaFixture.instance(0, cls.zk.host, cls.zk.port, chroot) cls.server2 = KafkaFixture.instance(1, cls.zk.host, cls.zk.port, chroot) cls.server = cls.server1 # Bootstrapping server
def _send_random_messages(self, producer, topic, partition, n): for j in range(n): msg = 'msg {0}: {1}'.format(j, random_string(10)) log.debug('_send_random_message %s to %s:%d', msg, topic, partition) while True: try: producer.send_messages(topic, partition, msg.encode('utf-8')) except: log.exception('failure in _send_random_messages - retrying') continue else: break
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = self.send_messages(0, [ str(x) for x in range(10) ]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ]) # Consumer should still get all of them consumer = self.consumer() expected_messages = set(small_messages + large_messages) actual_messages = set([ x.message.value for x in consumer ]) self.assertEqual(expected_messages, actual_messages) consumer.stop()
def test_client(request, sasl_kafka): topic_name = special_to_underscore(request.node.name + random_string(4)) sasl_kafka.create_topics([topic_name], num_partitions=1) client, = sasl_kafka.get_clients(1) request = MetadataRequest_v1(None) client.send(0, request) for _ in range(10): result = client.poll(timeout_ms=10000) if len(result) > 0: break else: raise RuntimeError("Couldn't fetch topic response from Broker.") result = result[0] assert topic_name in [t[1] for t in result.topics]
def setUpClass(cls): # noqa if not os.environ.get('KAFKA_VERSION'): return zk_chroot = random_string(10) replicas = 2 partitions = 2 # mini zookeeper, 2 kafka brokers cls.zk = ZookeeperFixture.instance() kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions] cls.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)] hosts = ['%s:%d' % (b.host, b.port) for b in cls.brokers] cls.client = KafkaClient(hosts)
def test_kafka_producer_proper_record_metadata(kafka_broker, compression): connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=10000, compression_type=compression) if producer.config['api_version'] >= (0, 10): magic = 1 else: magic = 0 topic = random_string(5) future = producer.send(topic, value=b"Simple value", key=b"Simple key", timestamp_ms=9999999, partition=0) record = future.get(timeout=5) assert record is not None assert record.topic == topic assert record.partition == 0 assert record.topic_partition == TopicPartition(topic, 0) assert record.offset == 0 if magic >= 1: assert record.timestamp == 9999999 else: assert record.timestamp == -1 # NO_TIMESTAMP if magic == 1: assert record.checksum == 1370034956 else: assert record.checksum == 3296137851 assert record.serialized_key_size == 10 assert record.serialized_value_size == 12 # generated timestamp case is skipped for broker 0.9 and below if magic == 0: return send_time = time.time() * 1000 future = producer.send(topic, value=b"Simple value", key=b"Simple key", timestamp_ms=None, partition=0) record = future.get(timeout=5) assert abs(record.timestamp - send_time) <= 1000 # Allow 1s deviation
def setUp(self): if not os.environ.get('KAFKA_VERSION'): self.skipTest('integration test requires KAFKA_VERSION') zk_chroot = random_string(10) replicas = 3 partitions = 3 # mini zookeeper, 3 kafka brokers self.zk = ZookeeperFixture.instance() kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions] self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = SimpleClient(hosts, timeout=2) super(TestFailover, self).setUp()
def setUp(self): if not os.environ.get('KAFKA_VERSION'): return zk_chroot = random_string(10) replicas = 2 partitions = 2 # mini zookeeper, 2 kafka brokers self.zk = ZookeeperFixture.instance() kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions] self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = KafkaClient(hosts) super(TestFailover, self).setUp()
def setUpClass(cls): # noqa if not os.environ.get('KAFKA_VERSION'): return zk_chroot = random_string(10) replicas = 2 partitions = 2 # mini zookeeper, 2 kafka brokers cls.zk = ZookeeperFixture.instance() kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions] cls.brokers = [ KafkaFixture.instance(i, *kk_args) for i in range(replicas) ] hosts = ['%s:%d' % (b.host, b.port) for b in cls.brokers] cls.client = KafkaClient(hosts)
def test_kafka_producer_proper_record_metadata(kafka_broker, compression): connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression) magic = producer._max_usable_produce_magic() topic = random_string(5) future = producer.send( topic, value=b"Simple value", key=b"Simple key", timestamp_ms=9999999, partition=0) record = future.get(timeout=5) assert record is not None assert record.topic == topic assert record.partition == 0 assert record.topic_partition == TopicPartition(topic, 0) assert record.offset == 0 if magic >= 1: assert record.timestamp == 9999999 else: assert record.timestamp == -1 # NO_TIMESTAMP if magic >= 2: assert record.checksum is None elif magic == 1: assert record.checksum == 1370034956 else: assert record.checksum == 3296137851 assert record.serialized_key_size == 10 assert record.serialized_value_size == 12 # generated timestamp case is skipped for broker 0.9 and below if magic == 0: return send_time = time.time() * 1000 future = producer.send( topic, value=b"Simple value", key=b"Simple key", timestamp_ms=None, partition=0) record = future.get(timeout=5) assert abs(record.timestamp - send_time) <= 1000 # Allow 1s deviation
def setUp(self): if not os.environ.get('KAFKA_VERSION'): self.skipTest('integration test requires KAFKA_VERSION') zk_chroot = random_string(10) replicas = 3 partitions = 3 # mini zookeeper, 3 kafka brokers self.zk = ZookeeperFixture.instance() kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions] self.brokers = [ KafkaFixture.instance(i, *kk_args) for i in range(replicas) ] hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers] self.client = SimpleClient(hosts, timeout=2) super(TestFailover, self).setUp()
def assert_message_count(self, topic, check_count, timeout=10, partitions=None): hosts = ",".join(["%s:%d" % (broker.host, broker.port) for broker in self.brokers]) client = KafkaClient(hosts) group = random_string(10) consumer = SimpleConsumer(client, group, topic, partitions=partitions, auto_commit=False, iter_timeout=timeout) started_at = time.time() pending = consumer.pending(partitions) # Keep checking if it isn't immediately correct, subject to timeout while pending != check_count and (time.time() - started_at < timeout): pending = consumer.pending(partitions) consumer.stop() client.close() self.assertEqual(pending, check_count)
def test_delete_consumergroups(kafka_admin_client, kafka_consumer_factory, send_messages): random_group_id = 'test-group-' + random_string(6) group1 = random_group_id + "_1" group2 = random_group_id + "_2" group3 = random_group_id + "_3" send_messages(range(0, 100), partition=0) consumer1 = kafka_consumer_factory(group_id=group1) next(consumer1) consumer1.close() consumer2 = kafka_consumer_factory(group_id=group2) next(consumer2) consumer2.close() consumer3 = kafka_consumer_factory(group_id=group3) next(consumer3) consumer3.close() consumergroups = { group_id for group_id, _ in kafka_admin_client.list_consumer_groups() } assert group1 in consumergroups assert group2 in consumergroups assert group3 in consumergroups delete_results = { group_id: error for group_id, error in kafka_admin_client.delete_consumer_groups( [group1, group2]) } assert delete_results[group1] == NoError assert delete_results[group2] == NoError assert group3 not in delete_results consumergroups = { group_id for group_id, _ in kafka_admin_client.list_consumer_groups() } assert group1 not in consumergroups assert group2 not in consumergroups assert group3 in consumergroups
def test_kafka_consumer__offset_commit_resume_dual(self): GROUP_ID = random_string(10).encode('utf-8') self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.kafka_consumer( group_id = GROUP_ID, auto_commit_enable = True, auto_commit_interval_ms = None, auto_commit_interval_messages = 20, auto_offset_reset='smallest', offset_storage='kafka', ) # Grab the first 195 messages output_msgs1 = [] for _ in xrange(195): m = consumer1.next() output_msgs1.append(m) consumer1.task_done(m) self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 consumer2 = self.kafka_consumer( group_id = GROUP_ID, auto_commit_enable = True, auto_commit_interval_ms = None, auto_commit_interval_messages = 20, consumer_timeout_ms = 100, auto_offset_reset='smallest', offset_storage='dual', ) # 181-200 output_msgs2 = [] with self.assertRaises(ConsumerTimeout): while True: m = consumer2.next() output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15)
def test_kafka_consumer__offset_commit_resume_dual(self): GROUP_ID = random_string(10).encode('utf-8') self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.kafka_consumer( group_id=GROUP_ID, auto_commit_enable=True, auto_commit_interval_ms=None, auto_commit_interval_messages=20, auto_offset_reset='smallest', offset_storage='kafka', ) # Grab the first 195 messages output_msgs1 = [] for _ in xrange(195): m = consumer1.next() output_msgs1.append(m) consumer1.task_done(m) self.assert_message_count(output_msgs1, 195) # The total offset across both partitions should be at 180 consumer2 = self.kafka_consumer( group_id=GROUP_ID, auto_commit_enable=True, auto_commit_interval_ms=None, auto_commit_interval_messages=20, consumer_timeout_ms=100, auto_offset_reset='smallest', offset_storage='dual', ) # 181-200 output_msgs2 = [] with self.assertRaises(ConsumerTimeout): while True: m = consumer2.next() output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) self.assertEqual(len(set(output_msgs1) & set(output_msgs2)), 15)
def test_end_to_end(kafka_broker, compression): if compression == 'lz4': # LZ4 requires 0.8.2 if version() < (0, 8, 2): return # LZ4 python libs dont work on python2.6 elif sys.version_info < (2, 7): return connect_str = 'localhost:' + str(kafka_broker.port) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=10000, compression_type=compression, value_serializer=str.encode) consumer = KafkaConsumer(bootstrap_servers=connect_str, group_id=None, consumer_timeout_ms=10000, auto_offset_reset='earliest', value_deserializer=bytes.decode) topic = random_string(5) messages = 100 futures = [] for i in range(messages): futures.append(producer.send(topic, 'msg %d' % i)) ret = [f.get(timeout=30) for f in futures] assert len(ret) == messages producer.close() consumer.subscribe([topic]) msgs = set() for i in range(messages): try: msgs.add(next(consumer).value) except StopIteration: break assert msgs == set(['msg %d' % i for i in range(messages)])
def test_kafka_consumer__offset_commit_resume(kafka_consumer_factory, send_messages): GROUP_ID = random_string(10) send_messages(range(0, 100), partition=0) send_messages(range(100, 200), partition=1) # Start a consumer and grab the first 180 messages consumer1 = kafka_consumer_factory( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) output_msgs1 = [] for _ in range(180): m = next(consumer1) output_msgs1.append(m) assert_message_count(output_msgs1, 180) # Normally we let the pytest fixture `kafka_consumer_factory` handle # closing as part of its teardown. Here we manually call close() to force # auto-commit to occur before the second consumer starts. That way the # second consumer only consumes previously unconsumed messages. consumer1.close() # Start a second consumer to grab 181-200 consumer2 = kafka_consumer_factory( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) output_msgs2 = [] for _ in range(20): m = next(consumer2) output_msgs2.append(m) assert_message_count(output_msgs2, 20) # Verify the second consumer wasn't reconsuming messages that the first # consumer already saw assert_message_count(output_msgs1 + output_msgs2, 200)
def test_end_to_end(kafka_broker, compression): if compression == 'lz4': # LZ4 requires 0.8.2 if version() < (0, 8, 2): return # python-lz4 crashes on older versions of pypy elif platform.python_implementation() == 'PyPy': return connect_str = ':'.join([kafka_broker.host, str(kafka_broker.port)]) producer = KafkaProducer(bootstrap_servers=connect_str, retries=5, max_block_ms=30000, compression_type=compression, value_serializer=str.encode) consumer = KafkaConsumer(bootstrap_servers=connect_str, group_id=None, consumer_timeout_ms=30000, auto_offset_reset='earliest', value_deserializer=bytes.decode) topic = random_string(5) messages = 100 futures = [] for i in range(messages): futures.append(producer.send(topic, 'msg %d' % i)) ret = [f.get(timeout=30) for f in futures] assert len(ret) == messages producer.close() consumer.subscribe([topic]) msgs = set() for i in range(messages): try: msgs.add(next(consumer).value) except StopIteration: break assert msgs == set(['msg %d' % i for i in range(messages)]) consumer.close()
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = self.send_messages(0, [ str(x) for x in range(10) ]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = self.send_messages(0, [ random_string(5000) for x in range(10) ]) # Brokers prior to 0.11 will return the next message # if it is smaller than max_bytes (called buffer_size in SimpleConsumer) # Brokers 0.11 and later that store messages in v2 format # internally will return the next message only if the # full MessageSet is smaller than max_bytes. # For that reason, we set the max buffer size to a little more # than the size of all large messages combined consumer = self.consumer(max_buffer_size=60000) expected_messages = set(small_messages + large_messages) actual_messages = set([ x.message.value for x in consumer ]) self.assertEqual(expected_messages, actual_messages) consumer.stop()
def test_kafka_consumer_max_bytes_one_msg(self): # We send to only 1 partition so we don't have parallel requests to 2 # nodes for data. self.send_messages(0, range(100, 200)) # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we must get 1 msg at a time group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = self.kafka_consumer(group_id=group, auto_offset_reset='earliest', fetch_max_bytes=1) fetched_msgs = [] # A bit hacky, but we need this in order for message count to be exact consumer._coordinator.ensure_active_group() for i in range(10): poll_res = consumer.poll(timeout_ms=2000) print(poll_res) for partition, msgs in six.iteritems(poll_res): for msg in msgs: fetched_msgs.append(msg) self.assertEqual(len(fetched_msgs), 10)
def test_kafka_consumer_max_bytes_one_msg(self): # We send to only 1 partition so we don't have parallel requests to 2 # nodes for data. self.send_messages(0, range(100, 200)) # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time # But 0.11.0.0 returns 1 MessageSet at a time when the messages are # stored in the new v2 format by the broker. # # DP Note: This is a strange test. The consumer shouldn't care # how many messages are included in a FetchResponse, as long as it is # non-zero. I would not mind if we deleted this test. It caused # a minor headache when testing 0.11.0.0. group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = self.kafka_consumer(group_id=group, auto_offset_reset='earliest', consumer_timeout_ms=5000, fetch_max_bytes=1) fetched_msgs = [next(consumer) for i in range(10)] self.assertEqual(len(fetched_msgs), 10)
def test_kafka_consumer__offset_commit_resume(self): GROUP_ID = random_string(10) self.send_messages(0, range(0, 100)) self.send_messages(1, range(100, 200)) # Start a consumer consumer1 = self.kafka_consumer( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) # Grab the first 180 messages output_msgs1 = [] for _ in xrange(180): m = next(consumer1) output_msgs1.append(m) self.assert_message_count(output_msgs1, 180) consumer1.close() # The total offset across both partitions should be at 180 consumer2 = self.kafka_consumer( group_id=GROUP_ID, enable_auto_commit=True, auto_commit_interval_ms=100, auto_offset_reset='earliest', ) # 181-200 output_msgs2 = [] for _ in xrange(20): m = next(consumer2) output_msgs2.append(m) self.assert_message_count(output_msgs2, 20) self.assertEqual(len(set(output_msgs1) | set(output_msgs2)), 200) consumer2.close()
def test_kafka_consumer_max_bytes_one_msg(self): # We send to only 1 partition so we don't have parallel requests to 2 # nodes for data. self.send_messages(0, range(100, 200)) # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we should get 1 msg at a time # But 0.11.0.0 returns 1 MessageSet at a time when the messages are # stored in the new v2 format by the broker. # # DP Note: This is a strange test. The consumer shouldn't care # how many messages are included in a FetchResponse, as long as it is # non-zero. I would not mind if we deleted this test. It caused # a minor headache when testing 0.11.0.0. group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = self.kafka_consumer( group_id=group, auto_offset_reset='earliest', consumer_timeout_ms=5000, fetch_max_bytes=1) fetched_msgs = [next(consumer) for i in range(10)] self.assertEqual(len(fetched_msgs), 10)
def test_kafka_consumer_max_bytes_one_msg(self): # We send to only 1 partition so we don't have parallel requests to 2 # nodes for data. self.send_messages(0, range(100, 200)) # Start a consumer. FetchResponse_v3 should always include at least 1 # full msg, so by setting fetch_max_bytes=1 we must get 1 msg at a time group = 'test-kafka-consumer-max-bytes-one-msg-' + random_string(5) consumer = self.kafka_consumer( group_id=group, auto_offset_reset='earliest', fetch_max_bytes=1) fetched_msgs = [] # A bit hacky, but we need this in order for message count to be exact consumer._coordinator.ensure_active_group() for i in range(10): poll_res = consumer.poll(timeout_ms=2000) print(poll_res) for partition, msgs in six.iteritems(poll_res): for msg in msgs: fetched_msgs.append(msg) self.assertEqual(len(fetched_msgs), 10)
def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[kafka_bytestring(topic)].partition( key) == 0: recovered = True except (FailedPayloadsError, ConnectionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg)
def test_switch_leader_keyed_producer(self): topic = self.topic producer = KeyedProducer(self.client, async=False) # Send 10 random messages for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) # kill leader for partition 0 self._kill_leader(topic, 0) recovered = False started = time.time() timeout = 60 while not recovered and (time.time() - started) < timeout: try: key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg) if producer.partitioners[topic].partition(key) == 0: recovered = True except (FailedPayloadsError, ConnectionError, RequestTimedOutError, NotLeaderForPartitionError): log.debug("caught exception sending message -- will retry") continue # Verify we successfully sent the message self.assertTrue(recovered) # send some more messages just to make sure no more exceptions for _ in range(10): key = random_string(3).encode('utf-8') msg = random_string(10).encode('utf-8') producer.send_messages(topic, key, msg)
def test_snappy(self): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) self.assertEqual(b1, b2)
def topic(kafka_broker, request): """Return a topic fixture""" topic_name = '%s_%s' % (request.node.name, random_string(10)) kafka_broker.create_topics([topic_name]) return topic_name
def topic(simple_client): topic = random_string(5) simple_client.ensure_topic_exists(topic) return topic
def test_snappy(): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = snappy_decode(snappy_encode(b1)) assert b1 == b2
def test_group(kafka_broker, topic): num_partitions = 4 connect_str = get_connect_str(kafka_broker) consumers = {} stop = {} threads = {} messages = collections.defaultdict(list) group_id = 'test-group-' + random_string(6) def consumer_thread(i): assert i not in consumers assert i not in stop stop[i] = threading.Event() consumers[i] = KafkaConsumer(topic, bootstrap_servers=connect_str, group_id=group_id, heartbeat_interval_ms=500) while not stop[i].is_set(): for tp, records in six.itervalues(consumers[i].poll(100)): messages[i][tp].extend(records) consumers[i].close() del consumers[i] del stop[i] num_consumers = 4 for i in range(num_consumers): t = threading.Thread(target=consumer_thread, args=(i,)) t.start() threads[i] = t try: timeout = time.time() + 35 while True: for c in range(num_consumers): # Verify all consumers have been created if c not in consumers: break # Verify all consumers have an assignment elif not consumers[c].assignment(): break # If all consumers exist and have an assignment else: # Verify all consumers are in the same generation # then log state and break while loop generations = set([consumer._coordinator.generation for consumer in list(consumers.values())]) # New generation assignment is not complete until # coordinator.rejoining = False rejoining = any([consumer._coordinator.rejoining for consumer in list(consumers.values())]) if not rejoining and len(generations) == 1: for c, consumer in list(consumers.items()): logging.info("[%s] %s %s: %s", c, consumer._coordinator.generation, consumer._coordinator.member_id, consumer.assignment()) break assert time.time() < timeout, "timeout waiting for assignments" group_assignment = set() for c in range(num_consumers): assert len(consumers[c].assignment()) != 0 assert set.isdisjoint(consumers[c].assignment(), group_assignment) group_assignment.update(consumers[c].assignment()) assert group_assignment == set([ TopicPartition(topic, partition) for partition in range(num_partitions)]) finally: for c in range(num_consumers): stop[c].set() threads[c].join()
def test_gzip(): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = gzip_decode(gzip_encode(b1)) assert b1 == b2
def test_snappy(self): for i in xrange(1000): s1 = random_string(100) s2 = snappy_decode(snappy_encode(s1)) self.assertEquals(s1, s2)
def test_gzip(self): for i in xrange(1000): s1 = random_string(100) s2 = gzip_decode(gzip_encode(s1)) self.assertEquals(s1, s2)
def test_lz4(): for i in xrange(1000): b1 = random_string(100).encode('utf-8') b2 = lz4_decode(lz4_encode(b1)) assert b1 == b2