def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): # pragma: no cover return # Single zookeeper, 3 kafka brokers zk_chroot = random_string(10) replicas = 3 partitions = 2 max_bytes = 12 * 1048576 # 12 MB cls.zk = ZookeeperFixture.instance() kk_args = [ cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions, max_bytes ] cls.kafka_brokers = [ KafkaFixture.instance(i, *kk_args) for i in range(replicas) ] hosts = ['%s:%d' % (b.host, b.port) for b in cls.kafka_brokers] cls.client = KafkaClient(hosts, timeout=2500, clientId=__name__) # Startup the twisted reactor in a thread. We need this before the # the KafkaClient can work, since KafkaBrokerClient relies on the # reactor for its TCP connection cls.reactor, cls.thread = threaded_reactor()
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = yield self.send_messages(0, [str(x) for x in range(10)]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = yield self.send_messages( 0, [random_string(FETCH_BUFFER_SIZE_BYTES * 3) for x in range(10)]) # Consumer should still get all of them consumer = self.consumer() # Start the consumer from the beginning d = consumer.start(OFFSET_EARLIEST) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 20: # Wait a bit for them to arrive yield async_delay() expected_messages = set(small_messages + large_messages) actual_messages = set( [x.message.value for x in consumer.processor._messages]) self.assertEqual(expected_messages, actual_messages) # Clean up consumer.stop() self.successResultOf(d)
def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): # pragma: no cover return DEBUGGING = True setDebugging(DEBUGGING) DelayedCall.debug = DEBUGGING zk_chroot = random_string(10) replicas = 2 partitions = 7 # mini zookeeper, 2 kafka brokers cls.zk = ZookeeperFixture.instance() kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions] cls.kafka_brokers = [ KafkaFixture.instance(i, *kk_args) for i in range(replicas)] hosts = ['%s:%d' % (b.host, b.port) for b in cls.kafka_brokers] # We want a short timeout on message sending for this test, since # we are expecting failures when we take down the brokers cls.client = KafkaClient(hosts, timeout=1000, clientId=__name__) # Startup the twisted reactor in a thread. We need this before the # the KafkaClient can work, since KafkaBrokerClient relies on the # reactor for its TCP connection cls.reactor, cls.thread = threaded_reactor()
def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): # pragma: no cover return DEBUGGING = True setDebugging(DEBUGGING) DelayedCall.debug = DEBUGGING zk_chroot = random_string(10) replicas = 2 partitions = 7 # mini zookeeper, 2 kafka brokers cls.zk = ZookeeperFixture.instance() kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions] cls.kafka_brokers = [ KafkaFixture.instance(i, *kk_args) for i in range(replicas) ] hosts = ['%s:%d' % (b.host, b.port) for b in cls.kafka_brokers] # We want a short timeout on message sending for this test, since # we are expecting failures when we take down the brokers cls.client = KafkaClient(hosts, timeout=1000, clientId=__name__) # Startup the twisted reactor in a thread. We need this before the # the KafkaClient can work, since KafkaBrokerClient relies on the # reactor for its TCP connection cls.reactor, cls.thread = threaded_reactor()
def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): # pragma: no cover log.warning("WARNING: KAFKA_VERSION not found in environment") return DEBUGGING = True setDebugging(DEBUGGING) DelayedCall.debug = DEBUGGING # Single zookeeper, 3 kafka brokers zk_chroot = random_string(10) replicas = 3 partitions = 2 cls.zk = ZookeeperFixture.instance() kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions] cls.kafka_brokers = [ KafkaFixture.instance(i, *kk_args) for i in range(replicas)] # server is used by our superclass when creating the client... cls.server = cls.kafka_brokers[0] # Startup the twisted reactor in a thread. We need this before the # the KafkaClient can work, since KafkaBrokerClient relies on the # reactor for its TCP connection cls.reactor, cls.thread = threaded_reactor()
def test_large_messages(self): # Produce 10 "normal" size messages small_messages = yield self.send_messages( 0, [str(x) for x in range(10)]) # Produce 10 messages that are large (bigger than default fetch size) large_messages = yield self.send_messages( 0, [random_string(FETCH_BUFFER_SIZE_BYTES * 3) for x in range(10)]) # Consumer should still get all of them consumer = self.consumer() # Start the consumer from the beginning d = consumer.start(OFFSET_EARLIEST) # Loop waiting for all the messages to show up while len(consumer.processor._messages) < 20: # Wait a bit for them to arrive yield async_delay() expected_messages = set(small_messages + large_messages) actual_messages = set([x.message.value for x in consumer.processor._messages]) self.assertEqual(expected_messages, actual_messages) # Clean up consumer.stop() self.successResultOf(d)
def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): # pragma: no cover log.warning("WARNING: KAFKA_VERSION not found in environment") return DEBUGGING = True setDebugging(DEBUGGING) DelayedCall.debug = DEBUGGING # Single zookeeper, 3 kafka brokers zk_chroot = random_string(10) replicas = 3 partitions = PARTITION_COUNT cls.zk = ZookeeperFixture.instance() kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions] cls.kafka_brokers = [ KafkaFixture.instance(i, *kk_args) for i in range(replicas) ] # server is used by our superclass when creating the client... cls.server = cls.kafka_brokers[0] # Startup the twisted reactor in a thread. We need this before the # the KafkaClient can work, since KafkaBrokerClient relies on the # reactor for its TCP connection cls.reactor, cls.thread = threaded_reactor()
def _send_random_messages(self, producer, topic, n): for j in range(n): resp = yield producer.send_messages( topic, msgs=[random_string(10)]) self.assertFalse(isinstance(resp, Exception)) if resp: self.assertEqual(resp.error, 0)
def _send_random_messages(self, producer, topic, n): for j in range(n): resp = yield producer.send_messages(topic, msgs=[random_string(10)]) self.assertFalse(isinstance(resp, Exception)) if resp: self.assertEqual(resp.error, 0)
def test_huge_messages(self): # Produce 10 "normal" size messages yield self.send_messages(0, [str(x) for x in range(10)]) # Setup a max buffer size for the consumer, and put a message in # Kafka that's bigger than that MAX_FETCH_BUFFER_SIZE_BYTES = (256 * 1024) - 10 huge_message, = yield self.send_messages( 0, [random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)]) # Create a consumer with the (smallish) max buffer size consumer = self.consumer(max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES) # This consumer fails to get the message, and errbacks the start # deferred d = consumer.start(OFFSET_EARLIEST) # Loop waiting for the errback to be called while not d.called: # Wait a bit for them to arrive yield async_delay() # Make sure the failure is as expected self.failureResultOf(d, ConsumerFetchSizeTooSmall) # Make sure the smaller, earlier messages were delivered self.assert_message_count(consumer.processor._messages, 10) # last offset seen last_offset = consumer.processor._messages[-1].offset # Stop the consumer: d already errbacked, but stop still must be called consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer() # Start just past the last message processed d = big_consumer.start(last_offset + 1) # Consume giant message successfully while not big_consumer.processor._messages: # Wait a bit for it to arrive yield async_delay() self.assertEqual(big_consumer.processor._messages[0].message.value, huge_message) # Clean up big_consumer.stop() self.successResultOf(d)
def test_commit_fetch_offsets(self): """test_commit_fetch_offsets RANT: https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol implies that the metadata supplied with the commit will be returned by the fetch, but under 0.8.2.1 with a API_version of 0, it's not. Switch to using the V1 API and it works. """ # noqa resp = {} c_group = "CG_1" metadata = "My_Metadata_{}".format(random_string(10)) offset = random.randint(0, 1024) log.debug("Commiting offset: %d metadata: %s for topic: %s part: 0", offset, metadata, self.topic) req = OffsetCommitRequest(self.topic, 0, offset, -1, metadata) # We have to retry, since the client doesn't, and Kafka will # create the topic on the fly, but the first request will fail for attempt in range(20): log.debug("test_commit_fetch_offsets: Commit Attempt: %d", attempt) try: (resp, ) = yield self.client.send_offset_commit_request( c_group, [req]) except ConsumerCoordinatorNotAvailableError: log.info( "No Coordinator for Consumer Group: %s Attempt: %d of 20", c_group, attempt) time.sleep(0.5) continue except NotCoordinatorForConsumerError: # pragma: no cover # Kafka seems to have a timing issue: If we ask broker 'A' who # the ConsumerCoordinator is for a auto-created, not extant # topic, the assigned broker may not realize it's been so # designated by the time we find out and make our request. log.info( "Coordinator is not coordinator!!: %s Attempt: %d of 20", c_group, attempt) time.sleep(0.5) continue break self.assertEqual(getattr(resp, 'error', -1), 0) req = OffsetFetchRequest(self.topic, 0) (resp, ) = yield self.client.send_offset_fetch_request(c_group, [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, offset) # Check we received the proper metadata in the response self.assertEqual(resp.metadata, metadata) log.debug("test_commit_fetch_offsets: Test Complete.")
def setUpClass(cls): if not os.environ.get('KAFKA_VERSION'): # pragma: no cover return # Single zookeeper, 3 kafka brokers zk_chroot = random_string(10) replicas = 3 partitions = 2 cls.zk = ZookeeperFixture.instance() kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions] cls.kafka_brokers = [ KafkaFixture.instance(i, *kk_args) for i in range(replicas)] hosts = ['%s:%d' % (b.host, b.port) for b in cls.kafka_brokers] cls.client = KafkaClient(hosts, timeout=1500, clientId=__name__) # Startup the twisted reactor in a thread. We need this before the # the KafkaClient can work, since KafkaBrokerClient relies on the # reactor for its TCP connection cls.reactor, cls.thread = threaded_reactor()
def test_write_nonextant_topic(self): """test_write_nonextant_topic Test we can write to a non-extant topic (which will be auto-created) simply by calling producer.send_messages with a long enough timeout. """ test_topics = ["{}-{}-{}".format( self.id().split('.')[-1], i, random_string(10)) for i in range(10)] producer = Producer( self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE) for topic in test_topics: resp = yield producer.send_messages(topic, msgs=[self.msg(topic)]) # Make sure the send went ok self.assert_produce_response(resp, 0) # Make sure we can get the message back yield self.assert_fetch_offset( 0, 0, [self.msg(topic)], topic=topic) yield producer.stop()
def test_commit_fetch_offsets(self): """test_commit_fetch_offsets RANT: https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol implies that the metadata supplied with the commit will be returned by the fetch, but under 0.8.2.1 with a API_version of 0, it's not. Switch to using the V1 API and it works. """ # noqa resp = {} c_group = "CG_1" metadata = "My_Metadata_{}".format(random_string(10)) offset = random.randint(0, 1024) log.debug("Commiting offset: %d metadata: %s for topic: %s part: 0", offset, metadata, self.topic) req = OffsetCommitRequest(self.topic, 0, offset, -1, metadata) # We have to retry, since the client doesn't, and Kafka will # create the topic on the fly, but the first request will fail for attempt in range(20): log.debug("test_commit_fetch_offsets: Commit Attempt: %d", attempt) try: (resp,) = yield self.client.send_offset_commit_request( c_group, [req]) except ConsumerCoordinatorNotAvailableError: log.info( "No Coordinator for Consumer Group: %s Attempt: %d of 20", c_group, attempt) time.sleep(0.5) continue break self.assertEqual(getattr(resp, 'error', -1), 0) req = OffsetFetchRequest(self.topic, 0) (resp,) = yield self.client.send_offset_fetch_request(c_group, [req]) self.assertEqual(resp.error, 0) self.assertEqual(resp.offset, offset) # broker doesn't seem to return proper metadata self.assertEqual(resp.metadata, metadata) log.debug("test_commit_fetch_offsets: Test Complete.")
def test_write_nonextant_topic(self): """test_write_nonextant_topic Test we can write to a non-extant topic (which will be auto-created) simply by calling producer.send_messages with a long enough timeout. """ test_topics = [ "{}-{}-{}".format(self.id().split('.')[-1], i, random_string(10)) for i in range(10) ] producer = Producer(self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE) for topic in test_topics: resp = yield producer.send_messages(topic, msgs=[self.msg(topic)]) # Make sure the send went ok self.assert_produce_response(resp, 0) # Make sure we can get the message back yield self.assert_fetch_offset(0, 0, [self.msg(topic)], topic=topic) yield producer.stop()
def test_throughput(self): yield async_delay(3) # 0.8.1.1 fails otherwise # Flag to shutdown keep_running = True # Count of messages sent sent_msgs_count = [0] total_messages_size = [0] # setup MESSAGE_BLOCK_SIZEx1024-ish byte messages to send over and over constant_messages = [ self.msg(s) for s in [random_string(1024) for x in range(MESSAGE_BLOCK_SIZE)] ] large_messages = [ self.msg(s) for s in [ random_string(FETCH_BUFFER_SIZE_BYTES * 3) for x in range(MESSAGE_BLOCK_SIZE) ] ] constant_messages_size = len(constant_messages[0]) * MESSAGE_BLOCK_SIZE large_messages_size = len(large_messages[0]) * MESSAGE_BLOCK_SIZE # Create a producer and send some messages producer = Producer(self.client) # Create consumers (1/partition) consumers = [ self.consumer(partition=p, fetch_max_wait_time=50) for p in range(PARTITION_COUNT) ] def log_error(failure): log.exception("Failure sending messages: %r", failure) # pragma: no cover def sent_msgs(resps): log.info("Messages Sent: %r", resps) sent_msgs_count[0] += MESSAGE_BLOCK_SIZE return resps def send_msgs(): # randomly, 1/20 of the time, send large messages if randint(0, 19): ## if True: messages = constant_messages large = '' total_messages_size[0] += constant_messages_size else: messages = large_messages large = ' large' total_messages_size[0] += large_messages_size log.info("Sending: %d%s messages", len(messages), large) d = producer.send_messages(self.topic, msgs=messages) # As soon as we get a response from the broker, count them # and if we're still supposed to, send more d.addCallback(sent_msgs) if keep_running: d.addCallback(lambda _: self.reactor.callLater(0, send_msgs)) ## d.addCallback(lambda _: send_msgs()) d.addErrback(log_error) # Start sending messages, MESSAGE_BLOCK_SIZE at a time, 1K or 384K each send_msgs() # Start the consumers from the beginning fetch_start = time.time() start_ds = [consumer.start(OFFSET_EARLIEST) for consumer in consumers] # Let them all run for awhile... log.info("Waiting %d seconds...", PRODUCE_TIME) yield async_delay(PRODUCE_TIME) # Tell the producer to stop keep_running = False # Wait up to PRODUCE_TIME for the consumers to catch up log.info( "Waiting up to %d seconds for " "consumers to finish consuming...", PRODUCE_TIME) deadline = time.time() + PRODUCE_TIME * 2 while time.time() < deadline: consumed = sum( [len(consumer.processor._messages) for consumer in consumers]) log.debug("Consumed %d messages.", consumed) if sent_msgs_count[0] == consumed: break yield async_delay(1) fetch_time = time.time() - fetch_start consumed_bytes = sum( [c.processor._messages_bytes[0] for c in consumers]) result_msg = ("Sent: {} messages ({:,} total bytes) in ~{} seconds" " ({}/sec), Consumed: {} in {:.2f} seconds.".format( sent_msgs_count[0], total_messages_size[0], PRODUCE_TIME, sent_msgs_count[0] / PRODUCE_TIME, consumed, fetch_time)) # Log the result, and print to stderr to get around nose capture log.info(result_msg) print("\n\t Performance Data: " + result_msg, file=sys.stderr) # And print data as stats stat('Production_Time', PRODUCE_TIME) stat('Consumption_Time', fetch_time) stat('Messages_Produced', sent_msgs_count[0]) stat('Messages_Consumed', consumed) stat('Messages_Bytes_Produced', total_messages_size[0]) stat('Messages_Bytes_Consumed', consumed_bytes) stat('Messages_Produced_Per_Second', sent_msgs_count[0] / PRODUCE_TIME) stat('Messages_Consumed_Per_Second', consumed / fetch_time) stat('Message_Bytes_Produced_Per_Second', total_messages_size[0] / PRODUCE_TIME) stat('Message_Bytes_Consumed_Per_Second', consumed_bytes / fetch_time) # Clean up log.debug('Stopping producer: %r', producer) yield producer.stop() log.debug('Stopping consumers: %r', consumers) for consumer in consumers: consumer.stop() [self.successResultOf(start_d) for start_d in start_ds] # make sure we got all the messages we sent self.assertEqual( sent_msgs_count[0], sum([len(consumer.processor._messages) for consumer in consumers]))
def test_gzip(self): for i in xrange(100): s1 = random_string(100) s2 = gzip_decode(gzip_encode(s1)) self.assertEqual(s1, s2)
def test_snappy(self): for i in xrange(100): s1 = random_string(120) s2 = snappy_decode(snappy_encode(s1)) self.assertEqual(s1, s2)
def setUp(self): super(unittest.TestCase, self).setUp() if not self.topic: self.topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
def setUp(self): super(unittest.TestCase, self).setUp() if not self.topic: self.topic = "%s-%s" % ( self.id()[self.id().rindex(".") + 1:], random_string(10))