class TestTopic(unittest.TestCase): # Contents of self.dogs_queue after setUp: # 0.6 # [(0, 'Rusty'), (14, 'Patty'), (28, 'Jack'), (41, 'Clyde')] # 0.7 # [(0, 'Rusty'), (15, 'Patty'), (30, 'Jack'), (44, 'Clyde')] def setUp(self): self.k = Kafka(version_0_7=True) self.topic_name = get_unique_topic('test-kafka-topic') input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde'] self.k.produce(self.topic_name, input_messages) # If you don't do this sleep, then you can get into a condition where # a fetch immediately after a produce will cause a state where the # produce is duplicated (it really gets that way in Kafka). time.sleep(MESSAGE_DELAY_SECS) self.dogs_queue = self.k.topic(self.topic_name) # print list(self.k.fetch(self.topic_name, 0)) # print self.topic_name def test_offset_queries(self): # 0.6 # self.assertEqual(self.dogs_queue.earliest_offset(), 0) # self.assertEqual(self.dogs_queue.latest_offset(), 55) # self.assertRaises(OffsetOutOfRange, self.dogs_queue.poll(100).next) # self.assertRaises(InvalidOffset, self.dogs_queue.poll(22).next) # 0.7 self.assertEqual(self.dogs_queue.earliest_offset(), 0) self.assertEqual(self.dogs_queue.latest_offset(), 59) self.assertRaises(OffsetOutOfRange, self.dogs_queue.poll(100).next) self.assertRaises(InvalidOffset, self.dogs_queue.poll(22).next) def test_end_offset_iteration(self): # 0.6 # dogs = self.dogs_queue.poll(0, end_offset=28, poll_interval=None) # status, messages = dogs.next() # self.assertEqual(status.start_offset, 0) # self.assertEqual(status.next_offset, 41) # self.assertEqual(status.last_offset_read, 28) # self.assertEqual(status.messages_read, 3) # self.assertEqual(status.bytes_read, 14) # self.assertEqual(status.num_fetches, 1) # self.assertEqual(messages, ['Rusty', 'Patty', 'Jack']) # self.assertRaises(StopIteration, dogs.next) # 0.7 dogs = self.dogs_queue.poll(0, end_offset=30, poll_interval=None) status, messages = dogs.next() self.assertEqual(status.start_offset, 0) self.assertEqual(status.next_offset, 44) self.assertEqual(status.last_offset_read, 30) self.assertEqual(status.messages_read, 3) self.assertEqual(status.bytes_read, 14) self.assertEqual(status.num_fetches, 1) self.assertEqual(messages, ['Rusty', 'Patty', 'Jack']) self.assertRaises(StopIteration, dogs.next)
def test_3x5_consumer_rebalancing(): """Consumer rebalancing, with auto rebalancing.""" log_break("test_3x5_consumer_rebalancing") for kafka_server in RunConfig.kafka_servers: k = Kafka("localhost", kafka_server.kafka_config.port) for topic in ["t1", "t2", "t3"]: k.produce(topic, ["bootstrap"], 0) time.sleep(MESSAGE_DELAY_SECS) producer = ZKProducer(ZK_CONNECT_STR, "t1") assert_equals(len(producer.broker_partitions), topology_3x5.total_partitions, "We should be sending to all broker_partitions.") c1 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1") assert_equals(len(c1.broker_partitions), topology_3x5.total_partitions, "Only one consumer, it should have all partitions.") c2 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1") assert_equals(len(c2.broker_partitions), (topology_3x5.total_partitions) / 2) time.sleep(MESSAGE_DELAY_SECS) assert_equals(len(set(c1.broker_partitions + c2.broker_partitions)), topology_3x5.total_partitions, "We should have all broker partitions covered.") c3 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1") assert_equals(len(c3.broker_partitions), (topology_3x5.total_partitions) / 3) time.sleep(MESSAGE_DELAY_SECS) assert_equals(sum(len(c.broker_partitions) for c in [c1, c2, c3]), topology_3x5.total_partitions, "All BrokerPartitions should be accounted for.") assert_equals(len(set(c1.broker_partitions + c2.broker_partitions + c3.broker_partitions)), topology_3x5.total_partitions, "There should be no overlaps")
def setUp(self): self.k = Kafka() self.topic_name = get_unique_topic('test-kafka-topic') input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde'] self.k.produce(self.topic_name, input_messages) # If you don't do this sleep, then you can get into a condition where # a fetch immediately after a produce will cause a state where the # produce is duplicated (it really gets that way in Kafka). time.sleep(MESSAGE_DELAY_SECS) self.dogs_queue = self.k.topic(self.topic_name)
def __init__(self, topic, broker_partitions, end_broker_partitions=None): """If broker_partitions is a list of BrokerPartitions, we assume that we'll start at the latest offset. If broker_partitions is a mapping of BrokerPartitions to offsets, we'll start at those offsets.""" self._topic = topic self._broker_partitions = sorted(broker_partitions) self._stats = defaultdict( lambda: ConsumerStats(fetches=0, bytes=0, messages=0, max_fetch=0)) self._bps_to_next_offsets = broker_partitions # This will collapse duplicaets so we only have one conn per host/port broker_conn_info = frozenset( (bp.broker_id, bp.host, bp.port) for bp in self._broker_partitions) self._connections = dict((broker_id, Kafka(host, port)) for broker_id, host, port in broker_conn_info) # Figure out where we're going to start from... if isinstance(broker_partitions, Mapping): self._bps_to_next_offsets = broker_partitions else: self._bps_to_next_offsets = dict( (bp, self._connections[bp].latest_offset(bp.topic, bp.partition)) for bp in broker_partitions) self._end_broker_partitions = end_broker_partitions or {}
def test_kafka(self): kafka = Kafka() topic = get_unique_topic('test-kafka') start_offset = 0 input_messages = ['message0', 'message1', 'message2'] kafka.produce(topic, input_messages) time.sleep(MESSAGE_DELAY_SECS) fetch_results = kafka.fetch(topic, start_offset) output_messages = [] offsets = [] for offset, output_message in fetch_results: output_messages.append(output_message) offsets.append(offset) self.assertEquals(input_messages, output_messages) actual_latest_offsets = kafka.offsets(topic, LATEST_OFFSET, max_offsets=1) self.assertEquals(len(actual_latest_offsets), 1) expected_latest_offset = offsets[-1] + Lengths.MESSAGE_HEADER \ + len(output_messages[-1]) self.assertEquals(expected_latest_offset, actual_latest_offsets[0]) actual_earliest_offsets = kafka.offsets(topic, EARLIEST_OFFSET, max_offsets=1) self.assertEquals(len(actual_earliest_offsets), 1) self.assertEquals(0, actual_earliest_offsets[0])
def setUp(self): self.k = Kafka(version_0_7=True) self.topic_name = get_unique_topic('test-kafka-topic') input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde'] self.k.produce(self.topic_name, input_messages) # If you don't do this sleep, then you can get into a condition where # a fetch immediately after a produce will cause a state where the # produce is duplicated (it really gets that way in Kafka). time.sleep(MESSAGE_DELAY_SECS) self.dogs_queue = self.k.topic(self.topic_name)
def test_kafka(self): kafka = Kafka() topic = get_unique_topic('test-kafka') start_offset = 0 input_messages = ['message0', 'message1', 'message2'] kafka.produce(topic, input_messages) time.sleep(MESSAGE_DELAY_SECS) messageSet = kafka.fetch(topic, start_offset) fetch_results = messageSet._offsets_msgs output_messages = [] offsets = [] for offset, output_message in fetch_results: output_messages.append(output_message) offsets.append(offset) self.assertEquals(input_messages, output_messages) actual_latest_offsets = kafka.offsets(topic, LATEST_OFFSET, max_offsets=1) self.assertEquals(len(actual_latest_offsets), 1) expected_latest_offset = offsets[-1] + Lengths.MESSAGE_HEADER_07 \ + len(output_messages[-1]) self.assertEquals(expected_latest_offset, actual_latest_offsets[0]) actual_earliest_offsets = kafka.offsets(topic, EARLIEST_OFFSET, max_offsets=1) self.assertEquals(len(actual_earliest_offsets), 1) self.assertEquals(0, actual_earliest_offsets[0])
def test_3x5_consumer_rebalancing(): """Consumer rebalancing, with auto rebalancing.""" log_break("test_3x5_consumer_rebalancing") for kafka_server in RunConfig.kafka_servers: k = Kafka("localhost", kafka_server.kafka_config.port) for topic in ["t1", "t2", "t3"]: k.produce(topic, ["bootstrap"], 0) delay() producer = ZKProducer(ZK_CONNECT_STR, "t1") assert_equals(len(producer.broker_partitions), topology_3x5.total_partitions, "We should be sending to all broker_partitions.") c1 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1") assert_equals(len(c1.broker_partitions), topology_3x5.total_partitions, "Only one consumer, it should have all partitions.") c2 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1") assert_equals(len(c2.broker_partitions), (topology_3x5.total_partitions) / 2) delay() assert_equals(len(set(c1.broker_partitions + c2.broker_partitions)), topology_3x5.total_partitions, "We should have all broker partitions covered.") c3 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1") assert_equals(len(c3.broker_partitions), (topology_3x5.total_partitions) / 3) delay() assert_equals(sum(len(c.broker_partitions) for c in [c1, c2, c3]), topology_3x5.total_partitions, "All BrokerPartitions should be accounted for.") assert_equals( len( set(c1.broker_partitions + c2.broker_partitions + c3.broker_partitions)), topology_3x5.total_partitions, "There should be no overlaps")
class TestTopic(unittest.TestCase): # Contents of self.dogs_queue after setUp: # [(0, 'Rusty'), (14, 'Patty'), (28, 'Jack'), (41, 'Clyde')] def setUp(self): self.k = Kafka() self.topic_name = get_unique_topic('test-kafka-topic') input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde'] self.k.produce(self.topic_name, input_messages) # If you don't do this sleep, then you can get into a condition where # a fetch immediately after a produce will cause a state where the # produce is duplicated (it really gets that way in Kafka). time.sleep(MESSAGE_DELAY_SECS) self.dogs_queue = self.k.topic(self.topic_name) # print list(self.k.fetch(self.topic_name, 0)) # print self.topic_name def test_offset_queries(self): self.assertEqual(self.dogs_queue.earliest_offset(), 0) self.assertEqual(self.dogs_queue.latest_offset(), 55) self.assertRaises(OffsetOutOfRange, self.dogs_queue.poll(100).next) self.assertRaises(InvalidOffset, self.dogs_queue.poll(22).next) def test_end_offset_iteration(self): dogs = self.dogs_queue.poll(0, end_offset=28, poll_interval=None) status, messages = dogs.next() self.assertEqual(status.start_offset, 0) self.assertEqual(status.next_offset, 41) self.assertEqual(status.last_offset_read, 28) self.assertEqual(status.messages_read, 3) self.assertEqual(status.bytes_read, 14) self.assertEqual(status.num_fetches, 1) self.assertEqual(messages, ['Rusty', 'Patty', 'Jack']) self.assertRaises(StopIteration, dogs.next)
def test_kafka(self): kafka = Kafka() topic = get_unique_topic('test-kafka') start_offset = 0 input_messages = ['message0', 'message1', 'message2'] kafka.produce(topic, input_messages, compression=COMPRESSION_GZIP) time.sleep(MESSAGE_DELAY_SECS) messageSet = kafka.fetch(topic, start_offset) fetch_results = messageSet._offsets_msgs output_messages = [] offsets = [] for message in fetch_results: offset, output_message = message output_messages.append(output_message) offsets.append(offset) actual_latest_offsets = kafka.offsets(topic, LATEST_OFFSET, max_offsets=1) self.assertEquals(len(actual_latest_offsets), 1) expected_latest_offset = offsets[-1] + Lengths.MESSAGE_HEADER_07 + len(output_messages[-1]) self.assertEquals(expected_latest_offset, actual_latest_offsets[0]) actual_earliest_offsets = kafka.offsets(topic, EARLIEST_OFFSET, max_offsets=1) self.assertEquals(len(actual_earliest_offsets), 1) self.assertEquals(0, actual_earliest_offsets[0]) # decompress any compressed messages, do check output_messages = [] offsets = [] for message in fetch_results: if type(message) == CompressedMessage: cMessageSet = message.messageSet() for cMessage in cMessageSet._offsets_msgs: offset, output_message = cMessage output_messages.append(output_message) offsets.append(offset) else: offset, output_message = message output_messages.append(output_message) offsets.append(offset) self.assertEquals(input_messages, output_messages)
def test_cant_connect(self): kafka = Kafka(host=str(time.time())) topic = get_unique_topic('test-cant-connect') self.assertRaises(ConnectionFailure, kafka.produce, topic, 'wont appear')
def send_to_all_partitions(topic, messages): for kafka_server in RunConfig.kafka_servers: k = Kafka("localhost", kafka_server.kafka_config.port) for partition in range(topology_3x5.partitions_per_broker): k.produce(topic, messages, partition)
def send_to_all_partitions(partitions_per_broker, topic, messages): for kafka_server in RunConfig.kafka_servers: k = Kafka("localhost", kafka_server.kafka_config.port) for partition in range(partitions_per_broker): k.produce(topic, messages, partition)