Пример #1
0
class TestTopic(unittest.TestCase):
    # Contents of self.dogs_queue after setUp:
    #  0.6
    #   [(0, 'Rusty'), (14, 'Patty'), (28, 'Jack'), (41, 'Clyde')]
    #  0.7
    #   [(0, 'Rusty'), (15, 'Patty'), (30, 'Jack'), (44, 'Clyde')]
    
    def setUp(self):
        self.k = Kafka(version_0_7=True)
        self.topic_name = get_unique_topic('test-kafka-topic')
        input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde']
        self.k.produce(self.topic_name, input_messages)
        
        # If you don't do this sleep, then you can get into a condition where
        # a fetch immediately after a produce will cause a state where the 
        # produce is duplicated (it really gets that way in Kafka).
        time.sleep(MESSAGE_DELAY_SECS)
        self.dogs_queue = self.k.topic(self.topic_name)

        # print list(self.k.fetch(self.topic_name, 0))
        # print self.topic_name
        
    
    def test_offset_queries(self):
        # 0.6
        # self.assertEqual(self.dogs_queue.earliest_offset(), 0)
        # self.assertEqual(self.dogs_queue.latest_offset(), 55)
        # self.assertRaises(OffsetOutOfRange, self.dogs_queue.poll(100).next)
        # self.assertRaises(InvalidOffset, self.dogs_queue.poll(22).next)
        
        # 0.7
        self.assertEqual(self.dogs_queue.earliest_offset(), 0)
        self.assertEqual(self.dogs_queue.latest_offset(), 59)
        self.assertRaises(OffsetOutOfRange, self.dogs_queue.poll(100).next)
        self.assertRaises(InvalidOffset, self.dogs_queue.poll(22).next)

    def test_end_offset_iteration(self):
        # 0.6
        # dogs = self.dogs_queue.poll(0, end_offset=28, poll_interval=None)
        # status, messages = dogs.next()
        # self.assertEqual(status.start_offset, 0)
        # self.assertEqual(status.next_offset, 41)
        # self.assertEqual(status.last_offset_read, 28)
        # self.assertEqual(status.messages_read, 3)
        # self.assertEqual(status.bytes_read, 14)
        # self.assertEqual(status.num_fetches, 1)
        # self.assertEqual(messages, ['Rusty', 'Patty', 'Jack'])
        # self.assertRaises(StopIteration, dogs.next)
        
        # 0.7
        dogs = self.dogs_queue.poll(0, end_offset=30, poll_interval=None)
        status, messages = dogs.next()
        self.assertEqual(status.start_offset, 0)
        self.assertEqual(status.next_offset, 44)
        self.assertEqual(status.last_offset_read, 30)
        self.assertEqual(status.messages_read, 3)
        self.assertEqual(status.bytes_read, 14)
        self.assertEqual(status.num_fetches, 1)
        self.assertEqual(messages, ['Rusty', 'Patty', 'Jack'])
        self.assertRaises(StopIteration, dogs.next)
Пример #2
0
def test_3x5_consumer_rebalancing():
    """Consumer rebalancing, with auto rebalancing."""
    log_break("test_3x5_consumer_rebalancing")
    for kafka_server in RunConfig.kafka_servers:
        k = Kafka("localhost", kafka_server.kafka_config.port)
        for topic in ["t1", "t2", "t3"]:
            k.produce(topic, ["bootstrap"], 0)
    time.sleep(MESSAGE_DELAY_SECS)

    producer = ZKProducer(ZK_CONNECT_STR, "t1")
    assert_equals(len(producer.broker_partitions), topology_3x5.total_partitions,
                  "We should be sending to all broker_partitions.")
           
    c1 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c1.broker_partitions), topology_3x5.total_partitions,
                  "Only one consumer, it should have all partitions.")
    c2 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c2.broker_partitions), (topology_3x5.total_partitions) / 2)

    time.sleep(MESSAGE_DELAY_SECS)
    assert_equals(len(set(c1.broker_partitions + c2.broker_partitions)),
                  topology_3x5.total_partitions,
                  "We should have all broker partitions covered.")

    c3 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c3.broker_partitions), (topology_3x5.total_partitions) / 3)

    time.sleep(MESSAGE_DELAY_SECS)
    assert_equals(sum(len(c.broker_partitions) for c in [c1, c2, c3]),
                  topology_3x5.total_partitions,
                  "All BrokerPartitions should be accounted for.")
    assert_equals(len(set(c1.broker_partitions + c2.broker_partitions + 
                          c3.broker_partitions)),
                  topology_3x5.total_partitions,
                  "There should be no overlaps")
Пример #3
0
    def setUp(self):
        self.k = Kafka()
        self.topic_name = get_unique_topic('test-kafka-topic')
        input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde']
        self.k.produce(self.topic_name, input_messages)

        # If you don't do this sleep, then you can get into a condition where
        # a fetch immediately after a produce will cause a state where the
        # produce is duplicated (it really gets that way in Kafka).
        time.sleep(MESSAGE_DELAY_SECS)
        self.dogs_queue = self.k.topic(self.topic_name)
    def __init__(self, topic, broker_partitions, end_broker_partitions=None):
        """If broker_partitions is a list of BrokerPartitions, we assume that
        we'll start at the latest offset. If broker_partitions is a mapping of
        BrokerPartitions to offsets, we'll start at those offsets."""
        self._topic = topic
        self._broker_partitions = sorted(broker_partitions)
        self._stats = defaultdict(
            lambda: ConsumerStats(fetches=0, bytes=0, messages=0, max_fetch=0))
        self._bps_to_next_offsets = broker_partitions

        # This will collapse duplicaets so we only have one conn per host/port
        broker_conn_info = frozenset(
            (bp.broker_id, bp.host, bp.port) for bp in self._broker_partitions)
        self._connections = dict((broker_id, Kafka(host, port))
                                 for broker_id, host, port in broker_conn_info)

        # Figure out where we're going to start from...
        if isinstance(broker_partitions, Mapping):
            self._bps_to_next_offsets = broker_partitions
        else:
            self._bps_to_next_offsets = dict(
                (bp,
                 self._connections[bp].latest_offset(bp.topic, bp.partition))
                for bp in broker_partitions)

        self._end_broker_partitions = end_broker_partitions or {}
Пример #5
0
    def test_kafka(self):
        kafka = Kafka()
        topic = get_unique_topic('test-kafka')
        start_offset = 0

        input_messages = ['message0', 'message1', 'message2']

        kafka.produce(topic, input_messages)
        time.sleep(MESSAGE_DELAY_SECS)
        fetch_results = kafka.fetch(topic, start_offset)

        output_messages = []
        offsets = []
        for offset, output_message in fetch_results:
            output_messages.append(output_message)
            offsets.append(offset)

        self.assertEquals(input_messages, output_messages)

        actual_latest_offsets = kafka.offsets(topic,
                                              LATEST_OFFSET,
                                              max_offsets=1)

        self.assertEquals(len(actual_latest_offsets), 1)
        expected_latest_offset = offsets[-1] + Lengths.MESSAGE_HEADER \
            + len(output_messages[-1])
        self.assertEquals(expected_latest_offset, actual_latest_offsets[0])

        actual_earliest_offsets = kafka.offsets(topic,
                                                EARLIEST_OFFSET,
                                                max_offsets=1)

        self.assertEquals(len(actual_earliest_offsets), 1)
        self.assertEquals(0, actual_earliest_offsets[0])
Пример #6
0
 def setUp(self):
     self.k = Kafka(version_0_7=True)
     self.topic_name = get_unique_topic('test-kafka-topic')
     input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde']
     self.k.produce(self.topic_name, input_messages)
     
     # If you don't do this sleep, then you can get into a condition where
     # a fetch immediately after a produce will cause a state where the 
     # produce is duplicated (it really gets that way in Kafka).
     time.sleep(MESSAGE_DELAY_SECS)
     self.dogs_queue = self.k.topic(self.topic_name)
Пример #7
0
    def test_kafka(self):
        kafka = Kafka()
        topic = get_unique_topic('test-kafka')
        start_offset = 0
        
        input_messages = ['message0', 'message1', 'message2']
        
        kafka.produce(topic, input_messages)
        time.sleep(MESSAGE_DELAY_SECS)
        messageSet = kafka.fetch(topic, start_offset)
        fetch_results = messageSet._offsets_msgs
        
        output_messages = []
        offsets = []
        for offset, output_message in fetch_results:
            output_messages.append(output_message)
            offsets.append(offset)
        
        self.assertEquals(input_messages, output_messages)
        
        actual_latest_offsets = kafka.offsets(topic, LATEST_OFFSET, 
            max_offsets=1)
            
        self.assertEquals(len(actual_latest_offsets), 1)
        expected_latest_offset = offsets[-1] + Lengths.MESSAGE_HEADER_07 \
            + len(output_messages[-1])
        self.assertEquals(expected_latest_offset, actual_latest_offsets[0])
        
        actual_earliest_offsets = kafka.offsets(topic, EARLIEST_OFFSET, 
            max_offsets=1)

        self.assertEquals(len(actual_earliest_offsets), 1)
        self.assertEquals(0, actual_earliest_offsets[0])
Пример #8
0
def test_3x5_consumer_rebalancing():
    """Consumer rebalancing, with auto rebalancing."""
    log_break("test_3x5_consumer_rebalancing")
    for kafka_server in RunConfig.kafka_servers:
        k = Kafka("localhost", kafka_server.kafka_config.port)
        for topic in ["t1", "t2", "t3"]:
            k.produce(topic, ["bootstrap"], 0)
    delay()

    producer = ZKProducer(ZK_CONNECT_STR, "t1")
    assert_equals(len(producer.broker_partitions),
                  topology_3x5.total_partitions,
                  "We should be sending to all broker_partitions.")

    c1 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c1.broker_partitions), topology_3x5.total_partitions,
                  "Only one consumer, it should have all partitions.")
    c2 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c2.broker_partitions),
                  (topology_3x5.total_partitions) / 2)

    delay()
    assert_equals(len(set(c1.broker_partitions + c2.broker_partitions)),
                  topology_3x5.total_partitions,
                  "We should have all broker partitions covered.")

    c3 = ZKConsumer(ZK_CONNECT_STR, "group_3x5", "t1")
    assert_equals(len(c3.broker_partitions),
                  (topology_3x5.total_partitions) / 3)

    delay()
    assert_equals(sum(len(c.broker_partitions) for c in [c1, c2, c3]),
                  topology_3x5.total_partitions,
                  "All BrokerPartitions should be accounted for.")
    assert_equals(
        len(
            set(c1.broker_partitions + c2.broker_partitions +
                c3.broker_partitions)), topology_3x5.total_partitions,
        "There should be no overlaps")
Пример #9
0
class TestTopic(unittest.TestCase):
    # Contents of self.dogs_queue after setUp:
    #   [(0, 'Rusty'), (14, 'Patty'), (28, 'Jack'), (41, 'Clyde')]

    def setUp(self):
        self.k = Kafka()
        self.topic_name = get_unique_topic('test-kafka-topic')
        input_messages = ['Rusty', 'Patty', 'Jack', 'Clyde']
        self.k.produce(self.topic_name, input_messages)

        # If you don't do this sleep, then you can get into a condition where
        # a fetch immediately after a produce will cause a state where the
        # produce is duplicated (it really gets that way in Kafka).
        time.sleep(MESSAGE_DELAY_SECS)
        self.dogs_queue = self.k.topic(self.topic_name)

        # print list(self.k.fetch(self.topic_name, 0))
        # print self.topic_name

    def test_offset_queries(self):
        self.assertEqual(self.dogs_queue.earliest_offset(), 0)
        self.assertEqual(self.dogs_queue.latest_offset(), 55)
        self.assertRaises(OffsetOutOfRange, self.dogs_queue.poll(100).next)
        self.assertRaises(InvalidOffset, self.dogs_queue.poll(22).next)

    def test_end_offset_iteration(self):
        dogs = self.dogs_queue.poll(0, end_offset=28, poll_interval=None)
        status, messages = dogs.next()
        self.assertEqual(status.start_offset, 0)
        self.assertEqual(status.next_offset, 41)
        self.assertEqual(status.last_offset_read, 28)
        self.assertEqual(status.messages_read, 3)
        self.assertEqual(status.bytes_read, 14)
        self.assertEqual(status.num_fetches, 1)
        self.assertEqual(messages, ['Rusty', 'Patty', 'Jack'])
        self.assertRaises(StopIteration, dogs.next)
Пример #10
0
    def test_kafka(self):
        kafka = Kafka()
        topic = get_unique_topic('test-kafka')
        start_offset = 0

        input_messages = ['message0', 'message1', 'message2']

        kafka.produce(topic, input_messages, compression=COMPRESSION_GZIP)
        time.sleep(MESSAGE_DELAY_SECS)
        messageSet = kafka.fetch(topic, start_offset)
        fetch_results = messageSet._offsets_msgs

        output_messages = []
        offsets = []
        
        for message in fetch_results:
            offset, output_message = message
            output_messages.append(output_message)
            offsets.append(offset)

        actual_latest_offsets = kafka.offsets(topic, LATEST_OFFSET, 
            max_offsets=1)

        self.assertEquals(len(actual_latest_offsets), 1)
        expected_latest_offset = offsets[-1] + Lengths.MESSAGE_HEADER_07 + len(output_messages[-1])
        self.assertEquals(expected_latest_offset, actual_latest_offsets[0])

        actual_earliest_offsets = kafka.offsets(topic, EARLIEST_OFFSET, 
            max_offsets=1)

        self.assertEquals(len(actual_earliest_offsets), 1)
        self.assertEquals(0, actual_earliest_offsets[0])

        # decompress any compressed messages, do check
        output_messages = []
        offsets = []
        for message in fetch_results:
            if type(message) == CompressedMessage:
                cMessageSet = message.messageSet()
                for cMessage in cMessageSet._offsets_msgs:
                    offset, output_message = cMessage
                    output_messages.append(output_message)
                    offsets.append(offset)
            else:
                offset, output_message = message
                output_messages.append(output_message)
                offsets.append(offset)
                
        self.assertEquals(input_messages, output_messages)
Пример #11
0
    def test_cant_connect(self):
        kafka = Kafka(host=str(time.time()))
        topic = get_unique_topic('test-cant-connect')

        self.assertRaises(ConnectionFailure, kafka.produce, topic,
                          'wont appear')
Пример #12
0
def send_to_all_partitions(topic, messages):
    for kafka_server in RunConfig.kafka_servers:
        k = Kafka("localhost", kafka_server.kafka_config.port)
        for partition in range(topology_3x5.partitions_per_broker):
            k.produce(topic, messages, partition)
Пример #13
0
def send_to_all_partitions(partitions_per_broker, topic, messages):
    for kafka_server in RunConfig.kafka_servers:
        k = Kafka("localhost", kafka_server.kafka_config.port)
        for partition in range(partitions_per_broker):
            k.produce(topic, messages, partition)