def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):  # pragma: no cover
            return

        # Single zookeeper, 3 kafka brokers
        zk_chroot = random_string(10)
        replicas = 3
        partitions = 2
        max_bytes = 12 * 1048576  # 12 MB

        cls.zk = ZookeeperFixture.instance()
        kk_args = [
            cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions,
            max_bytes
        ]
        cls.kafka_brokers = [
            KafkaFixture.instance(i, *kk_args) for i in range(replicas)
        ]

        hosts = ['%s:%d' % (b.host, b.port) for b in cls.kafka_brokers]
        cls.client = KafkaClient(hosts, timeout=2500, clientId=__name__)

        # Startup the twisted reactor in a thread. We need this before the
        # the KafkaClient can work, since KafkaBrokerClient relies on the
        # reactor for its TCP connection
        cls.reactor, cls.thread = threaded_reactor()
Пример #2
0
    def test_large_messages(self):
        # Produce 10 "normal" size messages
        small_messages = yield self.send_messages(0,
                                                  [str(x) for x in range(10)])

        # Produce 10 messages that are large (bigger than default fetch size)
        large_messages = yield self.send_messages(
            0, [random_string(FETCH_BUFFER_SIZE_BYTES * 3) for x in range(10)])

        # Consumer should still get all of them
        consumer = self.consumer()

        # Start the consumer from the beginning
        d = consumer.start(OFFSET_EARLIEST)

        # Loop waiting for all the messages to show up
        while len(consumer.processor._messages) < 20:
            # Wait a bit for them to arrive
            yield async_delay()

        expected_messages = set(small_messages + large_messages)
        actual_messages = set(
            [x.message.value for x in consumer.processor._messages])
        self.assertEqual(expected_messages, actual_messages)

        # Clean up
        consumer.stop()
        self.successResultOf(d)
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):  # pragma: no cover
            return

        DEBUGGING = True
        setDebugging(DEBUGGING)
        DelayedCall.debug = DEBUGGING

        zk_chroot = random_string(10)
        replicas = 2
        partitions = 7

        # mini zookeeper, 2 kafka brokers
        cls.zk = ZookeeperFixture.instance()
        kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
        cls.kafka_brokers = [
            KafkaFixture.instance(i, *kk_args) for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in cls.kafka_brokers]
        # We want a short timeout on message sending for this test, since
        # we are expecting failures when we take down the brokers
        cls.client = KafkaClient(hosts, timeout=1000, clientId=__name__)

        # Startup the twisted reactor in a thread. We need this before the
        # the KafkaClient can work, since KafkaBrokerClient relies on the
        # reactor for its TCP connection
        cls.reactor, cls.thread = threaded_reactor()
Пример #4
0
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):  # pragma: no cover
            return

        DEBUGGING = True
        setDebugging(DEBUGGING)
        DelayedCall.debug = DEBUGGING

        zk_chroot = random_string(10)
        replicas = 2
        partitions = 7

        # mini zookeeper, 2 kafka brokers
        cls.zk = ZookeeperFixture.instance()
        kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
        cls.kafka_brokers = [
            KafkaFixture.instance(i, *kk_args) for i in range(replicas)
        ]

        hosts = ['%s:%d' % (b.host, b.port) for b in cls.kafka_brokers]
        # We want a short timeout on message sending for this test, since
        # we are expecting failures when we take down the brokers
        cls.client = KafkaClient(hosts, timeout=1000, clientId=__name__)

        # Startup the twisted reactor in a thread. We need this before the
        # the KafkaClient can work, since KafkaBrokerClient relies on the
        # reactor for its TCP connection
        cls.reactor, cls.thread = threaded_reactor()
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):  # pragma: no cover
            log.warning("WARNING: KAFKA_VERSION not found in environment")
            return

        DEBUGGING = True
        setDebugging(DEBUGGING)
        DelayedCall.debug = DEBUGGING

        # Single zookeeper, 3 kafka brokers
        zk_chroot = random_string(10)
        replicas = 3
        partitions = 2

        cls.zk = ZookeeperFixture.instance()
        kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
        cls.kafka_brokers = [
            KafkaFixture.instance(i, *kk_args) for i in range(replicas)]
        # server is used by our superclass when creating the client...
        cls.server = cls.kafka_brokers[0]

        # Startup the twisted reactor in a thread. We need this before the
        # the KafkaClient can work, since KafkaBrokerClient relies on the
        # reactor for its TCP connection
        cls.reactor, cls.thread = threaded_reactor()
    def test_large_messages(self):
        # Produce 10 "normal" size messages
        small_messages = yield self.send_messages(
            0, [str(x) for x in range(10)])

        # Produce 10 messages that are large (bigger than default fetch size)
        large_messages = yield self.send_messages(
          0, [random_string(FETCH_BUFFER_SIZE_BYTES * 3) for x in range(10)])

        # Consumer should still get all of them
        consumer = self.consumer()

        # Start the consumer from the beginning
        d = consumer.start(OFFSET_EARLIEST)

        # Loop waiting for all the messages to show up
        while len(consumer.processor._messages) < 20:
            # Wait a bit for them to arrive
            yield async_delay()

        expected_messages = set(small_messages + large_messages)
        actual_messages = set([x.message.value for x in
                               consumer.processor._messages])
        self.assertEqual(expected_messages, actual_messages)

        # Clean up
        consumer.stop()
        self.successResultOf(d)
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):  # pragma: no cover
            log.warning("WARNING: KAFKA_VERSION not found in environment")
            return

        DEBUGGING = True
        setDebugging(DEBUGGING)
        DelayedCall.debug = DEBUGGING

        # Single zookeeper, 3 kafka brokers
        zk_chroot = random_string(10)
        replicas = 3
        partitions = PARTITION_COUNT

        cls.zk = ZookeeperFixture.instance()
        kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
        cls.kafka_brokers = [
            KafkaFixture.instance(i, *kk_args) for i in range(replicas)
        ]
        # server is used by our superclass when creating the client...
        cls.server = cls.kafka_brokers[0]

        # Startup the twisted reactor in a thread. We need this before the
        # the KafkaClient can work, since KafkaBrokerClient relies on the
        # reactor for its TCP connection
        cls.reactor, cls.thread = threaded_reactor()
    def _send_random_messages(self, producer, topic, n):
        for j in range(n):
            resp = yield producer.send_messages(
                topic, msgs=[random_string(10)])

            self.assertFalse(isinstance(resp, Exception))

            if resp:
                self.assertEqual(resp.error, 0)
Пример #9
0
    def _send_random_messages(self, producer, topic, n):
        for j in range(n):
            resp = yield producer.send_messages(topic,
                                                msgs=[random_string(10)])

            self.assertFalse(isinstance(resp, Exception))

            if resp:
                self.assertEqual(resp.error, 0)
Пример #10
0
    def test_huge_messages(self):
        # Produce 10 "normal" size messages
        yield self.send_messages(0, [str(x) for x in range(10)])

        # Setup a max buffer size for the consumer, and put a message in
        # Kafka that's bigger than that
        MAX_FETCH_BUFFER_SIZE_BYTES = (256 * 1024) - 10
        huge_message, = yield self.send_messages(
            0, [random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)])

        # Create a consumer with the (smallish) max buffer size
        consumer = self.consumer(max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES)

        # This consumer fails to get the message, and errbacks the start
        # deferred
        d = consumer.start(OFFSET_EARLIEST)

        # Loop waiting for the errback to be called
        while not d.called:
            # Wait a bit for them to arrive
            yield async_delay()
        # Make sure the failure is as expected
        self.failureResultOf(d, ConsumerFetchSizeTooSmall)

        # Make sure the smaller, earlier messages were delivered
        self.assert_message_count(consumer.processor._messages, 10)

        # last offset seen
        last_offset = consumer.processor._messages[-1].offset

        # Stop the consumer: d already errbacked, but stop still must be called
        consumer.stop()

        # Create a consumer with no fetch size limit
        big_consumer = self.consumer()
        # Start just past the last message processed
        d = big_consumer.start(last_offset + 1)
        # Consume giant message successfully
        while not big_consumer.processor._messages:
            # Wait a bit for it to arrive
            yield async_delay()

        self.assertEqual(big_consumer.processor._messages[0].message.value,
                         huge_message)

        # Clean up
        big_consumer.stop()
        self.successResultOf(d)
    def test_commit_fetch_offsets(self):
        """test_commit_fetch_offsets

        RANT: https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol
        implies that the metadata supplied with the commit will be returned by
        the fetch, but under 0.8.2.1 with a API_version of 0, it's not. Switch
        to using the V1 API and it works.
        """  # noqa
        resp = {}
        c_group = "CG_1"
        metadata = "My_Metadata_{}".format(random_string(10))
        offset = random.randint(0, 1024)
        log.debug("Commiting offset: %d metadata: %s for topic: %s part: 0",
                  offset, metadata, self.topic)
        req = OffsetCommitRequest(self.topic, 0, offset, -1, metadata)
        # We have to retry, since the client doesn't, and Kafka will
        # create the topic on the fly, but the first request will fail
        for attempt in range(20):
            log.debug("test_commit_fetch_offsets: Commit Attempt: %d", attempt)
            try:
                (resp, ) = yield self.client.send_offset_commit_request(
                    c_group, [req])
            except ConsumerCoordinatorNotAvailableError:
                log.info(
                    "No Coordinator for Consumer Group: %s Attempt: %d of 20",
                    c_group, attempt)
                time.sleep(0.5)
                continue
            except NotCoordinatorForConsumerError:  # pragma: no cover
                # Kafka seems to have a timing issue: If we ask broker 'A' who
                # the ConsumerCoordinator is for a auto-created, not extant
                # topic, the assigned broker may not realize it's been so
                # designated by the time we find out and make our request.
                log.info(
                    "Coordinator is not coordinator!!: %s Attempt: %d of 20",
                    c_group, attempt)
                time.sleep(0.5)
                continue
            break
        self.assertEqual(getattr(resp, 'error', -1), 0)

        req = OffsetFetchRequest(self.topic, 0)
        (resp, ) = yield self.client.send_offset_fetch_request(c_group, [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, offset)
        # Check we received the proper metadata in the response
        self.assertEqual(resp.metadata, metadata)
        log.debug("test_commit_fetch_offsets: Test Complete.")
Пример #12
0
    def test_huge_messages(self):
        # Produce 10 "normal" size messages
        yield self.send_messages(0, [str(x) for x in range(10)])

        # Setup a max buffer size for the consumer, and put a message in
        # Kafka that's bigger than that
        MAX_FETCH_BUFFER_SIZE_BYTES = (256 * 1024) - 10
        huge_message, = yield self.send_messages(
            0, [random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10)])

        # Create a consumer with the (smallish) max buffer size
        consumer = self.consumer(max_buffer_size=MAX_FETCH_BUFFER_SIZE_BYTES)

        # This consumer fails to get the message, and errbacks the start
        # deferred
        d = consumer.start(OFFSET_EARLIEST)

        # Loop waiting for the errback to be called
        while not d.called:
            # Wait a bit for them to arrive
            yield async_delay()
        # Make sure the failure is as expected
        self.failureResultOf(d, ConsumerFetchSizeTooSmall)

        # Make sure the smaller, earlier messages were delivered
        self.assert_message_count(consumer.processor._messages, 10)

        # last offset seen
        last_offset = consumer.processor._messages[-1].offset

        # Stop the consumer: d already errbacked, but stop still must be called
        consumer.stop()

        # Create a consumer with no fetch size limit
        big_consumer = self.consumer()
        # Start just past the last message processed
        d = big_consumer.start(last_offset + 1)
        # Consume giant message successfully
        while not big_consumer.processor._messages:
            # Wait a bit for it to arrive
            yield async_delay()

        self.assertEqual(big_consumer.processor._messages[0].message.value,
                         huge_message)

        # Clean up
        big_consumer.stop()
        self.successResultOf(d)
Пример #13
0
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):  # pragma: no cover
            return

        # Single zookeeper, 3 kafka brokers
        zk_chroot = random_string(10)
        replicas = 3
        partitions = 2

        cls.zk = ZookeeperFixture.instance()
        kk_args = [cls.zk.host, cls.zk.port, zk_chroot, replicas, partitions]
        cls.kafka_brokers = [
            KafkaFixture.instance(i, *kk_args) for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in cls.kafka_brokers]
        cls.client = KafkaClient(hosts, timeout=1500, clientId=__name__)

        # Startup the twisted reactor in a thread. We need this before the
        # the KafkaClient can work, since KafkaBrokerClient relies on the
        # reactor for its TCP connection
        cls.reactor, cls.thread = threaded_reactor()
Пример #14
0
    def test_write_nonextant_topic(self):
        """test_write_nonextant_topic

        Test we can write to a non-extant topic (which will be auto-created)
        simply by calling producer.send_messages with a long enough timeout.
        """
        test_topics = ["{}-{}-{}".format(
            self.id().split('.')[-1], i, random_string(10)) for i in range(10)]

        producer = Producer(
            self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE)

        for topic in test_topics:
            resp = yield producer.send_messages(topic, msgs=[self.msg(topic)])
            # Make sure the send went ok
            self.assert_produce_response(resp, 0)
            # Make sure we can get the message back
            yield self.assert_fetch_offset(
                0, 0, [self.msg(topic)], topic=topic)

        yield producer.stop()
Пример #15
0
    def test_commit_fetch_offsets(self):
        """test_commit_fetch_offsets

        RANT: https://cwiki.apache.org/confluence/display/KAFKA/A+Guide+To+The+Kafka+Protocol
        implies that the metadata supplied with the commit will be returned by
        the fetch, but under 0.8.2.1 with a API_version of 0, it's not. Switch
        to using the V1 API and it works.
        """  # noqa
        resp = {}
        c_group = "CG_1"
        metadata = "My_Metadata_{}".format(random_string(10))
        offset = random.randint(0, 1024)
        log.debug("Commiting offset: %d metadata: %s for topic: %s part: 0",
                  offset, metadata, self.topic)
        req = OffsetCommitRequest(self.topic, 0, offset, -1, metadata)
        # We have to retry, since the client doesn't, and Kafka will
        # create the topic on the fly, but the first request will fail
        for attempt in range(20):
            log.debug("test_commit_fetch_offsets: Commit Attempt: %d", attempt)
            try:
                (resp,) = yield self.client.send_offset_commit_request(
                    c_group, [req])
            except ConsumerCoordinatorNotAvailableError:
                log.info(
                    "No Coordinator for Consumer Group: %s Attempt: %d of 20",
                    c_group, attempt)
                time.sleep(0.5)
                continue
            break
        self.assertEqual(getattr(resp, 'error', -1), 0)

        req = OffsetFetchRequest(self.topic, 0)
        (resp,) = yield self.client.send_offset_fetch_request(c_group, [req])
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, offset)
        # broker doesn't seem to return proper metadata
        self.assertEqual(resp.metadata, metadata)
        log.debug("test_commit_fetch_offsets: Test Complete.")
    def test_write_nonextant_topic(self):
        """test_write_nonextant_topic

        Test we can write to a non-extant topic (which will be auto-created)
        simply by calling producer.send_messages with a long enough timeout.
        """
        test_topics = [
            "{}-{}-{}".format(self.id().split('.')[-1], i, random_string(10))
            for i in range(10)
        ]

        producer = Producer(self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE)

        for topic in test_topics:
            resp = yield producer.send_messages(topic, msgs=[self.msg(topic)])
            # Make sure the send went ok
            self.assert_produce_response(resp, 0)
            # Make sure we can get the message back
            yield self.assert_fetch_offset(0,
                                           0, [self.msg(topic)],
                                           topic=topic)

        yield producer.stop()
    def test_throughput(self):
        yield async_delay(3)  # 0.8.1.1 fails otherwise

        # Flag to shutdown
        keep_running = True
        # Count of messages sent
        sent_msgs_count = [0]
        total_messages_size = [0]
        # setup MESSAGE_BLOCK_SIZEx1024-ish byte messages to send over and over
        constant_messages = [
            self.msg(s)
            for s in [random_string(1024) for x in range(MESSAGE_BLOCK_SIZE)]
        ]
        large_messages = [
            self.msg(s) for s in [
                random_string(FETCH_BUFFER_SIZE_BYTES * 3)
                for x in range(MESSAGE_BLOCK_SIZE)
            ]
        ]

        constant_messages_size = len(constant_messages[0]) * MESSAGE_BLOCK_SIZE
        large_messages_size = len(large_messages[0]) * MESSAGE_BLOCK_SIZE

        # Create a producer and send some messages
        producer = Producer(self.client)

        # Create consumers (1/partition)
        consumers = [
            self.consumer(partition=p, fetch_max_wait_time=50)
            for p in range(PARTITION_COUNT)
        ]

        def log_error(failure):
            log.exception("Failure sending messages: %r",
                          failure)  # pragma: no cover

        def sent_msgs(resps):
            log.info("Messages Sent: %r", resps)
            sent_msgs_count[0] += MESSAGE_BLOCK_SIZE
            return resps

        def send_msgs():
            # randomly, 1/20 of the time, send large messages
            if randint(0, 19):
                ## if True:
                messages = constant_messages
                large = ''
                total_messages_size[0] += constant_messages_size
            else:
                messages = large_messages
                large = ' large'
                total_messages_size[0] += large_messages_size

            log.info("Sending: %d%s messages", len(messages), large)
            d = producer.send_messages(self.topic, msgs=messages)
            # As soon as we get a response from the broker, count them
            # and if we're still supposed to, send more
            d.addCallback(sent_msgs)
            if keep_running:
                d.addCallback(lambda _: self.reactor.callLater(0, send_msgs))
                ## d.addCallback(lambda _: send_msgs())
            d.addErrback(log_error)

        # Start sending messages, MESSAGE_BLOCK_SIZE at a time, 1K or 384K each
        send_msgs()

        # Start the consumers from the beginning
        fetch_start = time.time()
        start_ds = [consumer.start(OFFSET_EARLIEST) for consumer in consumers]

        # Let them all run for awhile...
        log.info("Waiting %d seconds...", PRODUCE_TIME)
        yield async_delay(PRODUCE_TIME)
        # Tell the producer to stop
        keep_running = False
        # Wait up to PRODUCE_TIME for the consumers to catch up
        log.info(
            "Waiting up to %d seconds for "
            "consumers to finish consuming...", PRODUCE_TIME)
        deadline = time.time() + PRODUCE_TIME * 2
        while time.time() < deadline:
            consumed = sum(
                [len(consumer.processor._messages) for consumer in consumers])
            log.debug("Consumed %d messages.", consumed)
            if sent_msgs_count[0] == consumed:
                break
            yield async_delay(1)
        fetch_time = time.time() - fetch_start
        consumed_bytes = sum(
            [c.processor._messages_bytes[0] for c in consumers])

        result_msg = ("Sent: {} messages ({:,} total bytes) in ~{} seconds"
                      " ({}/sec), Consumed: {} in {:.2f} seconds.".format(
                          sent_msgs_count[0], total_messages_size[0],
                          PRODUCE_TIME, sent_msgs_count[0] / PRODUCE_TIME,
                          consumed, fetch_time))
        # Log the result, and print to stderr to get around nose capture
        log.info(result_msg)
        print("\n\t Performance Data: " + result_msg, file=sys.stderr)
        # And print data as stats
        stat('Production_Time', PRODUCE_TIME)
        stat('Consumption_Time', fetch_time)
        stat('Messages_Produced', sent_msgs_count[0])
        stat('Messages_Consumed', consumed)
        stat('Messages_Bytes_Produced', total_messages_size[0])
        stat('Messages_Bytes_Consumed', consumed_bytes)
        stat('Messages_Produced_Per_Second', sent_msgs_count[0] / PRODUCE_TIME)
        stat('Messages_Consumed_Per_Second', consumed / fetch_time)
        stat('Message_Bytes_Produced_Per_Second',
             total_messages_size[0] / PRODUCE_TIME)
        stat('Message_Bytes_Consumed_Per_Second', consumed_bytes / fetch_time)

        # Clean up
        log.debug('Stopping producer: %r', producer)
        yield producer.stop()
        log.debug('Stopping consumers: %r', consumers)
        for consumer in consumers:
            consumer.stop()
        [self.successResultOf(start_d) for start_d in start_ds]
        # make sure we got all the messages we sent
        self.assertEqual(
            sent_msgs_count[0],
            sum([len(consumer.processor._messages) for consumer in consumers]))
Пример #18
0
 def test_gzip(self):
     for i in xrange(100):
         s1 = random_string(100)
         s2 = gzip_decode(gzip_encode(s1))
         self.assertEqual(s1, s2)
Пример #19
0
 def test_snappy(self):
     for i in xrange(100):
         s1 = random_string(120)
         s2 = snappy_decode(snappy_encode(s1))
         self.assertEqual(s1, s2)
Пример #20
0
 def setUp(self):
     super(unittest.TestCase, self).setUp()
     if not self.topic:
         self.topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                                 random_string(10))
Пример #21
0
 def setUp(self):
     super(unittest.TestCase, self).setUp()
     if not self.topic:
         self.topic = "%s-%s" % (
             self.id()[self.id().rindex(".") + 1:], random_string(10))