def test_produce_mixed(self):
        start_offset = yield self.current_offset(self.topic, 0)

        msg_count = 1+100
        messages = [
            create_message("Just a plain message"),
            create_message_set(
                make_send_requests(
                    ["Gzipped %d" % i for i in range(100)]), CODEC_GZIP)[0]
        ]

        # Can't produce snappy messages without snappy...
        if has_snappy():
            msg_count += 100
            messages.append(
                create_message_set(
                    make_send_requests(["Snappy %d" % i for i in range(100)]),
                    CODEC_SNAPPY)[0])

        yield self.assert_produce_request(messages, start_offset, msg_count)
        ptMsgs = ['Just a plain message']
        ptMsgs.extend(["Gzipped %d" % i for i in range(100)])
        if has_snappy():
            ptMsgs.extend(["Snappy %d" % i for i in range(100)])
        yield self.assert_fetch_offset(0, start_offset, ptMsgs,
                                       fetch_size=10240)
    def test_produce_mixed(self):
        start_offset = yield self.current_offset(self.topic, 0)

        msg_count = 1 + 100
        messages = [
            create_message("Just a plain message"),
            create_message_set(
                make_send_requests(["Gzipped %d" % i for i in range(100)]),
                CODEC_GZIP)[0]
        ]

        # Can't produce snappy messages without snappy...
        if has_snappy():
            msg_count += 100
            messages.append(
                create_message_set(
                    make_send_requests(["Snappy %d" % i for i in range(100)]),
                    CODEC_SNAPPY)[0])

        yield self.assert_produce_request(messages, start_offset, msg_count)
        ptMsgs = ['Just a plain message']
        ptMsgs.extend(["Gzipped %d" % i for i in range(100)])
        if has_snappy():
            ptMsgs.extend(["Snappy %d" % i for i in range(100)])
        yield self.assert_fetch_offset(0,
                                       start_offset,
                                       ptMsgs,
                                       fetch_size=10240)
Пример #3
0
    def test_create_snappy(self):
        if not has_snappy():
            raise SkipTest("Snappy not available")  # pragma: no cover
        message_list = [create_message(b"v3", key=b'84'),
                        create_message(b"v4", None)]
        msg = create_snappy_message(message_list)
        self.assertEqual(msg.magic, 0)
        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
        self.assertEqual(msg.key, None)
        decoded = snappy_decode(msg.value)
        expect = b"".join([
            struct.pack(">q", 0),           # MsgSet offset
            struct.pack(">i", 18),          # MsgSet size
            struct.pack(">i", 813233088),   # CRC
            struct.pack(">bb", 0, 0),       # Magic, flags
            struct.pack(">i2s", 2, b'84'),  # Key is 2 bytes long, '84'
            struct.pack(">i", 2),           # Msg length (bytes)
            b"v3",                          # Message contents

            struct.pack(">q", 0),           # MsgSet offset
            struct.pack(">i", 16),          # MsgSet size
            struct.pack(">i", 1022734157),  # CRC
            struct.pack(">bb", 0, 0),       # Magic, flags
            struct.pack(">i", -1),          # -1 indicates a null key
            struct.pack(">i", 2),           # Msg length (bytes)
            b"v4",                          # Message contents
        ])

        self.assertEqual(decoded, expect)
Пример #4
0
    def test_create_snappy(self):
        if not has_snappy():
            raise SkipTest("Snappy not available")  # pragma: no cover
        message_list = [
            create_message(b"v3", key=b'84'),
            create_message(b"v4", None)
        ]
        msg = create_snappy_message(message_list)
        self.assertEqual(msg.magic, 0)
        self.assertEqual(msg.attributes, ATTRIBUTE_CODEC_MASK & CODEC_SNAPPY)
        self.assertEqual(msg.key, None)
        decoded = snappy_decode(msg.value)
        expect = b"".join([
            struct.pack(">q", 0),  # MsgSet offset
            struct.pack(">i", 18),  # MsgSet size
            struct.pack(">i", 813233088),  # CRC
            struct.pack(">bb", 0, 0),  # Magic, flags
            struct.pack(">i2s", 2, b'84'),  # Key is 2 bytes long, '84'
            struct.pack(">i", 2),  # Msg length (bytes)
            b"v3",  # Message contents
            struct.pack(">q", 0),  # MsgSet offset
            struct.pack(">i", 16),  # MsgSet size
            struct.pack(">i", 1022734157),  # CRC
            struct.pack(">bb", 0, 0),  # Magic, flags
            struct.pack(">i", -1),  # -1 indicates a null key
            struct.pack(">i", 2),  # Msg length (bytes)
            b"v4",  # Message contents
        ])

        self.assertEqual(decoded, expect)
Пример #5
0
    def test_decode_message_snappy(self):
        if not has_snappy():
            raise SkipTest("Snappy not available")  # pragma: no cover
        snappy_encoded = (b'\xec\x80\xa1\x95\x00\x02\xff\xff\xff\xff\x00\x00'
                          b'\x00,8\x00\x00\x19\x01@\x10L\x9f[\xc2\x00\x00\xff'
                          b'\xff\xff\xff\x00\x00\x00\x02v1\x19\x1bD\x00\x10\xd5'
                          b'\x96\nx\x00\x00\xff\xff\xff\xff\x00\x00\x00\x02v2')
        offset = 11
        messages = list(KafkaCodec._decode_message(snappy_encoded, offset))
        self.assertEqual(len(messages), 2)

        msg1, msg2 = messages

        returned_offset1, decoded_message1 = msg1
        self.assertEqual(returned_offset1, 0)
        self.assertEqual(decoded_message1, create_message(b"v1"))

        returned_offset2, decoded_message2 = msg2
        self.assertEqual(returned_offset2, 0)
        self.assertEqual(decoded_message2, create_message(b"v2"))
Пример #6
0
    def test_decode_message_snappy(self):
        if not has_snappy():
            raise SkipTest("Snappy not available")  # pragma: no cover
        snappy_encoded = ('\xec\x80\xa1\x95\x00\x02\xff\xff\xff\xff\x00\x00'
                          '\x00,8\x00\x00\x19\x01@\x10L\x9f[\xc2\x00\x00\xff'
                          '\xff\xff\xff\x00\x00\x00\x02v1\x19\x1bD\x00\x10\xd5'
                          '\x96\nx\x00\x00\xff\xff\xff\xff\x00\x00\x00\x02v2')
        offset = 11
        messages = list(KafkaCodec._decode_message(snappy_encoded, offset))
        self.assertEqual(len(messages), 2)

        msg1, msg2 = messages

        returned_offset1, decoded_message1 = msg1
        self.assertEqual(returned_offset1, 0)
        self.assertEqual(decoded_message1, create_message("v1"))

        returned_offset2, decoded_message2 = msg2
        self.assertEqual(returned_offset2, 0)
        self.assertEqual(decoded_message2, create_message("v2"))
Пример #7
0
class TestCodec(unittest.TestCase):
    @unittest.skipUnless(has_gzip(), "Gzip not available")
    def test_gzip(self):
        for i in range(100):
            s1 = os.urandom(100)
            s2 = gzip_decode(gzip_encode(s1))
            self.assertEqual(s1, s2)

    @unittest.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy(self):
        for i in range(100):
            s1 = os.urandom(120)
            s2 = snappy_decode(snappy_encode(s1))
            self.assertEqual(s1, s2)

    @unittest.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy_decode_xerial(self):
        header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01'
        random_snappy = snappy_encode(b'SNAPPY' * 50)
        block_len = len(random_snappy)
        random_snappy2 = snappy_encode(b'XERIAL' * 50)
        block_len2 = len(random_snappy2)

        to_test = header \
            + struct.pack('!i', block_len) + random_snappy \
            + struct.pack('!i', block_len2) + random_snappy2 \

        self.assertEqual(snappy_decode(to_test),
                         (b'SNAPPY' * 50) + (b'XERIAL' * 50))

    @unittest.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy_encode_xerial(self):
        to_ensure = (b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01'
                     b'\x00\x00\x00\x18'
                     b'\xac\x02\x14SNAPPY\xfe\x06\x00\xfe\x06'
                     b'\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00'
                     b'\x00\x00\x00\x18'
                     b'\xac\x02\x14XERIAL\xfe\x06\x00\xfe\x06\x00'
                     b'\xfe\x06\x00\xfe\x06\x00\x96\x06\x00')

        to_test = (b'SNAPPY' * 50) + (b'XERIAL' * 50)

        compressed = snappy_encode(to_test,
                                   xerial_compatible=True,
                                   xerial_blocksize=300)
        self.assertEqual(compressed, to_ensure)

    @unittest.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy_raises_when_not_present(self):
        with patch.object(afkak.codec, 'has_snappy', return_value=False):
            with self.assertRaises(NotImplementedError):
                snappy_encode(b"Snappy not available")
            with self.assertRaises(NotImplementedError):
                snappy_decode(b"Snappy not available")

    def test_snappy_import_fails(self):
        import sys
        with patch.dict(sys.modules, values={'snappy': None}):
            reload_module(afkak.codec)
            self.assertFalse(afkak.codec.has_snappy())
        reload_module(afkak.codec)
class TestAfkakProducerIntegration(KafkaIntegrationTestCase,
                                   unittest.TestCase):
    topic = 'produce_topic'

    @classmethod
    def setUpClass(cls):
        if not os.environ.get('KAFKA_VERSION'):  # pragma: no cover
            log.warning("WARNING: KAFKA_VERSION not found in environment")
            return

        DEBUGGING = True
        setDebugging(DEBUGGING)
        DelayedCall.debug = DEBUGGING

        cls.zk = ZookeeperFixture.instance()
        cls.server = KafkaFixture.instance(0, cls.zk.host, cls.zk.port)

        # Startup the twisted reactor in a thread. We need this before the
        # the KafkaClient can work, since KafkaBrokerClient relies on the
        # reactor for its TCP connection
        cls.reactor, cls.thread = threaded_reactor()

    @classmethod
    def tearDownClass(cls):
        if not os.environ.get('KAFKA_VERSION'):  # pragma: no cover
            log.warning("WARNING: KAFKA_VERSION not found in environment")
            return

        cls.server.close()
        cls.zk.close()

    ###########################################################################
    #   client production Tests  - Server setup is 1 replica, 2 partitions    #
    ###########################################################################

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_produce_many_simple(self):
        start_offset = yield self.current_offset(self.topic, 0)

        yield self.assert_produce_request(
            [create_message("Test message %d" % i) for i in range(100)],
            start_offset,
            100,
        )

        yield self.assert_produce_request(
            [create_message("Test message %d" % i) for i in range(100)],
            start_offset + 100,
            100,
        )

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_produce_100_keyed(self):
        start_offset = yield self.current_offset(self.topic, 0)

        yield self.assert_produce_request(
            [
                create_message("Test message %d" % i, key="Key:%d" % i)
                for i in range(100)
            ],
            start_offset,
            100,
        )
        msgs = ["Test message %d" % i for i in range(100)]
        keys = ["Key:%d" % i for i in range(100)]
        yield self.assert_fetch_offset(0,
                                       start_offset,
                                       msgs,
                                       expected_keys=keys,
                                       fetch_size=10240)

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_produce_100_keyed_gzipped(self):
        """test_produce_100_keyed_gzipped

        Test that gzipping the batch doesn't affect the partition to which
        the messages are assigned, or the order of the messages in the topics

        """
        start_offset = yield self.current_offset(self.topic, 0)

        msg_set = create_message_set([
            SendRequest(self.topic, "Key:%d" % i, ["Test msg %d" % i], None)
            for i in range(100)
        ], CODEC_GZIP)
        yield self.assert_produce_request(
            msg_set,
            start_offset,
            100,
        )
        msgs = ["Test msg %d" % i for i in range(100)]
        keys = ["Key:%d" % i for i in range(100)]
        yield self.assert_fetch_offset(0,
                                       start_offset,
                                       msgs,
                                       expected_keys=keys,
                                       fetch_size=10240)

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_produce_10k_simple(self):
        start_offset = yield self.current_offset(self.topic, 0)

        yield self.assert_produce_request(
            [create_message("Test message %d" % i) for i in range(10000)],
            start_offset,
            10000,
        )

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_produce_many_gzip(self):
        start_offset = yield self.current_offset(self.topic, 0)

        message1 = create_message_set(
            make_send_requests(["Gzipped 1 %d" % i for i in range(100)]),
            CODEC_GZIP)[0]
        message2 = create_message_set(
            make_send_requests(["Gzipped 2 %d" % i for i in range(100)]),
            CODEC_GZIP)[0]

        yield self.assert_produce_request(
            [message1, message2],
            start_offset,
            200,
        )

    @unittest2.skipUnless(has_snappy(), "Snappy not available")
    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1", "0.8.2.2", "0.9.0.1")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_produce_many_snappy(self):
        start_offset = yield self.current_offset(self.topic, 0)

        message1 = create_message_set(
            make_send_requests(["Snappy 1 %d" % i for i in range(100)]),
            CODEC_SNAPPY)[0]
        message2 = create_message_set(
            make_send_requests(["Snappy 2 %d" % i for i in range(100)]),
            CODEC_SNAPPY)[0]
        yield self.assert_produce_request(
            [message1, message2],
            start_offset,
            200,
        )

    @kafka_versions("0.8.1", "0.8.1.1", "0.8.2.1", "0.8.2.2", "0.9.0.1")
    @deferred(timeout=20)
    @inlineCallbacks
    def test_produce_mixed(self):
        start_offset = yield self.current_offset(self.topic, 0)

        msg_count = 1 + 100
        messages = [
            create_message("Just a plain message"),
            create_message_set(
                make_send_requests(["Gzipped %d" % i for i in range(100)]),
                CODEC_GZIP)[0]
        ]

        # Can't produce snappy messages without snappy...
        if has_snappy():
            msg_count += 100
            messages.append(
                create_message_set(
                    make_send_requests(["Snappy %d" % i for i in range(100)]),
                    CODEC_SNAPPY)[0])

        yield self.assert_produce_request(messages, start_offset, msg_count)
        ptMsgs = ['Just a plain message']
        ptMsgs.extend(["Gzipped %d" % i for i in range(100)])
        if has_snappy():
            ptMsgs.extend(["Snappy %d" % i for i in range(100)])
        yield self.assert_fetch_offset(0,
                                       start_offset,
                                       ptMsgs,
                                       fetch_size=10240)

    @kafka_versions("all")
    @deferred(timeout=240)
    @inlineCallbacks
    def test_produce_10k_gzipped(self):
        start_offset = yield self.current_offset(self.topic, 0)

        msgs = create_message_set(
            make_send_requests(
                ["Gzipped batch 1, message %d" % i for i in range(5000)]),
            CODEC_GZIP)
        yield self.assert_produce_request(msgs, start_offset, 5000)
        msgs = create_message_set(
            make_send_requests(
                ["Gzipped batch 2, message %d" % i for i in range(5000)]),
            CODEC_GZIP)
        yield self.assert_produce_request(msgs, start_offset + 5000, 5000)

    ###################################################################
    #   Producer Tests  - Server setup is 1 replica, 2 partitions     #
    ###################################################################

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_producer_send_messages(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)
        producer = Producer(self.client)

        # Goes to first partition
        resp = yield producer.send_messages(
            self.topic, msgs=[self.msg("one"),
                              self.msg("two")])
        self.assert_produce_response(resp, start_offset0)

        # Goes to the 2nd partition
        resp = yield producer.send_messages(self.topic,
                                            msgs=[self.msg("three")])
        self.assert_produce_response(resp, start_offset1)

        # fetch the messages back and make sure they are as expected
        yield self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("one"), self.msg("two")])
        yield self.assert_fetch_offset(1, start_offset1, [self.msg("three")])
        # Goes back to the first partition because there's only two partitions
        resp = yield producer.send_messages(
            self.topic, msgs=[self.msg("four"),
                              self.msg("five")])
        self.assert_produce_response(resp, start_offset0 + 2)
        yield self.assert_fetch_offset(0, start_offset0, [
            self.msg("one"),
            self.msg("two"),
            self.msg("four"),
            self.msg("five")
        ])

        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_producer_round_robin_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client,
                            partitioner_class=RoundRobinPartitioner)
        resp1 = yield producer.send_messages(self.topic, "key1",
                                             [self.msg("one")])
        resp2 = yield producer.send_messages(self.topic, "key2",
                                             [self.msg("two")])
        resp3 = yield producer.send_messages(self.topic, "key3",
                                             [self.msg("three")])
        resp4 = yield producer.send_messages(self.topic, "key4",
                                             [self.msg("four")])

        self.assert_produce_response(resp1, start_offset0 + 0)
        self.assert_produce_response(resp2, start_offset1 + 0)
        self.assert_produce_response(resp3, start_offset0 + 1)
        self.assert_produce_response(resp4, start_offset1 + 1)

        yield self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("one"), self.msg("three")])
        yield self.assert_fetch_offset(
            1, start_offset1,
            [self.msg("two"), self.msg("four")])

        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_producer_round_robin_partitioner_random_start(self):
        try:
            RoundRobinPartitioner.set_random_start(True)
            producer = Producer(self.client,
                                partitioner_class=RoundRobinPartitioner)
            # Two partitions, so 1st and 3rd reqs should go to same part, but
            # 2nd should go to different. Other than that, without statistical
            # test, we can't say much... Partitioner tests should ensure that
            # we really aren't always starting on a non-random partition...
            resp1 = yield producer.send_messages(
                self.topic, msgs=[self.msg("one"),
                                  self.msg("two")])
            resp2 = yield producer.send_messages(self.topic,
                                                 msgs=[self.msg("three")])
            resp3 = yield producer.send_messages(
                self.topic, msgs=[self.msg("four"),
                                  self.msg("five")])

            self.assertEqual(resp1.partition, resp3.partition)
            self.assertNotEqual(resp1.partition, resp2.partition)

            yield producer.stop()
        finally:
            RoundRobinPartitioner.set_random_start(False)

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_producer_hashed_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, partitioner_class=HashedPartitioner)

        resp1 = yield producer.send_messages(self.topic, '1',
                                             [self.msg("one")])
        resp2 = yield producer.send_messages(self.topic, '2',
                                             [self.msg("two")])
        resp3 = yield producer.send_messages(self.topic, '1',
                                             [self.msg("three")])
        resp4 = yield producer.send_messages(self.topic, '1',
                                             [self.msg("four")])
        resp5 = yield producer.send_messages(self.topic, '2',
                                             [self.msg("five")])

        self.assert_produce_response(resp2, start_offset0 + 0)
        self.assert_produce_response(resp5, start_offset0 + 1)

        self.assert_produce_response(resp1, start_offset1 + 0)
        self.assert_produce_response(resp3, start_offset1 + 1)
        self.assert_produce_response(resp4, start_offset1 + 2)

        yield self.assert_fetch_offset(0, start_offset0, [
            self.msg("two"),
            self.msg("five"),
        ])
        yield self.assert_fetch_offset(1, start_offset1, [
            self.msg("one"),
            self.msg("three"),
            self.msg("four"),
        ])

        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_producer_batched_gzipped_hashed_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)
        offsets = (start_offset0, start_offset1)

        requests = []
        msgs_by_partition = ([], [])
        keys_by_partition = ([], [])

        partitioner = HashedPartitioner(self.topic, [0, 1])
        producer = Producer(self.client,
                            codec=CODEC_GZIP,
                            batch_send=True,
                            batch_every_n=100,
                            batch_every_t=None,
                            partitioner_class=HashedPartitioner)

        # Send ten groups of messages, each with a different key
        for i in range(10):
            msg_group = []
            key = 'Key: {}'.format(i)
            part = partitioner.partition(key, [0, 1])
            for j in range(10):
                msg = self.msg('Group:{} Msg:{}'.format(i, j))
                msg_group.append(msg)
                msgs_by_partition[part].append(msg)
                keys_by_partition[part].append(key)
            request = producer.send_messages(self.topic,
                                             key=key,
                                             msgs=msg_group)
            requests.append(request)
            yield async_delay(.5)  # Make the NoResult test have teeth...
            if i < 9:
                # This is to ensure we really are batching all the requests
                self.assertNoResult(request)

        # Now ensure we can retrieve the right messages from each partition
        for part in [0, 1]:
            yield self.assert_fetch_offset(part,
                                           offsets[part],
                                           msgs_by_partition[part],
                                           keys_by_partition[part],
                                           fetch_size=20480)

        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=5)
    @inlineCallbacks
    def test_acks_none(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, req_acks=PRODUCER_ACK_NOT_REQUIRED)
        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])
        self.assertEqual(resp, None)

        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])
        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_acks_local_write(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE)
        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])

        self.assert_produce_response(resp, start_offset0)
        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])

        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_acks_all_replicas(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, req_acks=PRODUCER_ACK_ALL_REPLICAS)

        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])
        self.assert_produce_response(resp, start_offset0)
        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])
        yield self.assert_fetch_offset(1, start_offset1, [])

        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_producer_batched_by_messages(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client,
                            batch_send=True,
                            batch_every_n=10,
                            batch_every_b=0,
                            batch_every_t=0)
        # Send 4 messages and do a fetch. Fetch should timeout, and send
        # deferred shouldn't have a result yet...
        send1D = producer.send_messages(self.topic,
                                        msgs=[
                                            self.msg("one"),
                                            self.msg("two"),
                                            self.msg("three"),
                                            self.msg("four")
                                        ])
        # by default the assert_fetch_offset() waits for 0.5 secs on the server
        # side before returning no result. So, these two calls should take 1sec
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Messages shouldn't have sent out yet, so we shouldn't have
        # response from server yet on having received/responded to the request
        self.assertNoResult(send1D)
        # Sending 3 more messages won't trigger the send either (7 total)
        send2D = producer.send_messages(
            self.topic,
            msgs=[self.msg("five"),
                  self.msg("six"),
                  self.msg("seven")])
        # make sure no messages on server...
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Still no result on send
        self.assertNoResult(send2D)
        # send final batch which will be on partition 0 again...
        send3D = producer.send_messages(self.topic,
                                        msgs=[
                                            self.msg("eight"),
                                            self.msg("nine"),
                                            self.msg("ten"),
                                            self.msg("eleven")
                                        ])
        # Now do a fetch, again waiting for up to 0.5 seconds for the response
        # All four messages sent in first batch (to partition 0, given default
        # R-R, start-at-zero partitioner), and 4 in 3rd batch
        yield self.assert_fetch_offset(0,
                                       start_offset0, [
                                           self.msg("one"),
                                           self.msg("two"),
                                           self.msg("three"),
                                           self.msg("four"),
                                           self.msg("eight"),
                                           self.msg("nine"),
                                           self.msg("ten"),
                                           self.msg("eleven")
                                       ],
                                       fetch_size=2048)
        # Fetch from partition:1 should have all messages in 2nd batch
        yield self.assert_fetch_offset(
            1, start_offset1,
            [self.msg("five"),
             self.msg("six"),
             self.msg("seven")])
        # make sure the deferreds fired with the proper result
        resp1 = self.successResultOf(send1D)
        resp2 = self.successResultOf(send2D)
        resp3 = self.successResultOf(send3D)
        self.assert_produce_response(resp1, start_offset0)
        self.assert_produce_response(resp2, start_offset1)
        self.assert_produce_response(resp3, start_offset0)
        # cleanup
        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_producer_batched_by_bytes(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client,
                            batch_send=True,
                            batch_every_b=4096,
                            batch_every_n=0,
                            batch_every_t=0)
        # Send 4 messages and do a fetch. Fetch should timeout, and send
        # deferred shouldn't have a result yet...
        send1D = producer.send_messages(self.topic,
                                        msgs=[
                                            self.msg("one"),
                                            self.msg("two"),
                                            self.msg("three"),
                                            self.msg("four")
                                        ])
        # by default the assert_fetch_offset() waits for 0.5 secs on the server
        # side before returning no result. So, these two calls should take 1sec
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Messages shouldn't have sent out yet, so we shouldn't have
        # response from server yet on having received/responded to the request
        self.assertNoResult(send1D)
        # Sending 3 more messages should trigger the send, but we don't yield
        # here, so we shouldn't have a response immediately after, and so
        # send2D should still have no result.
        send2D = producer.send_messages(
            self.topic,
            msgs=[self.msg("five"),
                  self.msg("six"),
                  self.msg("seven")])
        # make sure no messages on server...
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Still no result on send
        self.assertNoResult(send2D)
        # send 3rd batch which will be on partition 0 again...
        send3D = producer.send_messages(self.topic,
                                        msgs=[
                                            self.msg("eight"),
                                            self.msg("nine"),
                                            self.msg("ten"),
                                            self.msg("eleven")
                                        ])
        # make sure no messages on server...
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Still no result on send
        self.assertNoResult(send3D)
        # Finally, send a big message to trigger send
        send4D = producer.send_messages(self.topic,
                                        msgs=[self.msg("1234" * 1024)])
        # Now do a fetch, again waiting for up to 0.5 seconds for the response
        # All four messages sent in first batch (to partition 0, given default
        # R-R, start-at-zero partitioner), and 4 in 3rd batch
        yield self.assert_fetch_offset(0,
                                       start_offset0, [
                                           self.msg("one"),
                                           self.msg("two"),
                                           self.msg("three"),
                                           self.msg("four"),
                                           self.msg("eight"),
                                           self.msg("nine"),
                                           self.msg("ten"),
                                           self.msg("eleven")
                                       ],
                                       fetch_size=2048)
        # Fetch from partition:1 should have all messages in 2nd batch, as
        # send_messages() treats calls as groups and all/none are sent...
        yield self.assert_fetch_offset(1,
                                       start_offset1, [
                                           self.msg("five"),
                                           self.msg("six"),
                                           self.msg("seven"),
                                           self.msg("1234" * 1024)
                                       ],
                                       fetch_size=5 * 1024)
        # make sure the deferreds fired with the proper result
        resp1 = self.successResultOf(send1D)
        resp2 = self.successResultOf(send2D)
        resp3 = self.successResultOf(send3D)
        resp4 = self.successResultOf(send4D)
        self.assert_produce_response(resp1, start_offset0)
        self.assert_produce_response(resp2, start_offset1)
        self.assert_produce_response(resp3, start_offset0)
        self.assert_produce_response(resp4, start_offset1)
        # cleanup
        yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=20)
    @inlineCallbacks
    def test_producer_batched_by_time(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        # This needs to be big enough that the operations between starting the
        # producer and the time.sleep() call take less time than this... I made
        # it large enough that the test would still pass even with my Macbook's
        # cores all pegged by a load generator.
        batchtime = 5

        try:
            producer = Producer(self.client,
                                batch_send=True,
                                batch_every_n=0,
                                batch_every_t=batchtime)
            startTime = time.time()
            # Send 4 messages and do a fetch
            send1D = producer.send_messages(self.topic,
                                            msgs=[
                                                self.msg("one"),
                                                self.msg("two"),
                                                self.msg("three"),
                                                self.msg("four")
                                            ])
            # set assert_fetch_offset() to wait for 0.1 secs on the server-side
            # before returning no result. So, these calls should take 0.2sec
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Messages shouldn't have sent out yet, so we shouldn't have
            # response from server yet on having received/responded to request
            self.assertNoResult(send1D)
            # Sending 3 more messages should NOT trigger the send, as less than
            # 1 sec. elapsed by here, so send2D should still have no result.
            send2D = producer.send_messages(
                self.topic,
                msgs=[self.msg("five"),
                      self.msg("six"),
                      self.msg("seven")])
            # still no messages...
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Still no result on send, and send should NOT have gone out.
            self.assertNoResult(send2D)
            # Wait the timeout out. It'd be nicer to be able to just 'advance'
            # the reactor, but since we need the network so...
            time.sleep(batchtime - (time.time() - startTime) + 0.05)
            # We need to yield to the reactor to have it process the response
            # from the broker. Both send1D and send2D should then have results.
            resp1 = yield send1D
            resp2 = yield send2D
            # ensure the 2 batches went into the proper partitions...
            self.assert_produce_response(resp1, start_offset0)
            self.assert_produce_response(resp2, start_offset1)
            # Should be able to get messages now
            yield self.assert_fetch_offset(0, start_offset0, [
                self.msg("one"),
                self.msg("two"),
                self.msg("three"),
                self.msg("four")
            ])
            yield self.assert_fetch_offset(
                1, start_offset1,
                [self.msg("five"),
                 self.msg("six"),
                 self.msg("seven")])
        except IOError:  # pragma: no cover
            msg = "IOError: Probably time.sleep with negative value due to " \
                "'too slow' system taking more than {0} seconds to do " \
                "something that should take ~0.4 seconds.".format(batchtime)
            log.exception(msg)
            # In case of the IOError, we want to eat the CancelledError
            send1D.addErrback(lambda _: None)  # Eat any uncaught errors
            send2D.addErrback(lambda _: None)  # Eat any uncaught errors
            self.fail(msg)
        finally:
            yield producer.stop()

    @kafka_versions("all")
    @deferred(timeout=15)
    @inlineCallbacks
    def test_write_nonextant_topic(self):
        """test_write_nonextant_topic

        Test we can write to a non-extant topic (which will be auto-created)
        simply by calling producer.send_messages with a long enough timeout.
        """
        test_topics = [
            "{}-{}-{}".format(self.id().split('.')[-1], i, random_string(10))
            for i in range(10)
        ]

        producer = Producer(self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE)

        for topic in test_topics:
            resp = yield producer.send_messages(topic, msgs=[self.msg(topic)])
            # Make sure the send went ok
            self.assert_produce_response(resp, 0)
            # Make sure we can get the message back
            yield self.assert_fetch_offset(0,
                                           0, [self.msg(topic)],
                                           topic=topic)

        yield producer.stop()

    # ###########  Utility Functions  ############

    @inlineCallbacks
    def assert_produce_request(self, messages, initial_offset, message_ct):
        produce = ProduceRequest(self.topic, 0, messages=messages)

        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.
        resp, = yield self.client.send_produce_request([produce])
        self.assert_produce_response(resp, initial_offset)

        resp2 = yield self.current_offset(self.topic, 0)

        self.assertEqual(resp2, initial_offset + message_ct)

    def assert_produce_response(self, resp, initial_offset):
        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.offset, initial_offset)

    @inlineCallbacks
    def assert_fetch_offset(self,
                            partition,
                            start_offset,
                            expected_messages,
                            expected_keys=[],
                            max_wait=0.5,
                            fetch_size=1024,
                            topic=None):
        # There should only be one response message from the server.
        # This will throw an exception if there's more than one.
        if topic is None:
            topic = self.topic
        resp, = yield self.client.send_fetch_request(
            [FetchRequest(topic, partition, start_offset, fetch_size)],
            max_wait_time=int(max_wait * 1000))

        self.assertEqual(resp.error, 0)
        self.assertEqual(resp.partition, partition)
        # Convert generator to list
        resp_messages = list(resp.messages)
        messages = [x.message.value for x in resp_messages]
        self.assertEqual(messages, expected_messages)
        if expected_keys:
            keys = [x.message.key for x in resp_messages]
            self.assertEqual(keys, expected_keys)
        self.assertEqual(resp.highwaterMark,
                         start_offset + len(expected_messages))
Пример #9
0
class TestCodec(unittest2.TestCase):
    @unittest2.skipUnless(has_gzip(), "Gzip not available")
    def test_gzip(self):
        for i in xrange(100):
            s1 = random_string(100)
            s2 = gzip_decode(gzip_encode(s1))
            self.assertEqual(s1, s2)

    @unittest2.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy(self):
        for i in xrange(100):
            s1 = random_string(120)
            s2 = snappy_decode(snappy_encode(s1))
            self.assertEqual(s1, s2)

    @unittest2.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy_detect_xerial(self):
        import afkak as afkak1
        _detect_xerial_stream = afkak1.codec._detect_xerial_stream

        header = (b'\x82SNAPPY\x00\x00\x00\x00'
                  '\x01\x00\x00\x00\x01Some extra bytes')
        false_header = b'\x01SNAPPY\x00\x00\x00\x01\x00\x00\x00\x01'
        random_snappy = snappy_encode('SNAPPY' * 50)
        short_data = b'\x01\x02\x03\x04'

        self.assertTrue(_detect_xerial_stream(header))
        self.assertFalse(_detect_xerial_stream(b''))
        self.assertFalse(_detect_xerial_stream(b'\x00'))
        self.assertFalse(_detect_xerial_stream(false_header))
        self.assertFalse(_detect_xerial_stream(random_snappy))
        self.assertFalse(_detect_xerial_stream(short_data))

    @unittest2.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy_decode_xerial(self):
        header = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01'
        random_snappy = snappy_encode('SNAPPY' * 50)
        block_len = len(random_snappy)
        random_snappy2 = snappy_encode('XERIAL' * 50)
        block_len2 = len(random_snappy2)

        to_test = header \
            + struct.pack('!i', block_len) + random_snappy \
            + struct.pack('!i', block_len2) + random_snappy2 \

        self.assertEqual(
            snappy_decode(to_test), ('SNAPPY' * 50) + ('XERIAL' * 50))

    @unittest2.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy_encode_xerial(self):
        to_ensure = b'\x82SNAPPY\x00\x00\x00\x00\x01\x00\x00\x00\x01' + \
            '\x00\x00\x00\x18' + \
            '\xac\x02\x14SNAPPY\xfe\x06\x00\xfe\x06' + \
            '\x00\xfe\x06\x00\xfe\x06\x00\x96\x06\x00' + \
            '\x00\x00\x00\x18' + \
            '\xac\x02\x14XERIAL\xfe\x06\x00\xfe\x06\x00' + \
            '\xfe\x06\x00\xfe\x06\x00\x96\x06\x00'

        to_test = ('SNAPPY' * 50) + ('XERIAL' * 50)

        compressed = snappy_encode(
            to_test, xerial_compatible=True, xerial_blocksize=300)
        self.assertEqual(compressed, to_ensure)

    @unittest2.skipUnless(has_snappy(), "Snappy not available")
    def test_snappy_raises_when_not_present(self):
        with patch.object(afkak.codec, 'has_snappy',
                          return_value=False):
            with self.assertRaises(NotImplementedError):
                snappy_encode("Snappy not available")
            with self.assertRaises(NotImplementedError):
                snappy_decode("Snappy not available")

    def test_snappy_import_fails(self):
        import sys
        with patch.dict(sys.modules, values={'snappy': None}):
            reload(afkak.codec)
            self.assertFalse(afkak.codec.has_snappy())
        reload(afkak.codec)