示例#1
0
    def test_producer_send_messages(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)
        producer = Producer(self.client)

        # Goes to first partition
        resp = yield producer.send_messages(
            self.topic, msgs=[self.msg("one"), self.msg("two")])
        self.assert_produce_response(resp, start_offset0)

        # Goes to the 2nd partition
        resp = yield producer.send_messages(self.topic,
                                            msgs=[self.msg("three")])
        self.assert_produce_response(resp, start_offset1)

        # fetch the messages back and make sure they are as expected
        yield self.assert_fetch_offset(
            0, start_offset0, [self.msg("one"), self.msg("two")])
        yield self.assert_fetch_offset(
            1, start_offset1, [self.msg("three")])
        # Goes back to the first partition because there's only two partitions
        resp = yield producer.send_messages(
            self.topic, msgs=[self.msg("four"), self.msg("five")])
        self.assert_produce_response(resp, start_offset0+2)
        yield self.assert_fetch_offset(
            0, start_offset0, [self.msg("one"),
                               self.msg("two"),
                               self.msg("four"),
                               self.msg("five")])

        yield producer.stop()
    def test_switch_leader(self):
        producer = Producer(self.client)
        topic = self.topic
        try:
            for index in range(1, 3):
                # cause the client to establish connections to all the brokers
                log.debug("Pass: %d. Sending 10 random messages", index)
                yield self._send_random_messages(producer, topic, 10)

                # Ensure that the follower is in sync
                log.debug("Ensuring topic/partition is replicated.")
                part_meta = self.client.partition_meta[TopicAndPartition(
                    self.topic, 0)]
                # Ensure the all the replicas are in-sync before proceeding
                while len(part_meta.isr) != 2:  # pragma: no cover
                    log.debug("Waiting for Kafka replica to become synced")
                    if len(part_meta.replicas) != 2:
                        log.error(
                            "Kafka replica 'disappeared'!"
                            "Partitition Meta: %r", part_meta)
                    yield async_delay(1.0)
                    yield self.client.load_metadata_for_topics(self.topic)
                    part_meta = self.client.partition_meta[TopicAndPartition(
                        self.topic, 0)]

                # kill leader for partition 0
                log.debug("Killing leader of partition 0")
                broker, kill_time = self._kill_leader(topic, 0)

                log.debug("Sending 1 more message: 'part 1'")
                yield producer.send_messages(topic, msgs=['part 1'])
                log.debug("Sending 1 more message: 'part 2'")
                yield producer.send_messages(topic, msgs=['part 2'])

                # send to new leader
                log.debug("Sending 10 more messages")
                yield self._send_random_messages(producer, topic, 10)

                # Make sure the ZK ephemeral time (~6 seconds) has elapsed
                wait_time = (kill_time + 6.5) - time.time()
                if wait_time > 0:
                    log.debug("Waiting: %4.2f for ZK timeout", wait_time)
                    yield async_delay(wait_time)
                # restart the kafka broker
                log.debug("Restarting the broker")
                broker.restart()

                # count number of messages
                log.debug("Getting message count")
                count = yield self._count_messages(topic)
                self.assertEqual(count, 22 * index)
        finally:
            log.debug("Stopping the producer")
            yield producer.stop()
            log.debug("Producer stopped")

        log.debug("Test complete.")
    def test_switch_leader(self):
        producer = Producer(self.client)
        topic = self.topic
        try:
            for index in range(1, 3):
                # cause the client to establish connections to all the brokers
                log.debug("Pass: %d. Sending 10 random messages", index)
                yield self._send_random_messages(producer, topic, 10)

                # Ensure that the follower is in sync
                log.debug("Ensuring topic/partition is replicated.")
                part_meta = self.client.partition_meta[TopicAndPartition(
                    self.topic, 0)]
                # Ensure the all the replicas are in-sync before proceeding
                while len(part_meta.isr) != 2:  # pragma: no cover
                    log.debug("Waiting for Kafka replica to become synced")
                    if len(part_meta.replicas) != 2:
                        log.error("Kafka replica 'disappeared'!"
                                  "Partitition Meta: %r", part_meta)
                    yield async_delay(1.0)
                    yield self.client.load_metadata_for_topics(self.topic)
                    part_meta = self.client.partition_meta[TopicAndPartition(
                        self.topic, 0)]

                # kill leader for partition 0
                log.debug("Killing leader of partition 0")
                broker, kill_time = self._kill_leader(topic, 0)

                log.debug("Sending 1 more message: 'part 1'")
                yield producer.send_messages(topic, msgs=['part 1'])
                log.debug("Sending 1 more message: 'part 2'")
                yield producer.send_messages(topic, msgs=['part 2'])

                # send to new leader
                log.debug("Sending 10 more messages")
                yield self._send_random_messages(producer, topic, 10)

                # Make sure the ZK ephemeral time (~6 seconds) has elapsed
                wait_time = (kill_time + 6.5) - time.time()
                if wait_time > 0:
                    log.debug("Waiting: %4.2f for ZK timeout", wait_time)
                    yield async_delay(wait_time)
                # restart the kafka broker
                log.debug("Restarting the broker")
                broker.restart()

                # count number of messages
                log.debug("Getting message count")
                count = yield self._count_messages(topic)
                self.assertEqual(count, 22 * index)
        finally:
            log.debug("Stopping the producer")
            yield producer.stop()
            log.debug("Producer stopped")

        log.debug("Test complete.")
    def test_acks_none(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, req_acks=PRODUCER_ACK_NOT_REQUIRED)
        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])
        self.assertEqual(resp, None)

        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])
        yield producer.stop()
示例#5
0
    def test_producer_batched_by_time(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        # This needs to be big enough that the operations between starting the
        # producer and the sleep take less time than this... I made
        # it large enough that the test would still pass even with my Macbook's
        # cores all pegged by a load generator.
        batchtime = 5

        try:
            producer = Producer(self.client, batch_send=True,
                                batch_every_n=0, batch_every_t=batchtime)
            startTime = time.time()
            # Send 4 messages and do a fetch
            send1D = producer.send_messages(
                self.topic, msgs=[self.msg("one"), self.msg("two"),
                                  self.msg("three"), self.msg("four")])
            # set assert_fetch_offset() to wait for 0.1 secs on the server-side
            # before returning no result. So, these calls should take 0.2sec
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Messages shouldn't have sent out yet, so we shouldn't have
            # response from server yet on having received/responded to request
            self.assertNoResult(send1D)
            # Sending 3 more messages should NOT trigger the send, as less than
            # 1 sec. elapsed by here, so send2D should still have no result.
            send2D = producer.send_messages(
                self.topic, msgs=[self.msg("five"), self.msg("six"),
                                  self.msg("seven")])
            # still no messages...
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Still no result on send, and send should NOT have gone out.
            self.assertNoResult(send2D)
            # Wait the timeout out. It'd be nicer to be able to just 'advance'
            # the reactor, but since we need the network so...
            yield async_delay(batchtime - (time.time() - startTime) + 0.05, clock=self.reactor)
            # We need to yield to the reactor to have it process the response
            # from the broker. Both send1D and send2D should then have results.
            resp1 = yield send1D
            resp2 = yield send2D
            # ensure the 2 batches went into the proper partitions...
            self.assert_produce_response(resp1, start_offset0)
            self.assert_produce_response(resp2, start_offset1)
            # Should be able to get messages now
            yield self.assert_fetch_offset(
                0, start_offset0, [self.msg("one"), self.msg("two"),
                                   self.msg("three"), self.msg("four")])
            yield self.assert_fetch_offset(
                1, start_offset1, [self.msg("five"), self.msg("six"),
                                   self.msg("seven")])
        finally:
            yield producer.stop()
示例#6
0
    def test_acks_none(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        yield self.current_offset(self.topic, 1)

        producer = Producer(
            self.client, req_acks=PRODUCER_ACK_NOT_REQUIRED)
        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])
        self.assertEqual(resp, None)

        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])
        yield producer.stop()
    def test_acks_local_write(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE)
        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])

        self.assert_produce_response(resp, start_offset0)
        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])

        yield producer.stop()
示例#8
0
    def test_producer_batched_by_messages(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, batch_send=True, batch_every_n=10,
                            batch_every_b=0, batch_every_t=0)
        # Send 4 messages and do a fetch. Fetch should timeout, and send
        # deferred shouldn't have a result yet...
        send1D = producer.send_messages(
            self.topic, msgs=[self.msg("one"), self.msg("two"),
                              self.msg("three"), self.msg("four")])
        # by default the assert_fetch_offset() waits for 0.5 secs on the server
        # side before returning no result. So, these two calls should take 1sec
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Messages shouldn't have sent out yet, so we shouldn't have
        # response from server yet on having received/responded to the request
        self.assertNoResult(send1D)
        # Sending 3 more messages won't trigger the send either (7 total)
        send2D = producer.send_messages(
            self.topic, msgs=[self.msg("five"), self.msg("six"),
                              self.msg("seven")])
        # make sure no messages on server...
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Still no result on send
        self.assertNoResult(send2D)
        # send final batch which will be on partition 0 again...
        send3D = producer.send_messages(
            self.topic, msgs=[self.msg("eight"), self.msg("nine"),
                              self.msg("ten"), self.msg("eleven")])
        # Now do a fetch, again waiting for up to 0.5 seconds for the response
        # All four messages sent in first batch (to partition 0, given default
        # R-R, start-at-zero partitioner), and 4 in 3rd batch
        yield self.assert_fetch_offset(
            0, start_offset0, [self.msg("one"), self.msg("two"),
                               self.msg("three"), self.msg("four"),
                               self.msg("eight"), self.msg("nine"),
                               self.msg("ten"), self.msg("eleven")],
            fetch_size=2048)
        # Fetch from partition:1 should have all messages in 2nd batch
        yield self.assert_fetch_offset(
            1, start_offset1, [self.msg("five"), self.msg("six"),
                               self.msg("seven")])
        # make sure the deferreds fired with the proper result
        resp1 = self.successResultOf(send1D)
        resp2 = self.successResultOf(send2D)
        resp3 = self.successResultOf(send3D)
        self.assert_produce_response(resp1, start_offset0)
        self.assert_produce_response(resp2, start_offset1)
        self.assert_produce_response(resp3, start_offset0)
        # cleanup
        yield producer.stop()
示例#9
0
    def test_acks_local_write(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        yield self.current_offset(self.topic, 1)

        producer = Producer(
            self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE)
        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])

        self.assert_produce_response(resp, start_offset0)
        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])

        yield producer.stop()
    def test_acks_all_replicas(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, req_acks=PRODUCER_ACK_ALL_REPLICAS)

        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])
        self.assert_produce_response(resp, start_offset0)
        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])
        yield self.assert_fetch_offset(1, start_offset1, [])

        yield producer.stop()
    def test_switch_leader(self):
        """
        Produce messages while killing the coordinator broker.

        Note that in order to avoid loss of acknowledged writes the producer
        must request acks of -1 (`afkak.common.PRODUCER_ACK_ALL_REPLICAS`).
        """
        producer = Producer(
            self.client,
            req_acks=PRODUCER_ACK_ALL_REPLICAS,
            max_req_attempts=100,
        )
        topic = self.topic
        try:
            for index in range(1, 3):
                # cause the client to establish connections to all the brokers
                log.debug("Pass: %d. Sending 10 random messages", index)
                yield self._send_random_messages(producer, topic, 10)

                # kill leader for partition 0
                log.debug("Killing leader of partition 0")
                broker, kill_time = self._kill_leader(topic, 0)

                log.debug("Sending 1 more message: 'part 1'")
                yield producer.send_messages(topic, msgs=[b'part 1'])
                log.debug("Sending 1 more message: 'part 2'")
                yield producer.send_messages(topic, msgs=[b'part 2'])

                # send to new leader
                log.debug("Sending 10 more messages")
                yield self._send_random_messages(producer, topic, 10)

                # Make sure the ZK ephemeral time (~6 seconds) has elapsed
                wait_time = (kill_time + 6.5) - time.time()
                if wait_time > 0:
                    log.debug("Waiting: %4.2f for ZK timeout", wait_time)
                    yield async_delay(wait_time)
                # restart the kafka broker
                log.debug("Restarting leader broker %r", broker)
                broker.restart()

                # count number of messages
                log.debug("Getting message count")
                count = yield self._count_messages(topic)
                self.assertGreaterEqual(count, 22 * index)
        finally:
            log.debug("Stopping the producer")
            yield producer.stop()
            log.debug("Producer stopped")

        log.debug("Test complete.")
    def test_producer_hashed_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, partitioner_class=HashedPartitioner)

        resp1 = yield producer.send_messages(self.topic, '1',
                                             [self.msg("one")])
        resp2 = yield producer.send_messages(self.topic, '2',
                                             [self.msg("two")])
        resp3 = yield producer.send_messages(self.topic, '1',
                                             [self.msg("three")])
        resp4 = yield producer.send_messages(self.topic, '1',
                                             [self.msg("four")])
        resp5 = yield producer.send_messages(self.topic, '2',
                                             [self.msg("five")])

        self.assert_produce_response(resp2, start_offset0 + 0)
        self.assert_produce_response(resp5, start_offset0 + 1)

        self.assert_produce_response(resp1, start_offset1 + 0)
        self.assert_produce_response(resp3, start_offset1 + 1)
        self.assert_produce_response(resp4, start_offset1 + 2)

        yield self.assert_fetch_offset(0, start_offset0, [
            self.msg("two"),
            self.msg("five"),
        ])
        yield self.assert_fetch_offset(1, start_offset1, [
            self.msg("one"),
            self.msg("three"),
            self.msg("four"),
        ])

        yield producer.stop()
示例#13
0
    def test_acks_all_replicas(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(
            self.client,
            req_acks=PRODUCER_ACK_ALL_REPLICAS)

        resp = yield producer.send_messages(self.topic, msgs=[self.msg("one")])
        self.assert_produce_response(resp, start_offset0)
        yield self.assert_fetch_offset(0, start_offset0, [self.msg("one")])
        yield self.assert_fetch_offset(1, start_offset1, [])

        yield producer.stop()
    def test_producer_batched_gzipped_hashed_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)
        offsets = (start_offset0, start_offset1)

        requests = []
        msgs_by_partition = ([], [])
        keys_by_partition = ([], [])

        partitioner = HashedPartitioner(self.topic, [0, 1])
        producer = Producer(self.client,
                            codec=CODEC_GZIP,
                            batch_send=True,
                            batch_every_n=100,
                            batch_every_t=None,
                            partitioner_class=HashedPartitioner)

        # Send ten groups of messages, each with a different key
        for i in range(10):
            msg_group = []
            key = 'Key: {}'.format(i)
            part = partitioner.partition(key, [0, 1])
            for j in range(10):
                msg = self.msg('Group:{} Msg:{}'.format(i, j))
                msg_group.append(msg)
                msgs_by_partition[part].append(msg)
                keys_by_partition[part].append(key)
            request = producer.send_messages(self.topic,
                                             key=key,
                                             msgs=msg_group)
            requests.append(request)
            yield async_delay(.5)  # Make the NoResult test have teeth...
            if i < 9:
                # This is to ensure we really are batching all the requests
                self.assertNoResult(request)

        # Now ensure we can retrieve the right messages from each partition
        for part in [0, 1]:
            yield self.assert_fetch_offset(part,
                                           offsets[part],
                                           msgs_by_partition[part],
                                           keys_by_partition[part],
                                           fetch_size=20480)

        yield producer.stop()
    def test_producer_round_robin_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client,
                            partitioner_class=RoundRobinPartitioner)
        resp1 = yield producer.send_messages(self.topic, "key1",
                                             [self.msg("one")])
        resp2 = yield producer.send_messages(self.topic, "key2",
                                             [self.msg("two")])
        resp3 = yield producer.send_messages(self.topic, "key3",
                                             [self.msg("three")])
        resp4 = yield producer.send_messages(self.topic, "key4",
                                             [self.msg("four")])

        self.assert_produce_response(resp1, start_offset0 + 0)
        self.assert_produce_response(resp2, start_offset1 + 0)
        self.assert_produce_response(resp3, start_offset0 + 1)
        self.assert_produce_response(resp4, start_offset1 + 1)

        yield self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("one"), self.msg("three")])
        yield self.assert_fetch_offset(
            1, start_offset1,
            [self.msg("two"), self.msg("four")])

        yield producer.stop()
示例#16
0
    def test_write_nonextant_topic(self):
        """
        Test we can write to a non-extant topic (which will be auto-created)
        simply by calling producer.send_messages with a long enough timeout.
        """
        test_topics = ["{}-{}-{}".format(
            self.id().split('.')[-1], i, random_string(10)) for i in range(10)]

        producer = Producer(self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE)

        for topic in test_topics:
            resp = yield producer.send_messages(topic, msgs=[self.msg(topic)])
            # Make sure the send went ok
            self.assert_produce_response(resp, 0)
            # Make sure we can get the message back
            yield self.assert_fetch_offset(
                0, 0, [self.msg(topic)], topic=topic)

        yield producer.stop()
示例#17
0
    def test_write_nonextant_topic(self):
        """
        Test we can write to a non-extant topic (which will be auto-created)
        simply by calling producer.send_messages with a long enough timeout.
        """
        test_topics = ["{}-{}-{}".format(
            self.id().split('.')[-1], i, random_string(10)) for i in range(10)]

        producer = Producer(self.client, req_acks=PRODUCER_ACK_LOCAL_WRITE)

        for topic in test_topics:
            resp = yield producer.send_messages(topic, msgs=[self.msg(topic)])
            # Make sure the send went ok
            self.assert_produce_response(resp, 0)
            # Make sure we can get the message back
            yield self.assert_fetch_offset(
                0, 0, [self.msg(topic)], topic=topic)

        yield producer.stop()
    def test_producer_send_messages(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)
        producer = Producer(self.client)

        # Goes to first partition
        resp = yield producer.send_messages(
            self.topic, msgs=[self.msg("one"),
                              self.msg("two")])
        self.assert_produce_response(resp, start_offset0)

        # Goes to the 2nd partition
        resp = yield producer.send_messages(self.topic,
                                            msgs=[self.msg("three")])
        self.assert_produce_response(resp, start_offset1)

        # fetch the messages back and make sure they are as expected
        yield self.assert_fetch_offset(
            0, start_offset0,
            [self.msg("one"), self.msg("two")])
        yield self.assert_fetch_offset(1, start_offset1, [self.msg("three")])
        # Goes back to the first partition because there's only two partitions
        resp = yield producer.send_messages(
            self.topic, msgs=[self.msg("four"),
                              self.msg("five")])
        self.assert_produce_response(resp, start_offset0 + 2)
        yield self.assert_fetch_offset(0, start_offset0, [
            self.msg("one"),
            self.msg("two"),
            self.msg("four"),
            self.msg("five")
        ])

        yield producer.stop()
示例#19
0
    def test_producer_round_robin_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client,
                            partitioner_class=RoundRobinPartitioner)
        resp1 = yield producer.send_messages(
            self.topic, b"key1", [self.msg("one")])
        resp2 = yield producer.send_messages(
            self.topic, b"key2", [self.msg("two")])
        resp3 = yield producer.send_messages(
            self.topic, b"key3", [self.msg("three")])
        resp4 = yield producer.send_messages(
            self.topic, b"key4", [self.msg("four")])

        self.assert_produce_response(resp1, start_offset0+0)
        self.assert_produce_response(resp2, start_offset1+0)
        self.assert_produce_response(resp3, start_offset0+1)
        self.assert_produce_response(resp4, start_offset1+1)

        yield self.assert_fetch_offset(
            0, start_offset0, [self.msg("one"), self.msg("three")])
        yield self.assert_fetch_offset(
            1, start_offset1, [self.msg("two"), self.msg("four")])

        yield producer.stop()
示例#20
0
    def test_producer_round_robin_partitioner_random_start(self):
        try:
            RoundRobinPartitioner.set_random_start(True)
            producer = Producer(self.client,
                                partitioner_class=RoundRobinPartitioner)
            # Two partitions, so 1st and 3rd reqs should go to same part, but
            # 2nd should go to different. Other than that, without statistical
            # test, we can't say much... Partitioner tests should ensure that
            # we really aren't always starting on a non-random partition...
            resp1 = yield producer.send_messages(
                self.topic, msgs=[self.msg("one"), self.msg("two")])
            resp2 = yield producer.send_messages(
                self.topic, msgs=[self.msg("three")])
            resp3 = yield producer.send_messages(
                self.topic, msgs=[self.msg("four"), self.msg("five")])

            self.assertEqual(resp1.partition, resp3.partition)
            self.assertNotEqual(resp1.partition, resp2.partition)

            yield producer.stop()
        finally:
            RoundRobinPartitioner.set_random_start(False)
示例#21
0
    def test_producer_batched_gzipped_hashed_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)
        offsets = (start_offset0, start_offset1)

        requests = []
        msgs_by_partition = ([], [])
        keys_by_partition = ([], [])

        partitioner = HashedPartitioner(self.topic, [0, 1])
        producer = Producer(
            self.client, codec=CODEC_GZIP, batch_send=True, batch_every_n=100,
            batch_every_t=None, partitioner_class=HashedPartitioner)

        # Send ten groups of messages, each with a different key
        for i in range(10):
            msg_group = []
            key = 'Key: {}'.format(i).encode()
            part = partitioner.partition(key, [0, 1])
            for j in range(10):
                msg = self.msg('Group:{} Msg:{}'.format(i, j))
                msg_group.append(msg)
                msgs_by_partition[part].append(msg)
                keys_by_partition[part].append(key)
            request = producer.send_messages(
                self.topic, key=key, msgs=msg_group)
            requests.append(request)
            yield async_delay(.5)  # Make the NoResult test have teeth...
            if i < 9:
                # This is to ensure we really are batching all the requests
                self.assertNoResult(request)

        # Now ensure we can retrieve the right messages from each partition
        for part in [0, 1]:
            yield self.assert_fetch_offset(
                part, offsets[part], msgs_by_partition[part],
                keys_by_partition[part], fetch_size=20480)

        yield producer.stop()
示例#22
0
    def test_producer_batched_by_messages(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client, batch_send=True, batch_every_n=10,
                            batch_every_b=0, batch_every_t=0)
        # Send 4 messages and do a fetch. Fetch should timeout, and send
        # deferred shouldn't have a result yet...
        send1D = producer.send_messages(
            self.topic, msgs=[self.msg("one"), self.msg("two"),
                              self.msg("three"), self.msg("four")])
        # by default the assert_fetch_offset() waits for 0.5 secs on the server
        # side before returning no result. So, these two calls should take 1sec
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Messages shouldn't have sent out yet, so we shouldn't have
        # response from server yet on having received/responded to the request
        self.assertNoResult(send1D)
        # Sending 3 more messages won't trigger the send either (7 total)
        send2D = producer.send_messages(
            self.topic, msgs=[self.msg("five"), self.msg("six"),
                              self.msg("seven")])
        # make sure no messages on server...
        yield self.assert_fetch_offset(0, start_offset0, [])
        yield self.assert_fetch_offset(1, start_offset1, [])
        # Still no result on send
        self.assertNoResult(send2D)
        # send final batch which will be on partition 0 again...
        send3D = producer.send_messages(
            self.topic, msgs=[self.msg("eight"), self.msg("nine"),
                              self.msg("ten"), self.msg("eleven")])
        # Now do a fetch, again waiting for up to 0.5 seconds for the response
        # All four messages sent in first batch (to partition 0, given default
        # R-R, start-at-zero partitioner), and 4 in 3rd batch
        yield self.assert_fetch_offset(
            0, start_offset0, [self.msg("one"), self.msg("two"),
                               self.msg("three"), self.msg("four"),
                               self.msg("eight"), self.msg("nine"),
                               self.msg("ten"), self.msg("eleven")],
            fetch_size=2048)
        # Fetch from partition:1 should have all messages in 2nd batch
        yield self.assert_fetch_offset(
            1, start_offset1, [self.msg("five"), self.msg("six"),
                               self.msg("seven")])
        # make sure the deferreds fired with the proper result
        resp1 = self.successResultOf(send1D)
        resp2 = self.successResultOf(send2D)
        resp3 = self.successResultOf(send3D)
        self.assert_produce_response(resp1, start_offset0)
        self.assert_produce_response(resp2, start_offset1)
        self.assert_produce_response(resp3, start_offset0)
        # cleanup
        yield producer.stop()
示例#23
0
    def test_producer_hashed_partitioner(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        producer = Producer(self.client,
                            partitioner_class=HashedPartitioner)

        resp1 = yield producer.send_messages(
            self.topic, b'1', [self.msg("one")])
        resp2 = yield producer.send_messages(
            self.topic, b'2', [self.msg("two")])
        resp3 = yield producer.send_messages(
            self.topic, b'1', [self.msg("three")])
        resp4 = yield producer.send_messages(
            self.topic, b'1', [self.msg("four")])
        resp5 = yield producer.send_messages(
            self.topic, b'2', [self.msg("five")])

        self.assert_produce_response(resp2, start_offset0+0)
        self.assert_produce_response(resp5, start_offset0+1)

        self.assert_produce_response(resp1, start_offset1+0)
        self.assert_produce_response(resp3, start_offset1+1)
        self.assert_produce_response(resp4, start_offset1+2)

        yield self.assert_fetch_offset(
            0, start_offset0, [
                self.msg("two"),
                self.msg("five"),
                ])
        yield self.assert_fetch_offset(
            1, start_offset1, [
                self.msg("one"),
                self.msg("three"),
                self.msg("four"),
                ])

        yield producer.stop()
示例#24
0
    def test_producer_round_robin_partitioner_random_start(self):
        try:
            RoundRobinPartitioner.set_random_start(True)
            producer = Producer(self.client,
                                partitioner_class=RoundRobinPartitioner)
            # Two partitions, so 1st and 3rd reqs should go to same part, but
            # 2nd should go to different. Other than that, without statistical
            # test, we can't say much... Partitioner tests should ensure that
            # we really aren't always starting on a non-random partition...
            resp1 = yield producer.send_messages(
                self.topic, msgs=[self.msg("one"), self.msg("two")])
            resp2 = yield producer.send_messages(
                self.topic, msgs=[self.msg("three")])
            resp3 = yield producer.send_messages(
                self.topic, msgs=[self.msg("four"), self.msg("five")])

            self.assertEqual(resp1.partition, resp3.partition)
            self.assertNotEqual(resp1.partition, resp2.partition)

            yield producer.stop()
        finally:
            RoundRobinPartitioner.set_random_start(False)
    def test_producer_batched_by_time(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        # This needs to be big enough that the operations between starting the
        # producer and the time.sleep() call take less time than this... I made
        # it large enough that the test would still pass even with my Macbook's
        # cores all pegged by a load generator.
        batchtime = 5

        try:
            producer = Producer(self.client,
                                batch_send=True,
                                batch_every_n=0,
                                batch_every_t=batchtime)
            startTime = time.time()
            # Send 4 messages and do a fetch
            send1D = producer.send_messages(self.topic,
                                            msgs=[
                                                self.msg("one"),
                                                self.msg("two"),
                                                self.msg("three"),
                                                self.msg("four")
                                            ])
            # set assert_fetch_offset() to wait for 0.1 secs on the server-side
            # before returning no result. So, these calls should take 0.2sec
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Messages shouldn't have sent out yet, so we shouldn't have
            # response from server yet on having received/responded to request
            self.assertNoResult(send1D)
            # Sending 3 more messages should NOT trigger the send, as less than
            # 1 sec. elapsed by here, so send2D should still have no result.
            send2D = producer.send_messages(
                self.topic,
                msgs=[self.msg("five"),
                      self.msg("six"),
                      self.msg("seven")])
            # still no messages...
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Still no result on send, and send should NOT have gone out.
            self.assertNoResult(send2D)
            # Wait the timeout out. It'd be nicer to be able to just 'advance'
            # the reactor, but since we need the network so...
            time.sleep(batchtime - (time.time() - startTime) + 0.05)
            # We need to yield to the reactor to have it process the response
            # from the broker. Both send1D and send2D should then have results.
            resp1 = yield send1D
            resp2 = yield send2D
            # ensure the 2 batches went into the proper partitions...
            self.assert_produce_response(resp1, start_offset0)
            self.assert_produce_response(resp2, start_offset1)
            # Should be able to get messages now
            yield self.assert_fetch_offset(0, start_offset0, [
                self.msg("one"),
                self.msg("two"),
                self.msg("three"),
                self.msg("four")
            ])
            yield self.assert_fetch_offset(
                1, start_offset1,
                [self.msg("five"),
                 self.msg("six"),
                 self.msg("seven")])
        except IOError:  # pragma: no cover
            msg = "IOError: Probably time.sleep with negative value due to " \
                "'too slow' system taking more than {0} seconds to do " \
                "something that should take ~0.4 seconds.".format(batchtime)
            log.exception(msg)
            # In case of the IOError, we want to eat the CancelledError
            send1D.addErrback(lambda _: None)  # Eat any uncaught errors
            send2D.addErrback(lambda _: None)  # Eat any uncaught errors
            self.fail(msg)
        finally:
            yield producer.stop()
    def test_throughput(self):
        yield async_delay(3)  # 0.8.1.1 fails otherwise

        # Flag to shutdown
        keep_running = True
        # Count of messages sent
        sent_msgs_count = [0]
        total_messages_size = [0]
        # setup MESSAGE_BLOCK_SIZEx1024-ish byte messages to send over and over
        constant_messages = [
            self.msg(s)
            for s in [random_string(1024) for x in range(MESSAGE_BLOCK_SIZE)]
        ]
        large_messages = [
            self.msg(s) for s in [
                random_string(FETCH_BUFFER_SIZE_BYTES * 3)
                for x in range(MESSAGE_BLOCK_SIZE)
            ]
        ]

        constant_messages_size = len(constant_messages[0]) * MESSAGE_BLOCK_SIZE
        large_messages_size = len(large_messages[0]) * MESSAGE_BLOCK_SIZE

        # Create a producer and send some messages
        producer = Producer(self.client)

        # Create consumers (1/partition)
        consumers = [
            self.consumer(partition=p, fetch_max_wait_time=50)
            for p in range(PARTITION_COUNT)
        ]

        def log_error(failure):
            log.exception("Failure sending messages: %r",
                          failure)  # pragma: no cover

        def sent_msgs(resps):
            log.info("Messages Sent: %r", resps)
            sent_msgs_count[0] += MESSAGE_BLOCK_SIZE
            return resps

        def send_msgs():
            # randomly, 1/20 of the time, send large messages
            if randint(0, 19):
                ## if True:
                messages = constant_messages
                large = ''
                total_messages_size[0] += constant_messages_size
            else:
                messages = large_messages
                large = ' large'
                total_messages_size[0] += large_messages_size

            log.info("Sending: %d%s messages", len(messages), large)
            d = producer.send_messages(self.topic, msgs=messages)
            # As soon as we get a response from the broker, count them
            # and if we're still supposed to, send more
            d.addCallback(sent_msgs)
            if keep_running:
                d.addCallback(lambda _: self.reactor.callLater(0, send_msgs))
                ## d.addCallback(lambda _: send_msgs())
            d.addErrback(log_error)

        # Start sending messages, MESSAGE_BLOCK_SIZE at a time, 1K or 384K each
        send_msgs()

        # Start the consumers from the beginning
        fetch_start = time.time()
        start_ds = [consumer.start(OFFSET_EARLIEST) for consumer in consumers]

        # Let them all run for awhile...
        log.info("Waiting %d seconds...", PRODUCE_TIME)
        yield async_delay(PRODUCE_TIME)
        # Tell the producer to stop
        keep_running = False
        # Wait up to PRODUCE_TIME for the consumers to catch up
        log.info(
            "Waiting up to %d seconds for "
            "consumers to finish consuming...", PRODUCE_TIME)
        deadline = time.time() + PRODUCE_TIME * 2
        while time.time() < deadline:
            consumed = sum(
                [len(consumer.processor._messages) for consumer in consumers])
            log.debug("Consumed %d messages.", consumed)
            if sent_msgs_count[0] == consumed:
                break
            yield async_delay(1)
        fetch_time = time.time() - fetch_start
        consumed_bytes = sum(
            [c.processor._messages_bytes[0] for c in consumers])

        result_msg = ("Sent: {} messages ({:,} total bytes) in ~{} seconds"
                      " ({}/sec), Consumed: {} in {:.2f} seconds.".format(
                          sent_msgs_count[0], total_messages_size[0],
                          PRODUCE_TIME, sent_msgs_count[0] / PRODUCE_TIME,
                          consumed, fetch_time))
        # Log the result, and print to stderr to get around nose capture
        log.info(result_msg)
        print("\n\t Performance Data: " + result_msg, file=sys.stderr)
        # And print data as stats
        stat('Production_Time', PRODUCE_TIME)
        stat('Consumption_Time', fetch_time)
        stat('Messages_Produced', sent_msgs_count[0])
        stat('Messages_Consumed', consumed)
        stat('Messages_Bytes_Produced', total_messages_size[0])
        stat('Messages_Bytes_Consumed', consumed_bytes)
        stat('Messages_Produced_Per_Second', sent_msgs_count[0] / PRODUCE_TIME)
        stat('Messages_Consumed_Per_Second', consumed / fetch_time)
        stat('Message_Bytes_Produced_Per_Second',
             total_messages_size[0] / PRODUCE_TIME)
        stat('Message_Bytes_Consumed_Per_Second', consumed_bytes / fetch_time)

        # Clean up
        log.debug('Stopping producer: %r', producer)
        yield producer.stop()
        log.debug('Stopping consumers: %r', consumers)
        for consumer in consumers:
            consumer.stop()
        [self.successResultOf(start_d) for start_d in start_ds]
        # make sure we got all the messages we sent
        self.assertEqual(
            sent_msgs_count[0],
            sum([len(consumer.processor._messages) for consumer in consumers]))
    def test_producer_batched_by_time(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        # This needs to be big enough that the operations between starting the
        # producer and the time.sleep() call take less time than this... I made
        # it large enough that the test would still pass even with my Macbook's
        # cores all pegged by a load generator.
        batchtime = 5

        try:
            producer = Producer(self.client, batch_send=True,
                                batch_every_n=0, batch_every_t=batchtime)
            startTime = time.time()
            # Send 4 messages and do a fetch
            send1D = producer.send_messages(
                self.topic, msgs=[self.msg("one"), self.msg("two"),
                                  self.msg("three"), self.msg("four")])
            # set assert_fetch_offset() to wait for 0.1 secs on the server-side
            # before returning no result. So, these calls should take 0.2sec
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Messages shouldn't have sent out yet, so we shouldn't have
            # response from server yet on having received/responded to request
            self.assertNoResult(send1D)
            # Sending 3 more messages should NOT trigger the send, as less than
            # 1 sec. elapsed by here, so send2D should still have no result.
            send2D = producer.send_messages(
                self.topic, msgs=[self.msg("five"), self.msg("six"),
                                  self.msg("seven")])
            # still no messages...
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Still no result on send, and send should NOT have gone out.
            self.assertNoResult(send2D)
            # Wait the timeout out. It'd be nicer to be able to just 'advance'
            # the reactor, but since we need the network so...
            time.sleep(batchtime - (time.time() - startTime) + 0.05)
            # We need to yield to the reactor to have it process the response
            # from the broker. Both send1D and send2D should then have results.
            resp1 = yield send1D
            resp2 = yield send2D
            # ensure the 2 batches went into the proper partitions...
            self.assert_produce_response(resp1, start_offset0)
            self.assert_produce_response(resp2, start_offset1)
            # Should be able to get messages now
            yield self.assert_fetch_offset(
                0, start_offset0, [self.msg("one"), self.msg("two"),
                                   self.msg("three"), self.msg("four")])
            yield self.assert_fetch_offset(
                1, start_offset1, [self.msg("five"), self.msg("six"),
                                   self.msg("seven")])
        except IOError:  # pragma: no cover
            msg = "IOError: Probably time.sleep with negative value due to " \
                "'too slow' system taking more than {0} seconds to do " \
                "something that should take ~0.4 seconds.".format(batchtime)
            log.exception(msg)
            # In case of the IOError, we want to eat the CancelledError
            send1D.addErrback(lambda _: None)  # Eat any uncaught errors
            send2D.addErrback(lambda _: None)  # Eat any uncaught errors
            self.fail(msg)
        finally:
            yield producer.stop()
    def test_producer_batched_by_time(self):
        start_offset0 = yield self.current_offset(self.topic, 0)
        start_offset1 = yield self.current_offset(self.topic, 1)

        # This needs to be big enough that the operations between starting the
        # producer and the sleep take less time than this... I made
        # it large enough that the test would still pass even with my Macbook's
        # cores all pegged by a load generator.
        batchtime = 5

        try:
            producer = Producer(self.client,
                                batch_send=True,
                                batch_every_n=0,
                                batch_every_t=batchtime)
            startTime = time.time()
            # Send 4 messages and do a fetch
            send1D = producer.send_messages(self.topic,
                                            msgs=[
                                                self.msg("one"),
                                                self.msg("two"),
                                                self.msg("three"),
                                                self.msg("four")
                                            ])
            # set assert_fetch_offset() to wait for 0.1 secs on the server-side
            # before returning no result. So, these calls should take 0.2sec
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Messages shouldn't have sent out yet, so we shouldn't have
            # response from server yet on having received/responded to request
            self.assertNoResult(send1D)
            # Sending 3 more messages should NOT trigger the send, as less than
            # 1 sec. elapsed by here, so send2D should still have no result.
            send2D = producer.send_messages(
                self.topic,
                msgs=[self.msg("five"),
                      self.msg("six"),
                      self.msg("seven")])
            # still no messages...
            yield self.assert_fetch_offset(0, start_offset0, [], max_wait=0.1)
            yield self.assert_fetch_offset(1, start_offset1, [], max_wait=0.1)
            # Still no result on send, and send should NOT have gone out.
            self.assertNoResult(send2D)
            # Wait the timeout out. It'd be nicer to be able to just 'advance'
            # the reactor, but since we need the network so...
            yield async_delay(batchtime - (time.time() - startTime) + 0.05,
                              clock=self.reactor)
            # We need to yield to the reactor to have it process the response
            # from the broker. Both send1D and send2D should then have results.
            resp1 = yield send1D
            resp2 = yield send2D
            # ensure the 2 batches went into the proper partitions...
            self.assert_produce_response(resp1, start_offset0)
            self.assert_produce_response(resp2, start_offset1)
            # Should be able to get messages now
            yield self.assert_fetch_offset(0, start_offset0, [
                self.msg("one"),
                self.msg("two"),
                self.msg("three"),
                self.msg("four")
            ])
            yield self.assert_fetch_offset(
                1, start_offset1,
                [self.msg("five"),
                 self.msg("six"),
                 self.msg("seven")])
        finally:
            yield producer.stop()