示例#1
0
 def test_three_consumers(self):
     self.logger.info("BENCHMARK: Three consumers")
     self.perf = ConsumerPerformanceService(self.test_context,
                                            3,
                                            self.kafka,
                                            topic="test-rep-three",
                                            num_records=self.msgs_default,
                                            throughput=-1,
                                            threads=1)
     self.perf.run()
     self.logger.info("Three consumers: %s", throughput(self.perf))
示例#2
0
    def __init__(self, test_context):
        super(NativeVsRestConsumerPerformance,
              self).__init__(test_context,
                             num_zk=1,
                             num_brokers=1,
                             num_rest=1,
                             topics={
                                 'test-rep-one': {
                                     'partitions': 6,
                                     'replication-factor': 1
                                 }
                             })

        if True:
            # Works on both aws and local
            msgs = 1000000
        else:
            # Can use locally on Vagrant VMs, but may use too much memory for aws
            msgs = 50000000

        msg_size = 100
        batch_size = 8196
        acks = 1  # default for REST proxy, which isn't yet configurable
        nthreads = 1  # not configurable for REST proxy

        self.producer = ProducerPerformanceService(test_context,
                                                   1,
                                                   self.kafka,
                                                   topic="test",
                                                   num_records=msgs + 1000,
                                                   record_size=msg_size,
                                                   throughput=-1,
                                                   settings={
                                                       'batch.size':
                                                       batch_size,
                                                       'acks': acks
                                                   })

        self.consumer_perf = ConsumerPerformanceService(test_context,
                                                        1,
                                                        self.kafka,
                                                        topic="test",
                                                        num_records=msgs,
                                                        throughput=-1,
                                                        threads=nthreads)

        self.rest_consumer_perf = RestConsumerPerformanceService(
            test_context,
            1,
            self.rest,
            topic="test",
            num_records=msgs,
            throughput=-1)
示例#3
0
 def test_single_consumer(self):
     # All consumer tests use the messages from the first benchmark, so
     # they'll get messages of the default message size
     self.logger.info("BENCHMARK: Single consumer")
     self.perf = ConsumerPerformanceService(self.test_context,
                                            1,
                                            self.kafka,
                                            topic="test-rep-three",
                                            num_records=self.msgs_default,
                                            throughput=-1,
                                            threads=1)
     self.perf.run()
     self.logger.info("Single consumer: %s" % throughput(self.perf))
    def __init__(self, test_context):
        super(NativeVsRestConsumerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={
            'test-rep-one' : { 'partitions': 6, 'replication-factor': 1 }
        })

        if True:
            # Works on both aws and local
            msgs = 1000000
        else:
            # Can use locally on Vagrant VMs, but may use too much memory for aws
            msgs = 50000000

        msg_size = 100
        batch_size = 8196
        acks = 1 # default for REST proxy, which isn't yet configurable
        nthreads = 1 # not configurable for REST proxy

        self.producer = ProducerPerformanceService(
            test_context, 1, self.kafka,
            topic="test", num_records=msgs+1000, record_size=msg_size, throughput=-1,
            settings={'batch.size':batch_size, 'acks': acks}
        )

        self.consumer_perf = ConsumerPerformanceService(
            test_context, 1, self.kafka,
            topic="test", num_records=msgs, throughput=-1, threads=nthreads
        )

        self.rest_consumer_perf = RestConsumerPerformanceService(
            test_context, 1, self.rest,
            topic="test", num_records=msgs, throughput=-1
        )
    def test_long_term_throughput(self):
        self.logger.info("BENCHMARK: Long production")
        self.perf = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=self.msgs_large, record_size=self.msg_size_default, throughput=-1,
            settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory},
            intermediate_stats=True
        )
        self.perf.run()

        summary = ["Throughput over long run, data > memory:"]

        # FIXME we should be generating a graph too
        # Try to break it into 5 blocks, but fall back to a smaller number if
        # there aren't even 5 elements
        block_size = max(len(self.perf.stats[0]) / 5, 1)
        nblocks = len(self.perf.stats[0]) / block_size
        for i in range(nblocks):
            subset = self.perf.stats[0][i*block_size:min((i+1)*block_size, len(self.perf.stats[0]))]
            if len(subset) == 0:
                summary.append(" Time block %d: (empty)" % i)
            else:
                summary.append(" Time block %d: %f rec/sec (%f MB/s)" % (i,
                                 sum([stat['records_per_sec'] for stat in subset])/float(len(subset)),
                                 sum([stat['mbps'] for stat in subset])/float(len(subset))))

        self.logger.info("\n".join(summary))
示例#6
0
    def test_producer_and_consumer(self):
        self.logger.info("BENCHMARK: Producer + Consumer")
        self.producer = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_default,
            record_size=self.msg_size_default,
            throughput=-1,
            settings={
                'acks': 1,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            })

        self.consumer = ConsumerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_default,
            throughput=-1,
            threads=1)

        Service.run_parallel(self.producer, self.consumer)

        summary = [
            "Producer + consumer:",
            " Producer: %s" % throughput(self.producer),
            " Consumer: %s" % throughput(self.consumer)
        ]
        self.logger.info("\n".join(summary))
 def test_three_consumers(self):
     self.logger.info("BENCHMARK: Three consumers")
     self.perf = ConsumerPerformanceService(
         self.test_context, 3, self.kafka,
         topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1
     )
     self.perf.run()
     self.logger.info("Three consumers: %s", throughput(self.perf))
class NativeVsRestConsumerPerformance(RestProxyTest):
    def __init__(self, test_context):
        super(NativeVsRestConsumerPerformance, self).__init__(test_context, num_zk=1, num_brokers=1, num_rest=1, topics={
            'test-rep-one' : { 'partitions': 6, 'replication-factor': 1 }
        })

        if True:
            # Works on both aws and local
            msgs = 1000000
        else:
            # Can use locally on Vagrant VMs, but may use too much memory for aws
            msgs = 50000000

        msg_size = 100
        batch_size = 8196
        acks = 1 # default for REST proxy, which isn't yet configurable
        nthreads = 1 # not configurable for REST proxy

        self.producer = ProducerPerformanceService(
            test_context, 1, self.kafka,
            topic="test", num_records=msgs+1000, record_size=msg_size, throughput=-1,
            settings={'batch.size':batch_size, 'acks': acks}
        )

        self.consumer_perf = ConsumerPerformanceService(
            test_context, 1, self.kafka,
            topic="test", num_records=msgs, throughput=-1, threads=nthreads
        )

        self.rest_consumer_perf = RestConsumerPerformanceService(
            test_context, 1, self.rest,
            topic="test", num_records=msgs, throughput=-1
        )

    def test(self):
        # Seed data. FIXME currently the REST consumer isn't properly finishing
        # unless we have some extra messages -- the last set isn't getting
        # properly returned for some reason.
        self.producer.run()

        self.consumer_perf.run()
        self.rest_consumer_perf.run()

        self.logger.info("Consumer performance: %f MB/s, %f msg/sec", self.consumer_perf.results[0]['mbps'], self.consumer_perf.results[0]['records_per_sec'])
        self.logger.info("REST Consumer performance: %f MB/s, %f msg/sec", self.rest_consumer_perf.results[0]['mbps'], self.rest_consumer_perf.results[0]['records_per_sec'])
 def test_three_producers_async(self):
     self.logger.info("BENCHMARK: Three producers, async 3x replication")
     self.perf = ProducerPerformanceService(
         self.test_context, 3, self.kafka,
         topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1,
         settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory}
     )
     self.perf.run()
     self.logger.info("Three producers, async 3x replication: %s" % throughput(self.perf))
 def test_single_producer_no_replication(self):
     self.logger.info("BENCHMARK: Single producer, no replication")
     self.perf = ProducerPerformanceService(
         self.test_context, 1, self.kafka,
         topic="test-rep-one", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1,
         settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory}
     )
     self.perf.run()
     self.logger.info("Single producer, no replication: %s", throughput(self.perf))
 def test_single_consumer(self):
     # All consumer tests use the messages from the first benchmark, so
     # they'll get messages of the default message size
     self.logger.info("BENCHMARK: Single consumer")
     self.perf = ConsumerPerformanceService(
         self.test_context, 1, self.kafka,
         topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1
     )
     self.perf.run()
     self.logger.info("Single consumer: %s" % throughput(self.perf))
    def test_end_to_end_latency(self):
        self.logger.info("BENCHMARK: End to end latency")
        self.perf = EndToEndLatencyService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=10000
        )
        self.perf.run()

        self.logger.info("End-to-end latency: median %f ms, 99%% %f ms, 99.9%% %f ms" % \
               (self.perf.results[0]['latency_50th_ms'],
                self.perf.results[0]['latency_99th_ms'],
                self.perf.results[0]['latency_999th_ms']))
示例#13
0
    def test_end_to_end_latency(self):
        self.logger.info("BENCHMARK: End to end latency")
        self.perf = EndToEndLatencyService(self.test_context,
                                           1,
                                           self.kafka,
                                           topic="test-rep-three",
                                           num_records=10000)
        self.perf.run()

        self.logger.info("End-to-end latency: median %f ms, 99%% %f ms, 99.9%% %f ms" % \
               (self.perf.results[0]['latency_50th_ms'],
                self.perf.results[0]['latency_99th_ms'],
                self.perf.results[0]['latency_999th_ms']))
示例#14
0
 def test_single_producer_sync(self):
     self.logger.info("BENCHMARK: Single producer, sync 3x replication")
     self.perf = ProducerPerformanceService(
         self.test_context,
         1,
         self.kafka,
         topic="test-rep-three",
         num_records=self.msgs_default,
         record_size=self.msg_size_default,
         throughput=-1,
         settings={
             'acks': -1,
             'batch.size': self.batch_size,
             'buffer.memory': self.buffer_memory
         })
     self.perf.run()
     self.logger.info("Single producer, sync 3x replication: %s" %
                      throughput(self.perf))
示例#15
0
    def test_long_term_throughput(self):
        self.logger.info("BENCHMARK: Long production")
        self.perf = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_large,
            record_size=self.msg_size_default,
            throughput=-1,
            settings={
                'acks': 1,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            },
            intermediate_stats=True)
        self.perf.run()

        summary = ["Throughput over long run, data > memory:"]

        # FIXME we should be generating a graph too
        # Try to break it into 5 blocks, but fall back to a smaller number if
        # there aren't even 5 elements
        block_size = max(len(self.perf.stats[0]) / 5, 1)
        nblocks = len(self.perf.stats[0]) / block_size
        for i in range(nblocks):
            subset = self.perf.stats[0][i * block_size:min(
                (i + 1) * block_size, len(self.perf.stats[0]))]
            if len(subset) == 0:
                summary.append(" Time block %d: (empty)" % i)
            else:
                summary.append(
                    " Time block %d: %f rec/sec (%f MB/s)" %
                    (i, sum([stat['records_per_sec']
                             for stat in subset]) / float(len(subset)),
                     sum([stat['mbps']
                          for stat in subset]) / float(len(subset))))

        self.logger.info("\n".join(summary))
示例#16
0
class NativeVsRestConsumerPerformance(RestProxyTest):
    def __init__(self, test_context):
        super(NativeVsRestConsumerPerformance,
              self).__init__(test_context,
                             num_zk=1,
                             num_brokers=1,
                             num_rest=1,
                             topics={
                                 'test-rep-one': {
                                     'partitions': 6,
                                     'replication-factor': 1
                                 }
                             })

        if True:
            # Works on both aws and local
            msgs = 1000000
        else:
            # Can use locally on Vagrant VMs, but may use too much memory for aws
            msgs = 50000000

        msg_size = 100
        batch_size = 8196
        acks = 1  # default for REST proxy, which isn't yet configurable
        nthreads = 1  # not configurable for REST proxy

        self.producer = ProducerPerformanceService(test_context,
                                                   1,
                                                   self.kafka,
                                                   topic="test",
                                                   num_records=msgs + 1000,
                                                   record_size=msg_size,
                                                   throughput=-1,
                                                   settings={
                                                       'batch.size':
                                                       batch_size,
                                                       'acks': acks
                                                   })

        self.consumer_perf = ConsumerPerformanceService(test_context,
                                                        1,
                                                        self.kafka,
                                                        topic="test",
                                                        num_records=msgs,
                                                        throughput=-1,
                                                        threads=nthreads)

        self.rest_consumer_perf = RestConsumerPerformanceService(
            test_context,
            1,
            self.rest,
            topic="test",
            num_records=msgs,
            throughput=-1)

    def test(self):
        # Seed data. FIXME currently the REST consumer isn't properly finishing
        # unless we have some extra messages -- the last set isn't getting
        # properly returned for some reason.
        self.producer.run()

        self.consumer_perf.run()
        self.rest_consumer_perf.run()

        self.logger.info("Consumer performance: %f MB/s, %f msg/sec",
                         self.consumer_perf.results[0]['mbps'],
                         self.consumer_perf.results[0]['records_per_sec'])
        self.logger.info("REST Consumer performance: %f MB/s, %f msg/sec",
                         self.rest_consumer_perf.results[0]['mbps'],
                         self.rest_consumer_perf.results[0]['records_per_sec'])
class KafkaBenchmark(KafkaTest):
    '''A benchmark of Kafka producer/consumer performance. This replicates the test
    run here:
    https://engineering.linkedin.com/kafka/benchmarking-apache-kafka-2-million-writes-second-three-cheap-machines
    '''
    def __init__(self, test_context):
        super(KafkaBenchmark, self).__init__(test_context, num_zk=1, num_brokers=3, topics={
            'test-rep-one' : { 'partitions': 6, 'replication-factor': 1 },
            'test-rep-three' : { 'partitions': 6, 'replication-factor': 3 }
        })

        if True:
            # Works on both aws and local
            self.msgs = 1000000
            self.msgs_default = 1000000
        else:
            # Can use locally on Vagrant VMs, but may use too much memory for aws
            self.msgs = 50000000
            self.msgs_default = 50000000

        self.msgs_large = 10000000
        self.msg_size_default = 100
        self.batch_size = 8*1024
        self.buffer_memory = 64*1024*1024
        self.msg_sizes = [10, 100, 1000, 10000, 100000]
        self.target_data_size = 128*1024*1024
        self.target_data_size_gb = self.target_data_size/float(1024*1024*1024)

    def test_single_producer_no_replication(self):
        self.logger.info("BENCHMARK: Single producer, no replication")
        self.perf = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic="test-rep-one", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1,
            settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory}
        )
        self.perf.run()
        self.logger.info("Single producer, no replication: %s", throughput(self.perf))

    def test_single_producer_replication(self):
        self.logger.info("BENCHMARK: Single producer, async 3x replication")
        self.perf = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1,
            settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory}
        )
        self.perf.run()
        self.logger.info("Single producer, async 3x replication: %s" % throughput(self.perf))

    def test_single_producer_sync(self):
        self.logger.info("BENCHMARK: Single producer, sync 3x replication")
        self.perf = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1,
            settings={'acks':-1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory}
        )
        self.perf.run()
        self.logger.info("Single producer, sync 3x replication: %s" % throughput(self.perf))

    def test_three_producers_async(self):
        self.logger.info("BENCHMARK: Three producers, async 3x replication")
        self.perf = ProducerPerformanceService(
            self.test_context, 3, self.kafka,
            topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1,
            settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory}
        )
        self.perf.run()
        self.logger.info("Three producers, async 3x replication: %s" % throughput(self.perf))

    def test_multiple_message_size(self):
        # TODO this would be a great place to use parametrization
        self.perfs = {}
        for msg_size in self.msg_sizes:
            self.logger.info("BENCHMARK: Message size %d (%f GB total, single producer, async 3x replication)", msg_size, self.target_data_size_gb)
            # Always generate the same total amount of data
            nrecords = int(self.target_data_size / msg_size)
            self.perfs["perf-" + str(msg_size)] = ProducerPerformanceService(
                self.test_context, 1, self.kafka,
                topic="test-rep-three", num_records=nrecords, record_size=msg_size, throughput=-1,
                settings={'acks': 1, 'batch.size': self.batch_size, 'buffer.memory': self.buffer_memory}
            )
        self.msg_size_perf = {}

        for msg_size in self.msg_sizes:
            perf = self.perfs["perf-" + str(msg_size)]
            perf.run()
            self.msg_size_perf[msg_size] = perf

        summary = ["Message size:"]
        for msg_size in self.msg_sizes:
            summary.append(" %d: %s" % (msg_size, throughput(self.msg_size_perf[msg_size])))
        self.logger.info("\n".join(summary))

    def test_long_term_throughput(self):
        self.logger.info("BENCHMARK: Long production")
        self.perf = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=self.msgs_large, record_size=self.msg_size_default, throughput=-1,
            settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory},
            intermediate_stats=True
        )
        self.perf.run()

        summary = ["Throughput over long run, data > memory:"]

        # FIXME we should be generating a graph too
        # Try to break it into 5 blocks, but fall back to a smaller number if
        # there aren't even 5 elements
        block_size = max(len(self.perf.stats[0]) / 5, 1)
        nblocks = len(self.perf.stats[0]) / block_size
        for i in range(nblocks):
            subset = self.perf.stats[0][i*block_size:min((i+1)*block_size, len(self.perf.stats[0]))]
            if len(subset) == 0:
                summary.append(" Time block %d: (empty)" % i)
            else:
                summary.append(" Time block %d: %f rec/sec (%f MB/s)" % (i,
                                 sum([stat['records_per_sec'] for stat in subset])/float(len(subset)),
                                 sum([stat['mbps'] for stat in subset])/float(len(subset))))

        self.logger.info("\n".join(summary))

    def test_end_to_end_latency(self):
        self.logger.info("BENCHMARK: End to end latency")
        self.perf = EndToEndLatencyService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=10000
        )
        self.perf.run()

        self.logger.info("End-to-end latency: median %f ms, 99%% %f ms, 99.9%% %f ms" % \
               (self.perf.results[0]['latency_50th_ms'],
                self.perf.results[0]['latency_99th_ms'],
                self.perf.results[0]['latency_999th_ms']))

    def test_producer_and_consumer(self):
        self.logger.info("BENCHMARK: Producer + Consumer")
        self.producer = ProducerPerformanceService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=self.msgs_default, record_size=self.msg_size_default, throughput=-1,
            settings={'acks':1, 'batch.size':self.batch_size, 'buffer.memory':self.buffer_memory}
        )

        self.consumer = ConsumerPerformanceService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1
        )

        Service.run_parallel(self.producer, self.consumer)

        summary = [
            "Producer + consumer:",
            " Producer: %s" % throughput(self.producer),
            " Consumer: %s" % throughput(self.consumer)]
        self.logger.info("\n".join(summary))

    def test_single_consumer(self):
        # All consumer tests use the messages from the first benchmark, so
        # they'll get messages of the default message size
        self.logger.info("BENCHMARK: Single consumer")
        self.perf = ConsumerPerformanceService(
            self.test_context, 1, self.kafka,
            topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1
        )
        self.perf.run()
        self.logger.info("Single consumer: %s" % throughput(self.perf))

    def test_three_consumers(self):
        self.logger.info("BENCHMARK: Three consumers")
        self.perf = ConsumerPerformanceService(
            self.test_context, 3, self.kafka,
            topic="test-rep-three", num_records=self.msgs_default, throughput=-1, threads=1
        )
        self.perf.run()
        self.logger.info("Three consumers: %s", throughput(self.perf))
示例#18
0
class KafkaBenchmark(KafkaTest):
    '''A benchmark of Kafka producer/consumer performance. This replicates the test
    run here:
    https://engineering.linkedin.com/kafka/benchmarking-apache-kafka-2-million-writes-second-three-cheap-machines
    '''
    def __init__(self, test_context):
        super(KafkaBenchmark, self).__init__(test_context,
                                             num_zk=1,
                                             num_brokers=3,
                                             topics={
                                                 'test-rep-one': {
                                                     'partitions': 6,
                                                     'replication-factor': 1
                                                 },
                                                 'test-rep-three': {
                                                     'partitions': 6,
                                                     'replication-factor': 3
                                                 }
                                             })

        if True:
            # Works on both aws and local
            self.msgs = 1000000
            self.msgs_default = 1000000
        else:
            # Can use locally on Vagrant VMs, but may use too much memory for aws
            self.msgs = 50000000
            self.msgs_default = 50000000

        self.msgs_large = 10000000
        self.msg_size_default = 100
        self.batch_size = 8 * 1024
        self.buffer_memory = 64 * 1024 * 1024
        self.msg_sizes = [10, 100, 1000, 10000, 100000]
        self.target_data_size = 128 * 1024 * 1024
        self.target_data_size_gb = self.target_data_size / float(
            1024 * 1024 * 1024)

    def test_single_producer_no_replication(self):
        self.logger.info("BENCHMARK: Single producer, no replication")
        self.perf = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-one",
            num_records=self.msgs_default,
            record_size=self.msg_size_default,
            throughput=-1,
            settings={
                'acks': 1,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            })
        self.perf.run()
        self.logger.info("Single producer, no replication: %s",
                         throughput(self.perf))

    def test_single_producer_replication(self):
        self.logger.info("BENCHMARK: Single producer, async 3x replication")
        self.perf = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_default,
            record_size=self.msg_size_default,
            throughput=-1,
            settings={
                'acks': 1,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            })
        self.perf.run()
        self.logger.info("Single producer, async 3x replication: %s" %
                         throughput(self.perf))

    def test_single_producer_sync(self):
        self.logger.info("BENCHMARK: Single producer, sync 3x replication")
        self.perf = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_default,
            record_size=self.msg_size_default,
            throughput=-1,
            settings={
                'acks': -1,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            })
        self.perf.run()
        self.logger.info("Single producer, sync 3x replication: %s" %
                         throughput(self.perf))

    def test_three_producers_async(self):
        self.logger.info("BENCHMARK: Three producers, async 3x replication")
        self.perf = ProducerPerformanceService(
            self.test_context,
            3,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_default,
            record_size=self.msg_size_default,
            throughput=-1,
            settings={
                'acks': 1,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            })
        self.perf.run()
        self.logger.info("Three producers, async 3x replication: %s" %
                         throughput(self.perf))

    def test_multiple_message_size(self):
        # TODO this would be a great place to use parametrization
        self.perfs = {}
        for msg_size in self.msg_sizes:
            self.logger.info(
                "BENCHMARK: Message size %d (%f GB total, single producer, async 3x replication)",
                msg_size, self.target_data_size_gb)
            # Always generate the same total amount of data
            nrecords = int(self.target_data_size / msg_size)
            self.perfs["perf-" + str(msg_size)] = ProducerPerformanceService(
                self.test_context,
                1,
                self.kafka,
                topic="test-rep-three",
                num_records=nrecords,
                record_size=msg_size,
                throughput=-1,
                settings={
                    'acks': 1,
                    'batch.size': self.batch_size,
                    'buffer.memory': self.buffer_memory
                })
        self.msg_size_perf = {}

        for msg_size in self.msg_sizes:
            perf = self.perfs["perf-" + str(msg_size)]
            perf.run()
            self.msg_size_perf[msg_size] = perf

        summary = ["Message size:"]
        for msg_size in self.msg_sizes:
            summary.append(
                " %d: %s" %
                (msg_size, throughput(self.msg_size_perf[msg_size])))
        self.logger.info("\n".join(summary))

    def test_long_term_throughput(self):
        self.logger.info("BENCHMARK: Long production")
        self.perf = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_large,
            record_size=self.msg_size_default,
            throughput=-1,
            settings={
                'acks': 1,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            },
            intermediate_stats=True)
        self.perf.run()

        summary = ["Throughput over long run, data > memory:"]

        # FIXME we should be generating a graph too
        # Try to break it into 5 blocks, but fall back to a smaller number if
        # there aren't even 5 elements
        block_size = max(len(self.perf.stats[0]) / 5, 1)
        nblocks = len(self.perf.stats[0]) / block_size
        for i in range(nblocks):
            subset = self.perf.stats[0][i * block_size:min(
                (i + 1) * block_size, len(self.perf.stats[0]))]
            if len(subset) == 0:
                summary.append(" Time block %d: (empty)" % i)
            else:
                summary.append(
                    " Time block %d: %f rec/sec (%f MB/s)" %
                    (i, sum([stat['records_per_sec']
                             for stat in subset]) / float(len(subset)),
                     sum([stat['mbps']
                          for stat in subset]) / float(len(subset))))

        self.logger.info("\n".join(summary))

    def test_end_to_end_latency(self):
        self.logger.info("BENCHMARK: End to end latency")
        self.perf = EndToEndLatencyService(self.test_context,
                                           1,
                                           self.kafka,
                                           topic="test-rep-three",
                                           num_records=10000)
        self.perf.run()

        self.logger.info("End-to-end latency: median %f ms, 99%% %f ms, 99.9%% %f ms" % \
               (self.perf.results[0]['latency_50th_ms'],
                self.perf.results[0]['latency_99th_ms'],
                self.perf.results[0]['latency_999th_ms']))

    def test_producer_and_consumer(self):
        self.logger.info("BENCHMARK: Producer + Consumer")
        self.producer = ProducerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_default,
            record_size=self.msg_size_default,
            throughput=-1,
            settings={
                'acks': 1,
                'batch.size': self.batch_size,
                'buffer.memory': self.buffer_memory
            })

        self.consumer = ConsumerPerformanceService(
            self.test_context,
            1,
            self.kafka,
            topic="test-rep-three",
            num_records=self.msgs_default,
            throughput=-1,
            threads=1)

        Service.run_parallel(self.producer, self.consumer)

        summary = [
            "Producer + consumer:",
            " Producer: %s" % throughput(self.producer),
            " Consumer: %s" % throughput(self.consumer)
        ]
        self.logger.info("\n".join(summary))

    def test_single_consumer(self):
        # All consumer tests use the messages from the first benchmark, so
        # they'll get messages of the default message size
        self.logger.info("BENCHMARK: Single consumer")
        self.perf = ConsumerPerformanceService(self.test_context,
                                               1,
                                               self.kafka,
                                               topic="test-rep-three",
                                               num_records=self.msgs_default,
                                               throughput=-1,
                                               threads=1)
        self.perf.run()
        self.logger.info("Single consumer: %s" % throughput(self.perf))

    def test_three_consumers(self):
        self.logger.info("BENCHMARK: Three consumers")
        self.perf = ConsumerPerformanceService(self.test_context,
                                               3,
                                               self.kafka,
                                               topic="test-rep-three",
                                               num_records=self.msgs_default,
                                               throughput=-1,
                                               threads=1)
        self.perf.run()
        self.logger.info("Three consumers: %s", throughput(self.perf))