def testPercentileCalculator(self): numbers = list(range(0, 1001)) percentiles = sample.PercentileCalculator( numbers, percentiles=[0, 1, 99.9, 100]) self.assertEqual(percentiles['p0'], 0) self.assertEqual(percentiles['p1'], 10) self.assertEqual(percentiles['p99.9'], 999) self.assertEqual(percentiles['p100'], 1000) self.assertEqual(percentiles['average'], 500) # 4 percentiles we requested, plus average and stddev self.assertEqual(len(percentiles), 6)
def testWrongTypePercentile(self): with self.assertRaises(ValueError): sample.PercentileCalculator([3], percentiles=['a'])
def testOutOfRangePercentile(self): with self.assertRaises(ValueError): sample.PercentileCalculator([3], percentiles=[-1])
def testNoNumbers(self): with self.assertRaises(ValueError): sample.PercentileCalculator([], percentiles=[0, 1, 99])
def RunNetperf(vm, benchmark_name, server_ip, num_streams): """Spawns netperf on a remote VM, parses results. Args: vm: The VM that the netperf TCP_RR benchmark will be run upon. benchmark_name: The netperf benchmark to run, see the documentation. server_ip: A machine that is running netserver. num_streams: The number of netperf client threads to run. Returns: A sample.Sample object with the result. """ enable_latency_histograms = FLAGS.netperf_enable_histograms or num_streams > 1 # Throughput benchmarks don't have latency histograms enable_latency_histograms = enable_latency_histograms and \ benchmark_name != 'TCP_STREAM' # Flags: # -o specifies keys to include in CSV output. # -j keeps additional latency numbers # -v sets the verbosity level so that netperf will print out histograms # -I specifies the confidence % and width - here 99% confidence that the true # value is within +/- 2.5% of the reported value # -i specifies the maximum and minimum number of iterations. confidence = ('-I 99,5 -i {0},3'.format(FLAGS.netperf_max_iter) if FLAGS.netperf_max_iter else '') verbosity = '-v2 ' if enable_latency_histograms else '' netperf_cmd = ('{netperf_path} -p {{command_port}} -j {verbosity} ' '-t {benchmark_name} -H {server_ip} -l {length} {confidence}' ' -- ' '-P ,{{data_port}} ' '-o THROUGHPUT,THROUGHPUT_UNITS,P50_LATENCY,P90_LATENCY,' 'P99_LATENCY,STDDEV_LATENCY,' 'MIN_LATENCY,MAX_LATENCY,' 'CONFIDENCE_ITERATION,THROUGHPUT_CONFID').format( netperf_path=netperf.NETPERF_PATH, benchmark_name=benchmark_name, server_ip=server_ip, length=FLAGS.netperf_test_length, confidence=confidence, verbosity=verbosity) if FLAGS.netperf_thinktime != 0: netperf_cmd += (' -X {thinktime},{thinktime_array_size},' '{thinktime_run_length} ').format( thinktime=FLAGS.netperf_thinktime, thinktime_array_size=FLAGS.netperf_thinktime_array_size, thinktime_run_length=FLAGS.netperf_thinktime_run_length) # Run all of the netperf processes and collect their stdout # TODO: Record process start delta of netperf processes on the remote machine # Give the remote script the max possible test length plus 5 minutes to # complete remote_cmd_timeout = \ FLAGS.netperf_test_length * (FLAGS.netperf_max_iter or 1) + 300 remote_cmd = ('./%s --netperf_cmd="%s" --num_streams=%s --port_start=%s' % (REMOTE_SCRIPT, netperf_cmd, num_streams, PORT_START)) remote_stdout, _ = vm.RemoteCommand(remote_cmd, timeout=remote_cmd_timeout) # Decode stdouts, stderrs, and return codes from remote command's stdout json_out = json.loads(remote_stdout) stdouts = json_out[0] # Metadata to attach to samples metadata = {'netperf_test_length': FLAGS.netperf_test_length, 'max_iter': FLAGS.netperf_max_iter or 1, 'sending_thread_count': num_streams} parsed_output = [ParseNetperfOutput(stdout, metadata, benchmark_name, enable_latency_histograms) for stdout in stdouts] if len(parsed_output) == 1: # Only 1 netperf thread throughput_sample, latency_samples, histogram = parsed_output[0] return [throughput_sample] + latency_samples else: # Multiple netperf threads samples = [] # Unzip parsed output # Note that latency_samples are invalid with multiple threads because stats # are computed per-thread by netperf, so we don't use them here. throughput_samples, _, latency_histograms = [list(t) for t in zip(*parsed_output)] # They should all have the same units throughput_unit = throughput_samples[0].unit # Extract the throughput values from the samples throughputs = [s.value for s in throughput_samples] # Compute some stats on the throughput values throughput_stats = sample.PercentileCalculator(throughputs, [50, 90, 99]) throughput_stats['min'] = min(throughputs) throughput_stats['max'] = max(throughputs) # Calculate aggregate throughput throughput_stats['total'] = throughput_stats['average'] * len(throughputs) # Create samples for throughput stats for stat, value in throughput_stats.items(): samples.append( sample.Sample('%s_Throughput_%s' % (benchmark_name, stat), float(value), throughput_unit, metadata)) if enable_latency_histograms: # Combine all of the latency histogram dictionaries latency_histogram = Counter() for histogram in latency_histograms: latency_histogram.update(histogram) # Create a sample for the aggregate latency histogram hist_metadata = {'histogram': json.dumps(latency_histogram)} hist_metadata.update(metadata) samples.append(sample.Sample( '%s_Latency_Histogram' % benchmark_name, 0, 'us', hist_metadata)) # Calculate stats on aggregate latency histogram latency_stats = _HistogramStatsCalculator(latency_histogram, [50, 90, 99]) # Create samples for the latency stats for stat, value in latency_stats.items(): samples.append( sample.Sample('%s_Latency_%s' % (benchmark_name, stat), float(value), 'us', metadata)) return samples
def ParseSysbenchOutput(sysbench_output, results, metadata): """Parses sysbench output. Extract relevant TPS and latency numbers, and populate the final result collection with these information. Specifically, we are interested in tps numbers reported by each reporting interval, and the summary latency numbers printed at the end of the run in "General Statistics" -> "Response Time". Example Sysbench output: sysbench 0.5: multi-threaded system evaluation benchmark <... lots of output we don't care here ...> Threads started! [ 2s] threads: 16, tps: 526.38, reads: 7446.79, writes: 2105.52, response time: 210.67ms (99%), errors: 0.00, reconnects: 0.00 < .... lots of tps output every 2 second, we need all those> < ... lots of other output we don't care for now...> General statistics: total time: 17.0563s total number of events: 10000 total time taken by event execution: 272.6053s response time: min: 18.31ms avg: 27.26ms max: 313.50ms approx. 99 percentile: 57.15ms < We care about the response time section above, these are latency numbers> < then there are some outputs after this, we don't care either> Args: sysbench_output: The output from sysbench. results: The dictionary to store results based on sysbench output. metadata: The metadata to be passed along to the Samples class. """ all_tps = [] seen_general_statistics = False seen_response_time = False response_times = {} sysbench_output_io = StringIO.StringIO(sysbench_output) for line in sysbench_output_io.readlines(): if re.match('^\[', line): tps = re.findall('tps: (.*?),', line) all_tps.append(float(tps[0])) continue if line.startswith('General statistics:'): seen_general_statistics = True continue if seen_general_statistics: if re.match('^ +response time:.*', line): seen_response_time = True continue if seen_general_statistics and seen_response_time: for token in RESPONSE_TIME_TOKENS: search_string = '.*%s: +(.*)ms' % token if re.findall(search_string, line): response_times[token] = float( re.findall(search_string, line)[0]) tps_line = ', '.join(map(str, all_tps)) # Print all tps data points in the log for reference. And report # percentiles of these tps data in the final result set. logging.info('All TPS numbers: \n %s', tps_line) tps_percentile = sample.PercentileCalculator(all_tps) for percentile in sample.PERCENTILES_LIST: percentile_string = 'p%s' % str(percentile) logging.info('%s tps %f', percentile_string, tps_percentile[percentile_string]) metric_name = ('%s %s') % (SYSBENCH_RESULT_NAME_TPS, percentile_string) results.append( sample.Sample(metric_name, tps_percentile[percentile_string], NA_UNIT, metadata)) # Also report average, stddev, and coefficient of variation for token in ['average', 'stddev']: logging.info('tps %s %f', token, tps_percentile[token]) metric_name = ('%s %s') % (SYSBENCH_RESULT_NAME_TPS, token) results.append( sample.Sample(metric_name, tps_percentile[token], NA_UNIT, metadata)) if tps_percentile['average'] > 0: cv = tps_percentile['stddev'] / tps_percentile['average'] logging.info('tps coefficient of variation %f', cv) metric_name = ('%s %s') % (SYSBENCH_RESULT_NAME_TPS, 'cv') results.append(sample.Sample(metric_name, cv, NA_UNIT, metadata)) # Now, report the latency numbers. for token in RESPONSE_TIME_TOKENS: logging.info('%s_response_time is %f', token, response_times[token]) metric_name = '%s %s' % (SYSBENCH_RESULT_NAME_LATENCY, token) if token == 'percentile': metric_name = '%s %s' % (metric_name, FLAGS.sysbench_latency_percentile) results.append( sample.Sample(metric_name, response_times[token], MS_UNIT, metadata))
def RunNetperf(primary_vm, secondary_vm, benchmark_name, num_streams, iteration, results): """Spawns netperf on a remote VM, parses results. Args: primary_vm: The VM that the netperf benchmark will be run upon. secondary_vm: The VM that the netperf server is running on. benchmark_name: The netperf benchmark to run, see the documentation. num_streams: The number of netperf client threads to run. iteration: The iteration to prefix the metrics with, as well as the index into the array where the results will be stored. results: The results variable shared by all threads. The iteration-th element holds a tuple of (Samples[], begin_starting_processes, end_starting_processes) """ # Flags: # -o specifies keys to include in CSV output. # -j keeps additional latency numbers netperf_cmd = ('{netperf_path} -p {{command_port}} -j ' '-t {benchmark_name} -H {server_ip} -l {length}' ' -- ' '-P ,{{data_port}} ' '-o THROUGHPUT,THROUGHPUT_UNITS,P50_LATENCY,P90_LATENCY,' 'P99_LATENCY,STDDEV_LATENCY,' 'MIN_LATENCY,MAX_LATENCY,' 'CONFIDENCE_ITERATION,THROUGHPUT_CONFID').format( netperf_path=netperf.NETPERF_PATH, benchmark_name=benchmark_name, server_ip=secondary_vm.internal_ip, length=FLAGS.bidirectional_network_test_length) # Run all of the netperf processes and collect their stdout # Give the remote script the test length plus 5 minutes to complete remote_cmd_timeout = FLAGS.bidirectional_network_test_length + 300 remote_cmd = ('./%s --netperf_cmd="%s" --num_streams=%s --port_start=%s' % (REMOTE_SCRIPT, netperf_cmd, num_streams, PORT_START)) remote_stdout, _ = primary_vm.RemoteCommand(remote_cmd, timeout=remote_cmd_timeout) # Decode the remote command's stdout which the stdouts, stderrs and return # code from each sub invocation of netperf (per stream) json_out = json.loads(remote_stdout) stdouts = json_out[0] # unused_stderrs = json_out[1] # unused_return_codes = json_out[2] begin_starting_processes = json_out[3] end_starting_processes = json_out[4] local_results = [] # Metadata to attach to samples metadata = { 'bidirectional_network_test_length': FLAGS.bidirectional_network_test_length, 'bidirectional_stream_num_streams': num_streams, 'ip_type': 'internal', 'primary_machine_type': primary_vm.machine_type, 'primary_zone': primary_vm.zone, 'secondary_machine_type': secondary_vm.machine_type, 'secondary_zone': secondary_vm.zone, } stream_start_delta = end_starting_processes - begin_starting_processes local_results.append( sample.Sample('%s_%s_start_delta' % (iteration, benchmark_name), float(stream_start_delta), SEC, metadata)) throughput_samples = [ _ParseNetperfOutput(stdout, metadata, benchmark_name, iteration) for stdout in stdouts ] # They should all have the same units throughput_unit = throughput_samples[0].unit # Extract the throughput values from the samples throughputs = [s.value for s in throughput_samples] # Compute some stats on the throughput values throughput_stats = sample.PercentileCalculator(throughputs, [50, 90, 99]) throughput_stats['min'] = min(throughputs) throughput_stats['max'] = max(throughputs) # Calculate aggregate throughput throughput_stats['total'] = throughput_stats['average'] * len(throughputs) # Create samples for throughput stats for stat, value in throughput_stats.items(): local_results.append( sample.Sample( '%s_%s_Throughput_%s' % (iteration, benchmark_name, stat), float(value), throughput_unit, metadata)) results[iteration] = (local_results, begin_starting_processes, end_starting_processes)
def testPandasSeries(self): percentiles = sample.PercentileCalculator(pd.Series([1, 2, 3]), percentiles=[50]) self.assertEqual(percentiles['p50'], 2)
def RunNetperf(vm, benchmark_name, server_ip, num_streams): """Spawns netperf on a remote VM, parses results. Args: vm: The VM that the netperf TCP_RR benchmark will be run upon. benchmark_name: The netperf benchmark to run, see the documentation. server_ip: A machine that is running netserver. num_streams: The number of netperf client threads to run. Returns: A sample.Sample object with the result. """ enable_latency_histograms = FLAGS.netperf_enable_histograms or num_streams > 1 # Throughput benchmarks don't have latency histograms enable_latency_histograms = (enable_latency_histograms and (benchmark_name not in ['TCP_STREAM', 'UDP_STREAM'])) # Flags: # -o specifies keys to include in CSV output. # -j keeps additional latency numbers # -v sets the verbosity level so that netperf will print out histograms # -I specifies the confidence % and width - here 99% confidence that the true # value is within +/- 2.5% of the reported value # -i specifies the maximum and minimum number of iterations. confidence = (f'-I 99,5 -i {FLAGS.netperf_max_iter},3' if FLAGS.netperf_max_iter else '') verbosity = '-v2 ' if enable_latency_histograms else '' remote_cmd_timeout = (FLAGS.netperf_test_length * (FLAGS.netperf_max_iter or 1) + 300) metadata = { 'netperf_test_length': FLAGS.netperf_test_length, 'sending_thread_count': num_streams, 'max_iter': FLAGS.netperf_max_iter or 1 } netperf_cmd = (f'{netperf.NETPERF_PATH} ' f'-p {{command_port}} ' f'-j {verbosity} ' f'-t {benchmark_name} ' f'-H {server_ip} ' f'-l {FLAGS.netperf_test_length} {confidence}' ' -- ' f'-P ,{{data_port}} ' f'-o {OUTPUT_SELECTOR}') if benchmark_name.upper() == 'UDP_STREAM': send_size = FLAGS.netperf_udp_stream_send_size_in_bytes netperf_cmd += f' -R 1 -m {send_size} -M {send_size} ' metadata['netperf_send_size_in_bytes'] = ( FLAGS.netperf_udp_stream_send_size_in_bytes) elif benchmark_name.upper() == 'TCP_STREAM': send_size = FLAGS.netperf_tcp_stream_send_size_in_bytes netperf_cmd += f' -m {send_size} -M {send_size} ' metadata['netperf_send_size_in_bytes'] = ( FLAGS.netperf_tcp_stream_send_size_in_bytes) if FLAGS.netperf_thinktime != 0: netperf_cmd += (' -X ' f'{FLAGS.netperf_thinktime},' f'{FLAGS.netperf_thinktime_array_size},' f'{FLAGS.netperf_thinktime_run_length} ') if FLAGS.netperf_mss and 'TCP' in benchmark_name.upper(): netperf_cmd += f' -G {FLAGS.netperf_mss}b' metadata['netperf_mss_requested'] = FLAGS.netperf_mss # Run all of the netperf processes and collect their stdout # TODO(dlott): Analyze process start delta of netperf processes on the remote # machine # Give the remote script the max possible test length plus 5 minutes to # complete remote_cmd_timeout = \ FLAGS.netperf_test_length * (FLAGS.netperf_max_iter or 1) + 300 remote_cmd = (f'./{REMOTE_SCRIPT} --netperf_cmd="{netperf_cmd}" ' f'--num_streams={num_streams} --port_start={PORT_START}') remote_stdout, _ = vm.RobustRemoteCommand(remote_cmd, should_log=True, timeout=remote_cmd_timeout) # Decode stdouts, stderrs, and return codes from remote command's stdout json_out = json.loads(remote_stdout) stdouts = json_out[0] parsed_output = [ ParseNetperfOutput(stdout, metadata, benchmark_name, enable_latency_histograms) for stdout in stdouts ] if len(parsed_output) == 1: # Only 1 netperf thread throughput_sample, latency_samples, histogram = parsed_output[0] return [throughput_sample] + latency_samples else: # Multiple netperf threads samples = [] # Unzip parsed output # Note that latency_samples are invalid with multiple threads because stats # are computed per-thread by netperf, so we don't use them here. throughput_samples, _, latency_histograms = [ list(t) for t in zip(*parsed_output) ] # They should all have the same units throughput_unit = throughput_samples[0].unit # Extract the throughput values from the samples throughputs = [s.value for s in throughput_samples] # Compute some stats on the throughput values throughput_stats = sample.PercentileCalculator(throughputs, [50, 90, 99]) throughput_stats['min'] = min(throughputs) throughput_stats['max'] = max(throughputs) # Calculate aggregate throughput throughput_stats['total'] = throughput_stats['average'] * len( throughputs) # Create samples for throughput stats for stat, value in throughput_stats.items(): samples.append( sample.Sample(f'{benchmark_name}_Throughput_{stat}', float(value), throughput_unit, metadata)) if enable_latency_histograms: # Combine all of the latency histogram dictionaries latency_histogram = collections.Counter() for histogram in latency_histograms: latency_histogram.update(histogram) # Create a sample for the aggregate latency histogram hist_metadata = {'histogram': json.dumps(latency_histogram)} hist_metadata.update(metadata) samples.append( sample.Sample(f'{benchmark_name}_Latency_Histogram', 0, 'us', hist_metadata)) # Calculate stats on aggregate latency histogram latency_stats = _HistogramStatsCalculator(latency_histogram, [50, 90, 99]) # Create samples for the latency stats for stat, value in latency_stats.items(): samples.append( sample.Sample(f'{benchmark_name}_Latency_{stat}', float(value), 'us', metadata)) return samples