def _ParseOutputForSamples(output): """Parses the output from running Coremark to get performance samples. Args: output: The output from running Coremark. Returns: A list of sample.Sample objects. Raises: Benchmarks.RunError: If correct operation is not validated. """ if 'Correct operation validated' not in output: raise errors.Benchmarks.RunError('Correct operation not validated.') value = regex_util.ExtractFloat(r'CoreMark 1.0 : ([0-9]*\.[0-9]*)', output) metadata = { 'summary': output.splitlines()[-1], # Last line of output is a summary. 'size': regex_util.ExtractInt(r'CoreMark Size\s*:\s*([0-9]*)', output), 'total_ticks': regex_util.ExtractInt(r'Total ticks\s*:\s*([0-9]*)', output), 'total_time_sec': regex_util.ExtractFloat(r'Total time \(secs\)\s*:\s*([0-9]*\.[0-9]*)', output), 'iterations': regex_util.ExtractInt(r'Iterations\s*:\s*([0-9]*)', output), 'iterations_per_cpu': ITERATIONS_PER_CPU, 'parallelism_method': FLAGS.coremark_parallelism_method, } return [sample.Sample('Coremark Score', value, '', metadata)]
def ParseOutput(hpcc_output, benchmark_spec): """Parses the output from HPCC. Args: hpcc_output: A string containing the text of hpccoutf.txt. benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: A list of samples to be published (in the same format as Run() returns). """ results = [] metadata = dict() match = re.search('HPLMaxProcs=([0-9]*)', hpcc_output) metadata['num_cpus'] = match.group(1) metadata['num_machines'] = len(benchmark_spec.vms) UpdateMetadata(metadata) value = regex_util.ExtractFloat('HPL_Tflops=([0-9]*\\.[0-9]*)', hpcc_output) results.append(sample.Sample('HPL Throughput', value, 'Tflops', metadata)) value = regex_util.ExtractFloat('SingleRandomAccess_GUPs=([0-9]*\\.[0-9]*)', hpcc_output) results.append(sample.Sample('Random Access Throughput', value, 'GigaUpdates/sec')) for metric in STREAM_METRICS: regex = 'SingleSTREAM_%s=([0-9]*\\.[0-9]*)' % metric value = regex_util.ExtractFloat(regex, hpcc_output) results.append(sample.Sample('STREAM %s Throughput' % metric, value, 'GB/s')) value = regex_util.ExtractFloat(r'PTRANS_GBs=([0-9]*\.[0-9]*)', hpcc_output) results.append(sample.Sample('PTRANS Throughput', value, 'GB/s', metadata)) return results
def Run(benchmark_spec): """Run Coremark on the target vm. Args: benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: A list of sample.Sample objects. """ vms = benchmark_spec.vms vm = vms[0] logging.info('Coremark running on %s', vm) num_cpus = vm.num_cpus vm.RemoteCommand( 'cd %s;make PORT_DIR=linux64 ITERATIONS=%s XCFLAGS="-g -O2 ' '-DMULTITHREAD=%d -DUSE_PTHREAD -DPERFORMANCE_RUN=1"' % (COREMARK_DIR, ITERATIONS_PER_CPU, num_cpus)) logging.info('Coremark Results:') stdout, _ = vm.RemoteCommand('cat %s/run1.log' % COREMARK_DIR, should_log=True) value = regex_util.ExtractFloat(r'CoreMark 1.0 : ([0-9]*\.[0-9]*)', stdout) metadata = {'num_cpus': vm.num_cpus} metadata.update(vm.GetMachineTypeDict()) return [sample.Sample('Coremark Score', value, '', metadata)]
def _CollectGpuSamples( vm: virtual_machine.BaseVirtualMachine) -> List[sample.Sample]: """Run XGBoost on the cluster. Args: vm: The virtual machine to run the benchmark. Returns: A list of sample.Sample objects. """ cmd = [ f'{FLAGS.xgboost_env}', 'python3', f'{linux_packages.INSTALL_DIR}/xgboost/tests/benchmark/benchmark_tree.py', f'--tree_method={_TREE_METHOD.value}', f'--sparsity={_SPARSITY.value}', f'--rows={_ROWS.value}', f'--columns={_COLUMNS.value}', f'--iterations={_ITERATIONS.value}', f'--test_size={_TEST_SIZE.value}', ] if _PARAMS.value: cmd.append(f'--params="{_PARAMS.value}"') metadata = _MetadataFromFlags(vm) metadata.update(cuda_toolkit.GetMetadata(vm)) metadata['command'] = ' '.join(cmd) stdout, stderr, exit_code = vm.RemoteCommandWithReturnCode( metadata['command'], ignore_failure=True) if exit_code: logging.warning('Error with getting XGBoost stats: %s', stderr) training_time = regex_util.ExtractFloat(r'Train Time: ([\d\.]+) seconds', stdout) return sample.Sample('training_time', training_time, 'seconds', metadata)
def ParseOutput(hpcc_output, benchmark_spec): """Parses the output from HPCC. Args: hpcc_output: A string containing the text of hpccoutf.txt. benchmark_spec: The benchmark specification. Contains all data that is required to run the benchmark. Returns: A list of samples to be published (in the same format as Run() returns). """ results = [] metadata = dict() match = re.search('HPLMaxProcs=([0-9]*)', hpcc_output) metadata['num_cpus'] = match.group(1) metadata['num_machines'] = len(benchmark_spec.vms) UpdateMetadata(metadata) # Parse all metrics from metric=value lines in the HPCC output. metric_values = regex_util.ExtractAllFloatMetrics( hpcc_output) for metric, value in metric_values.iteritems(): results.append(sample.Sample(metric, value, '', metadata)) # Parse some metrics separately and add units. Although these metrics are # parsed above and added to results, this handling is left so that existing # uses of these metric names will continue to work. value = regex_util.ExtractFloat('HPL_Tflops=([0-9]*\\.[0-9]*)', hpcc_output) results.append(sample.Sample('HPL Throughput', value, 'Tflops', metadata)) value = regex_util.ExtractFloat('SingleRandomAccess_GUPs=([0-9]*\\.[0-9]*)', hpcc_output) results.append( sample.Sample('Random Access Throughput', value, 'GigaUpdates/sec', metadata)) for metric in STREAM_METRICS: regex = 'SingleSTREAM_%s=([0-9]*\\.[0-9]*)' % metric value = regex_util.ExtractFloat(regex, hpcc_output) results.append( sample.Sample('STREAM %s Throughput' % metric, value, 'GB/s', metadata)) value = regex_util.ExtractFloat(r'PTRANS_GBs=([0-9]*\.[0-9]*)', hpcc_output) results.append(sample.Sample('PTRANS Throughput', value, 'GB/s', metadata)) return results
def ParseResults(result, metadata): """Parse mcperf result into samples. Sample Output: #type avg std min p5 p10 p50 p67 read 106.0 67.7 37.2 80.0 84.3 101.7 108.8 update 0.0 0.0 0.0 0.0 0.0 0.0 0.0 op_q 10.0 0.0 1.0 9.4 9.4 9.7 9.8 Total QPS = 754451.6 (45267112 / 60.0s) Total connections = 8 Misses = 0 (0.0%) Skipped TXs = 0 (0.0%) RX 11180976417 bytes : 177.7 MB/s TX 0 bytes : 0.0 MB/s CPU Usage Stats (avg/min/max): 31.85%,30.31%,32.77% Args: result: Text output of running mcperf benchmark. metadata: metadata associated with the results. Returns: List of sample.Sample objects and actual qps. """ samples = [] if FLAGS.mcperf_ratio < 1.0: # N/A for write only workloads. misses = regex_util.ExtractGroup(MISS_REGEX, result) metadata['miss_rate'] = float(misses) latency_stats = regex_util.ExtractGroup(LATENCY_HEADER_REGEX, result).split() # parse latency for metric in ('read', 'update', 'op_q'): latency_regex = metric + LATENCY_REGEX latency_values = regex_util.ExtractGroup(latency_regex, result).split() for idx, stat in enumerate(latency_stats): if idx == len(latency_values): logging.warning('Mutilate does not report %s latency for %s.', stat, metric) break samples.append( sample.Sample(metric + '_' + stat, float(latency_values[idx]), 'usec', metadata)) # parse bandwidth for metric in ('TX', 'RX'): bw_regex = metric + BANDWIDTH_REGEX bw = regex_util.ExtractGroup(bw_regex, result) samples.append(sample.Sample(metric, float(bw), 'MB/s', metadata)) qps = regex_util.ExtractFloat(QPS_REGEX, result) samples.append(sample.Sample('qps', qps, 'ops/s', metadata)) return samples, qps
def ParseResults(result, metadata): """Parse mutilate result into samples. Sample Output: #type avg min 1st 5th 10th 90th 95th 99th read 52.4 41.0 43.1 45.2 48.1 55.8 56.6 71.5 update 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 op_q 1.5 1.0 1.0 1.1 1.1 1.9 2.0 2.0 Total QPS = 18416.6 (92083 / 5.0s) Misses = 0 (0.0%) RX 22744501 bytes : 4.3 MB/s TX 3315024 bytes : 0.6 MB/s Args: result: Text output of running mutilate benchmark. metadata: metadata associated with the results. Returns: List of sample.Sample objects and actual qps. """ samples = [] if FLAGS.mutilate_ratio < 1.0: # N/A for write only workloads. misses = regex_util.ExtractGroup(MISS_REGEX, result) metadata['miss_rate'] = float(misses) latency_stats = regex_util.ExtractGroup(LATENCY_HEADER_REGEX, result).split() # parse latency for metric in ('read', 'update', 'op_q'): latency_regex = metric + LATENCY_REGEX latency_values = regex_util.ExtractGroup(latency_regex, result).split() for idx, stat in enumerate(latency_stats): if idx == len(latency_values): logging.warning( 'Mutilate does not report %s latency for %s.', stat, metric) break samples.append( sample.Sample(metric + '_' + stat, float(latency_values[idx]), 'usec', metadata)) # parse bandwidth for metric in ('TX', 'RX'): bw_regex = metric + BANDWIDTH_REGEX bw = regex_util.ExtractGroup(bw_regex, result) samples.append( sample.Sample(metric, float(bw), 'MB/s', metadata)) qps = regex_util.ExtractFloat(QPS_REGEX, result) samples.append(sample.Sample('qps', qps, 'ops/s', metadata)) return samples, qps
def MakeAccuracySamplesFromOutput(base_metadata: Dict[str, Any], output: str) -> List[sample.Sample]: """Creates accuracy samples containing metrics. Args: base_metadata: dict contains all the metadata that reports. output: string, command output Returns: Samples containing training metrics. """ metadata = {} for column_name in _ACCURACY_METADATA: metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch( fr'{re.escape(column_name)} *: *(.*)', output) accuracy = regex_util.ExtractFloat( r': Accuracy = (\d+\.\d+), Threshold = \d+\.\d+\. Accuracy test PASSED', output) metadata['Threshold'] = regex_util.ExtractFloat( r': Accuracy = \d+\.\d+, Threshold = (\d+\.\d+)\. Accuracy test PASSED', output) metadata.update(base_metadata) return [sample.Sample('accuracy', float(accuracy), '%', metadata)]
def _ExtractThroughput(output): """Extract throughput from TensorFlow output. Args: output: TensorFlow output Returns: throuput (float) """ regex = r'total images/sec: (\S+)' try: return regex_util.ExtractFloat(regex, output) except: raise TFParseOutputException('Unable to parse TensorFlow output')
def MakeSamplesFromOutput(metadata: Dict[str, Any], output: str) -> List[sample.Sample]: """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_inference_benchmark_test.py Returns: Samples containing training metrics. """ for column_name in _METADATA_COLUMNS: metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch( fr'{re.escape(column_name)} *: *(.*)', output) throughput = regex_util.ExtractFloat( r': result_scheduled_samples_per_sec *: *(.*), Result is VALID', output) return [sample.Sample('throughput', float(throughput), 'samples/s', metadata)]
def MakePerformanceSamplesFromOutput(base_metadata: Dict[str, Any], output: str) -> List[sample.Sample]: """Create performance samples containing metrics. Args: base_metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_inference_benchmark_test.py Returns: Samples containing training metrics. """ metadata = {} for column_name in _PERFORMANCE_METADATA: metadata[f'mlperf {column_name}'] = regex_util.ExtractExactlyOneMatch( fr'{re.escape(column_name)} *: *(.*)', output) metadata.update(base_metadata) throughput = regex_util.ExtractFloat( r': result_scheduled_samples_per_sec: (\d+\.\d+)', output) return [sample.Sample('throughput', float(throughput), 'samples/s', metadata)]
def _CreateSingleSample(sample_name, sample_units, metadata, client_stdout): """Creates a sample from the tensorflow_serving_client_workload stdout. client_stdout is expected to contain output in the following format: key1: int_or_float_value_1 key2: int_or_float_value_2 Args: sample_name: Name of the sample. Used to create a regex to extract the value from client_stdout. Also used as the returned sample's name. sample_units: Units to be specified in the returned sample metadata: Metadata to be added to the returned sample client_stdout: Stdout from tensorflow_serving_client_workload.py Returns: A single floating point sample. Raises: regex_util.NoMatchError: when no line beginning with sample_name: is found in client_stdout """ regex = sample_name + r'\:\s*(\w+\.?\w*)' value = regex_util.ExtractFloat(regex, client_stdout) return sample.Sample(sample_name, value, sample_units, metadata)
def _RunIperf(sending_vm, receiving_vm, receiving_ip_address, ip_type): """Run iperf using sending 'vm' to connect to 'ip_address'. Args: sending_vm: The VM sending traffic. receiving_vm: The VM receiving traffic. receiving_ip_address: The IP address of the iperf server (ie the receiver). ip_type: The IP type of 'ip_address' (e.g. 'internal', 'external') Returns: A Sample. Raises: regex_util.NoMatchError: When iperf results are not found in stdout. """ iperf_cmd = ('iperf --client %s --port %s --format m --time 60' % (receiving_ip_address, IPERF_PORT)) stdout, _ = sending_vm.RemoteCommand(iperf_cmd, should_log=True) iperf_pattern = re.compile(r'(\d+\.\d+|\d+) Mbits/sec') value = regex_util.ExtractFloat(iperf_pattern, stdout) metadata = { # TODO(voellm): The server and client terminology is being # deprecated. It does not make clear the direction of the flow. 'server_machine_type': receiving_vm.machine_type, 'server_zone': receiving_vm.zone, 'client_machine_type': sending_vm.machine_type, 'client_zone': sending_vm.zone, # The meta data defining the environment 'receiving_machine_type': receiving_vm.machine_type, 'receiving_zone': receiving_vm.zone, 'sending_machine_type': sending_vm.machine_type, 'sending_zone': sending_vm.zone, 'ip_type': ip_type } return sample.Sample('Throughput', float(value), 'Mbits/sec', metadata)
def testParsesSuccessfully(self): regex = r'test (\d+|\.\d+|\d+\.\d+) string' string = 'test 12.435 string' self.assertAlmostEqual(12.435, regex_util.ExtractFloat(regex, string, group=1))
def _ParseResult(out, test): """Parse blazemark results. Sample output: https://bitbucket.org/blaze-lib/blaze/wiki/Blazemark#!command-line-parameters Dense Vector/Dense Vector Addition: C-like implementation [MFlop/s]: 100 1115.44 10000000 206.317 Classic operator overloading [MFlop/s]: 100 415.703 10000000 112.557 Blaze [MFlop/s]: 100 2602.56 10000000 292.569 Boost uBLAS [MFlop/s]: 100 1056.75 10000000 208.639 Blitz++ [MFlop/s]: 100 1011.1 10000000 207.855 GMM++ [MFlop/s]: 100 1115.42 10000000 207.699 Armadillo [MFlop/s]: 100 1095.86 10000000 208.658 MTL [MFlop/s]: 100 1018.47 10000000 209.065 Eigen [MFlop/s]: 100 2173.48 10000000 209.899 N=100, steps=55116257 C-like = 2.33322 (4.94123) Classic = 6.26062 (13.2586) Blaze = 1 (2.11777) Boost uBLAS = 2.4628 (5.21565) Blitz++ = 2.57398 (5.4511) GMM++ = 2.33325 (4.94129) Armadillo = 2.3749 (5.0295) MTL = 2.55537 (5.41168) Eigen = 1.19742 (2.53585) N=10000000, steps=8 C-like = 1.41805 (0.387753) Classic = 2.5993 (0.710753) Blaze = 1 (0.27344) Boost uBLAS = 1.40227 (0.383437) Blitz++ = 1.40756 (0.384884) GMM++ = 1.40862 (0.385172) Armadillo = 1.40215 (0.383403) MTL = 1.39941 (0.382656) Eigen = 1.39386 (0.381136) Args: out: string. Blazemark output in raw string format. test: string. Name of the test ran. Returns: A list of samples. Each sample if a 4-tuple of (benchmark_name, value, unit, metadata). """ matches = regex_util.ExtractAllMatches(THROUGHPUT_HEADER_REGEX, out) results = [] for m in matches: lib = _SimplfyLibName(m[0]) metadata = {} filled = m[1] if filled: metadata['% filled'] = regex_util.ExtractFloat( FILLED_REGEX, filled) unit = m[-2] for v in regex_util.ExtractAllMatches(THROUGHPUT_RESULT_REGEX, m[-1]): metadata['N'] = int(v[0]) results.append( sample.Sample( '_'.join([test, lib, 'Throughput']), # Metric name float(v[1]), # Value unit, # Unit copy.deepcopy(metadata))) # Metadata logging.info('Results for %s:\n %s', test, results) return results