def ParseResults(output): """Parse YCSB output. Sample Output: [OVERALL], RunTime(ms), 723.0 [OVERALL], Throughput(ops/sec), 1383.1258644536654 [UPDATE], Operations, 496 [UPDATE], AverageLatency(us), 5596.689516129032 [UPDATE], MinLatency(us), 2028 [UPDATE], MaxLatency(us), 46240 [UPDATE], 95thPercentileLatency(ms), 10 [UPDATE], 99thPercentileLatency(ms), 43 [UPDATE], Return=0, 496 Args: output: String output of YCSB tool from commandline. Returns: A list of samples in the form of 3 or 4 tuples. The tuples contain the sample metric (string), value (float), and unit (string). If a 4th element is included, it is a dictionary of sample metadata. """ samples = [] result_match = regex_util.ExtractAllMatches(RESULT_REGEX, output) for groups in result_match: samples.append( sample.Sample(groups[1], float(groups[3]), groups[2], {'stage': groups[0]})) operations_match = regex_util.ExtractAllMatches(OPERATIONS_REGEX, output) for groups in operations_match: samples.append( sample.Sample('Operations', float(groups[1]), '', {'stage': groups[0]})) return samples
def ParseResults(results): """Result parser for Silo. This is what a smaple output looks like: --- table statistics --- table customer_0 size 30000 (+0 records) table customer_name_idx_0 size 30000 (+0 records) table district_0 size 10 (+0 records) table history_0 size 792182 (+762182 records) table item_0 size 100000 (+0 records) table new_order_0 size 122238 (+113238 records) table oorder_0 size 829578 (+799578 records) table oorder_c_id_idx_0 size 829578 (+799578 records) table order_line_0 size 8300509 (+8000949 records) table stock_0 size 100000 (+0 records) table stock_data_0 size 100000 (+0 records) table warehouse_0 size 1 (+0 records) --- benchmark statistics --- runtime: 30.0007 sec memory delta: 768.336 MB memory delta rate: 25.6106 MB/sec logical memory delta: 112.705 MB logical memory delta rate: 3.75673 MB/sec agg_nosync_throughput: 59150.1 ops/sec avg_nosync_per_core_throughput: 59150.1 ops/sec/core agg_throughput: 59150.1 ops/sec avg_per_core_throughput: 59150.1 ops/sec/core agg_persist_throughput: 59150.1 ops/sec avg_per_core_persist_throughput: 59150.1 ops/sec/core avg_latency: 0.0168378 ms avg_persist_latency: 0 ms agg_abort_rate: 0 aborts/sec avg_per_core_abort_rate: 0 aborts/sec/core txn breakdown: [[Delivery, 70967], [NewOrder, 799578], [OrderStatus, 70813], [Payment, 762182], [StockLevel, 71006]] --- system counters (for benchmark) --- --- perf counters (if enabled, for benchmark) --- --- allocator stats --- [allocator] ncpus=0 --------------------------------------- """ samples = [] # agg throughput match = regex_util.ExtractAllMatches(AGG_THPUT_REGEX, results)[0] samples.append(sample.Sample( match[0], float(match[1]), match[2])) # per core throughput match = regex_util.ExtractAllMatches(PER_CORE_THPUT_REGEX, results)[0] samples.append(sample.Sample( match[0], float(match[1]), match[2])) # avg latency match = regex_util.ExtractAllMatches(LAT_REGEX, results)[0] samples.append(sample.Sample( match[0], float(match[1]), match[2])) return samples
def MakeSamplesFromOutput(metadata, output, use_tpu=False, model='resnet'): """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output use_tpu: bool, whether tpu is in use model: string, model name Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_benchmark_test.py Returns: Samples containing training metrics. """ samples = [] results = regex_util.ExtractAllMatches( r':::MLL (\d+\.\d+) eval_accuracy: {(.*)}', output) start = None for wall_time, result in results: wall_time = float(wall_time) if not start: start = wall_time metadata_copy = metadata.copy() epoch = regex_util.ExtractExactlyOneMatch(r'"epoch_num": (\d+)', result) if ('transformer' in model and (not use_tpu)): value = regex_util.ExtractExactlyOneMatch(r'"value": "(\d+\.\d+)"', result) elif 'mask' in model: mask_value, mask_metadata = regex_util.ExtractExactlyOneMatch( r'^"value": (.*?), "metadata": (.*)$', result) value = json.loads(mask_value)['accuracy']['BBOX'] metadata_copy.update(json.loads(mask_value)['accuracy']) metadata_copy.update(json.loads(mask_metadata)) else: value = regex_util.ExtractExactlyOneMatch(r'"value": (\d+\.\d+)', result) metadata_copy['times'] = wall_time - start metadata_copy['epoch'] = int(epoch) samples.append( sample.Sample('Eval Accuracy', float(value) * 100, '%', metadata_copy)) if not use_tpu: if 'minigo' in model: times = regex_util.ExtractAllMatches(r'RESULT,.*,(\d+),.*,.*', output) else: times = regex_util.ExtractAllMatches(r'RESULT,.*,.*,(\d+),.*,.*', output) samples.append( sample.Sample('Time', int(times[0]), 'seconds', metadata)) return samples
def MakeSamplesFromOutput(metadata, output, use_tpu=False, model=RESNET): """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output use_tpu: bool, whether tpu is in use model: string, model name Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_benchmark_test.py Returns: Samples containing training metrics. """ samples = [] if RESNET in model: results = regex_util.ExtractAllMatches( f'Speed: ({RE_FLOAT}) samples/sec', output) results.extend( regex_util.ExtractAllMatches(f'"imgs_sec": ({RE_FLOAT})', output)) results.extend( regex_util.ExtractAllMatches( f'"key": "throughput", "value": ({RE_FLOAT})', output)) elif TRANSFORMER in model: results = re.findall(r'wps=(\S+),', output) elif GNMT in model: results = re.findall(r'Tok/s (\S+)', output) elif SSD in model: results = re.findall(r'avg. samples / sec: (\S+)', output) elif MASK in model: results = regex_util.ExtractAllMatches(f'"throughput": ({RE_FLOAT})', output) results.extend( regex_util.ExtractAllMatches( f'"key": "throughput", "value": ({RE_FLOAT})', output)) results.extend( regex_util.ExtractAllMatches( f'MLPERF METRIC THROUGHPUT=({RE_FLOAT}) iterations / s', output)) elif BERT in model: results = regex_util.ExtractAllMatches( f"'training_sequences_per_second': ({RE_FLOAT})", output) for speed in results: samples.append( sample.Sample('speed', float(speed), 'samples/sec', metadata)) if not use_tpu: if MINIGO in model: times = regex_util.ExtractAllMatches(r'RESULT,.*,(\d+),.*,.*', output) else: times = regex_util.ExtractAllMatches(r'RESULT,.*,.*,(\d+),.*,.*', output) samples.append( sample.Sample('Time', int(times[0]), 'seconds', metadata)) return samples
def MakeSamplesFromOutput(metadata, output): """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/nccl_benchmark_test.py Returns: Samples containing training metrics, and the bandwidth """ samples = [] metadata.update(_SAMPLE_LINE_RE.match(output).groupdict()) results = regex_util.ExtractAllMatches(r'(Rank\s+\d+) (.*)', output) for rank, device in results: metadata[rank] = device results = regex_util.ExtractAllMatches( r'^\s*' r'(\d+)\s+' r'(\d+)\s+' r'(\w+)\s+' r'(\w+)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\S+)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\d+(?:\.\d+)?)\s+' r'(\S+)', output, re.MULTILINE) max_out_of_place_algbw = 0 for row in results: metadata_copy = metadata.copy() metadata_copy.update(zip(_METADATA_COLUMNS, row)) for metric, metadata_key in sorted(_SAMPLE_NAMES.items()): samples.append( sample.Sample(metric, float(metadata_copy[metadata_key]), 'GB/s', metadata_copy)) # Gbps is gigaBIT per second and GB/s is gigaBYTE per second max_out_of_place_algbw = max( max_out_of_place_algbw, float(metadata_copy['out_of_place_algbw'])) avg_bus_bandwidth = regex_util.ExtractExactlyOneMatch( r'Avg bus bandwidth\s+: ([0-9\.]+)', output) samples.append( sample.Sample('avg_busbw', float(avg_bus_bandwidth), 'GB/s', metadata)) samples.append( sample.Sample('max_out_of_place_algbw', max_out_of_place_algbw * 8, 'Gbps', metadata)) return samples, max_out_of_place_algbw
def MakeSamplesFromTrainOutput(metadata, output, elapsed_seconds): """Create a sample containing training metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output elapsed_seconds: float, elapsed seconds from saved checkpoint. Example output: perfkitbenchmarker/tests/linux_benchmarks/mnist_benchmark_test.py Returns: a Sample containing training metrics, current step, elapsed seconds """ samples = [] metadata_copy = metadata.copy() if 'Saving checkpoints' in output: step = int( regex_util.ExtractAllMatches(r'Saving checkpoints for (\d+) into', output).pop()) else: step = int(regex_util.ExtractAllMatches(r'step = (\d+)', output).pop()) metadata_copy['step'] = int(step) metadata_copy['epoch'] = step / metadata['num_examples_per_epoch'] metadata_copy['elapsed_seconds'] = elapsed_seconds get_mean = lambda matches: sum(float(x) for x in matches) / len(matches) loss = get_mean( regex_util.ExtractAllMatches(r'Loss for final step: (\d+\.\d+)', output)) samples.append(sample.Sample('Loss', float(loss), '', metadata_copy)) if 'global_step/sec: ' in output: global_step_sec = get_mean( regex_util.ExtractAllMatches(r'global_step/sec: (\S+)', output)) samples.append( sample.Sample('Global Steps Per Second', global_step_sec, 'global_steps/sec', metadata_copy)) examples_sec = global_step_sec * metadata['train_batch_size'] if 'examples/sec: ' in output: examples_sec_log = get_mean( regex_util.ExtractAllMatches(r'examples/sec: (\S+)', output)) precision = abs(examples_sec_log - examples_sec) / examples_sec_log assert precision < EXAMPLES_PER_SECOND_PRECISION, 'examples/sec is wrong.' examples_sec = examples_sec_log samples.append( sample.Sample('Examples Per Second', examples_sec, 'examples/sec', metadata_copy)) return samples
def _RunGpuPingpong(vm: virtual_machine.BaseVirtualMachine, addr: str) -> List[Tuple[float, float]]: """Returns the Ping and Pong latency times.""" stdout, stderr = vm.RemoteCommand( f'{_ENV.value} python {_TEST_SCRIPT} {addr}') ping_bws = [ float(bw) for bw in regex_util.ExtractAllMatches(_TIMELINE_PING, stdout + stderr) ] pong_bws = [ float(bw) for bw in regex_util.ExtractAllMatches(_TIMELINE_PONG, stdout + stderr) ] return list(zip(ping_bws, pong_bws))
def ExtractResults(result, benchmark_language): """Retrieves data points from the result string.""" datapoints = [] if benchmark_language == 'C': for groups in regex_util.ExtractAllMatches(result_regex_c, result): metric = '{0} {1}'.format(groups[0].strip(), groups[2].strip()) metric = metric.strip().strip( ':') # Extra ':' in 'MonteCarlo:'. value = float(groups[1]) datapoints.append((metric, value)) elif benchmark_language == 'Java': for groups in regex_util.ExtractAllMatches(result_regex_java, result): datapoints.append((groups[0].strip(), float(groups[1]))) return datapoints
def _ParseOutput(lmbench_output): """Parse the output from lmbench. Args: lmbench_output: A string containing the test results of lmbench. Returns: A list of samples to be published (in the same format as Run() returns). """ results = [] metadata = dict() # Updata metadata _UpdataMetadata(lmbench_output, metadata) # Parse results for "Processor, Processes - times in microseconds - smaller is better" # TODO(user): Parse more metric for processor section. processor_metric_list = ('syscall', 'read', 'write', 'stat', 'fstat', 'open/close', 'Signal handler installation', 'Signal handler overhead', 'Protection fault', 'Pipe latency', r'Process fork\+exit', r'Process fork\+execve', r'Process fork\+/bin/sh -c') _AddProcessorMetricSamples(lmbench_output, processor_metric_list, metadata, results) # Parse some sections from the output. parse_section_func_dict = {} contex_switching_titles = regex_util.ExtractAllMatches( '"size=.* ovr=.*', lmbench_output) for title in contex_switching_titles: parse_section_func_dict[title] = _ParseContextSwitching _ParseSections(lmbench_output, parse_section_func_dict, metadata, results) return results
def MakeSamplesFromOutput(metadata, output): """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_benchmark_test.py Returns: Samples containing training metrics. """ samples = [] results = regex_util.ExtractAllMatches( r':::MLPv(\S+) resnet (\d+\.\d+) .* eval_accuracy: {(.*)}', output) start = None for version, wall_time, result in results: wall_time = float(wall_time) if not start: start = wall_time metadata_copy = metadata.copy() epoch = regex_util.ExtractExactlyOneMatch(r'"epoch": (\d+)', result) value = regex_util.ExtractExactlyOneMatch(r'"value": (0\.\d+)', result) metadata_copy['times'] = wall_time - start metadata_copy['epoch'] = int(epoch) metadata_copy['version'] = version samples.append( sample.Sample('Eval Accuracy', float(value) * 100, '%', metadata_copy)) times = regex_util.ExtractExactlyOneMatch(r'RESULT,resnet,.*,(\d+),.*,.*', output) samples.append(sample.Sample('Times', int(times), 'seconds', metadata)) return samples
def MakeSamplesFromEvalOutput(metadata, output, elapsed_seconds): """Create a sample containing evaluation metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output elapsed_seconds: float, elapsed seconds from saved checkpoint. Example output: perfkitbenchmarker/tests/linux_benchmarks/mnist_benchmark_test.py Returns: a Sample containing evaluation metrics """ pattern = (r'Saving dict for global step \d+: accuracy = (\d+\.\d+), ' r'global_step = (\d+), loss = (\d+\.\d+)') accuracy, step, loss = regex_util.ExtractAllMatches(pattern, output).pop() metadata_copy = metadata.copy() step = int(step) metadata_copy['step'] = step num_examples_per_epoch = metadata['num_examples_per_epoch'] metadata_copy['epoch'] = step / num_examples_per_epoch metadata_copy['elapsed_seconds'] = elapsed_seconds return [ sample.Sample('Eval Loss', float(loss), '', metadata_copy), sample.Sample('Accuracy', float(accuracy) * 100, '%', metadata_copy) ]
def ParseJobFile(job_file): """Parse fio job file as dictionaries of sample metadata. Args: job_file: The contents of fio job file. Returns: A dictionary of dictionaries of sample metadata, using test name as keys, dictionaries of sample metadata as value. """ parameter_metadata = {} global_metadata = {} section_match = regex_util.ExtractAllMatches(SECTION_REGEX, job_file) for section in section_match: if section[0] == GLOBAL: global_metadata = ExtractFioParameters(section[1]) break for section in section_match: section_name = section[0] if section_name == GLOBAL: continue parameter_metadata[section_name] = {} parameter_metadata[section_name].update(global_metadata) parameter_metadata[section_name].update( ExtractFioParameters(section[1])) return parameter_metadata
def _CollectGpuSamples( vm: virtual_machine.BaseVirtualMachine) -> List[sample.Sample]: """Run CUDA memcopy on the cluster. Args: vm: The virtual machine to run the benchmark. Returns: A list of sample.Sample objects. """ if not nvidia_driver.CheckNvidiaGpuExists(vm): return [] if not nvidia_driver.CheckNvidiaSmiExists(vm): return [] global_metadata = _MetadataFromFlags() global_metadata.update(cuda_toolkit.GetMetadata(vm)) global_cmd = [ BANDWIDTH_TEST_PATH, '--csv', f'--memory={_MEMORY.value}', f'--mode={_MODE.value}' ] if _HTOD.value: global_cmd.append('--htod') if _DTOH.value: global_cmd.append('--dtoh') if _DTOD.value: global_cmd.append('--dtod') if _WC.value: global_cmd.append('--wc') num_gpus = nvidia_driver.QueryNumberOfGpus(vm) devices = list(range(num_gpus)) + (['all'] if num_gpus > 1 else []) samples = [] for device in devices: cmd = ' '.join(global_cmd + [f'--device={device}']) stdout, stderr, exit_code = vm.RemoteCommandWithReturnCode( cmd, ignore_failure=True) if exit_code: logging.warning('Error with getting GPU stats: %s', stderr) continue results = regex_util.ExtractAllMatches( r'bandwidthTest-(\S+), ' r'Bandwidth = ([\d\.]+) (\S+), ' r'Time = ([\d\.]+) s, ' r'Size = (\d+) bytes, ' r'NumDevsUsed = (\d+)', stdout) for metric, bandwidth, unit, time, size, num_devs_used in results: metadata = { 'time': float(time), 'size': int(size), 'NumDevsUsed': num_devs_used, 'device': device, 'command': cmd, } metadata.update(global_metadata) samples.append( sample.Sample(metric, float(bandwidth), unit, metadata)) return samples
def testParseSuccessfully(self): regex = r'(\d+) (\w+)' string = 'test 10 sec 33 Mbps multiple matching' matches = regex_util.ExtractAllMatches(regex, string) self.assertEqual(len(matches), 2) self.assertEqual(matches[0][0], '10') self.assertEqual(matches[0][1], 'sec') self.assertEqual(matches[1][0], '33') self.assertEqual(matches[1][1], 'Mbps')
def _MakeSamplesFromOutput(metadata, output): """Create a sample continaing the measured tensor2tensor throughput. Args: metadata: dict contains all the metadata that reports. output: tensor2tensor output Returns: a Sample containing the tensor2tensor throughput """ samples = [] samples.extend( mnist_benchmark.ExtractThroughput(r'global_step/sec: (\S+)', output, metadata, 'Global Steps Per Second', 'global_steps/sec')) # TODO(b/115633403) Workaround until t2t can use TPUEstimator on a GPU try: samples.extend( mnist_benchmark.ExtractThroughput(r'examples/sec: (\S+)', output, metadata, 'Examples Per Second', 'examples/sec')) except regex_util.NoMatchError: logging.info('examples/sec sample not collected') pattern = (r'Saving dict for global step \d+: .*global_step = (\d+), ' r'.*loss = (\d+\.\d+), ' r'.*accuracy = (\d+\.\d+), ' r'.*accuracy_per_sequence = (\d+\.\d+), ' r'.*accuracy_top5 = (\d+\.\d+), ' r'.*neg_log_perplexity = (-?\d+\.\d+)') for (step, loss, accuracy, accuracy_per_sequence, accuracy_top5, neg_log_perplexity) in (regex_util.ExtractAllMatches(pattern, output)): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) samples.append( sample.Sample('Eval Loss', float(loss), '', metadata_copy)) samples.append( sample.Sample('Accuracy', float(accuracy) * 100, '%', metadata_copy)) samples.append( sample.Sample('Accuracy Per Sequence', float(accuracy_per_sequence) * 100, '%', metadata_copy)) samples.append( sample.Sample('Negative Log Perplexity', float(neg_log_perplexity), 'perplexity', metadata_copy)) samples.append( sample.Sample('Top 5 Accuracy', float(accuracy_top5) * 100, '%', metadata_copy)) return samples
def ParseLatencyResult(result): """Parse latency result into value and unit. Args: result: string. Latency value in string format, contains value and unit. eg. 200ms Returns: A tuple of value (float) and unit (string). """ match = regex_util.ExtractAllMatches(LATENCY_REGEX, result)[0] return float(match[0]), match[1]
def MakeSamplesFromTrainOutput(metadata, output, elapsed_seconds): """Create a sample containing training metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output elapsed_seconds: float, elapsed seconds from saved checkpoint. Example output: perfkitbenchmarker/tests/linux_benchmarks/mnist_benchmark_test.py Returns: a Sample containing training metrics, current step, elapsed seconds """ samples = [] metadata_copy = metadata.copy() step = int(regex_util.ExtractAllMatches(r'step = (\d+)', output).pop()) metadata_copy['step'] = int(step) metadata_copy['epoch'] = step / metadata['num_examples_per_epoch'] metadata_copy['elapsed_seconds'] = elapsed_seconds get_mean = lambda matches: sum(float(x) for x in matches) / len(matches) loss = get_mean( regex_util.ExtractAllMatches(r'Loss for final step: (\d+\.\d+)', output)) samples.append(sample.Sample('Loss', float(loss), '', metadata_copy)) if 'global_step/sec: ' in output: global_step_sec = regex_util.ExtractAllMatches( r'global_step/sec: (\S+)', output) samples.append( sample.Sample('Global Steps Per Second', get_mean(global_step_sec), 'global_steps/sec', metadata_copy)) if 'examples/sec: ' in output: examples_sec = regex_util.ExtractAllMatches(r'examples/sec: (\S+)', output) samples.append( sample.Sample('Examples Per Second', get_mean(examples_sec), 'examples/sec', metadata_copy)) return samples
def _ParseDeviceInfo(test_output): """Parses the GPU device info from the CUDA device bandwidth test output. Args: test_output: The resulting output string from the bandwidth test application. Returns: A dictionary mapping the device number to its name, for every device available on the system. """ matches = regex_util.ExtractAllMatches(EXTRACT_DEVICE_INFO_REGEX, test_output, re.MULTILINE) devices = {str(i[0]): str(i[1]) for i in matches} return devices
def GetNuma(vm): """Get NUMA topology of the VM. Args: vm: VirtualMachine. Returns: A dictionary, key is the numa node, value is the number of vCPUs on the node. """ out, _ = vm.RemoteCommand('numactl --hardware') matches = regex_util.ExtractAllMatches(NUMA_CPUS_REGEX, out) numa_map = {} for m in matches: node = m[0] num_cpus = len(m[1].split(' ')) numa_map[node] = num_cpus return numa_map
def QueryGpuClockSpeed(vm, device_id): """Returns the value of the memory and graphics clock. All clock values are in MHz. Args: vm: Virtual machine to operate on. device_id: Id of GPU device to query. Returns: Tuple of clock speeds in MHz in the form (memory clock, graphics clock). """ query = ( 'sudo nvidia-smi --query-gpu=clocks.applications.memory,' 'clocks.applications.graphics --format=csv --id={0}'.format(device_id)) stdout, _ = vm.RemoteCommand(query, should_log=True) clock_speeds = stdout.splitlines()[1] matches = regex_util.ExtractAllMatches(EXTRACT_CLOCK_SPEEDS_REGEX, clock_speeds)[0] return (int(matches[0]), int(matches[1]))
def FormatCidrString(cidr_raw): """Format CIDR string for use in resource name. Removes or replaces illegal characters from CIDR. eg '10.128.0.0/9' -> '10-128-0-0-9' Args: cidr_raw: The unformatted CIDR string. Returns: A CIDR string suitable for use in resource names. Raises: Error: Invalid CIDR format """ delim = r'-' # Safe delimiter for most providers int_regex = r'[0-9]+' octets_mask = regex_util.ExtractAllMatches(int_regex, str(cidr_raw)) if len(octets_mask) != 5: # expecting 4 octets plus 1 prefix mask. raise ValueError('Invalid CIDR format: "{0}"'.format(cidr_raw)) return delim.join(octets_mask)
def ExtractThroughput(regex, output, metadata, metric, unit): """Extract throughput from MNIST output. Args: regex: string. Regular expression. output: MNIST output metadata: dict. Additional metadata to include with the sample. metric: string. Name of the metric within the benchmark. unit: string. Units for 'value'. Returns: samples containing the throughput """ matches = regex_util.ExtractAllMatches(regex, output) samples = [] for index, value in enumerate(matches): metadata_with_index = copy.deepcopy(metadata) metadata_with_index['index'] = index samples.append( sample.Sample(metric, float(value), unit, metadata_with_index)) return samples
def _ParseOutputFromSingleIteration(test_output): """Parses the output of the CUDA device bandwidth test. Args: test_output: The resulting output string from the bandwidth test application. Returns: A dictionary containing the following values as floats: * the device to host bandwidth * the host to device bandwidth * the device to device bandwidth All units are in MB/s, as these are the units guaranteed to be output by the test. """ matches = regex_util.ExtractAllMatches( EXTRACT_BANDWIDTH_TEST_RESULTS_REGEX, test_output) results = {} for i, metric in enumerate(BENCHMARK_METRICS): results[metric] = float(matches[i]) return results
def MakeSamplesFromOutput(metadata, output): """Create samples containing metrics. Args: metadata: dict contains all the metadata that reports. output: string, command output Returns: Samples containing training metrics. """ results = regex_util.ExtractAllMatches( r'^\| epoch (\d+):\s+' r'(\d+) / (\d+) ' r'loss=(\S+), ' r'nll_loss=(\S+), ' r'ppl=(\S+), ' r'wps=(\S+), ' r'ups=(\S+), ' r'wpb=(\S+), ' r'bsz=(\S+), ' r'num_updates=(\S+), ' r'lr=(\S+), ' r'gnorm=(\S+), ' r'clip=(\S+), ' r'oom=(\S+), ' r'loss_scale=(\S+), ' r'wall=(\S+), ' r'train_wall=(\S+)$', output, re.MULTILINE) samples = [] for row in results: metadata_copy = metadata.copy() metadata_copy.update(zip(METADATA_COLUMNS, row)) wps = float(metadata_copy['wps']) samples.append(sample.Sample('wps', wps, 'wps', metadata_copy)) samples.append( sample.Sample('wps per accelerator', wps / metadata['num_accelerators'], 'wps', metadata_copy)) return samples
def MakePerformanceSamplesFromOutput(base_metadata: Dict[str, Any], output: str) -> List[sample.Sample]: """Creates performance samples containing metrics. Args: base_metadata: dict contains all the metadata that reports. output: string, command output Example output: perfkitbenchmarker/tests/linux_benchmarks/mlperf_inference_benchmark_test.py Returns: Samples containing training metrics. """ metadata = {} for result in regex_util.ExtractAllMatches(r':::MLLOG (.*)', output): metric = json.loads(result) metadata[metric['key']] = metric['value'] metadata.update(base_metadata) return [ sample.Sample('throughput', metadata[_PERFORMANCE_METRIC], 'samples per second', metadata) ]
def ExtractFioParameters(fio_parameter): """Extract fio parameters from raw string. Sample parameter_string: overwrite=0 rw=write blocksize=512k size=10*10*1000*$mb_memory iodepth=64 direct=1 end_fsync=1 Args: fio_parameter: string. Parameters in string format. Returns: A dictionary of parameters. """ parameters = regex_util.ExtractAllMatches(PARAMETER_REGEX, fio_parameter) param_dict = {} for parameter in parameters: param_dict[parameter[0]] = parameter[1] return param_dict
def _ParseResult(out, test): """Parse blazemark results. Sample output: https://bitbucket.org/blaze-lib/blaze/wiki/Blazemark#!command-line-parameters Dense Vector/Dense Vector Addition: C-like implementation [MFlop/s]: 100 1115.44 10000000 206.317 Classic operator overloading [MFlop/s]: 100 415.703 10000000 112.557 Blaze [MFlop/s]: 100 2602.56 10000000 292.569 Boost uBLAS [MFlop/s]: 100 1056.75 10000000 208.639 Blitz++ [MFlop/s]: 100 1011.1 10000000 207.855 GMM++ [MFlop/s]: 100 1115.42 10000000 207.699 Armadillo [MFlop/s]: 100 1095.86 10000000 208.658 MTL [MFlop/s]: 100 1018.47 10000000 209.065 Eigen [MFlop/s]: 100 2173.48 10000000 209.899 N=100, steps=55116257 C-like = 2.33322 (4.94123) Classic = 6.26062 (13.2586) Blaze = 1 (2.11777) Boost uBLAS = 2.4628 (5.21565) Blitz++ = 2.57398 (5.4511) GMM++ = 2.33325 (4.94129) Armadillo = 2.3749 (5.0295) MTL = 2.55537 (5.41168) Eigen = 1.19742 (2.53585) N=10000000, steps=8 C-like = 1.41805 (0.387753) Classic = 2.5993 (0.710753) Blaze = 1 (0.27344) Boost uBLAS = 1.40227 (0.383437) Blitz++ = 1.40756 (0.384884) GMM++ = 1.40862 (0.385172) Armadillo = 1.40215 (0.383403) MTL = 1.39941 (0.382656) Eigen = 1.39386 (0.381136) Args: out: string. Blazemark output in raw string format. test: string. Name of the test ran. Returns: A list of samples. Each sample if a 4-tuple of (benchmark_name, value, unit, metadata). """ matches = regex_util.ExtractAllMatches(THROUGHPUT_HEADER_REGEX, out) results = [] for m in matches: lib = _SimplfyLibName(m[0]) metadata = {} filled = m[1] if filled: metadata['% filled'] = regex_util.ExtractFloat( FILLED_REGEX, filled) unit = m[-2] for v in regex_util.ExtractAllMatches(THROUGHPUT_RESULT_REGEX, m[-1]): metadata['N'] = int(v[0]) results.append( sample.Sample( '_'.join([test, lib, 'Throughput']), # Metric name float(v[1]), # Value unit, # Unit copy.deepcopy(metadata))) # Metadata logging.info('Results for %s:\n %s', test, results) return results
def ParseResults(results): """Result parser for UnixBench. Sample Results: 1 CPUs in system; running 1 parallel copy of tests 8 CPUs in system; running 8 parallel copies of tests Double-Precision Whetstone 4022.0 MWIPS (9.9 s, 7 samples) Execl Throughput 4735.8 lps (29.8 s, 2 samples) File Copy 1024 bufsize 2000 maxblocks 1294367.0 KBps (30.0 s, 2 samples) File Copy 256 bufsize 500 maxblocks 396912.9 KBps (30.0 s, 2 samples) File Copy 4096 bufsize 8000 maxblocks 2513158.7 KBps (30.0 s, 2 samples) Pipe Throughput 2221775.6 lps (10.0 s, 7 samples) Pipe-based Context Switching 369000.7 lps (10.0 s, 7 samples) Process Creation 12587.7 lps (30.0 s, 2 samples) Shell Scripts (1 concurrent) 8234.3 lpm (60.0 s, 2 samples) Shell Scripts (8 concurrent) 1064.5 lpm (60.0 s, 2 samples) System Call Overhead 4439274.5 lps (10.0 s, 7 samples) System Benchmarks Index Values BASELINE RESULT INDEX Dhrystone 2 using register variables 116700.0 34872897.7 2988.3 Double-Precision Whetstone 55.0 4022.0 731.3 Execl Throughput 43.0 4735.8 1101.4 File Copy 1024 bufsize 2000 maxblocks 3960.0 1294367.0 3268.6 File Copy 256 bufsize 500 maxblocks 1655.0 396912.9 2398.3 File Copy 4096 bufsize 8000 maxblocks 5800.0 2513158.7 4333.0 Pipe Throughput 12440.0 2221775.6 1786.0 Pipe-based Context Switching 4000.0 369000.7 922.5 Process Creation 126.0 12587.7 999.0 Shell Scripts (1 concurrent) 42.4 8234.3 1942.1 Shell Scripts (8 concurrent) 6.0 1064.5 1774.2 System Call Overhead 15000.0 4439274.5 2959.5 ======== System Benchmarks Index Score 1825.8 Args: results: UnixBench result. Returns: A list of sample.Sample objects. """ samples = [] start_index = results.find(RESULT_START_STRING) while start_index != -1: next_start_index = results.find(RESULT_START_STRING, start_index + 1) result = results[start_index: next_start_index] parallel_copies = regex_util.ExtractAllMatches( PARALLEL_COPIES_REGEX, result) parallel_copy_metadata = {'num_parallel_copies': int(parallel_copies[0])} match = regex_util.ExtractAllMatches(RESULT_REGEX, result) for groups in match: metadata = {'samples': int(groups[5]), 'time': groups[3] + groups[4]} metadata.update(parallel_copy_metadata) samples.append(sample.Sample( groups[0].strip(), float(groups[1]), groups[2], metadata)) match = regex_util.ExtractAllMatches(SCORE_REGEX, result) for groups in match: metadata = {'baseline': float(groups[1]), 'index': float(groups[3])} metadata.update(parallel_copy_metadata) samples.append(sample.Sample('%s:score' % groups[0].strip(), value=float(groups[2]), unit='', metadata=metadata)) match = regex_util.ExtractAllMatches(SYSTEM_SCORE_REGEX, result) samples.append(sample.Sample('System Benchmarks Index Score', float(match[0]), unit='', metadata=parallel_copy_metadata)) start_index = next_start_index return samples
def _MakeSamplesFromOutput(metadata, output): """Create a sample continaing the measured throughput. Args: metadata: dict contains all the metadata that reports. output: output Example output: perfkitbenchmarker/tests/linux_benchmarks/resnet_benchmark_test.py Returns: a Sample containing the throughput """ samples = [] pattern = r'loss = (\d+.\d+), step = 0' loss = regex_util.ExtractAllMatches(pattern, output) checkpoints = [('0', '', loss.pop(), '0', '0')] pattern = (r'global_step/sec: (\d+.\d+)\n(.*examples/sec: \d+.\d+\n)?.*' r'loss = (\d+.\d+), step = (\d+) \((\d+.\d+) sec\)') checkpoints.extend(regex_util.ExtractAllMatches(pattern, output)) for global_speed, example_speed, loss, step, duration in checkpoints: metadata_copy = copy.deepcopy(metadata) metadata_copy['step'] = int(step) metadata_copy['duration'] = float(duration) samples.append(sample.Sample('Loss', float(loss), '', metadata_copy)) samples.append( sample.Sample('Global Steps Per Second', float(global_speed), 'global_steps/sec', metadata_copy)) if example_speed: # This benchmark only reports "Examples Per Second" metric when we it # using TPU. pattern = r'examples/sec: (\d+.\d+)' example_speed = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Examples Per Second', float(example_speed), 'examples/sec', metadata_copy)) pattern = r'Loss for final step: (\d+.\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append(sample.Sample('Final Loss', float(value), '', metadata)) if FLAGS.resnet_mode in ('eval', 'train_and_eval'): pattern = r'Eval results: {.*\'loss\': (\d+.\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append(sample.Sample('Eval Loss', float(value), '', metadata)) # In the case of top-1 score, the trained model checks if the top class (the # one having the highest probability) is the same as the target label. # In the case of top-5 score, the trained model checks if the target label # is one of your top 5 predictions (the 5 ones with the highest # probabilities). pattern = r'Eval results: {.*\'top_1_accuracy\': (\d+.\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Top 1 Accuracy', float(value) * 100, '%', metadata)) pattern = r'Eval results: {.*\'top_5_accuracy\': (\d+.\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Top 5 Accuracy', float(value) * 100, '%', metadata)) pattern = r'Elapsed seconds (\d+)' value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Elapsed Seconds', int(value), 'seconds', metadata)) return samples
def _MakeSamplesFromOutput(metadata, output): """Create a sample continaing the measured throughput. Args: metadata: dict contains all the metadata that reports. output: output Example output: perfkitbenchmarker/tests/linux_benchmarks/resnet_benchmark_test.py Returns: a Sample containing the throughput """ samples = [] pattern = r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6})' start_time = _ParseDateTime( regex_util.ExtractAllMatches(pattern, output)[0]) if FLAGS.resnet_mode in ('train', 'train_and_eval'): # If statement training true, it will parse examples_per_second, # global_steps_per_second, loss pattern = ( r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n' r'.*loss = (\d+\.\d+), step = \d+\n') for wall_time, step, loss in regex_util.ExtractAllMatches( pattern, output): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) metadata_copy['duration'] = (_ParseDateTime(wall_time) - start_time).seconds samples.append( sample.Sample('Loss', float(loss), '', metadata_copy)) pattern = ( r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n' r'((.*\n){9})?.*Loss for final step: (\d+\.\d+).') for wall_time, step, _, _, loss in regex_util.ExtractAllMatches( pattern, output): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) metadata_copy['duration'] = (_ParseDateTime(wall_time) - start_time).seconds samples.append( sample.Sample('Loss', float(loss), '', metadata_copy)) pattern = ( r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving checkpoints for (\d+).*\n' r'.*global_step/sec: (\d+\.\d+)\n' r'(.*examples/sec: (\d+.\d+))?') for wall_time, step, global_step, _, examples_sec in ( regex_util.ExtractAllMatches(pattern, output)): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) metadata_copy['duration'] = (_ParseDateTime(wall_time) - start_time).seconds samples.append( sample.Sample('Global Steps Per Second', float(global_step), 'global_steps/sec', metadata_copy)) if examples_sec: # This benchmark only reports "Examples Per Second" metric when we it # using TPU. samples.append( sample.Sample('Examples Per Second', float(examples_sec), 'examples/sec', metadata_copy)) if FLAGS.resnet_mode in ('eval', 'train_and_eval'): # If statement evaluates true, it will parse top_1_accuracy, top_5_accuracy, # and eval_loss. pattern = ( r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Saving dict for global step \d+: ' r'global_step = (\d+), loss = (\d+\.\d+), top_1_accuracy = (\d+\.\d+), ' r'top_5_accuracy = (\d+\.\d+)') for wall_time, step, loss, top_1_accuracy, top_5_accuracy in ( regex_util.ExtractAllMatches(pattern, output)): metadata_copy = metadata.copy() metadata_copy['step'] = int(step) metadata_copy['duration'] = (_ParseDateTime(wall_time) - start_time).seconds samples.append( sample.Sample('Eval Loss', float(loss), '', metadata_copy)) # In the case of top-1 score, the trained model checks if the top class ( # the one having the highest probability) is the same as the target label. # In the case of top-5 score, the trained model checks if the target label # is one of your top 5 predictions (the 5 ones with the highest # probabilities). samples.append( sample.Sample('Top 1 Accuracy', float(top_1_accuracy) * 100, '%', metadata_copy)) samples.append( sample.Sample('Top 5 Accuracy', float(top_5_accuracy) * 100, '%', metadata_copy)) pattern = r'(\d{4} \d{2}:\d{2}:\d{2}\.\d{6}).*Elapsed seconds (\d+)' wall_time, value = regex_util.ExtractExactlyOneMatch(pattern, output) samples.append( sample.Sample('Elapsed Seconds', int(value), 'seconds', metadata)) return samples