def ExtractMetrics(): run_metrics_list = [] for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( RunBenchmark.path): trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) logging.info('processing trace: %s', trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) run_metrics = { 'url': trace.url, 'repeat_id': repeat_id, } run_metrics.update( sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( repeat_dir, trace)) run_metrics_list.append(run_metrics) run_metrics_list.sort(key=lambda e: e['repeat_id']) with open(ExtractMetrics.path, 'w') as csv_file: writer = csv.DictWriter( csv_file, fieldnames=(additional_column_names + sandwich_metrics.COMMON_CSV_COLUMN_NAMES)) writer.writeheader() for trace_metrics in run_metrics_list: writer.writerow(trace_metrics)
def _ProcessRunOutputDir(benchmark_setup, runner_output_dir): """Process benchmark's run output directory. Args: cache_validation_result: Same as for _RunOutputVerifier benchmark_setup: Same as for _RunOutputVerifier runner_output_dir: Same as for SandwichRunner.output_dir Returns: List of dictionary. """ run_metrics_list = [] for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( runner_output_dir): trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) logging.info('processing trace: %s', trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) served_from_cache_urls = sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.ServedFromCache) matching_subresource_count_used_from_cache = ( served_from_cache_urls.intersection( set(benchmark_setup['urls_to_enable_swr']))) run_metrics = { 'url': trace.url, 'repeat_id': repeat_id, 'benchmark_name': benchmark_setup['benchmark_name'], 'cache_recording.subresource_count': len(benchmark_setup['effective_subresource_urls']), 'cache_recording.matching_subresource_count': len(benchmark_setup['urls_to_enable_swr']), 'benchmark.matching_subresource_count_used_from_cache': len(matching_subresource_count_used_from_cache) } run_metrics.update( sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( repeat_dir, trace)) run_metrics_list.append(run_metrics) return run_metrics_list
def _ProcessRunOutputDir(cache_validation_result, benchmark_setup, runner_output_dir): """Process benchmark's run output directory. Args: cache_validation_result: Same as for _RunOutputVerifier benchmark_setup: Same as for _RunOutputVerifier runner_output_dir: Same as for SandwichRunner.output_dir Returns: List of dictionary. """ run_metrics_list = [] run_output_verifier = _RunOutputVerifier(cache_validation_result, benchmark_setup) cached_encoded_data_lengths = ( cache_validation_result['effective_encoded_data_lengths']) for repeat_id, repeat_dir in sandwich_runner.WalkRepeatedRuns( runner_output_dir): trace_path = os.path.join(repeat_dir, sandwich_runner.TRACE_FILENAME) logging.info('loading trace: %s', trace_path) trace = loading_trace.LoadingTrace.FromJsonFile(trace_path) logging.info('verifying trace: %s', trace_path) run_output_verifier.VerifyTrace(trace) logging.info('extracting metrics from trace: %s', trace_path) # Gather response size per URLs. response_sizes = {} for request in sandwich_utils.FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): # Ignore requests served from the blink's cache. if request.served_from_cache: continue if request.from_disk_cache: if request.url in cached_encoded_data_lengths: response_size = cached_encoded_data_lengths[request.url] else: # Some fat webpages may overflow the Memory cache, and so some # requests might be served from disk cache couple of times per page # load. logging.warning( 'Looks like could be served from memory cache: %s', request.url) if request.url in response_sizes: response_size = response_sizes[request.url] else: response_size = request.GetResponseTransportLength() response_sizes[request.url] = response_size # Sums the served from cache/network bytes. served_from_network_bytes = 0 served_from_cache_bytes = 0 urls_hitting_network = set() for request in sandwich_utils.FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): # Ignore requests served from the blink's cache. if request.served_from_cache: continue urls_hitting_network.add(request.url) if request.from_disk_cache: served_from_cache_bytes += response_sizes[request.url] else: served_from_network_bytes += response_sizes[request.url] # Make sure the served from blink's cache requests have at least one # corresponding request that was not served from the blink's cache. for request in sandwich_utils.FilterOutDataAndIncompleteRequests( trace.request_track.GetEvents()): assert (request.url in urls_hitting_network or not request.served_from_cache) run_metrics = { 'url': trace.url, 'repeat_id': repeat_id, 'subresource_discoverer': benchmark_setup['subresource_discoverer'], 'cache_recording.subresource_count': len(cache_validation_result['effective_encoded_data_lengths']), 'cache_recording.cached_subresource_count_theoretic': len(cache_validation_result['successfully_cached_resources']), 'cache_recording.cached_subresource_count': len(cache_validation_result['expected_cached_resources']), 'benchmark.subresource_count': len( sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.All)), 'benchmark.served_from_cache_count_theoretic': len(benchmark_setup['cache_whitelist']), 'benchmark.served_from_cache_count': len( sandwich_utils.ListUrlRequests( trace, sandwich_utils.RequestOutcome.ServedFromCache)), 'benchmark.served_from_network_bytes': served_from_network_bytes, 'benchmark.served_from_cache_bytes': served_from_cache_bytes } run_metrics.update( sandwich_metrics.ExtractCommonMetricsFromRepeatDirectory( repeat_dir, trace)) run_metrics_list.append(run_metrics) run_metrics_list.sort(key=lambda e: e['repeat_id']) wpr_log_path = os.path.join(runner_output_dir, sandwich_runner.WPR_LOG_FILENAME) logging.info('verifying wpr log: %s', wpr_log_path) run_output_verifier.VerifyWprLog(wpr_log_path) return run_metrics_list