def testLogDebugInfo_NoHistograms(self, mock_log): histograms = histogram_set.HistogramSet() add_histograms._LogDebugInfo(histograms) mock_log.assert_called_once_with('No histograms in data.')
def __init__(self, output_formatters=None, progress_reporter=None, output_dir=None, should_add_value=None, benchmark_name=None, benchmark_description=None, benchmark_enabled=True, upload_bucket=None, results_label=None): """ Args: output_formatters: A list of output formatters. The output formatters are typically used to format the test results, such as CsvOutputFormatter, which output the test results as CSV. progress_reporter: An instance of progress_reporter.ProgressReporter, to be used to output test status/results progressively. output_dir: A string specifying the directory where to store the test artifacts, e.g: trace, videos, etc. should_add_value: A function that takes two arguments: a value name and a boolean (True when the value belongs to the first run of the corresponding story). It returns True if the value should be added to the test results and False otherwise. benchmark_name: A string with the name of the currently running benchmark. benchmark_description: A string with a description of the currently running benchmark. benchmark_enabled: A boolean indicating whether the benchmark to run is enabled. (Some output formats need to produce special output for disabled benchmarks). upload_bucket: A string identifting a cloud storage bucket where to upload artifacts. results_label: A string that serves as an identifier for the current benchmark run. """ super(PageTestResults, self).__init__() self._progress_reporter = (progress_reporter if progress_reporter is not None else reporter_module.ProgressReporter()) self._output_formatters = (output_formatters if output_formatters is not None else []) self._output_dir = output_dir if should_add_value is not None: self._should_add_value = should_add_value else: self._should_add_value = lambda v, is_first: True self._current_page_run = None self._all_page_runs = [] self._all_stories = set() self._representative_value_for_each_value_name = {} self._all_summary_values = [] self._histograms = histogram_set.HistogramSet() self._benchmark_name = benchmark_name or '(unknown benchmark)' self._benchmark_description = benchmark_description or '' self._telemetry_info = TelemetryInfo( benchmark_name=self._benchmark_name, benchmark_description=self._benchmark_description, results_label=results_label, upload_bucket=upload_bucket, output_dir=output_dir) # State of the benchmark this set of results represents. self._benchmark_enabled = benchmark_enabled self._histogram_dicts_to_add = [] # Mapping of the stories that have run to the number of times they have run # This is necessary on interrupt if some of the stories did not run. self._story_run_count = {}
def testAddReservedDiagnostics_InvalidDiagnostic_Raises(self): hs = histogram_set.HistogramSet([self._CreateHistogram('foo')]) with self.assertRaises(AssertionError): add_reserved_diagnostics.AddReservedDiagnostics( hs.AsDicts(), {'SOME INVALID DIAGNOSTIC': 'bar'})
def AddReservedDiagnostics(histogram_dicts, names_to_values): # We need to generate summary statistics for anything that had a story, so # filter out every histogram with no stories, then merge. If you keep the # histograms with no story, you end up with duplicates. hs_with_stories = _LoadHistogramSet(histogram_dicts) hs_with_stories.FilterHistograms( lambda h: not h.diagnostics.get(reserved_infos.STORIES.name, [])) hs_with_no_stories = _LoadHistogramSet(histogram_dicts) hs_with_no_stories.FilterHistograms( lambda h: h.diagnostics.get(reserved_infos.STORIES.name, [])) # TODO(#3987): Refactor recipes to call merge_histograms separately. # This call combines all repetitions of a metric for a given story into a # single histogram. hs = histogram_set.HistogramSet() hs.ImportDicts(hs_with_stories.AsDicts()) for h in hs: h.diagnostics[reserved_infos.TEST_PATH.name] = ( generic_set.GenericSet([ComputeTestPath(h)])) _GetAndDeleteHadFailures(hs) dicts_across_repeats = _MergeHistogramSetByPath(hs) had_failures = _GetAndDeleteHadFailures(hs_with_stories) if not had_failures: # This call creates summary metrics across each tag set of stories. hs = histogram_set.HistogramSet() hs.ImportDicts(hs_with_stories.AsDicts()) hs.FilterHistograms(lambda h: not GetTIRLabelFromHistogram(h)) for h in hs: h.diagnostics[reserved_infos.SUMMARY_KEYS.name] = ( generic_set.GenericSet(['name', 'storyTags'])) h.diagnostics[reserved_infos.TEST_PATH.name] = ( generic_set.GenericSet([ComputeTestPath(h)])) dicts_across_stories = _MergeHistogramSetByPath(hs) # This call creates summary metrics across the entire story set. hs = histogram_set.HistogramSet() hs.ImportDicts(hs_with_stories.AsDicts()) for h in hs: h.diagnostics[reserved_infos.SUMMARY_KEYS.name] = ( generic_set.GenericSet(['name'])) h.diagnostics[reserved_infos.TEST_PATH.name] = ( generic_set.GenericSet([ComputeTestPath(h)])) dicts_across_names = _MergeHistogramSetByPath(hs) else: dicts_across_stories = [] dicts_across_names = [] # Now load everything into one histogram set. First we load the summary # histograms, since we need to mark them with SUMMARY_KEYS. # After that we load the rest, and then apply all the diagnostics specified # on the command line. Finally, since we end up with a lot of diagnostics # that no histograms refer to, we make sure to prune those. histograms = histogram_set.HistogramSet() histograms.ImportDicts(dicts_across_names) histograms.ImportDicts(dicts_across_stories) histograms.ImportDicts(dicts_across_repeats) histograms.ImportDicts(hs_with_no_stories.AsDicts()) histograms.DeduplicateDiagnostics() for name, value in names_to_values.iteritems(): assert name in ALL_NAMES histograms.AddSharedDiagnostic(name, generic_set.GenericSet([value])) histograms.RemoveOrphanedDiagnostics() return json.dumps(histograms.AsDicts())
def ProcessHistogramSet(histogram_dicts): if not isinstance(histogram_dicts, list): raise api_request_handler.BadRequestError( 'HistogramSet JSON much be a list of dicts') bot_whitelist_future = stored_object.GetAsync( add_point_queue.BOT_WHITELIST_KEY) histograms = histogram_set.HistogramSet() histograms.ImportDicts(histogram_dicts) histograms.ResolveRelatedHistograms() histograms.DeduplicateDiagnostics() if len(histograms) == 0: raise api_request_handler.BadRequestError( 'HistogramSet JSON must contain at least one histogram.') _LogDebugInfo(histograms) InlineDenseSharedDiagnostics(histograms) # TODO(eakuefner): Get rid of this. # https://github.com/catapult-project/catapult/issues/4242 _PurgeHistogramBinData(histograms) master = _GetDiagnosticValue(reserved_infos.MASTERS.name, histograms.GetFirstHistogram()) bot = _GetDiagnosticValue(reserved_infos.BOTS.name, histograms.GetFirstHistogram()) benchmark = _GetDiagnosticValue(reserved_infos.BENCHMARKS.name, histograms.GetFirstHistogram()) benchmark_description = _GetDiagnosticValue( reserved_infos.BENCHMARK_DESCRIPTIONS.name, histograms.GetFirstHistogram(), optional=True) _ValidateMasterBotBenchmarkName(master, bot, benchmark) suite_key = utils.TestKey('%s/%s/%s' % (master, bot, benchmark)) logging.info('Suite: %s', suite_key.id()) revision = ComputeRevision(histograms) bot_whitelist = bot_whitelist_future.get_result() internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) # We'll skip the histogram-level sparse diagnostics because we need to # handle those with the histograms, below, so that we can properly assign # test paths. suite_level_sparse_diagnostic_entities = FindSuiteLevelSparseDiagnostics( histograms, suite_key, revision, internal_only) # TODO(eakuefner): Refactor master/bot computation to happen above this line # so that we can replace with a DiagnosticRef rather than a full diagnostic. new_guids_to_old_diagnostics = DeduplicateAndPut( suite_level_sparse_diagnostic_entities, suite_key, revision) for new_guid, old_diagnostic in new_guids_to_old_diagnostics.iteritems(): histograms.ReplaceSharedDiagnostic( new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic)) tasks = _BatchHistogramsIntoTasks(suite_key.id(), histograms, revision, benchmark_description) _QueueHistogramTasks(tasks)
def _Poll(self): # TODO(simonhatch): Switch this to use the new perf-output flag instead # of the chartjson one. They're functionally equivalent, just new name. histogram_dicts = _RetrieveOutputJson(self._isolate_hash, 'chartjson-output.json') histograms = histogram_set.HistogramSet() histograms.ImportDicts(histogram_dicts) histograms.ResolveRelatedHistograms() matching_histograms = histograms.GetHistogramsNamed(self._hist_name) # Get and cache any trace URLs. unique_trace_urls = set() for hist in histograms: trace_urls = hist.diagnostics.get(reserved_infos.TRACE_URLS.name) # TODO(simonhatch): Remove this sometime after May 2018. We had a # brief period where the histograms generated by tests had invalid # trace_urls diagnostics. If the diagnostic we get back is just a ref, # then skip. # https://github.com/catapult-project/catapult/issues/4243 if trace_urls and not isinstance(trace_urls, diagnostic_ref.DiagnosticRef): unique_trace_urls.update(trace_urls) sorted_urls = sorted(unique_trace_urls) self._trace_urls = [{ 'name': t.split('/')[-1], 'url': t } for t in sorted_urls] # Filter the histograms by tir_label and story. Getting either the # tir_label or the story from a histogram involves pulling out and # examining various diagnostics associated with the histogram. tir_label = self._tir_label or '' matching_histograms = [ h for h in matching_histograms if tir_label == histogram_helpers.GetTIRLabelFromHistogram(h) ] # If no story is supplied, we're looking for a summary metric so just match # on name and tir_label. This is equivalent to the chartjson condition that # if no story is specified, look for "summary". if self._story: matching_histograms = [ h for h in matching_histograms if self._story == _GetStoryFromHistogram(h) ] # Have to pull out either the raw sample values, or the statistic result_values = [] for h in matching_histograms: result_values.extend(self._GetValuesOrStatistic(h)) if not result_values and self._hist_name: name = 'histogram: %s' % self._hist_name if tir_label: name += ' tir_label: %s' % tir_label if self._story: name += ' story: %s' % self._story raise ReadValueError('Could not find values matching: %s' % name) self._Complete(result_values=tuple(result_values))
def post(self): """Adds a single histogram or sparse shared diagnostic to the datastore. The |data| request parameter can be either a histogram or a sparse shared diagnostic; the set of diagnostics that are considered sparse (meaning that they don't normally change on every upload for a given benchmark from a given bot) is shown in add_histograms.SPARSE_DIAGNOSTIC_TYPES. See https://goo.gl/lHzea6 for detailed information on the JSON format for histograms and diagnostics. Request parameters: data: JSON encoding of a histogram or shared diagnostic. revision: a revision, given as an int. test_path: the test path to which this diagnostic or histogram should be attached. """ datastore_hooks.SetPrivilegedRequest() data = self.request.get('data') revision = int(self.request.get('revision')) test_path = self.request.get('test_path') data_dict = json.loads(data) guid = data_dict['guid'] is_diagnostic = 'type' in data_dict test_path_parts = test_path.split('/') master = test_path_parts[0] bot = test_path_parts[1] test_name = '/'.join(test_path_parts[2:]) bot_whitelist = stored_object.Get(add_point_queue.BOT_WHITELIST_KEY) internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) extra_args = {} if is_diagnostic else GetUnitArgs(data_dict['unit']) # TDOO(eakuefner): Populate benchmark_description once it appears in # diagnostics. parent_test = add_point_queue.GetOrCreateAncestors( master, bot, test_name, internal_only, **extra_args) test_key = parent_test.key added_rows = [] monitored_test_keys = [] if is_diagnostic: entity = histogram.SparseDiagnostic(id=guid, data=data, test=test_key, start_revision=revision, end_revision=revision, internal_only=internal_only) else: diagnostics = self.request.get('diagnostics') if diagnostics: diagnostic_data = json.loads(diagnostics) diagnostic_entities = [] for diagnostic_datum in diagnostic_data: # TODO(eakuefner): Pass map of guid to dict to avoid overhead guid = diagnostic_datum['guid'] diagnostic_entities.append( histogram.SparseDiagnostic( id=guid, data=diagnostic_datum, test=test_key, start_revision=revision, end_revision=sys.maxint, internal_only=internal_only)) new_guids_to_existing_diagnostics = add_histograms.DeduplicateAndPut( diagnostic_entities, test_key, revision).iteritems() # TODO(eakuefner): Move per-histogram monkeypatching logic to Histogram. hs = histogram_set.HistogramSet() hs.ImportDicts([data_dict]) # TODO(eakuefner): Share code for replacement logic with add_histograms for new_guid, existing_diagnostic in new_guids_to_existing_diagnostics: hs.ReplaceSharedDiagnostic( new_guid, diagnostic_ref.DiagnosticRef( existing_diagnostic['guid'])) data = hs.GetFirstHistogram().AsDict() entity = histogram.Histogram(id=guid, data=data, test=test_key, revision=revision, internal_only=internal_only) row = AddRow(data_dict, test_key, revision, test_path, internal_only) added_rows.append(row) is_monitored = parent_test.sheriff and parent_test.has_rows if is_monitored: monitored_test_keys.append(parent_test.key) entity.put() tests_keys = [ k for k in monitored_test_keys if not add_point_queue.IsRefBuild(k) ] # Updating of the cached graph revisions should happen after put because # it requires the new row to have a timestamp, which happens upon put. futures = [ graph_revisions.AddRowsToCacheAsync(added_rows), find_anomalies.ProcessTestsAsync(tests_keys) ] ndb.Future.wait_all(futures)
def testAssertType(self): hs = histogram_set.HistogramSet() with self.assertRaises(AssertionError): hs.ImportDicts([{'type': ''}])
def main(): # pylint: disable=W0101 logging.basicConfig(level=logging.INFO) logging.info('Invoked with %s', str(sys.argv)) args = _ParseArgs() _ConfigurePythonPath(args) # Import catapult modules here after configuring the pythonpath. from tracing.value import histogram_set from tracing.value.diagnostics import reserved_infos from tracing.value.diagnostics import generic_set pesq_path, polqa_path = _GetPathToTools() if pesq_path is None: return 1 out_dir = os.path.join(args.build_dir, '..') if args.android: test_command = [ os.path.join(args.build_dir, 'bin', 'run_low_bandwidth_audio_test'), '-v', '--num-retries', args.num_retries ] else: test_command = [ os.path.join(args.build_dir, 'low_bandwidth_audio_test') ] analyzers = [Analyzer('pesq', _RunPesq, pesq_path, 16000)] # Check if POLQA can run at all, or skip the 48 kHz tests entirely. example_path = os.path.join(SRC_DIR, 'resources', 'voice_engine', 'audio_tiny48.wav') if polqa_path and _RunPolqa(polqa_path, example_path, example_path): analyzers.append(Analyzer('polqa', _RunPolqa, polqa_path, 48000)) histograms = histogram_set.HistogramSet() for analyzer in analyzers: # Start the test executable that produces audio files. test_process = subprocess.Popen(_LogCommand(test_command + [ '--sample_rate_hz=%d' % analyzer.sample_rate_hz, '--test_case_prefix=%s' % analyzer.name, ] + args.extra_test_args), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) perf_results_file = None try: lines = iter(test_process.stdout.readline, '') for result in ExtractTestRuns(lines, echo=True): (android_device, test_name, reference_file, degraded_file, perf_results_file) = result adb_prefix = (args.adb_path, ) if android_device: adb_prefix += ('-s', android_device) reference_file = _GetFile(reference_file, out_dir, android=args.android, adb_prefix=adb_prefix) degraded_file = _GetFile(degraded_file, out_dir, move=True, android=args.android, adb_prefix=adb_prefix) analyzer_results = analyzer.func(analyzer.executable, reference_file, degraded_file) for metric, (value, units) in analyzer_results.items(): hist = histograms.CreateHistogram(metric, units, [value]) user_story = generic_set.GenericSet([test_name]) hist.diagnostics[reserved_infos.STORIES.name] = user_story # Output human readable results. print 'RESULT %s: %s= %s %s' % (metric, test_name, value, units) if args.remove: os.remove(reference_file) os.remove(degraded_file) finally: test_process.terminate() if perf_results_file: perf_results_file = _GetFile(perf_results_file, out_dir, move=True, android=args.android, adb_prefix=adb_prefix) _MergeInPerfResultsFromCcTests(histograms, perf_results_file) if args.remove: os.remove(perf_results_file) if args.isolated_script_test_perf_output: with open(args.isolated_script_test_perf_output, 'wb') as f: f.write(histograms.AsProto().SerializeToString()) return test_process.wait()
def _LoadHistogramSet(dicts): hs = histogram_set.HistogramSet() hs.ImportDicts(dicts) return hs
def testDeduplicateDiagnostics(self): generic_a = generic_set.GenericSet(['A']) generic_b = generic_set.GenericSet(['B']) date_a = date_range.DateRange(42) date_b = date_range.DateRange(57) a_hist = histogram.Histogram('a', 'unitless') generic0 = generic_set.GenericSet.FromDict(generic_a.AsDict()) generic0.AddDiagnostic(generic_b) a_hist.diagnostics['generic'] = generic0 date0 = date_range.DateRange.FromDict(date_a.AsDict()) date0.AddDiagnostic(date_b) a_hist.diagnostics['date'] = date0 b_hist = histogram.Histogram('b', 'unitless') generic1 = generic_set.GenericSet.FromDict(generic_a.AsDict()) generic1.AddDiagnostic(generic_b) b_hist.diagnostics['generic'] = generic1 date1 = date_range.DateRange.FromDict(date_a.AsDict()) date1.AddDiagnostic(date_b) b_hist.diagnostics['date'] = date1 c_hist = histogram.Histogram('c', 'unitless') c_hist.diagnostics['generic'] = generic1 histograms = histogram_set.HistogramSet([a_hist, b_hist, c_hist]) self.assertNotEqual(a_hist.diagnostics['generic'].guid, b_hist.diagnostics['generic'].guid) self.assertEqual(b_hist.diagnostics['generic'].guid, c_hist.diagnostics['generic'].guid) self.assertEqual(a_hist.diagnostics['generic'], b_hist.diagnostics['generic']) self.assertNotEqual(a_hist.diagnostics['date'].guid, b_hist.diagnostics['date'].guid) self.assertEqual(a_hist.diagnostics['date'], b_hist.diagnostics['date']) histograms.DeduplicateDiagnostics() self.assertEqual(a_hist.diagnostics['generic'].guid, b_hist.diagnostics['generic'].guid) self.assertEqual(b_hist.diagnostics['generic'].guid, c_hist.diagnostics['generic'].guid) self.assertEqual(a_hist.diagnostics['generic'], b_hist.diagnostics['generic']) self.assertEqual(a_hist.diagnostics['date'].guid, b_hist.diagnostics['date'].guid) self.assertEqual(a_hist.diagnostics['date'], b_hist.diagnostics['date']) histogram_dicts = histograms.AsDicts() # All diagnostics should have been serialized as DiagnosticRefs. for d in histogram_dicts: if 'type' not in d: for diagnostic_dict in d['diagnostics'].values(): self.assertIsInstance(diagnostic_dict, str) histograms2 = histogram_set.HistogramSet() histograms2.ImportDicts(histograms.AsDicts()) a_hists = histograms2.GetHistogramsNamed('a') self.assertEqual(len(a_hists), 1) a_hist2 = a_hists[0] b_hists = histograms2.GetHistogramsNamed('b') self.assertEqual(len(b_hists), 1) b_hist2 = b_hists[0] self.assertEqual(a_hist2.diagnostics['generic'].guid, b_hist2.diagnostics['generic'].guid) self.assertEqual(a_hist2.diagnostics['generic'], b_hist2.diagnostics['generic']) self.assertEqual(a_hist2.diagnostics['date'].guid, b_hist2.diagnostics['date'].guid) self.assertEqual(a_hist2.diagnostics['date'], b_hist2.diagnostics['date'])
def DumpsFirst(n): hs = histogram_set.HistogramSet(histograms[:n]) if n > 1: hs.DeduplicateDiagnostics() return Dumps(hs.AsDicts())
def testComputeTestPathWithoutStory(self): hist = histogram_module.Histogram('hist', 'count') histograms = histogram_set.HistogramSet([hist]) hist = histograms.GetFirstHistogram() test_path = histogram_helpers.ComputeTestPath(hist) self.assertEqual('hist', test_path)
def testLogDebugInfo_NoLogUrls(self, mock_log): hist = histogram_module.Histogram('hist', 'count') histograms = histogram_set.HistogramSet([hist]) add_histograms._LogDebugInfo(histograms) mock_log.assert_called_once_with('No LOG_URLS in data.')
def RunMetrics(trace_processor_path, trace_file, metric_names, fetch_power_profile=False, retain_all_samples=False): """Run TBMv3 metrics using trace processor. Args: trace_processor_path: path to the trace_processor executable. trace_file: path to the trace file. metric_names: a list of metric names (the corresponding files must exist in tbmv3/metrics directory). Returns: A HistogramSet with metric results. """ trace_processor_path = _EnsureTraceProcessor(trace_processor_path) metric_name_args = [] for metric_name in metric_names: metric_files = _CreateMetricFiles(metric_name) if metric_files.internal_metric: metric_name_args.append(metric_name) else: metric_name_args.append(metric_files.sql) command_args = [ trace_processor_path, '--run-metrics', ','.join(metric_name_args), '--metrics-output', 'json', trace_file, ] if fetch_power_profile: power_profile_sql = binary_deps_manager.FetchDataFile( POWER_PROFILE_SQL) command_args[1:1] = ['--pre-metrics', power_profile_sql] output = _RunTraceProcessor(*command_args) measurements = json.loads(output) histograms = histogram_set.HistogramSet() root_annotations = measurements.get('__annotations', {}) for metric_name in metric_names: full_metric_name = 'perfetto.protos.' + metric_name annotations = root_annotations.get(full_metric_name, None) metric_proto = measurements.get(full_metric_name, None) if metric_proto is None: logging.warn("Metric not found in the output: %s", metric_name) continue elif annotations is None: logging.info( "Skipping metric %s because it has no field with unit.", metric_name) continue for field in _LeafFieldAnnotations(annotations): unit = field.field_options.get('unit', None) if unit is None: logging.debug( 'Skipping field %s to histograms because it has no unit', field.name) continue histogram_name = ':'.join( [field.name for field in field.path_from_root]) samples = _PluckField(metric_proto, field.path_from_root) scoped_histogram_name = _ScopedHistogramName( metric_name, histogram_name) hist = Histogram(scoped_histogram_name, unit) if retain_all_samples: hist.max_num_sample_values = float('inf') for sample in samples: hist.AddSample(sample) histograms.AddHistogram(hist) return histograms
def testEvaluateSuccess_HistogramSkipRefTraceUrls(self, isolate_retrieve): hist = histogram_module.Histogram('some_chart', 'count') hist.AddSample(0) hist.diagnostics[reserved_infos.TRACE_URLS.name] = ( generic_set.GenericSet(['trace_url1', 'trace_url2'])) hist2 = histogram_module.Histogram('hist2', 'count') hist2.diagnostics[reserved_infos.TRACE_URLS.name] = ( generic_set.GenericSet(['trace_url3'])) hist2.diagnostics[reserved_infos.TRACE_URLS.name].guid = 'foo' histograms = histogram_set.HistogramSet([hist, hist2]) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([( '{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) self.PopulateTaskGraph(benchmark='some_benchmark', chart='some_chart') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': None, 'story': None, 'statistic': None, 'histogram_name': 'some_chart', }, 'graph_json_options': { 'chart': 'some_chart', 'trace': 'some_trace' }, 'result_values': [0], 'status': 'completed', 'tries': 1, 'trace_urls': [{ 'key': 'trace', 'value': 'trace_url1', 'url': 'trace_url1' }, { 'key': 'trace', 'value': 'trace_url2', 'url': 'trace_url2', }], 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def ProcessHistogramSet(histogram_dicts, completion_token=None): if not isinstance(histogram_dicts, list): raise api_request_handler.BadRequestError( 'HistogramSet JSON must be a list of dicts') histograms = histogram_set.HistogramSet() with timing.WallTimeLogger('hs.ImportDicts'): histograms.ImportDicts(histogram_dicts) with timing.WallTimeLogger('hs.DeduplicateDiagnostics'): histograms.DeduplicateDiagnostics() if len(histograms) == 0: raise api_request_handler.BadRequestError( 'HistogramSet JSON must contain at least one histogram.') with timing.WallTimeLogger('hs._LogDebugInfo'): _LogDebugInfo(histograms) with timing.WallTimeLogger('InlineDenseSharedDiagnostics'): InlineDenseSharedDiagnostics(histograms) # TODO(#4242): Get rid of this. # https://github.com/catapult-project/catapult/issues/4242 with timing.WallTimeLogger('_PurgeHistogramBinData'): _PurgeHistogramBinData(histograms) with timing.WallTimeLogger('_GetDiagnosticValue calls'): master = _GetDiagnosticValue(reserved_infos.MASTERS.name, histograms.GetFirstHistogram()) bot = _GetDiagnosticValue(reserved_infos.BOTS.name, histograms.GetFirstHistogram()) benchmark = _GetDiagnosticValue(reserved_infos.BENCHMARKS.name, histograms.GetFirstHistogram()) benchmark_description = _GetDiagnosticValue( reserved_infos.BENCHMARK_DESCRIPTIONS.name, histograms.GetFirstHistogram(), optional=True) with timing.WallTimeLogger('_ValidateMasterBotBenchmarkName'): _ValidateMasterBotBenchmarkName(master, bot, benchmark) with timing.WallTimeLogger('ComputeRevision'): suite_key = utils.TestKey('%s/%s/%s' % (master, bot, benchmark)) logging.info('Suite: %s', suite_key.id()) revision = ComputeRevision(histograms) logging.info('Revision: %s', revision) internal_only = graph_data.Bot.GetInternalOnlySync(master, bot) revision_record = histogram.HistogramRevisionRecord.GetOrCreate( suite_key, revision) revision_record.put() last_added = histogram.HistogramRevisionRecord.GetLatest( suite_key).get_result() # On first upload, a query immediately following a put may return nothing. if not last_added: last_added = revision_record _CheckRequest(last_added, 'No last revision') # We'll skip the histogram-level sparse diagnostics because we need to # handle those with the histograms, below, so that we can properly assign # test paths. with timing.WallTimeLogger('FindSuiteLevelSparseDiagnostics'): suite_level_sparse_diagnostic_entities = FindSuiteLevelSparseDiagnostics( histograms, suite_key, revision, internal_only) # TODO(896856): Refactor master/bot computation to happen above this line # so that we can replace with a DiagnosticRef rather than a full diagnostic. with timing.WallTimeLogger('DeduplicateAndPut'): new_guids_to_old_diagnostics = ( histogram.SparseDiagnostic.FindOrInsertDiagnostics( suite_level_sparse_diagnostic_entities, suite_key, revision, last_added.revision).get_result()) with timing.WallTimeLogger('ReplaceSharedDiagnostic calls'): for new_guid, old_diagnostic in new_guids_to_old_diagnostics.items(): histograms.ReplaceSharedDiagnostic( new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic)) with timing.WallTimeLogger('_CreateHistogramTasks'): tasks = _CreateHistogramTasks(suite_key.id(), histograms, revision, benchmark_description, completion_token) with timing.WallTimeLogger('_QueueHistogramTasks'): _QueueHistogramTasks(tasks)
def testEvaluateSuccess_HistogramSummary(self, isolate_retrieve): samples = [] hists = [] for i in range(10): hist = histogram_module.Histogram('some_chart', 'count') hist.AddSample(0) hist.AddSample(1) hist.AddSample(2) hist.diagnostics[reserved_infos.STORIES.name] = ( generic_set.GenericSet(['story%d' % i])) hist.diagnostics[reserved_infos.STORY_TAGS.name] = ( generic_set.GenericSet(['group:label1'])) hists.append(hist) samples.extend(hist.sample_values) for i in range(10): hist = histogram_module.Histogram('some_chart', 'count') hist.AddSample(0) hist.AddSample(1) hist.AddSample(2) hist.diagnostics[reserved_infos.STORIES.name] = ( generic_set.GenericSet(['another_story%d' % i])) hist.diagnostics[reserved_infos.STORY_TAGS.name] = ( generic_set.GenericSet(['group:label2'])) hists.append(hist) samples.extend(hist.sample_values) histograms = histogram_set.HistogramSet(hists) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:label'])) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([( '{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) self.PopulateTaskGraph(benchmark='some_benchmark', chart='some_chart') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': None, 'story': None, 'statistic': None, 'histogram_name': 'some_chart', }, 'graph_json_options': { 'chart': 'some_chart', 'trace': 'some_trace' }, 'result_values': [sum(samples)], 'status': 'completed', 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def testImportDicts(self): hist = histogram.Histogram('', 'unitless') hists = histogram_set.HistogramSet([hist]) hists2 = histogram_set.HistogramSet() hists2.ImportDicts(hists.AsDicts()) self.assertEqual(len(hists), len(hists2))
def testEvaluateFailure_HistogramNoSamples(self, isolate_retrieve): histogram = histogram_module.Histogram('some_chart', 'count') histograms = histogram_set.HistogramSet([histogram]) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:label'])) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORIES.name, generic_set.GenericSet(['https://story'])) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([( '{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) self.PopulateTaskGraph(benchmark='some_benchmark', chart='some_chart', grouping_label='label', story='https://story') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': 'label', 'story': 'https://story', 'statistic': None, 'histogram_name': 'some_chart', }, 'graph_json_options': { 'chart': 'some_chart', 'trace': 'some_trace' }, 'status': 'failed', 'errors': [{ 'reason': 'ReadValueNoValues', 'message': mock.ANY, }], 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def _LoadHistogramSet(dicts): hs = histogram_set.HistogramSet() hs.ImportDicts(dicts) hs.ResolveRelatedHistograms() return hs
def testEvaluateSuccess_WithData(self, isolate_retrieve): # Seed the response to the call to the isolate service. histogram = histogram_module.Histogram('some_chart', 'count') histogram.AddSample(0) histogram.AddSample(1) histogram.AddSample(2) histograms = histogram_set.HistogramSet([histogram]) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORY_TAGS.name, generic_set.GenericSet(['group:label'])) histograms.AddSharedDiagnosticToAllHistograms( reserved_infos.STORIES.name, generic_set.GenericSet(['story'])) isolate_retrieve.side_effect = itertools.chain( *itertools.repeat([( '{"files": {"some_benchmark/perf_results.json": ' '{"h": "394890891823812873798734a"}}}'), json.dumps(histograms.AsDicts())], 10)) # Set it up so that we are building a graph that's looking for no statistic. self.PopulateTaskGraph(benchmark='some_benchmark', chart='some_chart', grouping_label='label', story='story') self.assertNotEqual({}, task_module.Evaluate( self.job, event_module.Event(type='initiate', target_task=None, payload={}), self.evaluator)) # Ensure we find the find a value, and the histogram (?) associated with the # data we're looking for. self.assertEqual( { 'read_value_chromium@aaaaaaa_%s' % (attempt, ): { 'benchmark': 'some_benchmark', 'change': mock.ANY, 'mode': 'histogram_sets', 'results_filename': 'some_benchmark/perf_results.json', 'histogram_options': { 'grouping_label': 'label', 'story': 'story', 'statistic': None, 'histogram_name': 'some_chart', }, 'graph_json_options': { 'chart': 'some_chart', 'trace': 'some_trace', }, 'status': 'completed', 'result_values': [0, 1, 2], 'tries': 1, 'index': attempt, } for attempt in range(10) }, task_module.Evaluate( self.job, event_module.Event(type='select', target_task=None, payload={}), evaluators.Selector(task_type='read_value')))
def _LoadHistogramSetFromProto(options): hs = histogram_set.HistogramSet() with options.input_results_file as f: hs.ImportProto(f.read()) return hs
def testHistogramsOutputMeasurements(self): measurements = { 'a': { 'unit': 'ms', 'samples': [4, 6], 'description': 'desc_a' }, 'b': { 'unit': 'ms', 'samples': [5], 'description': 'desc_b' }, } start_ts = 1500000000 start_iso = datetime.datetime.utcfromtimestamp( start_ts).isoformat() + 'Z' self.SerializeIntermediateResults( testing.TestResult( 'benchmark/story', output_artifacts=[ self.CreateMeasurementsArtifact(measurements), ], tags=['story_tag:test'], start_time=start_iso, ), ) processor.main([ '--output-format', 'histograms', '--output-dir', self.output_dir, '--intermediate-dir', self.intermediate_dir, ]) with open( os.path.join(self.output_dir, histograms_output.OUTPUT_FILENAME)) as f: results = json.load(f) out_histograms = histogram_set.HistogramSet() out_histograms.ImportDicts(results) self.assertEqual(len(out_histograms), 2) hist = out_histograms.GetHistogramNamed('a') self.assertEqual(hist.name, 'a') self.assertEqual(hist.unit, 'ms_smallerIsBetter') self.assertEqual(hist.sample_values, [4, 6]) self.assertEqual(hist.description, 'desc_a') self.assertEqual(hist.diagnostics['benchmarks'], generic_set.GenericSet(['benchmark'])) self.assertEqual(hist.diagnostics['stories'], generic_set.GenericSet(['story'])) self.assertEqual(hist.diagnostics['storyTags'], generic_set.GenericSet(['test'])) self.assertEqual(hist.diagnostics['benchmarkStart'], date_range.DateRange(start_ts * 1e3)) hist = out_histograms.GetHistogramNamed('b') self.assertEqual(hist.name, 'b') self.assertEqual(hist.unit, 'ms_smallerIsBetter') self.assertEqual(hist.sample_values, [5]) self.assertEqual(hist.description, 'desc_b') self.assertEqual(hist.diagnostics['benchmarks'], generic_set.GenericSet(['benchmark'])) self.assertEqual(hist.diagnostics['stories'], generic_set.GenericSet(['story'])) self.assertEqual(hist.diagnostics['storyTags'], generic_set.GenericSet(['test'])) self.assertEqual(hist.diagnostics['benchmarkStart'], date_range.DateRange(start_ts * 1e3))
def __init__(self, output_formatters=None, progress_stream=None, output_dir=None, intermediate_dir=None, benchmark_name=None, benchmark_description=None, upload_bucket=None, results_label=None): """ Args: output_formatters: A list of output formatters. The output formatters are typically used to format the test results, such as CsvOutputFormatter, which output the test results as CSV. progress_stream: A file-like object where to write progress reports as stories are being run. Can be None to suppress progress reporting. output_dir: A string specifying the directory where to store the test artifacts, e.g: trace, videos, etc. benchmark_name: A string with the name of the currently running benchmark. benchmark_description: A string with a description of the currently running benchmark. upload_bucket: A string identifting a cloud storage bucket where to upload artifacts. results_label: A string that serves as an identifier for the current benchmark run. """ super(PageTestResults, self).__init__() self._progress_reporter = gtest_progress_reporter.GTestProgressReporter( progress_stream) self._output_formatters = (output_formatters if output_formatters is not None else []) self._output_dir = output_dir self._intermediate_dir = intermediate_dir if intermediate_dir is None and output_dir is not None: self._intermediate_dir = os.path.join(output_dir, 'artifacts') self._upload_bucket = upload_bucket self._current_story_run = None self._all_story_runs = [] self._all_stories = set() self._representative_value_for_each_value_name = {} self._histograms = histogram_set.HistogramSet() self._benchmark_name = benchmark_name or '(unknown benchmark)' self._benchmark_description = benchmark_description or '' # |_interruption| is None if the benchmark has not been interrupted. # Otherwise it is a string explaining the reason for the interruption. # Interruptions occur for unrecoverable exceptions. self._interruption = None self._results_label = results_label self._diagnostics = { reserved_infos.BENCHMARKS.name: [self.benchmark_name], reserved_infos.BENCHMARK_DESCRIPTIONS.name: [self.benchmark_description], } # If the object has been finalized, no more results can be added to it. self._finalized = False self._start_time = time.time() self._results_stream = None if self._intermediate_dir is not None: if not os.path.exists(self._intermediate_dir): os.makedirs(self._intermediate_dir) self._results_stream = open( os.path.join(self._intermediate_dir, TELEMETRY_RESULTS), 'w') self._RecordBenchmarkStart()
def testComplexMetricOutput_TBMv3(self): self.SerializeIntermediateResults( testing.TestResult( 'benchmark/story', output_artifacts=[ self.CreateProtoTraceArtifact(), self.CreateDiagnosticsArtifact( benchmarks=['benchmark'], osNames=['linux'], documentationUrls=[['documentation', 'url']]) ], tags=['tbmv3:dummy_metric'], start_time='2009-02-13T23:31:30.987000Z', ), ) processor.main([ '--output-format', 'histograms', '--output-dir', self.output_dir, '--intermediate-dir', self.intermediate_dir, '--results-label', 'label', '--experimental-tbmv3-metrics', ]) with open( os.path.join(self.output_dir, histograms_output.OUTPUT_FILENAME)) as f: results = json.load(f) # For testing the TBMv3 workflow we use dummy_metric defined in # tools/perf/core/tbmv3/metrics/dummy_metric_*. out_histograms = histogram_set.HistogramSet() out_histograms.ImportDicts(results) simple_field = out_histograms.GetHistogramNamed("dummy::simple_field") self.assertEqual(simple_field.unit, "count_smallerIsBetter") self.assertEqual((simple_field.num_values, simple_field.average), (1, 42)) repeated_field = out_histograms.GetHistogramNamed( "dummy::repeated_field") self.assertEqual(repeated_field.unit, "ms_biggerIsBetter") self.assertEqual(repeated_field.num_values, 3) self.assertEqual(repeated_field.sample_values, [1, 2, 3]) # Unannotated fields should not be included in final histogram output. simple_nested_unannotated = out_histograms.GetHistogramsNamed( "dummy::simple_nested:unannotated_field") self.assertEqual(len(simple_nested_unannotated), 0) repeated_nested_unannotated = out_histograms.GetHistogramsNamed( "dummy::repeated_nested:unannotated_field") self.assertEqual(len(repeated_nested_unannotated), 0) simple_nested_annotated = out_histograms.GetHistogramNamed( "dummy::simple_nested:annotated_field") self.assertEqual(simple_nested_annotated.unit, "ms_smallerIsBetter") self.assertEqual(simple_nested_annotated.num_values, 1) self.assertEqual(simple_nested_annotated.average, 44) repeated_nested_annotated = out_histograms.GetHistogramNamed( "dummy::repeated_nested:annotated_field") self.assertEqual(repeated_nested_annotated.unit, "ms_smallerIsBetter") self.assertEqual(repeated_nested_annotated.num_values, 2) self.assertEqual(repeated_nested_annotated.sample_values, [2, 4])
def __init__(self, output_formatters=None, progress_reporter=None, output_dir=None, should_add_value=None, benchmark_enabled=True, upload_bucket=None, artifact_results=None, benchmark_metadata=None): """ Args: output_formatters: A list of output formatters. The output formatters are typically used to format the test results, such as CsvPivotTableOutputFormatter, which output the test results as CSV. progress_reporter: An instance of progress_reporter.ProgressReporter, to be used to output test status/results progressively. output_dir: A string specified the directory where to store the test artifacts, e.g: trace, videos,... should_add_value: A function that takes two arguments: a value name and a boolean (True when the value belongs to the first run of the corresponding story). It returns True if the value should be added to the test results and False otherwise. artifact_results: An artifact results object. This is used to contain any artifacts from tests. Stored so that clients can call AddArtifact. benchmark_metadata: A benchmark.BenchmarkMetadata object. This is used in the chart JSON output formatter. """ super(PageTestResults, self).__init__() self._progress_reporter = (progress_reporter if progress_reporter is not None else reporter_module.ProgressReporter()) self._output_formatters = (output_formatters if output_formatters is not None else []) self._output_dir = output_dir if should_add_value is not None: self._should_add_value = should_add_value else: self._should_add_value = lambda v, is_first: True self._current_page_run = None self._all_page_runs = [] self._all_stories = set() self._representative_value_for_each_value_name = {} self._all_summary_values = [] self._serialized_trace_file_ids_to_paths = {} self._histograms = histogram_set.HistogramSet() self._telemetry_info = TelemetryInfo(upload_bucket=upload_bucket, output_dir=output_dir) # State of the benchmark this set of results represents. self._benchmark_enabled = benchmark_enabled self._artifact_results = artifact_results self._benchmark_metadata = benchmark_metadata self._histogram_dicts_to_add = [] # Mapping of the stories that have run to the number of times they have run # This is necessary on interrupt if some of the stories did not run. self._story_run_count = {}
def ProcessHistogramSet(histogram_dicts): if not isinstance(histogram_dicts, list): raise api_request_handler.BadRequestError( 'HistogramSet JSON much be a list of dicts') histograms = histogram_set.HistogramSet() histograms.ImportDicts(histogram_dicts) histograms.ResolveRelatedHistograms() InlineDenseSharedDiagnostics(histograms) revision = ComputeRevision(histograms) task_list = [] suite_key = GetSuiteKey(histograms) suite_level_sparse_diagnostic_entities = [] diagnostic_names_added = {} # We'll skip the histogram-level sparse diagnostics because we need to # handle those with the histograms, below, so that we can properly assign # test paths. for hist in histograms: for name, diag in hist.diagnostics.iteritems(): if name in SUITE_LEVEL_SPARSE_DIAGNOSTIC_NAMES: if diagnostic_names_added.get(name) is None: diagnostic_names_added[name] = diag.guid if diagnostic_names_added.get(name) != diag.guid: raise ValueError( name + ' diagnostics must be the same for all histograms') if (name in SUITE_LEVEL_SPARSE_DIAGNOSTIC_NAMES or type(diag) in SUITE_LEVEL_SPARSE_DIAGNOSTIC_TYPES): suite_level_sparse_diagnostic_entities.append( histogram.SparseDiagnostic(id=diag.guid, data=diag.AsDict(), test=suite_key, start_revision=revision, end_revision=sys.maxint, name=name)) # TODO(eakuefner): Refactor master/bot computation to happen above this line # so that we can replace with a DiagnosticRef rather than a full diagnostic. new_guids_to_old_diagnostics = DeduplicateAndPut( suite_level_sparse_diagnostic_entities, suite_key, revision) for new_guid, old_diagnostic in new_guids_to_old_diagnostics.iteritems(): histograms.ReplaceSharedDiagnostic( new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic)) for hist in histograms: guid = hist.guid diagnostics = FindHistogramLevelSparseDiagnostics(guid, histograms) # TODO(eakuefner): Don't compute full diagnostics, because we need anyway to # call GetOrCreate here and in the queue. test_path = ComputeTestPath(guid, histograms) # TODO(eakuefner): Batch these better than one per task. task_list.append(_MakeTask(hist, test_path, revision, diagnostics)) queue = taskqueue.Queue(TASK_QUEUE_NAME) queue.add(task_list)
def ConvertGtestJson(gtest_json): """Convert JSON from a gtest perf test to Histograms. Incoming data is in the following format: { 'metric1': { 'units': 'unit1', 'traces': { 'story1': ['mean', 'std_dev'], 'story2': ['mean', 'std_dev'], }, 'important': ['testcase1', 'testcase2'], }, 'metric2': { 'units': 'unit2', 'traces': { 'story1': ['mean', 'std_dev'], 'story2': ['mean', 'std_dev'], }, 'important': ['testcase1', 'testcase2'], }, ... } We ignore the 'important' fields and just assume everything should be considered important. We also don't bother adding any reserved diagnostics like mastername in this script since that should be handled by the upload script. Args: gtest_json: A JSON dict containing perf output from a gtest Returns: A HistogramSet containing equivalent histograms and diagnostics """ hs = histogram_set.HistogramSet() for metric, metric_data in gtest_json.iteritems(): # Maintain the same unit if we're able to find an exact match, converting # time units if possible. Otherwise use 'unitless'. unit, multiplier = _ConvertUnit(metric_data.get('units')) for story, story_data in metric_data['traces'].iteritems(): # We should only ever have the mean and standard deviation here. assert len(story_data) == 2 h = histogram.Histogram(metric, unit) h.diagnostics[ reserved_infos.STORIES.name] = generic_set.GenericSet([story]) mean = float(story_data[0]) * multiplier std_dev = float(story_data[1]) * multiplier h.AddSample(mean) # Synthesize the running statistics since we only have the mean + standard # deviation instead of the actual data points. h._running = histogram.RunningStatistics.FromDict([ 2, # count, we need this to be >1 in order for variance to work mean, # max 0, # meanlogs mean, # mean mean, # min mean, # sum std_dev * std_dev, # variance ]) hs.AddHistogram(h) return hs
def testIgnoreTagMap(self): histogram_set.HistogramSet().ImportDicts([{'type': 'TagMap'}])