示例#1
0
def AnalysisSerializer(task, _, accumulator):
    analysis_results = accumulator.setdefault(task.id, {})
    read_option_template = task.payload.get('read_option_template')
    graph_json_options = read_option_template.get('graph_json_options', {})
    metric = None
    if read_option_template.get('mode') == 'histogram_sets':
        metric = read_option_template.get('benchmark')
    if read_option_template.get('mode') == 'graph_json':
        metric = graph_json_options.get('chart')
    analysis_results.update({
        'changes': [
            change_module.ReconstituteChange(change)
            for change in task.payload.get('changes', [])
        ],
        'comparison_mode':
        task.payload.get('comparison_mode'),
        'comparisons':
        task.payload.get('comparisons', []),
        'culprits':
        task.payload.get('culprits', []),
        'metric':
        metric,
        'result_values':
        task.payload.get('result_values', [])
    })
示例#2
0
 def __call__(self, task, event, _):
     # Outline:
     #   - Check build status payload.
     #     - If successful, update the task payload with status and relevant
     #       information, propagate information into the accumulator.
     #     - If unsuccessful:
     #       - Retry if the failure is a retryable error (update payload with
     #         retry information)
     #       - Fail if failure is non-retryable or we've exceeded retries.
     if event.type == 'update':
         change = change_module.ReconstituteChange(
             task.payload.get('change'))
         return [UpdateBuildStatusAction(self.job, task, change, event)]
     return None
示例#3
0
  def __call__(self, task, _, accumulator):
    # Outline:
    #  - If the task is still pending, this means this is the first time we're
    #  encountering the task in an evaluation. Set up the payload data to
    #  include the full range of commits, so that we load it once and have it
    #  ready, and emit an action to mark the task ongoing.
    #
    #  - If the task is ongoing, gather all the dependency data (both results
    #  and status) and see whether we have enough data to determine the next
    #  action. We have three main cases:
    #
    #    1. We cannot detect a significant difference between the results from
    #       two different CLs. We call this the NoReproduction case.
    #
    #    2. We do not have enough confidence that there's a difference. We call
    #       this the Indeterminate case.
    #
    #    3. We have enough confidence that there's a difference between any two
    #       ordered changes. We call this the SignificantChange case.
    #
    # - Delegate the implementation to handle the independent cases for each
    #   change point we find in the CL continuum.
    if task.status == 'pending':
      return [PrepareCommits(self.job, task)]

    all_changes = None
    actions = []
    if 'changes' not in task.payload:
      all_changes = [
          change_module.Change(
              commits=[
                  change_module.Commit(
                      repository=commit.get('repository'),
                      git_hash=commit.get('git_hash'))
              ],
              patch=task.payload.get('pinned_change'))
          for commit in task.payload.get('commits', [])
      ]
      task.payload.update({
          'changes': [change.AsDict() for change in all_changes],
      })
      actions.append(UpdateTaskPayloadAction(self.job, task))
    else:
      # We need to reconstitute the Change instances from the dicts we've stored
      # in the payload.
      all_changes = [
          change_module.ReconstituteChange(change)
          for change in task.payload.get('changes')
      ]

    if task.status == 'ongoing':
      # TODO(dberris): Validate and fail gracefully instead of asserting?
      assert 'commits' in task.payload, ('Programming error, need commits to '
                                         'proceed!')

      # Collect all the dependency task data and analyse the results.
      # Group them by change.
      # Order them by appearance in the CL range.
      # Also count the status per CL (failed, ongoing, etc.)
      deps = set(task.dependencies)
      results_by_change = collections.defaultdict(list)
      status_by_change = collections.defaultdict(dict)
      changes_with_data = set()
      changes_by_status = collections.defaultdict(set)

      associated_results = [(change_module.ReconstituteChange(t.get('change')),
                             t.get('status'), t.get('result_values'))
                            for dep, t in accumulator.items()
                            if dep in deps]
      for change, status, result_values in associated_results:
        if result_values:
          filtered_results = [r for r in result_values if r is not None]
          if filtered_results:
            results_by_change[change].append(filtered_results)
        status_by_change[change].update({
            status: status_by_change[change].get(status, 0) + 1,
        })
        changes_by_status[status].add(change)
        changes_with_data.add(change)

      # If the dependencies have converged into a single status, we can make
      # decisions on the terminal state of the bisection.
      if len(changes_by_status) == 1 and changes_with_data:

        # Check whether all dependencies are completed and if we do
        # not have data in any of the dependencies.
        if changes_by_status.get('completed') == changes_with_data:
          changes_with_empty_results = [
              change for change in changes_with_data
              if not results_by_change.get(change)
          ]
          if changes_with_empty_results:
            task.payload.update({
                'errors':
                    task.payload.get('errors', []) + [{
                        'reason':
                            'BisectionFailed',
                        'message': ('We did not find any results from '
                                    'successful test runs.')
                    }]
            })
            return [CompleteExplorationAction(self.job, task, 'failed')]
        # Check whether all the dependencies had the tests fail consistently.
        elif changes_by_status.get('failed') == changes_with_data:
          task.payload.update({
              'errors':
                  task.payload.get('errors', []) + [{
                      'reason': 'BisectionFailed',
                      'message': 'All attempts in all dependencies failed.'
                  }]
          })
          return [CompleteExplorationAction(self.job, task, 'failed')]
        # If they're all pending or ongoing, then we don't do anything yet.
        else:
          return actions

      # We want to reduce the list of ordered changes to only the ones that have
      # data available.
      change_index = {change: index for index, change in enumerate(all_changes)}
      ordered_changes = [c for c in all_changes if c in changes_with_data]

      # From here we can then do the analysis on a pairwise basis, as we're
      # going through the list of Change instances we have data for.
      # NOTE: A lot of this algorithm is already in pinpoint/models/job_state.py
      # which we're adapting.
      def Compare(a, b):
        # This is the comparison function which determines whether the samples
        # we have from the two changes (a and b) are statistically significant.
        if a is None or b is None:
          return None

        if 'pending' in status_by_change[a] or 'pending' in status_by_change[b]:
          return compare.PENDING

        # NOTE: Here we're attempting to scale the provided comparison magnitude
        # threshold by the larger inter-quartile range (a measure of dispersion,
        # simply computed as the 75th percentile minus the 25th percentile). The
        # reason we're doing this is so that we can scale the tolerance
        # according to the noise inherent in the measurements -- i.e. more noisy
        # measurements will require a larger difference for us to consider
        # statistically significant.
        values_for_a = tuple(itertools.chain(*results_by_change[a]))
        values_for_b = tuple(itertools.chain(*results_by_change[b]))

        if not values_for_a:
          return None
        if not values_for_b:
          return None

        max_iqr = max(
            math_utils.Iqr(values_for_a), math_utils.Iqr(values_for_b), 0.001)
        comparison_magnitude = task.payload.get('comparison_magnitude',
                                                1.0) / max_iqr
        attempts = (len(values_for_a) + len(values_for_b)) // 2
        result = compare.Compare(values_for_a, values_for_b, attempts,
                                 'performance', comparison_magnitude)
        return result.result

      def DetectChange(change_a, change_b):
        # We return None if the comparison determines that the result is
        # inconclusive. This is required by the exploration.Speculate contract.
        comparison = Compare(change_a, change_b)
        if comparison == compare.UNKNOWN:
          return None
        return comparison == compare.DIFFERENT

      changes_to_refine = []

      def CollectChangesToRefine(a, b):
        # Here we're collecting changes that need refinement, which happens when
        # two changes when compared yield the "unknown" result.
        attempts_for_a = sum(status_by_change[a].values())
        attempts_for_b = sum(status_by_change[b].values())

        # Grow the attempts of both changes by 50% every time when increasing
        # attempt counts. This number is arbitrary, and we should probably use
        # something like a Fibonacci sequence when scaling attempt counts.
        new_attempts_size_a = min(
            attempts_for_a + (attempts_for_a // 2),
            task.payload.get('analysis_options', {}).get('max_attempts', 100))
        new_attempts_size_b = min(
            attempts_for_b + (attempts_for_b // 2),
            task.payload.get('analysis_options', {}).get('max_attempts', 100))

        # Only refine if the new attempt sizes are not large enough.
        if new_attempts_size_a > attempts_for_a:
          changes_to_refine.append((a, new_attempts_size_a))
        if new_attempts_size_b > attempts_for_b:
          changes_to_refine.append((b, new_attempts_size_b))

      def FindMidpoint(a, b):
        # Here we use the (very simple) midpoint finding algorithm given that we
        # already have the full range of commits to bisect through.
        a_index = change_index[a]
        b_index = change_index[b]
        subrange = all_changes[a_index:b_index + 1]
        return None if len(subrange) <= 2 else subrange[len(subrange) // 2]

      # We have a striding iterable, which will give us the before, current, and
      # after for a given index in the iterable.
      def SlidingTriple(iterable):
        """s -> (None, s0, s1), (s0, s1, s2), (s1, s2, s3), ..."""
        p, c, n = itertools.tee(iterable, 3)
        p = itertools.chain([None], p)
        n = itertools.chain(itertools.islice(n, 1, None), [None])
        return itertools.izip(p, c, n)

      # This is a comparison between values at a change and the values at
      # the previous change and the next change.
      comparisons = [{
          'prev': Compare(p, c),
          'next': Compare(c, n),
      } for (p, c, n) in SlidingTriple(ordered_changes)]

      # Collect the result values for each change with values.
      result_values = [
          list(itertools.chain(*results_by_change.get(change, [])))
          for change in ordered_changes
      ]
      if task.payload.get('comparisons') != comparisons or task.payload.get(
          'result_values') != result_values:
        task.payload.update({
            'comparisons': comparisons,
            'result_values': result_values,
        })
        actions.append(UpdateTaskPayloadAction(self.job, task))

      if len(ordered_changes) < 2:
        # We do not have enough data yet to determine whether we should do
        # anything.
        return actions

      additional_changes = exploration.Speculate(
          ordered_changes,
          change_detected=DetectChange,
          on_unknown=CollectChangesToRefine,
          midpoint=FindMidpoint,
          levels=_DEFAULT_SPECULATION_LEVELS)

      # At this point we can collect the actions to extend the task graph based
      # on the results of the speculation, only if the changes don't have any
      # more associated pending/ongoing work.
      min_attempts = task.payload.get('analysis_options',
                                      {}).get('min_attempts', 10)
      actions += [
          RefineExplorationAction(self.job, task, change, new_size)
          for change, new_size in itertools.chain(
              [(c, min_attempts) for _, c in additional_changes],
              [(c, a) for c, a in changes_to_refine],
          )
          if not bool({'pending', 'ongoing'} & set(status_by_change[change]))
      ]

      # Here we collect the points where we've found the changes.
      def Pairwise(iterable):
        """s -> (s0, s1), (s1, s2), (s2, s3), ..."""
        a, b = itertools.tee(iterable)
        next(b, None)
        return itertools.izip(a, b)

      task.payload.update({
          'culprits': [(a.AsDict(), b.AsDict())
                       for a, b in Pairwise(ordered_changes)
                       if DetectChange(a, b)],
      })
      can_complete = not bool(set(changes_by_status) - {'failed', 'completed'})
      if not actions and can_complete:
        # Mark this operation complete, storing the differences we can compute.
        actions = [CompleteExplorationAction(self.job, task, 'completed')]
      return actions
示例#4
0
 def __call__(self, _):
   start_change = change_module.ReconstituteChange(
       self.task.payload['start_change'])
   end_change = change_module.ReconstituteChange(
       self.task.payload['end_change'])
   try:
     # We're storing this once, so that we don't need to always get this when
     # working with the individual commits. This reduces our reliance on
     # datastore operations throughout the course of handling the culprit
     # finding process.
     #
     # TODO(dberris): Expand the commits into the full table of dependencies?
     # Because every commit in the chromium repository is likely to be building
     # against different versions of the dependencies (v8, skia, etc.)
     # we'd need to expand the concept of a changelist (CL, or Change in the
     # Pinpoint codebase) so that we know which versions of the dependencies to
     # use in specific CLs. Once we have this, we might be able to operate
     # cleanly on just Change instances instead of just raw commits.
     #
     # TODO(dberris): Model the "merge-commit" like nature of auto-roll CLs by
     # allowing the preparation action to model the non-linearity of the
     # history. This means we'll need a concept of levels, where changes in a
     # single repository history (the main one) operates at a higher level
     # linearly, and if we're descending into rolls that we're exploring a
     # lower level in the linear history. This is similar to the following
     # diagram:
     #
     #   main -> m0 -> m1 -> m2 -> roll0 -> m3 -> ...
     #                              |
     #   dependency ..............  +-> d0 -> d1
     #
     # Ideally we'll already have this expanded before we go ahead and perform
     # a bisection, to amortise the cost of making requests to back-end
     # services for this kind of information in tight loops.
     commits = change_module.Commit.CommitRange(start_change.base_commit,
                                                end_change.base_commit)
     self.task.payload.update({
         'commits': [
             collections.OrderedDict(
                 [('repository', start_change.base_commit.repository),
                  ('git_hash', start_change.base_commit.git_hash)])
         ] + [
             collections.OrderedDict(
                 [('repository', start_change.base_commit.repository),
                  ('git_hash', commit['commit'])])
             for commit in reversed(commits)
         ]
     })
     task_module.UpdateTask(
         self.job,
         self.task.id,
         new_state='ongoing',
         payload=self.task.payload)
   except gitiles_service.NotFoundError as e:
     # TODO(dberris): We need to be more resilient to intermittent failures
     # from the Gitiles service here.
     self.task.payload.update({
         'errors':
             self.task.payload.get('errors', []) + [{
                 'reason': 'GitilesFetchError',
                 'message': e.message
             }]
     })
     task_module.UpdateTask(
         self.job, self.task.id, new_state='failed', payload=self.task.payload)
示例#5
0
    def _FormatAndPostBugCommentOnComplete(self):
        logging.debug('Processing outputs.')
        if self._IsTryJob():
            # There is no comparison metric.
            title = '<b>%s Job complete. See results below.</b>' % _ROUND_PUSHPIN
            deferred.defer(_PostBugCommentDeferred,
                           self.bug_id,
                           '\n'.join((title, self.url)),
                           labels=['Pinpoint-Tryjob-Completed'],
                           _retry_options=RETRY_OPTIONS)
            return

        # There is a comparison metric.
        differences = []
        result_values = {}
        if not self.use_execution_engine:
            differences = self.state.Differences()
            for change_a, change_b in differences:
                result_values.setdefault(change_a,
                                         self.state.ResultValues(change_a))
                result_values.setdefault(change_b,
                                         self.state.ResultValues(change_b))
        else:
            logging.debug('Execution Engine: Finding culprits.')
            context = task_module.Evaluate(
                self, event_module.SelectEvent(),
                evaluators.Selector(
                    event_type='select',
                    include_keys={'culprits', 'change', 'result_values'}))
            differences = [
                (change_module.ReconstituteChange(change_a),
                 change_module.ReconstituteChange(change_b))
                for change_a, change_b in context.get('performance_bisection',
                                                      {}).get('culprits', [])
            ]
            result_values = {
                change_module.ReconstituteChange(v.get('change')):
                v.get('result_values')
                for v in context.values()
                if 'change' in v and 'result_values' in v
            }

        if not differences:
            title = "<b>%s Couldn't reproduce a difference.</b>" % _ROUND_PUSHPIN
            deferred.defer(_PostBugCommentDeferred,
                           self.bug_id,
                           '\n'.join((title, self.url)),
                           labels=['Pinpoint-No-Repro'],
                           _retry_options=RETRY_OPTIONS)
            return

        # Collect the result values for each of the differences
        difference_details = []
        commit_infos = []
        commits_with_deltas = {}
        for change_a, change_b in differences:
            if change_b.patch:
                commit = change_b.patch
            else:
                commit = change_b.last_commit
            commit_info = commit.AsDict()

            values_a = result_values[change_a]
            values_b = result_values[change_b]
            difference = _FormatDifferenceForBug(commit_info, values_a,
                                                 values_b, self.state.metric)
            difference_details.append(difference)
            commit_infos.append(commit_info)
            if values_a and values_b:
                mean_delta = job_state.Mean(values_b) - job_state.Mean(
                    values_a)
                commits_with_deltas[commit.id_string] = (mean_delta,
                                                         commit_info)

        deferred.defer(_UpdatePostAndMergeDeferred,
                       difference_details,
                       commit_infos,
                       list(commits_with_deltas.values()),
                       self.bug_id,
                       self.tags,
                       self.url,
                       _retry_options=RETRY_OPTIONS)
示例#6
0
    def _FormatAndPostBugCommentOnComplete(self):
        logging.debug('Processing outputs.')
        if self._IsTryJob():
            # There is no comparison metric.
            title = '<b>%s Job complete. See results below.</b>' % _ROUND_PUSHPIN
            deferred.defer(_PostBugCommentDeferred,
                           self.bug_id,
                           '\n'.join((title, self.url)),
                           project=self.project,
                           labels=['Pinpoint-Tryjob-Completed'],
                           _retry_options=RETRY_OPTIONS)
            return

        # There is a comparison metric.
        differences = []
        result_values = {}
        changes_examined = None
        if not self.use_execution_engine:
            differences = self.state.Differences()
            for change_a, change_b in differences:
                result_values.setdefault(change_a,
                                         self.state.ResultValues(change_a))
                result_values.setdefault(change_b,
                                         self.state.ResultValues(change_b))
            changes_examined = self.state.ChangesExamined()
        else:
            logging.debug('Execution Engine: Finding culprits.')
            context = task_module.Evaluate(
                self, event_module.SelectEvent(),
                evaluators.Selector(
                    event_type='select',
                    include_keys={'culprits', 'change', 'result_values'}))
            differences = [
                (change_module.ReconstituteChange(change_a),
                 change_module.ReconstituteChange(change_b))
                for change_a, change_b in context.get('performance_bisection',
                                                      {}).get('culprits', [])
            ]
            result_values = {
                change_module.ReconstituteChange(v.get('change')):
                v.get('result_values')
                for v in context.values()
                if 'change' in v and 'result_values' in v
            }

        if not differences:
            # When we cannot find a difference, we want to not only update the issue
            # with that (minimal) information but also automatically mark the issue
            # WontFix. This is based on information we've gathered in production that
            # most issues where we find Pinpoint cannot reproduce the difference end
            # up invariably as "Unconfirmed" with very little follow-up.
            title = "<b>%s Couldn't reproduce a difference.</b>" % _ROUND_PUSHPIN
            deferred.defer(_PostBugCommentDeferred,
                           self.bug_id,
                           '\n'.join((title, self.url)),
                           project=self.project,
                           labels=['Pinpoint-No-Repro'],
                           status='WontFix',
                           _retry_options=RETRY_OPTIONS)
            return

        # Collect the result values for each of the differences
        bug_update_builder = job_bug_update.DifferencesFoundBugUpdateBuilder(
            self.state.metric)
        bug_update_builder.SetExaminedCount(changes_examined)
        for change_a, change_b in differences:
            if change_b.patch:
                commit = change_b.patch
            else:
                commit = change_b.last_commit

            values_a = result_values[change_a]
            values_b = result_values[change_b]
            bug_update_builder.AddDifference(commit, values_a, values_b)

        deferred.defer(job_bug_update.UpdatePostAndMergeDeferred,
                       bug_update_builder,
                       self.bug_id,
                       self.tags,
                       self.url,
                       self.project,
                       _retry_options=RETRY_OPTIONS)
示例#7
0
    def __call__(self, task, event, context):
        # First we delegate to the task-specific serializers, and have the
        # domain-aware transformers canonicalise the data in the context. We
        # then do a dictionary merge following a simple protocol for editing a
        # single context. This way the transformers can output a canonical set
        # of transformations to build up the (global) context.
        local_context = {}
        super(Serializer, self).__call__(task, event, local_context)

        # What we expect to see in the local context is data in the following
        # form:
        #
        #   {
        #      # The 'state' key is required to identify to which change and which
        #      # state we should be performing the actions.
        #      'state': {
        #         'change': {...}
        #         'quest': <string>
        #
        #         # In the quest-based system, we end up with different "execution"
        #         # details, which come in "quest" order. In the task-based
        #         # evaluation model, the we use the 'index' in the 'add_details'
        #         # sub-object to identify the index in the details.
        #         'add_execution': {
        #             'add_details': {
        #                 'index': <int>
        #                 ...
        #             }
        #             ...
        #         }
        #
        #         # This allows us to accumulate the resulting values we encounter
        #         # associated with the change.
        #         'append_result_values': [<float>]
        #
        #         # This allows us to set the comparison result for this change in
        #         # context of other changes.
        #         'set_comparison': {
        #             'next': <string|None>,
        #             'prev': <string|None>,
        #         }
        #      }
        #
        #      # If we see the 'order_changes' key in the local context, then
        #      # that means we can sort the states according to the changes as they
        #      # appear in the embedded 'changes' list.
        #      'order_changes': {
        #        'changes': [..]
        #      }
        #
        #      # If we see the 'set_parameters' key in the local context, then
        #      # we can set the overall parameters we're looking to compare and
        #      # convey in the results.
        #      'set_parameters': {
        #          'comparison_mode': <string>
        #          'metric': <string>
        #      }
        #   }
        #
        # At this point we process the context to update the global context
        # following the protocol defined above.
        if 'state' in local_context:
            modification = local_context['state']
            states = context.setdefault('state', [])
            quests = context.setdefault('quests', [])

            # We need to find the existing state which matches the quest and the
            # change. If we don't find one, we create the first state entry for that.
            state_index = None
            change = modification.get('change')
            for index, state in enumerate(states):
                if state.get('change') == change:
                    state_index = index
                    break

            if state_index is None:
                states.append({
                    'attempts': [{
                        'executions': []
                    }],
                    'change': change
                })
                state_index = len(states) - 1

            quest = modification.get('quest')
            try:
                quest_index = quests.index(quest)
            except ValueError:
                quests.append(quest)
                quest_index = len(quests) - 1

            add_execution = modification.get('add_execution')
            append_result_values = modification.get('append_result_values')
            attempt_index = modification.get('index', 0)
            state = states[state_index]
            if add_execution:
                attempts = state['attempts']
                while len(attempts) < attempt_index + 1:
                    attempts.append({'executions': []})
                executions = state['attempts'][attempt_index]['executions']
                while len(executions) < quest_index + 1:
                    executions.append(None)
                executions[quest_index] = dict(add_execution)

            if append_result_values:
                state.setdefault('result_values',
                                 []).extend(append_result_values)

        if 'order_changes' in local_context:
            # Here, we'll sort the states according to their order of appearance in
            # the 'order_changes' list.
            states = context.get('state', [])
            if states:
                state_changes = {
                    change_module.ReconstituteChange(state.get('change'))
                    for state in states
                }
                order_changes = local_context.get('order_changes', {})
                all_changes = order_changes.get('changes', [])
                comparisons = order_changes.get('comparisons', [])
                result_values = order_changes.get('result_values', [])
                change_index = {
                    change: index
                    for index, change in enumerate(
                        known_change for known_change in all_changes
                        if known_change in state_changes)
                }
                ordered_states = [None] * len(states)
                for state in states:
                    index = change_index.get(
                        change_module.ReconstituteChange(state.get('change')))
                    if index is not None:
                        ordered_states[index] = state

                # Merge in the comparisons as they appear for the ordered_states.
                for state, comparison, result in itertools.izip_longest(
                        ordered_states, comparisons or [], result_values
                        or []):
                    if state is None:
                        continue
                    if comparison is not None:
                        state['comparisons'] = comparison
                    state['result_values'] = result or []
                context['state'] = ordered_states
                context['difference_count'] = len(
                    order_changes.get('culprits', []))

                # At this point set the default comparisons between two adjacent states
                # which don't have an associated comparison yet to 'pending'.
                states = context.get('state', [])
                for index, state in enumerate(states):
                    comparisons = state.get('comparisons')
                    if comparisons is None:
                        state['comparisons'] = {
                            'prev':
                            None if index == 0 else 'pending',
                            'next':
                            None if index + 1 == len(states) else 'pending',
                        }

        if 'set_parameters' in local_context:
            modification = local_context.get('set_parameters')
            context['comparison_mode'] = modification.get('comparison_mode')
            context['metric'] = modification.get('metric')