Exemplo n.º 1
0
 def testRelativeChangeFromZero(self):
   """Tests what happens when relative change from zero is calculated."""
   # If the first number is zero, then the result is not a number.
   self.assertEqual(0, math_utils.RelativeChange(0, 0))
   self.assertTrue(
       math.isnan(math_utils.RelativeChange(0, 1)))
   self.assertTrue(
       math.isnan(math_utils.RelativeChange(0, -1)))
Exemplo n.º 2
0
    def _ComputeRegressionStatistics(cls, rev_states, first_working_rev,
                                     last_broken_rev):
        # TODO(sergiyb): We assume that value has "values" key, which may not be
        # the case for failure-bisects, where there is a single value only.
        broken_means = [
            state.value['values']
            for state in rev_states[:last_broken_rev.index + 1] if state.value
        ]

        working_means = [
            state.value['values']
            for state in rev_states[first_working_rev.index:] if state.value
        ]

        # Flatten the lists to calculate mean of all values.
        working_mean = sum(working_means, [])
        broken_mean = sum(broken_means, [])

        # Calculate the approximate size of the regression
        mean_of_bad_runs = math_utils.Mean(broken_mean)
        mean_of_good_runs = math_utils.Mean(working_mean)

        regression_size = 100 * math_utils.RelativeChange(
            mean_of_good_runs, mean_of_bad_runs)
        if math.isnan(regression_size):
            regression_size = 'zero-to-nonzero'

        regression_std_err = math.fabs(
            math_utils.PooledStandardError([working_mean, broken_mean]) /
            max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0

        # Give a "confidence" in the bisect. Currently, we consider the values of
        # only the revisions at the breaking range (last known good and first known
        # bad) see the note in the docstring for FindBreakingRange.
        confidence_params = (sum([first_working_rev.value['values']],
                                 []), sum([last_broken_rev.value['values']],
                                          []))
        confidence = cls.ConfidenceScore(*confidence_params)

        bad_greater_than_good = mean_of_bad_runs > mean_of_good_runs

        return {
            'regression_size': regression_size,
            'regression_std_err': regression_std_err,
            'confidence': confidence,
            'bad_greater_than_good': bad_greater_than_good
        }
Exemplo n.º 3
0
    def _ComputeRegressionStatistics(cls, rev_states, first_working_rev,
                                     last_broken_rev):
        # TODO(sergiyb): We assume that value has "values" key, which may not be
        # the case for failure-bisects, where there is a single value only.
        broken_means = [
            state.value['values']
            for state in rev_states[:last_broken_rev.index + 1] if state.value
        ]

        working_means = [
            state.value['values']
            for state in rev_states[first_working_rev.index:] if state.value
        ]

        # Flatten the lists to calculate mean of all values.
        working_mean = sum(working_means, [])
        broken_mean = sum(broken_means, [])

        # Calculate the approximate size of the regression
        mean_of_bad_runs = math_utils.Mean(broken_mean)
        mean_of_good_runs = math_utils.Mean(working_mean)

        regression_size = 100 * math_utils.RelativeChange(
            mean_of_good_runs, mean_of_bad_runs)
        if math.isnan(regression_size):
            regression_size = 'zero-to-nonzero'

        regression_std_err = math.fabs(
            math_utils.PooledStandardError([working_mean, broken_mean]) /
            max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0

        # Give a "confidence" in the bisect culprit by seeing whether the results
        # of the culprit revision and the revision before that appear to be
        # statistically significantly different.
        confidence = cls.ConfidenceScore(
            sum([first_working_rev.value['values']], []),
            sum([last_broken_rev.value['values']], []))

        bad_greater_than_good = mean_of_bad_runs > mean_of_good_runs

        return {
            'regression_size': regression_size,
            'regression_std_err': regression_std_err,
            'confidence': confidence,
            'bad_greater_than_good': bad_greater_than_good
        }
Exemplo n.º 4
0
 def testRelativeChange_Negative(self):
     # Note that the return value of RelativeChange is always positive.
     self.assertEqual(3.0, math_utils.RelativeChange(-1, 2))
     self.assertEqual(3.0, math_utils.RelativeChange(1, -2))
     self.assertEqual(1.0, math_utils.RelativeChange(-1, -2))
Exemplo n.º 5
0
 def testRelativeChange_FromZero(self):
     # If the first number is zero, then the result is not a number.
     self.assertEqual(0, math_utils.RelativeChange(0, 0))
     self.assertTrue(math.isnan(math_utils.RelativeChange(0, 1)))
     self.assertTrue(math.isnan(math_utils.RelativeChange(0, -1)))
Exemplo n.º 6
0
 def testRelativeChange_NonZero(self):
     # The change is relative to the first value, regardless of which is bigger.
     self.assertEqual(0.5, math_utils.RelativeChange(1.0, 1.5))
     self.assertEqual(0.5, math_utils.RelativeChange(2.0, 1.0))
Exemplo n.º 7
0
 def testRelativeChangeWithNegatives(self):
   """Tests that relative change given is always positive."""
   self.assertEqual(3.0, math_utils.RelativeChange(-1, 2))
   self.assertEqual(3.0, math_utils.RelativeChange(1, -2))
   self.assertEqual(1.0, math_utils.RelativeChange(-1, -2))
Exemplo n.º 8
0
 def testRelativeChange(self):
   """Tests the common cases for calculating relative change."""
   # The change is relative to the first value, regardless of which is bigger.
   self.assertEqual(0.5, math_utils.RelativeChange(1.0, 1.5))
   self.assertEqual(0.5, math_utils.RelativeChange(2.0, 1.0))
    def GetResultsDict(self):
        """Prepares and returns information about the final resulsts as a dict.

    Returns:
      A dictionary with the following fields

      'first_working_revision': First good revision.
      'last_broken_revision': Last bad revision.
      'culprit_revisions': A list of revisions, which contain the bad change
          introducing the failure.
      'other_regressions': A list of tuples representing other regressions,
          which may have occured.
      'regression_size': For performance bisects, this is a relative change of
          the mean metric value. For other bisects this field always contains
          'zero-to-nonzero'.
      'regression_std_err': For performance bisects, it is a pooled standard
          error for groups of good and bad runs. Not used for other bisects.
      'confidence': For performance bisects, it is a confidence that the good
          and bad runs are distinct groups. Not used for non-performance
          bisects.
      'revision_data_sorted': dict mapping revision ids to data about that
          revision. Each piece of revision data consists of a dict with the
          following keys:

          'passed': Represents whether the performance test was successful at
              that revision. Possible values include: 1 (passed), 0 (failed),
              '?' (skipped), 'F' (build failed).
          'depot': The depot that this revision is from (i.e. WebKit)
          'external': If the revision is a 'src' revision, 'external' contains
              the revisions of each of the external libraries.
          'sort': A sort value for sorting the dict in order of commits.

          For example:
          {
            'CL #1':
            {
              'passed': False,
              'depot': 'chromium',
              'external': None,
              'sort': 0
            }
          }
    """
        revision_data_sorted = sorted(self.revision_data.iteritems(),
                                      key=lambda x: x[1]['sort'])

        # Find range where it possibly broke.
        first_working_revision = None
        first_working_revision_index = -1
        last_broken_revision = None
        last_broken_revision_index = -1

        culprit_revisions = []
        other_regressions = []
        regression_size = 0.0
        regression_std_err = 0.0
        confidence = 0.0

        for i in xrange(len(revision_data_sorted)):
            k, v = revision_data_sorted[i]
            if v['passed'] == 1:
                if not first_working_revision:
                    first_working_revision = k
                    first_working_revision_index = i

            if not v['passed']:
                last_broken_revision = k
                last_broken_revision_index = i

        if last_broken_revision != None and first_working_revision != None:
            broken_means = []
            for i in xrange(0, last_broken_revision_index + 1):
                if revision_data_sorted[i][1]['value']:
                    broken_means.append(
                        revision_data_sorted[i][1]['value']['values'])

            working_means = []
            for i in xrange(first_working_revision_index,
                            len(revision_data_sorted)):
                if revision_data_sorted[i][1]['value']:
                    working_means.append(
                        revision_data_sorted[i][1]['value']['values'])

            # Flatten the lists to calculate mean of all values.
            working_mean = sum(working_means, [])
            broken_mean = sum(broken_means, [])

            # Calculate the approximate size of the regression
            mean_of_bad_runs = math_utils.Mean(broken_mean)
            mean_of_good_runs = math_utils.Mean(working_mean)

            regression_size = 100 * math_utils.RelativeChange(
                mean_of_good_runs, mean_of_bad_runs)
            if math.isnan(regression_size):
                regression_size = 'zero-to-nonzero'

            regression_std_err = math.fabs(
                math_utils.PooledStandardError([working_mean, broken_mean]) /
                max(0.0001, min(mean_of_good_runs, mean_of_bad_runs))) * 100.0

            # Give a "confidence" in the bisect. At the moment we use how distinct the
            # values are before and after the last broken revision, and how noisy the
            # overall graph is.
            confidence = ConfidenceScore(working_means, broken_means)

            culprit_revisions = []

            cwd = os.getcwd()
            self._depot_registry.ChangeToDepotDir(
                self.revision_data[last_broken_revision]['depot'])

            if self.revision_data[last_broken_revision]['depot'] == 'cros':
                # Want to get a list of all the commits and what depots they belong
                # to so that we can grab info about each.
                cmd = [
                    'repo', 'forall', '-c',
                    'pwd ; git log --pretty=oneline --before=%d --after=%d' %
                    (last_broken_revision, first_working_revision + 1)
                ]
                output, return_code = bisect_utils.RunProcessAndRetrieveOutput(
                    cmd)

                changes = []
                assert not return_code, ('An error occurred while running '
                                         '"%s"' % ' '.join(cmd))
                last_depot = None
                cwd = os.getcwd()
                for l in output.split('\n'):
                    if l:
                        # Output will be in form:
                        # /path_to_depot
                        # /path_to_other_depot
                        # <SHA1>
                        # /path_again
                        # <SHA1>
                        # etc.
                        if l[0] == '/':
                            last_depot = l
                        else:
                            contents = l.split(' ')
                            if len(contents) > 1:
                                changes.append([last_depot, contents[0]])
                for c in changes:
                    os.chdir(c[0])
                    info = self._source_control.QueryRevisionInfo(c[1])
                    culprit_revisions.append((c[1], info, None))
            else:
                for i in xrange(last_broken_revision_index,
                                len(revision_data_sorted)):
                    k, v = revision_data_sorted[i]
                    if k == first_working_revision:
                        break
                    self._depot_registry.ChangeToDepotDir(v['depot'])
                    info = self._source_control.QueryRevisionInfo(k)
                    culprit_revisions.append((k, info, v['depot']))
            os.chdir(cwd)

            # Check for any other possible regression ranges.
            other_regressions = self._FindOtherRegressions(
                revision_data_sorted, mean_of_bad_runs > mean_of_good_runs)

        return {
            'first_working_revision': first_working_revision,
            'last_broken_revision': last_broken_revision,
            'culprit_revisions': culprit_revisions,
            'other_regressions': other_regressions,
            'regression_size': regression_size,
            'regression_std_err': regression_std_err,
            'confidence': confidence,
            'revision_data_sorted': revision_data_sorted
        }