def testBisectFinished(self): regression_range = CommitIDRange(lower=CommitID(commit_position=90, revision='rev_90'), upper=CommitID(commit_position=91, revision='rev_91')) self.assertEqual( (None, CommitID(commit_position=91, revision='rev_91')), lookback_algorithm._Bisect(regression_range))
def testBisectNextCommitPosition(self, mock_get_revision): regression_range = CommitIDRange(lower=CommitID(commit_position=90, revision='rev_90'), upper=CommitID(commit_position=100, revision='rev_100')) mock_get_revision.return_value = 'rev_95' self.assertEqual( (CommitID(commit_position=95, revision='rev_95'), None), lookback_algorithm._Bisect(regression_range))
def testGetLatestRegressionRangeRangeNoLowerBound(self): analysis = MasterFlakeAnalysis.Create('m', 'b', 123, 's', 't') analysis.data_points = [ DataPoint.Create(commit_position=100, pass_rate=1.0, git_hash='rev100') ] self.assertEqual( CommitIDRange(lower=CommitID(commit_position=100, revision='rev100'), upper=None), analysis.GetLatestRegressionRange())
def testGetLatestRegressionRangeNoUpperBoundMultipleDataPoints(self): analysis = MasterFlakeAnalysis.Create('m', 'b', 123, 's', 't') analysis.data_points = [ DataPoint.Create(commit_position=100, pass_rate=0.5), DataPoint.Create(commit_position=90, pass_rate=0.5, git_hash='rev90') ] self.assertEqual( CommitIDRange(lower=None, upper=CommitID(commit_position=90, revision='rev90')), analysis.GetLatestRegressionRange())
def testGetNextCommitIDFromBuildRangeReturnUpperBoundCloser(self): calculated_commit_id = CommitID(commit_position=1007, revision='r1007') lower = CommitID(commit_position=1000, revision='r1000') upper = CommitID(commit_position=1010, revision='r1010') build_range = CommitIDRange(lower=lower, upper=upper) analysis = MasterFlakeAnalysis.Create('m', 'b', 123, 's', 't') analysis.data_points = [ DataPoint.Create(commit_position=1020), # Doesn't have either. ] self.assertEqual( upper, next_commit_position_utils.GetNextCommitIDFromBuildRange( analysis, build_range, calculated_commit_id))
def testGetNextCommitIDFromBuildRangeAlreadyHasLowerReturnUpper(self): calculated_commit_id = CommitID(commit_position=1002, revision='r1002') lower = CommitID(commit_position=1000, revision='r1000') upper = CommitID(commit_position=1010, revision='r1010') build_range = CommitIDRange(lower=lower, upper=upper) analysis = MasterFlakeAnalysis.Create('m', 'b', 123, 's', 't') analysis.data_points = [ DataPoint.Create(commit_position=1000), # Already has lower bound. ] self.assertEqual( upper, next_commit_position_utils.GetNextCommitIDFromBuildRange( analysis, build_range, calculated_commit_id))
def testGetLatestRegressionRange(self): analysis = MasterFlakeAnalysis.Create('m', 'b', 123, 's', 't') analysis.data_points = [ DataPoint.Create(commit_position=91, pass_rate=0.9, git_hash='rev91'), DataPoint.Create(commit_position=90, pass_rate=1.0, git_hash='rev90'), ] self.assertEqual( CommitIDRange(lower=CommitID(commit_position=90, revision='rev90'), upper=CommitID(commit_position=91, revision='rev91')), analysis.GetLatestRegressionRange())
def testGetNextCommitIdExponentialSearch(self, mock_get_revision): regression_range = CommitIDRange(lower=CommitID(commit_position=90, revision='rev_90'), upper=CommitID(commit_position=100, revision='rev_100')) data_points = [ DataPoint.Create(commit_position=100, pass_rate=0.9, git_hash='rev_100'), DataPoint.Create(commit_position=90, pass_rate=1.0, git_hash='rev_90'), ] mock_get_revision.return_value = 'rev_99' self.assertEqual( (CommitID(commit_position=99, revision='rev_99'), None), lookback_algorithm.GetNextCommitId(data_points, False, regression_range))
def testGetLatestRegressionRangeMultipleDataPoints(self): analysis = MasterFlakeAnalysis.Create('m', 'b', 123, 's', 't') analysis.data_points = [ DataPoint.Create(commit_position=96, pass_rate=0.8), DataPoint.Create(commit_position=95, pass_rate=0.9, git_hash='rev95'), DataPoint.Create(commit_position=94, pass_rate=0.0, git_hash='rev94'), DataPoint.Create(commit_position=93, pass_rate=0.6), DataPoint.Create(commit_position=92, pass_rate=1.0), DataPoint.Create(commit_position=91, pass_rate=0.9), DataPoint.Create(commit_position=90, pass_rate=1.0), ] self.assertEqual( CommitIDRange(lower=CommitID(commit_position=94, revision='rev94'), upper=CommitID(commit_position=95, revision='rev95')), analysis.GetLatestRegressionRange())
def _DetermineNextCommitPosition(data_points): """Determines the next numerical point to check flakiness on. 1. When no lower bound is known yet, use a quadratically-increasing step size based on the distance in commit position of the previous data point, starting from 1. 2. When both a lower and upper bound are known, which occurs when a stable and a flaky point are identified, restart exponential search backward from the later point (biased from the right side to avoid identifying an obsolete culprit). 3. When two data points shows a flaky test is newly-added, bisect to find when it was added. Args: data_points (list): A list of DataPoints that have already fully been analyzed, sorted in descending order by commit position. Returns: (int, int): A tuple representing the next calculated commit position to analyze based on the flakiness trend in data_points, and a culprit commit position that flakiness was introduced in. Returns (None, culprit_commit_position) if a culprit has been identified, (next_commit_position, None) if further analysis is needed, or (None, None) if no findings or unreproducible. At no point should (next_commit_position, culprit_culprit_commit_position) be returned. """ flakes_in_a_row = 0 earliest_data_point = None for i, current_data_point in enumerate(data_points): pass_rate = current_data_point.pass_rate commit_position = current_data_point.commit_position earliest_data_point = current_data_point if pass_rate_util.TestDoesNotExist(pass_rate): if flakes_in_a_row > 0: # The test doesn't exist. It is likely the newly-added test is flaky # to begin with. Bisect the range between the nonexistent point and # when the earliest known flaky point to find when the test was # introduced. # _Bisect requires the data points to be sorted in descending order # by commit_position. previous_data_point = data_points[i - 1] return _Bisect( CommitIDRange( lower=CommitID( commit_position=current_data_point.commit_position, revision=current_data_point.git_hash), upper=CommitID( commit_position=previous_data_point.commit_position, revision=previous_data_point.git_hash))) else: # No flaky region has been identified, no findings. return None, None if pass_rate_util.IsStableDefaultThresholds(pass_rate): if flakes_in_a_row > 0: # A regression range (stable data point --> flaky data point) has been # identified. Perform the exponential search on that smaller range only. previous_data_point = data_points[i - 1] # Ensure the data points are sorted in descending order. assert (previous_data_point.commit_position > current_data_point.commit_position) # If the previous point and this one have adjacent commit positions, # the culprit is found. if previous_data_point.commit_position - commit_position == 1: return None, CommitID( commit_position=previous_data_point.commit_position, revision=previous_data_point.git_hash) if flakes_in_a_row == 1: # Begin the search 1 commit back from the flaky point. next_step_size = 1 else: # Exponential search using a quadraticially-increasing step size # based on the distance between the previous two data points. second_previous_data_point = data_points[i - 2] step_size = ( second_previous_data_point.commit_position - previous_data_point.commit_position) next_step_size = _NextHighestSquare(step_size) if (previous_data_point.commit_position - next_step_size <= commit_position): # Also restart the exponential lookback step size from 1 in case the # quadratic step size is too large and runs to or beyond the lower # bound of the regression range. next_step_size = 1 next_commit_position = ( previous_data_point.commit_position - next_step_size) return CommitID( commit_position=next_commit_position, revision=git.GetRevisionForCommitPositionByAnotherCommit( previous_data_point.git_hash, previous_data_point.commit_position, next_commit_position)), None else: # Stable/not reproducible. return None, None # Test is flaky at the current data point. flakes_in_a_row += 1 # Further analysis is neeed. if flakes_in_a_row == 1: next_step_size = 1 else: previous_data_point = data_points[-2] # Data points are assumed to be sorted in reverse order. assert (previous_data_point.commit_position > earliest_data_point.commit_position) # Exponential search using a quadraticially increasing step size. step_size = ( previous_data_point.commit_position - earliest_data_point.commit_position) next_step_size = _NextHighestSquare(step_size) next_commit_position = earliest_data_point.commit_position - next_step_size return CommitID( commit_position=next_commit_position, revision=git.GetRevisionForCommitPositionByAnotherCommit( earliest_data_point.git_hash, earliest_data_point.commit_position, next_commit_position)), None
def RunImpl(self, parameters): """Pipeline for determining the next commit position to analyze.""" analysis_urlsafe_key = parameters.analysis_urlsafe_key analysis = ndb.Key(urlsafe=analysis_urlsafe_key).get() assert analysis master_name = analysis.master_name builder_name = analysis.builder_name specified_lower_bound = parameters.commit_position_range.lower specified_upper_bound = parameters.commit_position_range.upper data_points = analysis.GetDataPointsWithinCommitPositionRange( IntRange(lower=specified_lower_bound, upper=specified_upper_bound)) # Data points must be sorted in reverse order by commit position before. data_points = sorted(data_points, key=lambda k: k.commit_position, reverse=True) # A suspected build id is available when there is a regression range that # spans a single build cycle. During this time, bisect is preferred to # exponential search. use_bisect = (analysis.suspected_flake_build_number is not None or analysis.suspected_build_id is not None) latest_regression_range = analysis.GetLatestRegressionRange() calculated_next_commit_id, culprit_commit_id = ( lookback_algorithm.GetNextCommitId(data_points, use_bisect, latest_regression_range)) if calculated_next_commit_id is None: # The analysis is finished according to the lookback algorithm. return NextCommitPositionOutput( next_commit_id=None, culprit_commit_id=culprit_commit_id) cutoff_commit_position = ( next_commit_position_utils.GetEarliestCommitPosition( specified_lower_bound, specified_upper_bound)) if calculated_next_commit_id.commit_position < cutoff_commit_position: # Long-standing flake. Do not continue the analysis. return NextCommitPositionOutput(next_commit_id=None, culprit_commit_id=None) # Try the analysis' heuristic results first, if any. next_commit_id = ( next_commit_position_utils.GetNextCommitIdFromHeuristicResults( analysis_urlsafe_key)) if next_commit_id is not None: # Heuristic results are available and should be tried first. assert not analysis.FindMatchingDataPointWithCommitPosition( next_commit_id.commit_position ), ('Existing heuristic results suggest commit position {} which has ' 'already been run'.format(next_commit_id.commit_position)) return NextCommitPositionOutput(next_commit_id=next_commit_id, culprit_commit_id=None) # Round off the next calculated commit position to the nearest builds on # both sides. reference_build_info = build_util.GetBuildInfo(master_name, builder_name, analysis.build_number) parent_mastername = reference_build_info.parent_mastername or master_name parent_buildername = (reference_build_info.parent_buildername or builder_name) target_name = parameters.step_metadata.isolate_target_name try: lower_bound_target, upper_bound_target = ( step_util.GetBoundingIsolatedTargets( parent_mastername, parent_buildername, target_name, calculated_next_commit_id.commit_position)) # Update the analysis' suspected build cycle if identified. analysis.UpdateSuspectedBuild(lower_bound_target, upper_bound_target) lower_bound_commit_id, upper_bound_commit_id = ( next_commit_position_utils.GenerateCommitIDsForBoundingTargets( data_points, lower_bound_target, upper_bound_target)) except AssertionError as e: # Fallback to searching buildbot in case builds aren't indexed as # IsolatedTargets. # TODO(crbug.com/872992): Remove fallback logic. analysis.LogError(e.message) analysis.LogWarning(( 'Failed to determine isolated targets surrounding {}. Falling back ' 'to searching buildbot').format( calculated_next_commit_id.commit_position)) upper_bound_build_number = analysis.GetLowestUpperBoundBuildNumber( calculated_next_commit_id) lower_bound_build, upper_bound_build = ( step_util.GetValidBoundingBuildsForStep( master_name, builder_name, analysis.step_name, None, upper_bound_build_number, calculated_next_commit_id.commit_position)) # Update the analysis' suspected build cycle if identified. analysis.UpdateSuspectedBuildUsingBuildInfo( lower_bound_build, upper_bound_build) lower_bound_commit_id = CommitID( commit_position=lower_bound_build.commit_position, revision=lower_bound_build.chromium_revision ) if lower_bound_build else None upper_bound_commit_id = CommitID( commit_position=upper_bound_build.commit_position, revision=upper_bound_build.chromium_revision ) if upper_bound_build else None # When identifying the neighboring builds of the requested commit position, # heuristic analysis may become eligible if the neighboring builds are # adjacent to one another. if analysis.CanRunHeuristicAnalysis(): # Run heuristic analysis if eligible and not yet already done. heuristic_analysis.RunHeuristicAnalysis(analysis) # Try the newly computed heuristic results if any were identified. next_commit_id = ( next_commit_position_utils.GetNextCommitIdFromHeuristicResults( analysis_urlsafe_key)) if next_commit_id is not None: # pragma: no branch assert not analysis.FindMatchingDataPointWithCommitPosition( next_commit_id.commit_position ), ('Newly run heuristic results suggest commit position {} which has ' 'already been run'.format(next_commit_id)) return NextCommitPositionOutput(next_commit_id=next_commit_id, culprit_commit_id=None) # Pick the commit position of the returned neighboring builds that has not # yet been analyzed if possible, or the commit position itself when not. build_range = CommitIDRange(lower=lower_bound_commit_id, upper=upper_bound_commit_id) actual_next_commit_id = ( next_commit_position_utils.GetNextCommitIDFromBuildRange( analysis, build_range, calculated_next_commit_id)) assert not analysis.FindMatchingDataPointWithCommitPosition( actual_next_commit_id.commit_position), ( 'Rounded-off commit position {} has already been run'.format( actual_next_commit_id.commit_position)) return NextCommitPositionOutput(next_commit_id=actual_next_commit_id, culprit_commit_id=culprit_commit_id)
def GetLatestRegressionRange(self): """Gets the latest stable -> flaky commit positions in data_points. Returns: (CommitIDRange): The commit position of the latest stable data_point and commit position of the earliest subsequent flaky data point. Either point can be None if no flaky or stable points are found. """ if not self.data_points: return CommitIDRange(lower=None, upper=None) if len(self.data_points) == 1: data_point = self.data_points[0] if pass_rate_util.IsStableDefaultThresholds(data_point.pass_rate): # A lower bound stable is found, but no upper bound. The caller of this # function should interpret this as the flakiness being unreproducible. return CommitIDRange( lower=CommitID( commit_position=data_point.commit_position, revision=data_point.git_hash), upper=None) # The flakiness is reproducible, but no lower bound (stable) has been # identified yet. return CommitIDRange( lower=None, upper=CommitID( commit_position=data_point.commit_position, revision=data_point.git_hash)) # For the latest regression range, sort in reverse order by commit position. data_points = sorted( self.data_points, key=lambda k: k.commit_position, reverse=True) # Disregard the data point created by the check for recent flakiness (if # any). # TODO(crbug.com/843846): Remove this data sanitization once change for # not appending that data point is committed. if (data_points and pass_rate_util.IsStableDefaultThresholds(data_points[0].pass_rate)): data_points = data_points[1:] # Identify the adjacent flaky and stable data points with the highest commit # positions. latest_stable_index = None for i, data_point in enumerate(data_points): if pass_rate_util.IsStableDefaultThresholds(data_point.pass_rate): latest_stable_index = i break if latest_stable_index is None: # All data points are flaky. The caller should interpret this as no # findings yet. return CommitIDRange( lower=None, upper=CommitID( commit_position=data_points[-1].commit_position, revision=data_points[-1].git_hash)) # A regression range has been identified. assert latest_stable_index > 0, ( 'Non-reproducible flaky tests should only have 1 data point') adjacent_flaky_data_point = data_points[latest_stable_index - 1] assert not pass_rate_util.IsStableDefaultThresholds( adjacent_flaky_data_point.pass_rate) return CommitIDRange( lower=CommitID( commit_position=data_points[latest_stable_index].commit_position, revision=data_points[latest_stable_index].git_hash), upper=CommitID( commit_position=adjacent_flaky_data_point.commit_position, revision=adjacent_flaky_data_point.git_hash))
def testGetLatestRegressionRangeRangeNoDataPoints(self): analysis = MasterFlakeAnalysis.Create('m', 'b', 123, 's', 't') analysis.data_points = [] self.assertEqual(CommitIDRange(lower=None, upper=None), analysis.GetLatestRegressionRange())