def testMonitorSwarmingTaskTimeOut(self, mocked_fn, _): build_info = BuildInfo(self.master_name, self.builder_name, self.build_number) build_info.commit_position = 12345 build_info.chromium_revision = 'a1b2c3d4' mocked_fn.return_value = build_info # Override swarming config settings to force a timeout. override_swarming_settings = {'task_timeout_hours': -1} self.UpdateUnitTestConfigSettings('swarming_settings', override_swarming_settings) task = FlakeSwarmingTask.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) task.task_id = 'task_id1' task.put() analysis = MasterFlakeAnalysis.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) analysis.Save() pipeline = ProcessFlakeSwarmingTaskResultPipeline(self.master_name, self.builder_name, self.build_number, self.step_name, self.build_number, self.test_name, 1, task_id='task_id1') pipeline.start_test() pipeline.run(self.master_name, self.builder_name, self.build_number, self.step_name, 'task_id1', self.build_number, self.test_name, 1) pipeline.callback(callback_params=pipeline.last_params) # Reload from ID to get all internal properties in sync. pipeline = ProcessFlakeSwarmingTaskResultPipeline.from_id( pipeline.pipeline_id) pipeline.finalized() step_name, task_info = pipeline.outputs.default.value self.assertEqual('abc_tests', task_info) self.assertEqual(self.step_name, step_name) task = FlakeSwarmingTask.Get(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) self.assertEqual(analysis_status.ERROR, task.status) self.assertEqual({}, task.tests_statuses)
def testCheckTestsRunStatuses(self, mocked_fn, _): build_info = BuildInfo(self.master_name, self.build_number, self.build_number) build_info.commit_position = 12345 build_info.chromium_revision = 'a1b2c3d4' mocked_fn.return_value = build_info analysis = MasterFlakeAnalysis.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) analysis.Save() task = FlakeSwarmingTask.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) task.put() call_params = ProcessFlakeSwarmingTaskResultPipeline._GetArgs( self.pipeline, self.master_name, self.builder_name, self.build_number, self.step_name, self.build_number, self.test_name, self.version_number) tests_statuses = ( ProcessFlakeSwarmingTaskResultPipeline._CheckTestsRunStatuses( self.pipeline, base_test._SAMPLE_FAILURE_LOG, *call_params)) self.assertEqual(base_test._EXPECTED_TESTS_STATUS, tests_statuses)
def testMonitorSwarmingTaskBuildException(self, mocked_fn, _): task_id = NO_TASK_EXCEPTION build_info = BuildInfo(self.master_name, self.build_number, self.build_number) build_info.commit_position = 12345 build_info.chromium_revision = 'a1b2c3d4' mocked_fn.return_value = build_info task = FlakeSwarmingTask.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) task.task_id = 'task_id' task.put() analysis = MasterFlakeAnalysis.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) analysis.Save() pipeline = ProcessFlakeSwarmingTaskResultPipeline() pipeline.start_test() pipeline.run(self.master_name, self.builder_name, self.build_number, self.step_name, task_id, self.build_number, self.test_name, 1) self.assertIsNone(task.task_id) self.assertEqual(analysis_status.SKIPPED, task.status) self.assertEqual(-1, analysis.data_points[-1].pass_rate) self.assertFalse(analysis.data_points[-1].has_valid_artifact)
def testGetFlakeSwarmingTaskData(self): task_id = 'task_1' master_name = 'm' builder_name = 'b' build_number = 121 step_name = 'browser_tests' test_name = 'test1' created_time = datetime(2016, 9, 26, 0, 0, 0, 0) started_time = datetime(2016, 9, 26, 0, 1, 0, 0) completed_time = datetime(2016, 9, 26, 0, 2, 0, 0) number_of_iterations = 100 number_of_passes = 100 task = FlakeSwarmingTask.Create(master_name, builder_name, build_number, step_name, test_name) task.task_id = task_id task.created_time = created_time task.started_time = started_time task.completed_time = completed_time task.tries = number_of_iterations task.successes = number_of_passes task.error = None task.status = analysis_status.COMPLETED data = task.GetFlakeSwarmingTaskData() self.assertEqual(task_id, data.task_id) self.assertEqual(created_time, data.created_time) self.assertEqual(started_time, data.started_time) self.assertEqual(completed_time, data.completed_time) self.assertEqual(number_of_iterations, data.number_of_iterations) self.assertEqual(number_of_passes, data.number_of_passes) self.assertEqual(analysis_status.COMPLETED, data.status) self.assertIsNone(data.error)
def testWaitingForTheTaskId(self): master_name = 'm' builder_name = 'b' build_number = 1 step_name = 's' tests = ['a.b'] swarming_task = FlakeSwarmingTask.Create(master_name, builder_name, build_number, step_name, tests[0]) swarming_task.status = analysis_status.PENDING swarming_task.put() def MockedSleep(*_): swarming_task = FlakeSwarmingTask.Get(master_name, builder_name, build_number, step_name, tests[0]) self.assertEqual(analysis_status.PENDING, swarming_task.status) swarming_task.status = analysis_status.RUNNING swarming_task.task_id = 'task_id' swarming_task.put() self.mock(time, 'sleep', MockedSleep) pipeline = TriggerFlakeSwarmingTaskPipeline() task_id = pipeline.run(master_name, builder_name, build_number, step_name, tests) self.assertEqual('task_id', task_id)
def _CreateSwarmingTask(self, master_name, builder_name, build_number, step_name, test_name): # Create the appropriate kind of Swarming Task (Flake). swarming_task = FlakeSwarmingTask.Create(master_name, builder_name, build_number, step_name, test_name) return swarming_task
def _GetBestBuildNumberToRun(master_name, builder_name, preferred_run_build_number, step_name, test_name, step_size, number_of_iterations): """Finds the optimal nearby swarming task build number to use for a cache hit. Builds are searched back looking for something either already completed or in progress. Completed builds are returned immediately, whereas for those in progress the closer the build number is to the original, the higher priority it is given. Args: master_name (str): The name of the master for this flake analysis. builder_name (str): The name of the builder for this flake analysis. preferred_run_build_number (int): The originally-requested build number to run the swarming task on. step_name (str): The name of the step to run swarming on. test_name (str): The name of the test to run swarming on. step_size (int): The distance of the last preferred build number that was called on this analysis. Used for determining the lookback threshold. number_of_iterations (int): The number of iterations being requested for the swarming task that is to be performed. Used to determine a sufficient cache hit. Returns: build_number (int): The best build number to analyze for this iteration of the flake analysis. """ # Looks forward or backward up to half of step_size. possibly_cached_build_numbers = _GetListOfNearbyBuildNumbers( preferred_run_build_number, step_size / 2) candidate_build_number = None candidate_flake_swarming_task_status = None for build_number in possibly_cached_build_numbers: cached_flake_swarming_task = FlakeSwarmingTask.Get( master_name, builder_name, build_number, step_name, test_name) sufficient = _IsSwarmingTaskSufficientForCacheHit( cached_flake_swarming_task, number_of_iterations) if sufficient: if cached_flake_swarming_task.status == analysis_status.COMPLETED: # Found a nearby swarming task that's already done. return build_number # Keep searching, but keeping this candidate in mind. Pending tasks are # considered, but running tasks are given higher priority. # TODO(lijeffrey): A further optimization can be to pick the swarming # task with the earliest ETA. if (candidate_build_number is None or (candidate_flake_swarming_task_status == analysis_status.PENDING and cached_flake_swarming_task.status == analysis_status.RUNNING)): # Either no previous candidate or a better candidate was found. candidate_build_number = build_number candidate_flake_swarming_task_status = cached_flake_swarming_task.status # No cached build nearby deemed adequate could be found. return candidate_build_number or preferred_run_build_number
def MockedSleep(*_): swarming_task = FlakeSwarmingTask.Get(master_name, builder_name, build_number, step_name, tests[0]) self.assertEqual(analysis_status.PENDING, swarming_task.status) swarming_task.status = analysis_status.RUNNING swarming_task.task_id = 'task_id' swarming_task.put()
def testProcessFlakeSwarmingTaskResultPipeline(self, *_): # End to end test. task = FlakeSwarmingTask.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) task.task_id = 'task_id1' task.put() analysis = MasterFlakeAnalysis.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) analysis.Save() pipeline = ProcessFlakeSwarmingTaskResultPipeline() pipeline.start_test() pipeline.run(self.master_name, self.builder_name, self.build_number, self.step_name, 'task_id1', self.build_number, self.test_name, analysis.version_number) pipeline.callback(callback_params=pipeline.last_params) # Reload from ID to get all internal properties in sync. pipeline = ProcessFlakeSwarmingTaskResultPipeline.from_id( pipeline.pipeline_id) step_name, task_info = pipeline.outputs.default.value self.assertEqual('abc_tests', task_info) self.assertEqual(self.step_name, step_name) task = FlakeSwarmingTask.Get(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) self.assertEqual(analysis_status.COMPLETED, task.status) self.assertEqual(base_test._EXPECTED_TESTS_STATUS, task.tests_statuses) self.assertEqual(datetime.datetime(2016, 2, 10, 18, 32, 6, 538220), task.created_time) self.assertEqual(datetime.datetime(2016, 2, 10, 18, 32, 9, 90550), task.started_time) self.assertEqual(datetime.datetime(2016, 2, 10, 18, 33, 9), task.completed_time) self.assertEqual(analysis.last_attempted_swarming_task_id, 'task_id1')
def testCheckTestsRunStatusesWhenTestDoesNotExist(self, mocked_fn, _): build_info = BuildInfo(self.master_name, self.builder_name, self.build_number) build_info.commit_position = 12345 build_info.chromium_revision = 'a1b2c3d4' mocked_fn.return_value = build_info test_name = 'TestSuite1.new_test' analysis = MasterFlakeAnalysis.Create(self.master_name, self.builder_name, self.build_number, self.step_name, test_name) analysis.Save() task = FlakeSwarmingTask.Create(self.master_name, self.builder_name, self.build_number, self.step_name, test_name) task.put() pipeline = ProcessFlakeSwarmingTaskResultPipeline() tests_statuses = pipeline._CheckTestsRunStatuses( base_test._SAMPLE_FAILURE_LOG, self.master_name, self.builder_name, self.build_number, self.step_name, self.build_number, test_name, self.version_number) self.assertEqual(base_test._EXPECTED_TESTS_STATUS, tests_statuses) task = FlakeSwarmingTask.Get(self.master_name, self.builder_name, self.build_number, self.step_name, test_name) self.assertEqual(0, task.tries) self.assertEqual(0, task.successes) analysis = MasterFlakeAnalysis.GetVersion(self.master_name, self.builder_name, self.build_number, self.step_name, test_name, self.version_number) self.assertTrue(analysis.data_points[-1].pass_rate < 0)
def _CheckTestsRunStatuses(self, output_json, master_name, builder_name, build_number, step_name, master_build_number, test_name, version_number): """Checks result status for each test run and saves the numbers accordingly. Args: output_json (dict): A dict of all test results in the swarming task. master_name (string): Name of master of swarming rerun. builder_name (dict): Name of builder of swarming rerun. build_number (int): Build Number of swarming rerun. step_name (dict): Name of step of swarming rerun. master_build_number (int): Build number of corresponding mfa. test_name (string): Name of test of swarming rerun. version_number (int): The version to save analysis results and ` to. Returns: tests_statuses (dict): A dict of different statuses for each test. Currently for each test, we are saving number of total runs, number of succeeded runs and number of failed runs. """ # Should query by test name, because some test has dependencies which # are also run, like TEST and PRE_TEST in browser_tests. tests_statuses = super(ProcessFlakeSwarmingTaskResultPipeline, self)._CheckTestsRunStatuses(output_json) tries = tests_statuses.get(test_name, {}).get('total_run', 0) successes = tests_statuses.get(test_name, {}).get('SUCCESS', 0) if tries > 0: pass_rate = successes * 1.0 / tries else: pass_rate = -1 # Special value to indicate test is not existing. flake_swarming_task = FlakeSwarmingTask.Get(master_name, builder_name, build_number, step_name, test_name) flake_swarming_task.tries = tries flake_swarming_task.successes = successes flake_swarming_task.put() self._UpdateMasterFlakeAnalysis(master_name, builder_name, build_number, step_name, master_build_number, test_name, version_number, pass_rate, flake_swarming_task) return tests_statuses
def testGetSwarmingTask(self): master_name = 'm' builder_name = 'b' build_number = 123 step_name = 's' test_name = 't' FlakeSwarmingTask.Create( master_name, builder_name, build_number, step_name, test_name).put() task = TriggerFlakeSwarmingTaskPipeline()._GetSwarmingTask( master_name, builder_name, build_number, step_name, test_name) self.assertEqual(master_name, task.master_name) self.assertEqual(builder_name, task.builder_name) self.assertEqual(build_number, task.build_number) self.assertEqual(step_name, task.step_name) self.assertEqual(test_name, task.test_name)
def testMonitorSwarmingTaskFailedToTriggerUndetectedError(self): task = FlakeSwarmingTask.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) task.put() analysis = MasterFlakeAnalysis.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) analysis.Save() pipeline = ProcessFlakeSwarmingTaskResultPipeline() pipeline.start_test() pipeline.run(self.master_name, self.builder_name, self.build_number, self.step_name, None, self.build_number, self.test_name, 1) self.assertIsNone(task.task_id) self.assertIsNotNone(task.error)
def testCheckTestsRunStatusesZeroBuildNumber(self, mocked_fn, _): build_info = BuildInfo(self.master_name, self.build_number, 0) build_info.commit_position = 12345 build_info.chromium_revision = 'a1b2c3d4' mocked_fn.return_value = build_info analysis = MasterFlakeAnalysis.Create(self.master_name, self.builder_name, 0, self.step_name, self.test_name) analysis.Save() task = FlakeSwarmingTask.Create(self.master_name, self.builder_name, 0, self.step_name, self.test_name) task.put() ProcessFlakeSwarmingTaskResultPipeline()._CheckTestsRunStatuses( {}, self.master_name, self.builder_name, 0, self.step_name, 0, self.test_name, 1) self.assertIsNone( analysis.data_points[0].previous_build_commit_position)
def testMonitorSwarmingTaskStepNotExist(self, mocked_fn, _): task_id = NO_TASK build_info = BuildInfo(self.master_name, self.build_number, self.build_number) build_info.commit_position = 12345 build_info.chromium_revision = 'a1b2c3d4' mocked_fn.return_value = build_info task = FlakeSwarmingTask.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) task.put() analysis = MasterFlakeAnalysis.Create(self.master_name, self.builder_name, self.build_number, self.step_name, self.test_name) analysis.Save() pipeline = ProcessFlakeSwarmingTaskResultPipeline( self.master_name, self.builder_name, self.build_number, self.step_name, task_id, self.build_number, self.test_name, 1) pipeline.start_test() pipeline.run(self.master_name, self.builder_name, self.build_number, self.step_name, task_id, self.build_number, self.test_name, 1) # Reload from ID to get all internal properties in sync. pipeline = ProcessFlakeSwarmingTaskResultPipeline.from_id( pipeline.pipeline_id) pipeline.finalized() _, step_name_no_platform = pipeline.outputs.default.value self.assertIsNone(task.task_id) self.assertEqual(analysis_status.SKIPPED, task.status) self.assertEqual(-1, analysis.data_points[-1].pass_rate) self.assertIsNone(step_name_no_platform)
def testReset(self): task = FlakeSwarmingTask.Create('m', 'b', 121, 'browser_tests', 'test1') task.task_id = 'task_id' task.Reset() self.assertIsNone(task.task_id)
def testStepTestName(self): task = FlakeSwarmingTask.Create('m', 'b', 121, 'browser_tests', 'test1') self.assertEqual('browser_tests', task.step_name) self.assertEqual('test1', task.test_name)
def run(self, master_name, builder_name, triggering_build_number, current_build_number, step_name, test_name, version_number, step_metadata=None, use_nearby_neighbor=False, manually_triggered=False): # Get MasterFlakeAnalysis success list corresponding to parameters. analysis = MasterFlakeAnalysis.GetVersion(master_name, builder_name, triggering_build_number, step_name, test_name, version=version_number) flake_swarming_task = FlakeSwarmingTask.Get(master_name, builder_name, current_build_number, step_name, test_name) # Don't call another pipeline if we fail. if flake_swarming_task.status == analysis_status.ERROR: # Report the last flake swarming task's error that it encountered. # TODO(lijeffrey): Another neighboring swarming task may be needed in this # one's place instead of failing altogether. error = flake_swarming_task.error or { 'error': 'Swarming task failed', 'message': 'The last swarming task did not complete as expected' } _UpdateAnalysisStatusUponCompletion(analysis, None, analysis_status.ERROR, error) logging.error('Error in Swarming task') yield UpdateFlakeBugPipeline(analysis.key.urlsafe()) return if not analysis.algorithm_parameters: # Uses analysis' own algorithm_parameters. flake_settings = waterfall_config.GetCheckFlakeSettings() analysis.algorithm_parameters = flake_settings analysis.put() algorithm_settings = analysis.algorithm_parameters.get( 'swarming_rerun') data_points = _NormalizeDataPoints(analysis.data_points) # Figure out what build_number to trigger a swarming rerun on next, if any. next_build_number, suspected_build, iterations_to_rerun = ( lookback_algorithm.GetNextRunPointNumber(data_points, algorithm_settings)) if iterations_to_rerun: # Need to rerun the first build with more iterations. _UpdateIterationsToRerun(analysis, iterations_to_rerun) _RemoveRerunBuildDataPoint(analysis, next_build_number) analysis.put() max_build_numbers_to_look_back = algorithm_settings.get( 'max_build_numbers_to_look_back', _DEFAULT_MAX_BUILD_NUMBERS) last_build_number = max( 0, triggering_build_number - max_build_numbers_to_look_back) if ((next_build_number < last_build_number or next_build_number >= triggering_build_number) and not iterations_to_rerun): # Finished. build_confidence_score = None if suspected_build is not None: # Use steppiness as the confidence score. build_confidence_score = confidence.SteppinessForBuild( analysis.data_points, suspected_build) # Update suspected build and the confidence score. _UpdateAnalysisStatusUponCompletion( analysis, suspected_build, analysis_status.COMPLETED, None, build_confidence_score=build_confidence_score) if build_confidence_score is None: logging.info( ('Skipping try jobs due to no suspected flake build being ' 'identified')) elif not _HasSufficientConfidenceToRunTryJobs(analysis): logging.info( ('Skipping try jobs due to insufficient confidence in ' 'suspected build')) else: # Hook up with try-jobs. Based on analysis of historical data, 60% # confidence could filter out almost all false positives. suspected_build_point = analysis.GetDataPointOfSuspectedBuild() assert suspected_build_point blamed_cls, lower_bound = _GetFullBlamedCLsAndLowerBound( suspected_build_point, analysis.data_points) if blamed_cls: if len(blamed_cls) > 1: logging.info( 'Running try-jobs against commits in regressions') start_commit_position = suspected_build_point.commit_position - 1 start_revision = blamed_cls[start_commit_position] build_info = build_util.GetBuildInfo( master_name, builder_name, triggering_build_number) parent_mastername = build_info.parent_mastername or master_name parent_buildername = build_info.parent_buildername or builder_name cache_name = swarming_util.GetCacheName( parent_mastername, parent_buildername) dimensions = waterfall_config.GetTrybotDimensions( parent_mastername, parent_buildername) yield RecursiveFlakeTryJobPipeline( analysis.key.urlsafe(), start_commit_position, start_revision, lower_bound, cache_name, dimensions) return # No update to bug yet. else: logging.info( 'Single commit in the blame list of suspected build' ) culprit_confidence_score = confidence.SteppinessForCommitPosition( analysis.data_points, suspected_build_point.commit_position) culprit = recursive_flake_try_job_pipeline.CreateCulprit( suspected_build_point.git_hash, suspected_build_point.commit_position, culprit_confidence_score) UpdateAnalysisUponCompletion(analysis, culprit, analysis_status.COMPLETED, None) else: logging.error( 'Cannot run flake try jobs against empty blame list') error = { 'error': 'Could not start try jobs', 'message': 'Empty blame list' } UpdateAnalysisUponCompletion(analysis, None, analysis_status.ERROR, error) yield UpdateFlakeBugPipeline(analysis.key.urlsafe()) return pipeline_job = RecursiveFlakePipeline( master_name, builder_name, next_build_number, step_name, test_name, version_number, triggering_build_number, step_metadata=step_metadata, manually_triggered=manually_triggered, use_nearby_neighbor=use_nearby_neighbor, step_size=(current_build_number - next_build_number)) # Disable attribute 'target' defined outside __init__ pylint warning, # because pipeline generates its own __init__ based on run function. pipeline_job.target = ( # pylint: disable=W0201 appengine_util.GetTargetNameForModule(constants.WATERFALL_BACKEND)) pipeline_job.start( queue_name=self.queue_name or constants.DEFAULT_QUEUE)
def _GetSwarmingTask(self, master_name, builder_name, build_number, step_name, master_build_number, test_name, _): # Gets the appropriate kind of swarming task (FlakeSwarmingTask). return FlakeSwarmingTask.Get(master_name, builder_name, build_number, step_name, test_name)