def UpdateSuspectedCL(repo_name, revision, commit_position, approach, master_name, builder_name, build_number, cl_failure_type, failures, top_score): suspected_cl = ( WfSuspectedCL.Get(repo_name, revision) or WfSuspectedCL.Create(repo_name, revision, commit_position)) if not suspected_cl.identified_time: # pragma: no cover. suspected_cl.identified_time = time_util.GetUTCNow() suspected_cl.updated_time = time_util.GetUTCNow() if approach not in suspected_cl.approaches: suspected_cl.approaches.append(approach) if cl_failure_type not in suspected_cl.failure_type: suspected_cl.failure_type.append(cl_failure_type) build_key = BaseBuildModel.CreateBuildKey(master_name, builder_name, build_number) if build_key not in suspected_cl.builds: suspected_cl.builds[build_key] = { 'approaches': [approach], 'failure_type': cl_failure_type, 'failures': failures, 'status': _GetsStatusFromSameFailure(suspected_cl.builds, failures), 'top_score': top_score } else: build = suspected_cl.builds[build_key] if approach not in build['approaches']: build['approaches'].append(approach) suspected_cl.put()
def Initialize(self, crash_data): """(Re)Initialize a CrashAnalysis ndb.Model from ``CrashData``. This method is only ever called from _NeedsNewAnalysis which is only ever called from ScheduleNewAnalysis. It is used for filling in the fields of a CrashAnalysis ndb.Model for the first time (though it can also be used to re-initialize a given CrashAnalysis). Subclasses should extend (not override) this to (re)initialize any client-specific fields they may have. """ # Get rid of any previous values there may have been. self.Reset() # Set the version. self.crashed_version = crash_data.crashed_version self.stack_trace = crash_data.raw_stacktrace self.stacktrace = crash_data.stacktrace self.signature = crash_data.signature self.platform = crash_data.platform self.regression_range = crash_data.regression_range self.dependencies = crash_data.dependencies self.dependency_rolls = crash_data.dependency_rolls self.identifiers = crash_data.identifiers # Set progress properties. self.status = analysis_status.PENDING self.requested_time = time_util.GetUTCNow() self.started_time = time_util.GetUTCNow()
def HandleGet(self): client_id = self.request.get('client_id', CrashClient.CRACAS) now = time_util.GetUTCNow() last_week = time_util.GetUTCNow() - timedelta(days=7) start_date, end_date = time_util.GetStartEndDates( self.request.get('start_date'), self.request.get('end_date'), default_start=last_week, default_end=now) publish_to_client = bool(self.request.get('publish')) count = 0 for crash_keys in IterateCrashBatches(client_id, start_date, end_date): pipeline = RerunPipeline(client_id, crash_keys, publish_to_client) # Attribute defined outside __init__ - pylint: disable=W0201 pipeline.target = appengine_util.GetTargetNameForModule( RERUN_SERVICE) pipeline.start(queue_name=RERUN_QUEUE) count += 1 if count == 0: message = 'No rerun pipeline started.' else: message = '%d rerun pipeline(s) kicked off.' % count return {'data': {'message': message}}
def _GetCQHiddenFlakeQueryStartTime(): """Gets the latest happen time of cq hidden flakes. Uses this time to decide if we should run the query for cq hidden flakes. And also uses this time to decides the start time of the query. Returns: (str): String representation of a datetime in the format %Y-%m-%d %H:%M:%S UTC. """ last_query_time_right_bourndary = time_util.GetUTCNow() - timedelta( hours=_CQ_HIDDEN_FLAKE_QUERY_HOUR_INTERVAL) hidden_flake_query_start_time = time_util.FormatDatetime( time_util.GetUTCNow() - timedelta(hours=_CQ_HIDDEN_FLAKE_QUERY_HOUR_INTERVAL + _ROUGH_MAX_BUILD_CYCLE_HOURS, minutes=_CQ_HIDDEN_FLAKE_QUERY_OVERLAP_MINUTES)) hidden_flake_query_end_time = time_util.FormatDatetime( time_util.GetUTCNow() - timedelta(hours=_CQ_HIDDEN_FLAKE_QUERY_HOUR_INTERVAL)) last_query_time = _GetLastCQHiddenFlakeQueryTime() if not last_query_time: # Only before the first time of running the query. return hidden_flake_query_start_time, hidden_flake_query_end_time return ((hidden_flake_query_start_time, hidden_flake_query_end_time) if last_query_time <= last_query_time_right_bourndary else (None, None))
def _GetETAToStartAnalysis(manually_triggered): """Returns an ETA as of a UTC datetime.datetime to start the analysis. If not urgent, Swarming tasks should be run off PST peak hours from 11am to 6pm on workdays. Args: manually_triggered (bool): True if the analysis is from manual request, like by a Chromium sheriff. Returns: The ETA as of a UTC datetime.datetime to start the analysis. """ if manually_triggered: # If the analysis is manually triggered, run it right away. return time_util.GetUTCNow() now_at_pst = time_util.GetPSTNow() if now_at_pst.weekday() >= 5: # PST Saturday or Sunday. return time_util.GetUTCNow() if now_at_pst.hour < 11 or now_at_pst.hour >= 18: # Before 11am or after 6pm. return time_util.GetUTCNow() # Set ETA time to 6pm, and also with a random latency within 30 minutes to # avoid sudden burst traffic to Swarming. diff = timedelta(hours=18 - now_at_pst.hour, minutes=-now_at_pst.minute, seconds=-now_at_pst.second + random.randint(0, 30 * 60), microseconds=-now_at_pst.microsecond) eta = now_at_pst + diff # Convert back to UTC. return time_util.ConvertPSTToUTC(eta)
def _UpdateCulprit(culprit_urlsafe_key, revert_status=None, revert_cl=None, skip_revert_reason=None, revert_submission_status=None): """Updates culprit entity.""" culprit = entity_util.GetEntityFromUrlsafeKey(culprit_urlsafe_key) assert culprit culprit.should_be_reverted = True culprit.revert_status = revert_status or culprit.revert_status culprit.revert_cl = revert_cl or culprit.revert_cl culprit.skip_revert_reason = skip_revert_reason or culprit.skip_revert_reason culprit.revert_submission_status = (revert_submission_status or culprit.revert_submission_status) if culprit.revert_status != analysis_status.RUNNING: # pragma: no branch # Only stores revert_pipeline_id when the revert is ongoing. culprit.revert_pipeline_id = None if revert_cl: culprit.cr_notification_status = analysis_status.COMPLETED culprit.revert_created_time = time_util.GetUTCNow() culprit.cr_notification_time = time_util.GetUTCNow() if (culprit.revert_submission_status != analysis_status.RUNNING): # pragma: no branch culprit.submit_revert_pipeline_id = None if culprit.revert_submission_status == analysis_status.COMPLETED: culprit.revert_committed_time = time_util.GetUTCNow() culprit.put()
def NeedANewAnalysis(master_name, builder_name, build_number, failed_steps, build_completed, force): """Checks status of analysis for the build and decides if a new one is needed. A WfAnalysis entity for the given build will be created if none exists. When a new analysis is needed, this function will create and save a WfAnalysis entity to the datastore, or it will reset the existing one but still keep the result of last analysis. Returns: True if an analysis is needed, otherwise False. """ analysis = WfAnalysis.Get(master_name, builder_name, build_number) if not analysis: # The build failure is not analyzed yet. analysis = WfAnalysis.Create(master_name, builder_name, build_number) analysis.status = analysis_status.PENDING analysis.request_time = time_util.GetUTCNow() analysis.put() return True elif force: # A new analysis could be forced if last analysis was completed. if not analysis.completed: # TODO: start a new analysis if the last one has started running but it # has no update for a considerable amount of time, eg. 10 minutes. logging.info( 'Existing analysis is not completed yet. No new analysis.') return False analysis.Reset() analysis.request_time = time_util.GetUTCNow() analysis.put() return True elif failed_steps and analysis.completed: # If there is any new failed step, a new analysis is needed. for step in failed_steps: analyzed = any(step == s for s in analysis.not_passed_steps) if analyzed: continue logging.info('At least one new failed step is detected: %s', step) analysis.Reset() analysis.request_time = time_util.GetUTCNow() analysis.put() return True # Start a new analysis if the build cycle wasn't completed in last analysis, # but now it is completed. This will potentially trigger a try-job run. if analysis.completed and not analysis.build_completed and build_completed: return True # TODO: support following cases # * Automatically retry if last analysis failed with errors. # * Analysis is not complete and no update in the last 5 minutes. logging.info('Not match any cases. No new analysis.') return False
def _CreateIssuesForFlakes(flake_groups_to_create_issue, num_of_issues_to_create): """Creates monorail bugs. Args: flake_groups_to_create_issue([FlakeGroupByOccurrences]]): A list of flake groups that are not yet linked with a FlakeIssue. num_of_issues_to_create(int): Total number for - New bugs created - Existing bugs found, linked to flakes and then updated. Note that it's possible that more bugs are found and linked to flakes, but will not be updated because of the limit. """ for flake_group in flake_groups_to_create_issue: try: if len(flake_group.flakes) == 1: # A single flake in group, uses this flake's info to look for or create # a bug. If num_of_issues_to_create has reached 0, only looks for # existing monorail bug for it. issue_generator = FlakeDetectionIssueGenerator( flake_group.flakes[0], flake_group.num_occurrences) issue_id = _CreateIssueForFlake( issue_generator, flake_group.flakes[0], create_or_update_bug=num_of_issues_to_create > 0) elif num_of_issues_to_create > 0: # Multiple flakes in group, only creates a bug when the bug count has # not reached the limit. issue_id = _CreateIssueForFlakeGroup(flake_group) else: # Multiple flakes in group, and no more bug is allowed. issue_id = None if issue_id and num_of_issues_to_create > 0: # A monorail bug has been created or updated. num_of_issues_to_create -= 1 # Updates FlakeIssue's last updated_time_by_flake_detection property. # This property is only applicable to Flake Detection because Flake # Detection can update an issue at most once every 24 hours. # Also change last_updated_time_in_monorail to keep in sync. flake_issue = GetFlakeIssue(flake_group.flakes[0]) flake_issue.last_updated_time_by_flake_detection = time_util.GetUTCNow( ) flake_issue.last_updated_time_in_monorail = time_util.GetUTCNow( ) flake_issue.put() except HttpError as error: # Benign exceptions (HttpError 403) may happen when FindIt tries to # update an issue that it doesn't have permission to. Do not raise # exception so that the for loop can move on to create or update next # issues. logging.warning( 'Failed to create or update issue due to error: %s', error)
def run(self, *_args, **_kwargs): """Call predator to do the analysis of the given crash. N.B., due to the structure of AppEngine pipelines, this method must accept the same arguments as are passed to ``__init__``; however, because they were already passed to ``__init__`` there's no use in recieving them here. Thus, we discard all the arguments to this method (except for ``self``, naturally). """ # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis # to guard against race conditions? analysis = self._findit.GetAnalysis(self._crash_identifiers) # Update the model's status to say we're in the process of doing analysis. analysis.pipeline_status_path = self.pipeline_status_path() analysis.status = analysis_status.RUNNING analysis.started_time = time_util.GetUTCNow() analysis.findit_version = appengine_util.GetCurrentVersion() analysis.put() # Actually do the analysis. culprit = self._findit.FindCulprit(analysis.ToCrashReport()) if culprit is not None: result, tags = culprit.ToDicts() else: result = {'found': False} tags = { 'found_suspects': False, 'found_project': False, 'found_components': False, 'has_regression_range': False, 'solution': None, } # Update model's status to say we're done, and save the results. analysis.completed_time = time_util.GetUTCNow() analysis.result = result for tag_name, tag_value in tags.iteritems(): # TODO(http://crbug.com/602702): make it possible to add arbitrary tags. # TODO(http://crbug.com/659346): we misplaced the coverage test; find it! if hasattr(analysis, tag_name): # pragma: no cover setattr(analysis, tag_name, tag_value) if hasattr(monitoring, tag_name): metric = getattr(monitoring, tag_name) metric.increment({tag_name: tag_value, 'client_id': self.client_id}) analysis.status = analysis_status.COMPLETED analysis.put()
def DownloadBuildData(master_name, builder_name, build_number): """Downloads build data and returns a WfBuild instance.""" build = WfBuild.Get(master_name, builder_name, build_number) if not build: build = WfBuild.Create(master_name, builder_name, build_number) # Cache the data to avoid pulling from master again. if _BuildDataNeedUpdating(build): use_cbe = waterfall_config.GetDownloadBuildDataSettings().get( 'use_chrome_build_extract') if use_cbe: # Retrieve build data from build archive first. build.data = buildbot.GetBuildDataFromArchive( master_name, builder_name, build_number, HTTP_CLIENT_NO_404_ERROR) if build.data: build.data_source = CHROME_BUILD_EXTRACT elif not lock_util.WaitUntilDownloadAllowed( master_name): # pragma: no cover return None if not build.data or not use_cbe: # Retrieve build data from build master. build.data = buildbot.GetBuildDataFromBuildMaster( master_name, builder_name, build_number, HTTP_CLIENT_LOGGING_ERRORS) build.data_source = BUILDBOT_MASTER build.last_crawled_time = time_util.GetUTCNow() build.put() return build
def _SavesNewCLConfidence(): """Queries all CLs and calculates confidence of each type of results.""" date_end = time_util.GetUTCNow().replace(hour=0, minute=0, second=0, microsecond=0) date_start = date_end - datetime.timedelta(days=TIME_RANGE_DAYS) result_heuristic = _GetCLDataForHeuristic(date_start, date_end) result_try_job, result_both = _GetCLDataForTryJob(date_start, date_end) new_compile_heuristic = _CalculateConfidenceLevelsForHeuristic( result_heuristic[failure_type.COMPILE]) new_test_heuristic = _CalculateConfidenceLevelsForHeuristic( result_heuristic[failure_type.TEST]) new_compile_try_job = _CreateConfidenceInformation( result_try_job[failure_type.COMPILE]) new_test_try_job = _CreateConfidenceInformation( result_try_job[failure_type.TEST]) new_compile_heuristic_try_job = _CreateConfidenceInformation( result_both[failure_type.COMPILE]) new_test_heuristic_try_job = _CreateConfidenceInformation( result_both[failure_type.TEST]) confidence = SuspectedCLConfidence.Get() confidence.Update(date_start, date_end, new_compile_heuristic, new_compile_try_job, new_compile_heuristic_try_job, new_test_heuristic, new_test_try_job, new_test_heuristic_try_job) return confidence
def CountRecentCommits(repo_url, ref='refs/heads/master', time_period=datetime.timedelta(hours=1)): """Gets the number of commits that landed recently. By default, this function will count the commits landed in the master ref during last hour, but can be used to count the commits landed in any ref in the most recent period of any arbitrary size. Args: repo_url (str): Url to the repo. ref (str): ref to count commits on. time_period (datetime.delta): window of time in which to count commits. Returns: An integer representing the number of commits that landed in the last hour. """ count = 0 cutoff = time_util.GetUTCNow() - time_period git_repo = NonCachedGitilesRepository(FinditHttpClient(), repo_url, ref) next_rev = ref while next_rev: # 100 is a reasonable size for a page. # This assumes that GetNChangeLogs returns changelogs in newer to older # order. logs, next_rev = git_repo.GetNChangeLogs(next_rev, 100) for log in logs: if log.committer.time >= cutoff: count += 1 else: return count return count
def run(self, failure_info, change_logs, deps_info, signals, build_completed): """Identifies culprit CL. Args: failure_info (dict): Output of pipeline DetectFirstFailurePipeline. change_logs (dict): Output of pipeline PullChangelogPipeline. signals (dict): Output of pipeline ExtractSignalPipeline. Returns: analysis_result returned by build_failure_analysis.AnalyzeBuildFailure. """ master_name = failure_info['master_name'] builder_name = failure_info['builder_name'] build_number = failure_info['build_number'] analysis_result, suspected_cls = build_failure_analysis.AnalyzeBuildFailure( failure_info, change_logs, deps_info, signals) analysis = WfAnalysis.Get(master_name, builder_name, build_number) analysis.build_completed = build_completed analysis.result = analysis_result analysis.status = analysis_status.COMPLETED analysis.result_status = _GetResultAnalysisStatus(analysis_result) analysis.suspected_cls = _GetSuspectedCLsWithOnlyCLInfo(suspected_cls) analysis.end_time = time_util.GetUTCNow() analysis.put() # Save suspected_cls to data_store. _SaveSuspectedCLs(suspected_cls, failure_info['master_name'], failure_info['builder_name'], failure_info['build_number'], failure_info['failure_type']) return analysis_result
def _GetMatchingFailureGroups(build_failure_type): earliest_time = time_util.GetUTCNow() - timedelta( seconds=waterfall_config.GetTryJobSettings().get( 'max_seconds_look_back_for_group')) return WfFailureGroup.query( ndb.AND(WfFailureGroup.build_failure_type == build_failure_type, WfFailureGroup.created_time >= earliest_time)).fetch()
def AnalyzeDetectedFlakeOccurrence(flake, flake_occurrence, bug_id): """Analyze detected flake occurrence by Flake Detection. Args: flake (Flake): The Flake triggering this analysis. flake_occurrece (FlakeOccurrence): A FlakeOccurrence model entity. bug_id (int): Id of the bug to update after the analysis finishes. """ test_name = flake_occurrence.test_name analysis_request = FlakeAnalysisRequest.Create(test_name, False, bug_id) analysis_request.flake_key = flake.key master_name = flake_occurrence.build_configuration.legacy_master_name builder_name = flake_occurrence.build_configuration.luci_builder build_number = flake_occurrence.build_configuration.legacy_build_number step_ui_name = flake_occurrence.step_ui_name analysis_request.AddBuildStep(master_name, builder_name, build_number, step_ui_name, time_util.GetUTCNow()) analysis_request.Save() logging.info('flake report for detected flake occurrence: %r', analysis_request) AsyncProcessFlakeReport(analysis_request, user_email=constants.DEFAULT_SERVICE_ACCOUNT, is_admin=False)
def GenerateAuthToken(key_name, user_id, action_id='', when=None): """Generates a URL-safe token based on XSRFToken but for generla purpose. Args: key_name (str): name of secret key to generate token. user_id (str): the user ID of the authenticated user. action_id (str): a string identifier of the action they requested authorization for. when (datetime): the time when the user was authorized for this action. If not set the current utc time is used. Returns: A string token. """ key = SecretKey.GetSecretKey(key_name) when = when or time_util.GetUTCNow() when_timestamp = time_util.ConvertToTimestamp(when) digester = hmac.new(key) digester.update(str(user_id)) digester.update(_DELIMITER) digester.update(action_id) digester.update(_DELIMITER) digester.update(str(when_timestamp)) digest = digester.digest() return base64.urlsafe_b64encode('%s%s%d' % (digest, _DELIMITER, when_timestamp))
def _GetDailyNumberOfRevertedCulprits(limit): earliest_time = time_util.GetUTCNow() - timedelta(days=1) # TODO(chanli): improve the check for a rare case when two pipelines revert # at the same time. return WfSuspectedCL.query( ndb.AND(WfSuspectedCL.failure_type == failure_type.COMPILE, WfSuspectedCL.revert_created_time >= earliest_time)).count(limit)
def HandleGet(self): """Lists WfAnalysis entities detected to have been aborted.""" midnight_today = datetime.combine(time_util.GetUTCNow(), time.min) start = self.request.get('start_date') end = self.request.get('end_date') start_date, end_date = _GetStartEndDates(start, end, midnight_today) analyses = WfAnalysis.query( ndb.AND(WfAnalysis.build_start_time >= start_date, WfAnalysis.build_start_time < end_date, WfAnalysis.aborted == True)).order(-WfAnalysis.build_start_time).fetch(_COUNT) analyses_data = [] for analysis in analyses: analyses_data.append(_Serialize(analysis)) data = { 'start_date': time_util.FormatDatetime(start_date), 'end_date': time_util.FormatDatetime(end_date), 'analyses': analyses_data, } return {'template': 'pipeline_errors_dashboard.html', 'data': data}
def _FormatDisplayData(try_job_data): """Returns information of a WfTryJobData/FlakeTryJobData as a dict.""" display_data = try_job_data.to_dict() for attribute in ('created_time', 'start_time', 'end_time', 'request_time'): display_data[attribute] = time_util.FormatDatetime( display_data[attribute]) display_data['pending_time'] = ( _FormatDuration(try_job_data.request_time, try_job_data.start_time) if try_job_data.start_time else _FormatDuration( try_job_data.created_time, time_util.GetUTCNow())) display_data['last_buildbucket_response'] = json.dumps( _PrepareBuildbucketResponseForDisplay( display_data['last_buildbucket_response']), sort_keys=True) if isinstance(try_job_data, FlakeTryJobData): # Flake try job data does not include try_job_type. display_data['try_job_type'] = 'flake' display_data['analysis_key'] = (try_job_data.analysis_key.urlsafe() if try_job_data.analysis_key else None) # Do not include the try job key in the response. display_data.pop('try_job_key', None) return display_data
def _GetCQFlagsOrExplanation(self, commit_timestamp): delta = time_util.GetUTCNow() - commit_timestamp if delta.days > 1: return ( '# Not skipping CQ checks because original CL landed > 1 day ago.\n\n' ) return 'No-Presubmit: true\nNo-Tree-Checks: true\nNo-Try: true\n'
def _UpdateAnalysisStatusUponCompletion(analysis, suspected_build, status, error, build_confidence_score=None): analysis.end_time = time_util.GetUTCNow() analysis.status = status analysis.confidence_in_suspected_build = build_confidence_score analysis.try_job_status = analysis_status.SKIPPED analysis.suspected_flake_build_number = suspected_build analysis.result_status = (result_status.NOT_FOUND_UNTRIAGED if suspected_build is None else result_status.FOUND_UNTRIAGED) if error: analysis.error = error else: # Clear info about the last attempted swarming task since it will be stored # in the data point. analysis.last_attempted_swarming_task_id = None analysis.last_attempted_build_number = None if _HasSufficientConfidenceToRunTryJobs(analysis): # Analysis is not finished yet: try jobs are about to be run. analysis.try_job_status = None analysis.end_time = None analysis.put()
def CanAutoCreateRevert(culprit, parameters): """Checks if Findit can auto create a revert. Args: culprit (Basestring): Urlsafe key for the suspected cl. parameters (CulpritActionParameters): Parameters to run culprit action pipelines. Findit can auto create a revert if: 1. Auto create revert for test is turned on; 2. The number of reverts in past 24 hours is less than the daily limit; 3. The culprit is also being suspected by the heuristic analysis. """ heuristic_cls = parameters.heuristic_cls if culprit not in heuristic_cls: return False action_settings = waterfall_config.GetActionSettings() # Auto revert has been turned off. if not bool(action_settings.get('auto_create_revert')): return False auto_create_revert_daily_threshold_test = action_settings.get( 'auto_create_revert_daily_threshold_test', _DEFAULT_AUTO_CREATE_REVERT_DAILY_THRESHOLD_TEST) # Auto revert has exceeded daily limit. if _GetDailyNumberOfRevertedCulprits( auto_create_revert_daily_threshold_test ) >= auto_create_revert_daily_threshold_test: logging.info( 'Auto reverts for test culprits on %s has met daily limit.', time_util.FormatDatetime(time_util.GetUTCNow())) return False return True
def Create(cls, repo_name, revision, commit_position): # pragma: no cover instance = cls(key=cls._CreateKey(repo_name, revision)) instance.repo_name = repo_name instance.revision = revision instance.commit_position = commit_position instance.identified_time = time_util.GetUTCNow() return instance
def OnFinalized(self, parameters): if not self.IsRootPipeline(): # AnalyzeFlakePipeline is recursive. Only the root pipeline should update. return analysis_urlsafe_key = parameters.analysis_urlsafe_key analysis = ndb.Key(urlsafe=analysis_urlsafe_key).get() assert analysis, 'Cannot retrieve analysis entry from datastore' # Get the analysis' already-detected error, if any. error = analysis.error if self.was_aborted: error = analysis.GetError() # Capture any undetected error. monitoring.aborted_pipelines.increment({'type': 'flake'}) status = analysis_status.ERROR if error else analysis_status.COMPLETED analysis.Update(error=error, end_time=time_util.GetUTCNow(), status=status) # TODO(crbug.com/847644): If error is set, report to ts_mon. # Monitor completion of pipeline. monitoring.completed_pipelines.increment({'type': 'flake'})
def _CheckForNewAnalysis(request, rerun=False): """Checks if a new analysis is needed for the requested flake. Args: request (FlakeAnalysisRequest): The request to analyze a flake. rerun (bool): Indicates a forced rerun by admin. Returns: (version_number, build_step) version_number (int): The version of the FlakeAnalysisRequest if a new analysis is needed; otherwise 0. build_step (BuildStep): a BuildStep instance if a new analysis is needed; otherwise None. """ existing_request = FlakeAnalysisRequest.GetVersion(key=request.name) if not existing_request or (existing_request.bug_id and request.bug_id and existing_request.bug_id != request.bug_id): # If no existing analysis or last analysis was for a different bug, randomly # pick one configuration for a new analysis. if existing_request: # Make a copy to preserve the version number of existing analysis and # prevent concurrent analyses of the same flake. user_emails = (email_util.ObscureEmails( existing_request.user_emails, ['google.com']) + list(set(request.user_emails))) existing_request.CopyFrom(request) request = existing_request request.user_emails = user_emails request.user_emails_obscured = False request.user_emails_last_edit = time_util.GetUTCNow() swarmed, supported, supported_build_step = _CheckFlakeSwarmedAndSupported( request) request.swarmed = swarmed request.supported = supported if supported_build_step and not request.is_step: supported_build_step.scheduled = True # This step will be analyzed. # For unsupported or step-level flakes, still save them for monitoring. _, saved = request.Save( retry_on_conflict=False) # Create a new version. if not saved or not supported_build_step or request.is_step: # No new analysis if: # 1. Another analysis was just triggered. # 2. No representative step is Swarmed Gtest. # 3. The flake is a step-level one. return 0, None return request.version_number, supported_build_step else: # If no bug is attached to the existing analysis or the new request, or both # are attached to the same bug, start a new analysis with a different # configuration. For a configuration that was analyzed 7 days ago, reset it # to use the new reported step of the same configuration. # TODO: move this setting to config. return _MergeNewRequestIntoExistingOne(request, existing_request, rerun)
def CreateFlakeAnalysisRequest(flake): analysis_request = FlakeAnalysisRequest.Create( flake.name, flake.is_step, flake.bug_id) for step in flake.build_steps: analysis_request.AddBuildStep(step.master_name, step.builder_name, step.build_number, step.step_name, time_util.GetUTCNow()) return analysis_request
def RunImpl(self, build_key): """Triggers flake analyses for flaky tests found by CI failure analysis.""" master_name, builder_name, build_number = build_key.GetParts() flake_settings = waterfall_config.GetCheckFlakeSettings() throttled = flake_settings.get('throttle_flake_analyses', True) analysis = WfAnalysis.Get(master_name, builder_name, build_number) if not analysis or not analysis.flaky_tests: return analysis_counts = defaultdict(lambda: defaultdict(int)) for step, flaky_tests in analysis.flaky_tests.iteritems(): logging.info('%s/%s/%s/%s has %s flaky tests.', master_name, builder_name, build_number, step, len(flaky_tests)) for test_name in flaky_tests: # TODO(crbug.com/904050): Deprecate FlakeAnalysisRequest in favor of # Flake. flake = flake_util.GetFlake(_LUCI_PROJECT, step, test_name, master_name, builder_name, build_number) request = FlakeAnalysisRequest.Create(test_name, False, None) request.AddBuildStep(master_name, builder_name, build_number, step, time_util.GetUTCNow()) request.flake_key = flake.key scheduled = flake_analysis_service.ScheduleAnalysisForFlake( request, '*****@*****.**', False, triggering_sources.FINDIT_PIPELINE) if scheduled: # pragma: no branch analysis_counts[step]['analyzed'] += 1 logging.info( 'A flake analysis has been triggered for %s/%s', step, test_name) if throttled and len(flaky_tests) > 1: logging.info( 'Throttling is enabled, skipping %d tests.', len(flaky_tests) - 1) analysis_counts[step]['throttled'] = len( flaky_tests) - 1 break # If we're throttled, stop after the first. else: analysis_counts[step]['error'] += 1 for step, step_counts in analysis_counts.iteritems(): # Collects metrics. step_metadata = step_util.GetStepMetadata(master_name, builder_name, build_number, step) canonical_step_name = step_metadata.get( 'canonical_step_name') or 'Unknown' isolate_target_name = step_metadata.get( 'isolate_target_name') or 'Unknown' for operation, count in step_counts.iteritems(): monitoring.OnFlakeIdentified(canonical_step_name, isolate_target_name, operation, count)
def _CreateTryJobData(self, build_id, try_job_key, has_heuristic_results): try_job_data = WfTryJobData.Create(build_id) try_job_data.created_time = time_util.GetUTCNow() try_job_data.has_compile_targets = False try_job_data.has_heuristic_results = has_heuristic_results try_job_data.try_job_key = try_job_key try_job_data.try_job_type = failure_type.GetDescriptionForFailureType( failure_type.TEST) try_job_data.put()
def _ResetAnalysis(self, master_name, builder_name, build_number): analysis = WfAnalysis.Get(master_name, builder_name, build_number) analysis.pipeline_status_path = self.pipeline_status_path() analysis.status = analysis_status.RUNNING analysis.result_status = None analysis.start_time = time_util.GetUTCNow() analysis.version = appengine_util.GetCurrentVersion() analysis.end_time = None analysis.put()
def run(self, *_args, **_kwargs): """Call predator to do the analysis of the given crash. N.B., due to the structure of AppEngine pipelines, this method must accept the same arguments as are passed to ``__init__``; however, because they were already passed to ``__init__`` there's no use in recieving them here. Thus, we discard all the arguments to this method (except for ``self``, naturally). """ logging.info('Start analysis of crash_pipeline. %s', json.dumps(self._crash_identifiers)) # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis # to guard against race conditions? analysis = self._predator.GetAnalysis(self._crash_identifiers) # Update the model's status to say we're in the process of doing analysis. analysis.pipeline_status_path = self.pipeline_status_path() analysis.status = analysis_status.RUNNING analysis.started_time = time_util.GetUTCNow() analysis.predator_version = appengine_util.GetCurrentVersion() analysis.put() # Actually do the analysis. culprit = self._predator.FindCulprit(analysis) result, tags = culprit.ToDicts() analysis.status = (analysis_status.COMPLETED if tags['success'] else analysis_status.ERROR) analysis.completed_time = time_util.GetUTCNow() # Update model's status to say we're done, and save the results. analysis.result = result for tag_name, tag_value in tags.iteritems(): # TODO(http://crbug.com/602702): make it possible to add arbitrary tags. # TODO(http://crbug.com/659346): we misplaced the coverage test; # find it! if hasattr(analysis, tag_name): # pragma: no cover setattr(analysis, tag_name, tag_value) analysis.put() self._predator.UpdateMetrics(analysis) logging.info('Found %s analysis result for %s: \n%s', self.client_id, repr(self._crash_identifiers), json.dumps(analysis.result, indent=2, sort_keys=True))