Exemplo n.º 1
0
    def testScheduleFlakeTryJob(self, mock_module):
        master_name = 'm'
        builder_name = 'b'
        build_number = 1
        step_name = 's'
        test_name = 't'
        git_hash = 'a1b2c3d4'
        build_id = '1'
        url = 'url'
        analysis_key = ndb.Key('key', 1)
        build = WfBuild.Create(master_name, builder_name, build_number)
        build.data = {
            'properties': {
                'parent_mastername': 'pm',
                'parent_buildername': 'pb'
            }
        }
        build.put()
        response = {
            'build': {
                'id': build_id,
                'url': url,
                'status': 'SCHEDULED',
            }
        }
        results = [(None,
                    buildbucket_client.BuildbucketBuild(response['build']))]
        mock_module.TriggerTryJobs.return_value = results

        FlakeTryJob.Create(master_name, builder_name, step_name, test_name,
                           git_hash).put()

        try_job_pipeline = ScheduleFlakeTryJobPipeline()
        try_job_id = try_job_pipeline.run(master_name, builder_name, step_name,
                                          test_name, git_hash,
                                          analysis_key.urlsafe(), None, None)

        try_job = FlakeTryJob.Get(master_name, builder_name, step_name,
                                  test_name, git_hash)
        try_job_data = FlakeTryJobData.Get(build_id)

        self.assertEqual(build_id, try_job_id)
        self.assertEqual(build_id, try_job.flake_results[-1]['try_job_id'])
        self.assertTrue(build_id in try_job.try_job_ids)
        self.assertEqual(try_job_data.try_job_key, try_job.key)
        self.assertEqual(analysis_key, try_job_data.analysis_key)
Exemplo n.º 2
0
    def testCreateTryJobData(self):
        master_name = 'm'
        builder_name = 'b'
        step_name = 's'
        test_name = 't'
        git_hash = 'a1b2c3d4'
        build_id = 'build_id'
        analysis_key = ndb.Key('key', 1)

        try_job = FlakeTryJob.Create(master_name, builder_name, step_name,
                                     test_name, git_hash)
        ScheduleFlakeTryJobPipeline()._CreateTryJobData(
            build_id, try_job.key, analysis_key.urlsafe())

        try_job_data = FlakeTryJobData.Get(build_id)

        self.assertEqual(try_job_data.try_job_key, try_job.key)
Exemplo n.º 3
0
def _GetLastAttemptedTryJobDetails(analysis):
    last_attempted_revision = analysis.last_attempted_revision
    if not last_attempted_revision:
        return {}

    try_job = FlakeTryJob.Get(analysis.master_name, analysis.builder_name,
                              analysis.step_name, analysis.test_name,
                              last_attempted_revision)

    if not try_job or not try_job.try_job_ids:
        return {}

    try_job_id = try_job.try_job_ids[-1]
    try_job_data = FlakeTryJobData.Get(try_job_id)
    if not try_job_data:
        return {}

    return {
        'status': analysis_status.STATUS_TO_DESCRIPTION.get(try_job.status),
        'url': try_job_data.try_job_url
    }
Exemplo n.º 4
0
    def _LogUnexpectedAbort(self):
        if not self.was_aborted:
            return

        flake_analysis = ndb.Key(urlsafe=self.urlsafe_flake_analysis_key).get()

        assert flake_analysis

        flake_analysis.try_job_status = analysis_status.ERROR
        flake_analysis.error = flake_analysis.error or {
            'error': 'RecursiveFlakeTryJobPipeline was aborted unexpectedly',
            'message': 'RecursiveFlakeTryJobPipeline was aborted unexpectedly'
        }
        flake_analysis.end_time = time_util.GetUTCNow()
        flake_analysis.put()

        try_job = FlakeTryJob.Get(flake_analysis.master_name,
                                  flake_analysis.builder_name,
                                  flake_analysis.step_name,
                                  flake_analysis.test_name, self.revision)

        if try_job and not try_job.completed:
            try_job.status = analysis_status.ERROR
            try_job.put()

        if not try_job or not try_job.try_job_ids:
            return

        try_job_data = FlakeTryJobData.Get(try_job.try_job_ids[-1])
        if try_job_data:  # pragma: no branch
            try_job_data.error = try_job_data.error or {
                'error':
                'RecursiveFlakeTryJobPipeline was aborted unexpectedly',
                'message':
                'RecursiveFlakeTryJobPipeline was aborted unexpectedly'
            }
            try_job_data.put()
    def testGetTryJobsForFlakeSuccess(self, mock_buildbucket, mock_report):
        master_name = 'm'
        builder_name = 'b'
        step_name = 's'
        test_name = 't'
        git_hash = 'a1b2c3d4'
        try_job_id = '1'

        try_job = FlakeTryJob.Create(master_name, builder_name, step_name,
                                     test_name, git_hash)
        try_job.flake_results = [{
            'report': None,
            'url': 'https://build.chromium.org/p/m/builders/b/builds/1234',
            'try_job_id': '1',
        }]
        try_job.status = analysis_status.RUNNING
        try_job.put()

        try_job_data = FlakeTryJobData.Create(try_job_id)
        try_job_data.try_job_key = try_job.key
        try_job_data.try_job_url = (
            'https://build.chromium.org/p/m/builders/b/builds/1234')
        try_job_data.put()

        build_response = {
            'id': '1',
            'url': 'https://build.chromium.org/p/m/builders/b/builds/1234',
            'status': 'COMPLETED',
        }
        report = {
            'result': {
                'r0': {
                    'gl_tests': {
                        'status': 'passed',
                        'valid': True,
                        'pass_fail_counts': {
                            'Test.One': {
                                'pass_count': 100,
                                'fail_count': 0
                            }
                        }
                    }
                }
            }
        }
        mock_buildbucket.GetTryJobs.return_value = [
            (None, buildbucket_client.BuildbucketBuild(build_response))
        ]
        mock_report.return_value = json.dumps(report)

        pipeline = MonitorTryJobPipeline()
        pipeline.start_test()
        pipeline.run(try_job.key.urlsafe(), failure_type.FLAKY_TEST,
                     try_job_id)
        pipeline.callback(callback_params=pipeline.last_params)

        # Reload from ID to get all internal properties in sync.
        pipeline = MonitorTryJobPipeline.from_id(pipeline.pipeline_id)
        pipeline.finalized()
        flake_result = pipeline.outputs.default.value

        expected_flake_result = {
            'report': {
                'result': {
                    'r0': {
                        'gl_tests': {
                            'status': 'passed',
                            'valid': True,
                            'pass_fail_counts': {
                                'Test.One': {
                                    'pass_count': 100,
                                    'fail_count': 0
                                }
                            }
                        }
                    }
                }
            },
            'url': 'https://build.chromium.org/p/m/builders/b/builds/1234',
            'try_job_id': '1',
        }

        self.assertEqual(expected_flake_result, flake_result)

        try_job = FlakeTryJob.Get(master_name, builder_name, step_name,
                                  test_name, git_hash)
        self.assertEqual(expected_flake_result, try_job.flake_results[-1])
        self.assertEqual(analysis_status.RUNNING, try_job.status)

        try_job_data = FlakeTryJobData.Get(try_job_id)
        self.assertEqual(try_job_data.last_buildbucket_response,
                         build_response)
Exemplo n.º 6
0
    def run(self, urlsafe_flake_analysis_key, urlsafe_try_job_key,
            lower_boundary_commit_position, cache_name, dimensions):
        """Determines the next commit position to run a try job on.

    Args:
      urlsafe_flake_analysis_key (str): The url-safe key to the corresponding
          flake analysis that triggered this pipeline.
      urlsafe_try_job_key (str): The url-safe key to the try job that was just
          run.
      lower_boundary_commit_position (int):  The lower bound of commit position
          that can run a try job.
    """
        flake_analysis = ndb.Key(urlsafe=urlsafe_flake_analysis_key).get()
        try_job = ndb.Key(urlsafe=urlsafe_try_job_key).get()
        assert flake_analysis
        assert try_job
        assert try_job.try_job_ids

        try_job_id = try_job.try_job_ids[-1]
        try_job_data = FlakeTryJobData.Get(try_job_id)

        # Don't call another pipeline if the previous try job failed.
        if try_job_data.error:
            UpdateAnalysisUponCompletion(flake_analysis, None,
                                         analysis_status.ERROR,
                                         try_job_data.error)
            yield UpdateFlakeBugPipeline(flake_analysis.key.urlsafe())
            return

        suspected_build_data_point = flake_analysis.GetDataPointOfSuspectedBuild(
        )

        # Because there are hard lower and upper bounds, only the data points
        # involved in try jobs should be considered when determining the next
        # commit position to test.
        try_job_data_points = _GetNormalizedTryJobDataPoints(flake_analysis)
        algorithm_settings = flake_analysis.algorithm_parameters.get(
            'try_job_rerun', {})

        # Figure out what commit position to trigger the next try job on, if any.
        next_commit_position, suspected_commit_position, _ = (
            lookback_algorithm.GetNextRunPointNumber(
                try_job_data_points, algorithm_settings,
                lower_boundary_commit_position))

        if suspected_commit_position is not None:  # Finished.
            confidence_score = confidence.SteppinessForCommitPosition(
                flake_analysis.data_points, suspected_commit_position)
            culprit_revision = suspected_build_data_point.GetRevisionAtCommitPosition(
                suspected_commit_position)
            culprit = CreateCulprit(culprit_revision,
                                    suspected_commit_position,
                                    confidence_score)
            UpdateAnalysisUponCompletion(flake_analysis, culprit,
                                         analysis_status.COMPLETED, None)

            yield UpdateFlakeBugPipeline(flake_analysis.key.urlsafe())
            return

        next_revision = suspected_build_data_point.GetRevisionAtCommitPosition(
            next_commit_position)

        pipeline_job = RecursiveFlakeTryJobPipeline(
            urlsafe_flake_analysis_key, next_commit_position, next_revision,
            lower_boundary_commit_position, cache_name, dimensions)
        # Disable attribute 'target' defined outside __init__ pylint warning,
        # because pipeline generates its own __init__ based on run function.
        pipeline_job.target = (  # pylint: disable=W0201
            appengine_util.GetTargetNameForModule(constants.WATERFALL_BACKEND))
        pipeline_job.start()
  def _callback(self, callback_params, pipeline_id=None):
    """Updates the TryJobData entities with status from buildbucket."""
    # callback_params may have been serialized if the callback was converted to
    # a URL.
    if isinstance(callback_params, basestring):
      callback_params = json.loads(callback_params)

    self.last_params = callback_params

    _ = pipeline_id  # We do nothing with this id.

    try_job_id = callback_params['try_job_id']
    assert try_job_id

    urlsafe_try_job_key = callback_params['urlsafe_try_job_key']
    try_job_type = callback_params['try_job_type']
    deadline = callback_params['deadline']
    already_set_started = callback_params['already_set_started']
    error_count = callback_params['error_count']
    max_error_times = callback_params['max_error_times']
    default_pipeline_wait_seconds = callback_params[
        'default_pipeline_wait_seconds']
    timeout_hours = callback_params['timeout_hours']
    backoff_time = callback_params['backoff_time']

    if try_job_type == failure_type.FLAKY_TEST:
      try_job_data = FlakeTryJobData.Get(try_job_id)
    else:
      try_job_data = WfTryJobData.Get(try_job_id)

    error, build = buildbucket_client.GetTryJobs([try_job_id])[0]

    if error:
      if error_count < max_error_times:
        error_count += 1
        self.delay_callback(
            backoff_time,
            callback_params={
                'try_job_id': try_job_id,
                'try_job_type': try_job_type,
                'urlsafe_try_job_key': urlsafe_try_job_key,
                'deadline': deadline,
                'already_set_started': already_set_started,
                'error_count': error_count,
                'max_error_times': max_error_times,
                'default_pipeline_wait_seconds': default_pipeline_wait_seconds,
                'timeout_hours': timeout_hours,
                'backoff_time': backoff_time * 2,
            }
        )
        return
      else:  # pragma: no cover
        # Buildbucket has responded error more than 5 times, retry pipeline.
        _UpdateTryJobMetadata(
            try_job_data, try_job_type, build, error, False)
        raise pipeline.Retry(
            'Error "%s" occurred. Reason: "%s"' % (error.message,
                                                   error.reason))
    elif build.status == BuildbucketBuild.COMPLETED:
      swarming_task_id = buildbot.GetSwarmingTaskIdFromUrl(
          build.url)

      if swarming_task_id:
        try:
          report = json.loads(swarming_util.GetStepLog(
              try_job_id, 'report', HttpClientAppengine(), 'report'))
        except (ValueError, TypeError) as e:  # pragma: no cover
          report = {}
          logging.exception(
              'Failed to load result report for swarming/%s '
              'due to exception %s.' % (swarming_task_id, e.message))
      else:
        try_job_master_name, try_job_builder_name, try_job_build_number = (
            buildbot.ParseBuildUrl(build.url))

        try:
          report = json.loads(buildbot.GetStepLog(
              try_job_master_name, try_job_builder_name, try_job_build_number,
              'report', HttpClientAppengine(), 'report'))
        except (ValueError, TypeError) as e:  # pragma: no cover
          report = {}
          logging.exception(
              'Failed to load result report for %s/%s/%s due to exception %s.'
              % (try_job_master_name, try_job_builder_name,
                 try_job_build_number, e.message))

      _UpdateTryJobMetadata(
          try_job_data, try_job_type, build, error, False,
          report if report else {})
      result_to_update = self._UpdateTryJobResult(
          urlsafe_try_job_key, try_job_type, try_job_id,
          build.url, BuildbucketBuild.COMPLETED, report)
      self.complete(result_to_update[-1])
      return
    else:
      error_count = 0
      backoff_time = default_pipeline_wait_seconds
      if build.status == BuildbucketBuild.STARTED and not (
          already_set_started):
        # It is possible this branch is skipped if a fast build goes from
        # 'SCHEDULED' to 'COMPLETED' between queries, so start_time may be
        # unavailable.
        start_time = time_util.MicrosecondsToDatetime(build.updated_time)
        self._UpdateTryJobResult(
            urlsafe_try_job_key, try_job_type, try_job_id,
            build.url, BuildbucketBuild.STARTED)

        already_set_started = True

        # Update as much try job metadata as soon as possible to avoid data
        # loss in case of errors.
        try_job_data.start_time = start_time
        try_job_data.request_time = (
            time_util.MicrosecondsToDatetime(build.request_time))
        try_job_data.try_job_url = build.url
        try_job_data.callback_url = self.get_callback_url(
            callback_params=json.dumps({
                'try_job_id': try_job_id,
                'try_job_type': try_job_type,
                'urlsafe_try_job_key': urlsafe_try_job_key,
                'deadline': deadline,
                'already_set_started': already_set_started,
                'error_count': error_count,
                'max_error_times': max_error_times,
                'default_pipeline_wait_seconds': default_pipeline_wait_seconds,
                'timeout_hours': timeout_hours,
                'backoff_time': backoff_time,
            })
        )
        try_job_data.put()

    if time.time() > deadline:  # pragma: no cover
      _UpdateTryJobMetadata(
          try_job_data, try_job_type, build, error, True)
      # Explicitly abort the whole pipeline.
      raise pipeline.Abort(
          'Try job %s timed out after %d hours.' % (
              try_job_id, timeout_hours))

    # Ensure last_buildbucket_response is always the most recent
    # whenever available during intermediate queries.
    _UpdateLastBuildbucketResponse(try_job_data, build)