示例#1
0
def fetch_cq_status():
  """Fetches data from chromium-cq-status app and saves new data.

  Remembers old cursor and fetches new data.
  """

  fetch_status = FetchStatus.query().get()
  cursor = ''
  begin = ''
  end = ''
  retry_count = 0

  while True:
    if fetch_status:
      if fetch_status.done:
        logging.info('historical fetching done so fetch latest...')
        end = str(time_functions.timestamp.utcnow_ts())

        last_build_run_seen = BuildRun.query().order(
            -BuildRun.time_finished).fetch(1)
        begin = str(time_functions.timestamp.utctimestamp(
            last_build_run_seen[0].time_finished))
        cursor = ''
      else:
        begin = fetch_status.begin
        end = fetch_status.end
        cursor = fetch_status.cursor
    else:
      logging.info('didnt find any historical information. fetching last week')
      begin = str(time_functions.timestamp.utctimestamp(
          datetime.datetime.utcnow() - datetime.timedelta(weeks=1)))
      end = str(time_functions.timestamp.utcnow_ts())

    if begin and end:
      logging.info('fetching from ' +
                   str(datetime.datetime.fromtimestamp(float(begin))) + ' to ' +
                   str(datetime.datetime.fromtimestamp(float(end))) +
                   ' cursor: ' + cursor)
    else:
      logging.info('fetching with no begin/end and cursor: ' + cursor)

    url = "https://chromium-cq-status.appspot.com/query"
    params = []
    params.append('tags=action=verifier_jobs_update')
    if cursor:
      params.append('cursor=' + cursor)
    if begin:
      params.append('begin=' + begin)
    if end:
      params.append('end=' + end)
    # Tried count of 200 or more but would get OOM or deadline errors. Even 50
    # sometimes gives:
    # "Values may not be more than 1000000 bytes in length; received 2118015
    # bytes"
    params.append('count=10')

    url += '?' + '&'.join(params)
    logging.info('fetching url: ' + url)

    try:
      urlfetch.set_default_fetch_deadline(60)
      result = urlfetch.fetch(url).content

      timestamp_str = '"timestamp":"'
      logging_idx = result.find(timestamp_str)
      if logging_idx != -1:
        logging_idx += len(timestamp_str)
        logging_idx2 = result.find('"', logging_idx)
        logging.info(' current fetch has time of ' +
                     result[logging_idx:logging_idx2])

      try:
        json_result = json.loads(result)

        more = json_result['more']
        cursor = json_result['cursor']

        try:
          logging_output = parse_cq_data(json_result)
          if logging_output:
            logging.info('found flakes: ' + ' '.join(logging_output))
        except DeadlineExceededError:
          logging.info('got DeadlineExceededError during parse_cq_data, '
                       'catching to not show up as error')
          return
      except ValueError:
        requests_metric.increment_by(1, fields={'status': 'parse_error'})
        logging.exception('failed to parse CQ data from %s', url)
        if 'DeadlineExceededError' in result:
          logging.error('got deadline exceeded, trying again after 1s')
          time.sleep(1)
          continue
        elif retry_count < 3:
          retry_count += 1
          logging.error('will retry after sleeping ' + str(retry_count))
          time.sleep(retry_count)
          continue
        else:
          logging.error('giving up and will count current fetch as done')
          # Don't want to continue this as could be a bad cursor.
          more = False
      else:
        requests_metric.increment_by(1, fields={'status': 'success'})

      if not fetch_status:
        fetch_status = FetchStatus()
      fetch_status.done = not more
      if fetch_status.done:
        fetch_status.cursor = ''
        fetch_status.begin = ''
        fetch_status.end = ''
        retry_count = 0
        logging.info('finished fetching for current cursor')
      else:
        fetch_status.begin = begin
        fetch_status.end = end
        fetch_status.cursor = cursor
      fetch_status.put()

      if not more:
        return  # finish the cron job and wait for next iteration
    except urllib2.URLError, e:
      requests_metric.increment_by(1, fields={'status': 'fetch_error'})
      logging.warning('Failed to fetch CQ status: %s', e.reason)
示例#2
0
def parse_cq_data(json_data):
  logging_output = []
  for result in json_data.get('results', {}):
    fields = result.get('fields', [])
    if not 'action' in fields:
      continue

    action = fields.get('action')
    if action != 'verifier_jobs_update':
      continue

    if fields.get('verifier') != 'try job':
      continue

    # At the moment, much of the parsing logic assumes this is a Chromium
    # tryjob.
    if fields.get('project') != 'chromium':
      continue

    job_states = fields.get('jobs', [])
    for state in job_states:
      # Just go by |result|.
      #if state not in ['JOB_SUCCEEDED', 'JOB_FAILED', 'JOB_TIMED_OUT']:
      #  continue

      for job in job_states[state]:
        build_properties = job.get('build_properties')
        if not build_properties:
          continue

        try:
          master = job['master']
          builder = job['builder']
          result = job['result']
          timestamp_tz = dateutil.parser.parse(job['timestamp'])
          # We assume timestamps from chromium-cq-status are already in UTC.
          timestamp = timestamp_tz.replace(tzinfo=None)
        except KeyError:
          continue

        try:
          buildnumber = get_int_value(build_properties, 'buildnumber')
          issue = get_int_value(build_properties, 'issue')
          patchset = get_int_value(build_properties, 'patchset')
          attempt_start_ts = get_int_value(build_properties, 'attempt_start_ts')
          time_started = datetime.datetime.utcfromtimestamp(
              attempt_start_ts / 1000000)
        except ValueError:
          continue

        if build_result.isResultPending(result):
          continue

        # At this point, only success or failure.
        success = build_result.isResultSuccess(result)

        patchset_builder_runs = get_patchset_builder_runs(issue=issue,
                                                          patchset=patchset,
                                                          master=master,
                                                          builder=builder)

        build_run = BuildRun(parent=patchset_builder_runs.key,
                             buildnumber=buildnumber,
                             result=result,
                             time_started=time_started,
                             time_finished=timestamp)

        previous_runs = BuildRun.query(
            ancestor=patchset_builder_runs.key).fetch()

        duplicate = False
        for previous_run in previous_runs:
          # We saw this build run already or there are multiple green runs,
          # in which case we ignore subsequent ones to avoid showing failures
          # multiple times.
          if (previous_run.buildnumber == buildnumber) or \
             (build_run.is_success and previous_run.is_success) :
            duplicate = True
            break

        if duplicate:
          continue

        build_run.put()

        for previous_run in previous_runs:
          if previous_run.is_success == build_run.is_success:
            continue
          if success:
            # We saw the flake and then the pass.
            failure_run = previous_run
            success_run = build_run
          else:
            # We saw the pass and then the failure. Could happen when fetching
            # historical data, or for the bot_update step (patch can't be
            # applied cleanly anymore).
            failure_run = build_run
            success_run = previous_run

          logging_output.append(failure_run.key.parent().get().builder +
                                str(failure_run.buildnumber))

          # Queue a task to fetch the error of this failure and create FlakyRun.
          flakes_metric.increment_by(1)
          taskqueue.add(
              queue_name='issue-updates',
              url='/issues/create_flaky_run',
              params={'failure_run_key': failure_run.key.urlsafe(),
                      'success_run_key': success_run.key.urlsafe()})

  return logging_output
示例#3
0
def parse_cq_data(json_data):
    logging_output = []
    for result in json_data.get('results', {}):
        fields = result.get('fields', [])
        if not 'action' in fields:
            logging.warning('Missing field action in status record')
            parsing_errors.increment_by(1)
            continue

        action = fields.get('action')
        if action != 'verifier_jobs_update':
            continue

        if fields.get('verifier') != 'try job':
            continue

        # At the moment, much of the parsing logic assumes this is a Chromium
        # tryjob.
        project = fields.get('project')
        if project != 'chromium/chromium/src':
            logging.info('project not chromium: %s', project)
            continue

        job_states = fields.get('jobs', {})

        for job in itertools.chain.from_iterable(job_states.values()):
            try:
                builder = job['builder']
                result = job['result']
                timestamp_tz = dateutil.parser.parse(
                    job.get('created_ts') or job['timestamp'])
                # We assume timestamps from chromium-cq-status are already in UTC.
                timestamp = timestamp_tz.replace(tzinfo=None)
            except KeyError:
                logging.warning('Failed to parse job details', exc_info=True)
                parsing_errors.increment_by(1)
                continue

            if build_result.isResultPending(result):
                continue

            build_properties = job.get('build_properties')
            if not build_properties:
                logging.warning(
                    'Missing field build_properties in job details')
                parsing_errors.increment_by(1)
                continue

            issue = -1
            patchset = -1
            time_started = 0

            try:
                buildnumber = get_int_value(build_properties, 'buildnumber')
                if 'patch_issue' in build_properties:
                    issue = get_int_value(build_properties, 'patch_issue')
                else:  # pragma: no cover
                    logging.warning('no issue')

                if 'patch_set' in build_properties:
                    patchset = get_int_value(build_properties, 'patch_set')
                else:  # pragma: no cover
                    logging.warning('no patchset')

                if 'attempt_start_ts' in build_properties:
                    attempt_start_ts = get_int_value(build_properties,
                                                     'attempt_start_ts')
                    time_started = datetime.datetime.utcfromtimestamp(
                        attempt_start_ts / 1000000)
                else:  # pragma: no cover
                    logging.warning('no attempt_start_ts')
                    continue

                # For builds through Buildbucket, job['master'] is actually the bucket
                # name. For buildbot-based builds, it just happens to be the same as the
                # master name. For Luci-based builds, it is different from the master
                # name, and the master name is set as a build property instead.
                # https://chromium.googlesource.com/chromium/src/+/infra/config/cr-buildbucket.cfg#115
                # So in either case, the "real" master name is in the build properties.
                master = build_properties['mastername']

            except (ValueError, KeyError):
                logging.warning('Failed to parse build properties',
                                exc_info=True)
                parsing_errors.increment_by(1)
                continue

            # At this point, only success or failure.
            success = build_result.isResultSuccess(result)

            patchset_builder_runs = get_patchset_builder_runs(
                issue=issue, patchset=patchset, master=master, builder=builder)

            build_run = BuildRun(parent=patchset_builder_runs.key,
                                 buildnumber=buildnumber,
                                 result=result,
                                 time_started=time_started,
                                 time_finished=timestamp)

            previous_runs = BuildRun.query(
                ancestor=patchset_builder_runs.key).fetch()

            duplicate = False
            for previous_run in previous_runs:
                # We saw this build run already or there are multiple green runs,
                # in which case we ignore subsequent ones to avoid showing failures
                # multiple times.
                if (previous_run.buildnumber == buildnumber) or \
                   (build_run.is_success and previous_run.is_success) :
                    duplicate = True
                    break

            if duplicate:
                continue

            build_run.put()

            for previous_run in previous_runs:
                if previous_run.is_success == build_run.is_success:
                    continue
                if success:
                    # We saw the flake and then the pass.
                    failure_run = previous_run
                    success_run = build_run
                else:
                    # We saw the pass and then the failure. Could happen when fetching
                    # historical data, or for the bot_update step (patch can't be
                    # applied cleanly anymore).
                    failure_run = build_run
                    success_run = previous_run

                logging_output.append(failure_run.key.parent().get().builder +
                                      str(failure_run.buildnumber))

                # Queue a task to fetch the error of this failure and create FlakyRun.
                flakes_metric.increment_by(1)
                taskqueue.add(queue_name='issue-updates',
                              url='/issues/create_flaky_run',
                              params={
                                  'failure_run_key': failure_run.key.urlsafe(),
                                  'success_run_key': success_run.key.urlsafe()
                              })

    return logging_output
示例#4
0
def fetch_cq_status():
    """Fetches data from chromium-cq-status app and saves new data.

  Remembers old cursor and fetches new data.
  """

    fetch_status = FetchStatus.query().get()
    cursor = ''
    begin = ''
    end = ''
    retry_count = 0

    while True:
        if fetch_status:
            if fetch_status.done:
                logging.info('historical fetching done so fetch latest...')
                end = str(time_functions.timestamp.utcnow_ts())

                last_build_run_seen = BuildRun.query().order(
                    -BuildRun.time_finished).fetch(1)
                begin = str(
                    time_functions.timestamp.utctimestamp(
                        last_build_run_seen[0].time_finished))
                cursor = ''
            else:
                begin = fetch_status.begin
                end = fetch_status.end
                cursor = fetch_status.cursor
        else:
            logging.info(
                'didnt find any historical information. fetching last week')
            begin = str(
                time_functions.timestamp.utctimestamp(
                    datetime.datetime.utcnow() - datetime.timedelta(weeks=1)))
            end = str(time_functions.timestamp.utcnow_ts())

        if begin and end:
            logging.info('fetching from %s to %s cursor: %s',
                         str(datetime.datetime.utcfromtimestamp(float(begin))),
                         str(datetime.datetime.utcfromtimestamp(float(end))),
                         cursor)
        else:
            logging.info('fetching with no begin/end and cursor: ' + cursor)

        url = "https://chromium-cq-status.appspot.com/query"
        params = []
        params.append('tags=action=verifier_jobs_update')
        if cursor:
            params.append('cursor=' + cursor)
        if begin:
            params.append('begin=' + begin)
        if end:
            params.append('end=' + end)
        # Tried count of 200 or more but would get OOM or deadline errors. Even 50
        # sometimes gives:
        # "Values may not be more than 1000000 bytes in length; received 2118015
        # bytes"
        params.append('count=10')

        url += '?' + '&'.join(params)
        logging.info('fetching url: ' + url)

        try:
            urlfetch.set_default_fetch_deadline(60)
            result = urlfetch.fetch(url).content

            timestamp_str = '"timestamp":"'
            logging_idx = result.find(timestamp_str)
            if logging_idx != -1:
                logging_idx += len(timestamp_str)
                logging_idx2 = result.find('"', logging_idx)
                logging.info(' current fetch has time of ' +
                             result[logging_idx:logging_idx2])

            try:
                json_result = json.loads(result)

                more = json_result['more']
                cursor = json_result['cursor']

                try:
                    logging_output = parse_cq_data(json_result)
                    if logging_output:
                        logging.info('found flakes: ' +
                                     ' '.join(logging_output))
                except DeadlineExceededError:
                    logging.info(
                        'got DeadlineExceededError during parse_cq_data, '
                        'catching to not show up as error')
                    return
            except ValueError:
                requests_metric.increment_by(1,
                                             fields={'status': 'parse_error'})
                logging.exception('failed to parse CQ data from %s', url)
                if 'DeadlineExceededError' in result:
                    logging.error(
                        'got deadline exceeded, trying again after 1s')
                    time.sleep(1)
                    continue
                elif retry_count < 3:
                    retry_count += 1
                    logging.error('will retry after sleeping ' +
                                  str(retry_count))
                    time.sleep(retry_count)
                    continue
                else:
                    logging.error(
                        'giving up and will count current fetch as done')
                    # Don't want to continue this as could be a bad cursor.
                    more = False
            else:
                requests_metric.increment_by(1, fields={'status': 'success'})

            if not fetch_status:
                fetch_status = FetchStatus()
            fetch_status.done = not more
            if fetch_status.done:
                fetch_status.cursor = ''
                fetch_status.begin = ''
                fetch_status.end = ''
                retry_count = 0
                logging.info('finished fetching for current cursor')
            else:
                fetch_status.begin = begin
                fetch_status.end = end
                fetch_status.cursor = cursor
            fetch_status.put()

            if not more:
                return  # finish the cron job and wait for next iteration
        except urllib2.URLError, e:
            requests_metric.increment_by(1, fields={'status': 'fetch_error'})
            logging.warning('Failed to fetch CQ status: %s', e.reason)
示例#5
0
def parse_cq_data(json_data):
  logging_output = []
  for result in json_data['results']:
    fields = result['fields']
    if not 'action' in fields:
      continue

    action = fields['action']
    if action != 'verifier_jobs_update':
      continue

    if fields['verifier'] != 'try job':
      continue

    job_states = fields['jobs']
    for state in job_states:
      # Just go by |result|.
      #if state not in ['JOB_SUCCEEDED', 'JOB_FAILED', 'JOB_TIMED_OUT']:
      #  continue

      for job in job_states[state]:
        build_properties = job['build_properties']
        if not build_properties:
          continue

        master = job['master']
        builder = job['builder']
        result = job['result']
        timestamp = datetime.datetime.strptime(job['timestamp'],
                                               '%Y-%m-%d %H:%M:%S.%f')
        try:
          buildnumber = get_int_value(build_properties, 'buildnumber')
          issue = get_int_value(build_properties, 'issue')
          patchset = get_int_value(build_properties, 'patchset')
        except ValueError as e:
          continue

        if build_result.isResultPending(result):
          continue

        # At this point, only success or failure.
        success = build_result.isResultSuccess(result)

        patchset_builder_runs = get_patchset_builder_runs(issue=issue,
                                                          patchset=patchset,
                                                          master=master,
                                                          builder=builder)

        build_run = BuildRun(parent=patchset_builder_runs.key,
                             buildnumber=buildnumber,
                             result=result,
                             time_finished=timestamp)

        previous_runs = BuildRun.query(
            ancestor=patchset_builder_runs.key).fetch()

        duplicate = False
        for previous_run in previous_runs:
          # We saw this build run already or there are multiple green runs,
          # in which case we ignore subsequent ones to avoid showing failures
          # multiple times.
          if (previous_run.buildnumber == buildnumber) or \
             (build_run.is_success and previous_run.is_success) :
            duplicate = True
            break

        if duplicate:
          continue

        build_run.put()

        for previous_run in previous_runs:
          if previous_run.is_success == build_run.is_success:
            continue
          if success:
            # We saw the flake and then the pass.
            flaky_run = FlakyRun(
                failure_run=previous_run.key,
                failure_run_time_finished=previous_run.time_finished,
                success_run=build_run.key)
            flaky_run.put()
            logging_output.append(previous_run.key.parent().get().builder +
                                  str(previous_run.buildnumber))
          else:
            # We saw the pass and then the failure. Could happen when fetching
            # historical data.
            flaky_run = FlakyRun(
                failure_run=build_run.key,
                failure_run_time_finished=build_run.time_finished,
                success_run=previous_run.key)
            flaky_run.put()
            logging_output.append(build_run.key.parent().get().builder +
                                  str(build_run.buildnumber))

          # Queue a task to fetch the error of this failure.
          deferred.defer(get_flaky_run_reason, flaky_run.key)

  return logging_output