Пример #1
0
    def get(self, viewLength):
        """Handles a new error report via POST."""
        if viewLength != 'day':
            # TODO(robbyw): For viewLength == week or viewLength == month, aggregate the aggregates.
            viewLength = 'day'

        data = AggregatedStats.all().order('-date').get()
        data = json.loads(data.json)[:25]

        for _, row in data:
            logging.info(row)
            row['servers'] = sorted(row['servers'].items(),
                                    key=lambda x: x[1],
                                    reverse=True)
            row['environments'] = sorted(row['environments'].items(),
                                         key=lambda x: x[1],
                                         reverse=True)

        keys, values = zip(*data)
        errors = LoggedError.get([db.Key(key) for key in keys])

        context = {
            'title': 'Top 25 exceptions over the last %s' % viewLength,
            'errors': zip(errors, values),
            'total': len(data)
        }
        self.response.out.write(
            template.render(getTemplatePath('aggregation.html'), context))
Пример #2
0
  def doAuthenticatedGet(self, _, *args):
    key, = args
    self.response.headers['Content-Type'] = 'text/plain'
    error = LoggedError.get(key)
    error.active = False
    error.put()

    self.response.out.write('ok')
Пример #3
0
    def doAuthenticatedGet(self, _, *args):
        key, = args
        self.response.headers['Content-Type'] = 'text/plain'
        error = LoggedError.get(key)
        error.active = False
        error.put()

        self.response.out.write('ok')
Пример #4
0
 def doAuthenticatedGet(self, _):
     if users.is_current_user_admin():
         for error in LoggedError.all():
             error.delete()
         for instance in LoggedErrorInstance.all():
             instance.delete()
         self.response.out.write('Done')
     else:
         self.redirect(users.create_login_url(self.request.uri))
Пример #5
0
 def doAuthenticatedGet(self, _):
   if users.is_current_user_admin():
     for error in LoggedError.all():
       error.delete()
     for instance in LoggedErrorInstance.all():
       instance.delete()
     self.response.out.write('Done')
   else:
     self.redirect(users.create_login_url(self.request.uri))
Пример #6
0
def getAggregatedError(project, errorHash):
    """Gets (and updates) the error matching the given report, or None if no matching error is found."""
    error = None

    project = getProject(project)

    q = LoggedError.all().filter("project =", project).filter("hash =", errorHash).filter("active =", True)

    for possibility in q:
        return possibility

    return error
Пример #7
0
def getAggregatedError(project, errorHash):
  """Gets (and updates) the error matching the given report, or None if no matching error is found."""
  error = None

  project = getProject(project)

  q = LoggedError.all().filter('project =', project).filter('hash =', errorHash).filter('active =', True)

  for possibility in q:
    return possibility

  return error
Пример #8
0
 def doAuthenticatedGet(self, user, *args):
   key, = args
   self.response.headers['Content-Type'] = 'text/html'
   error = LoggedError.get(key)
   filters = getFilters(self.request)
   context = {
     'title': '%s - %s' % (error.lastMessage, NAME),
     'extraScripts': ['view'],
     'user': user,
     'error': error,
     'filters': filters.items(),
     'instances': getInstances(filters, parent=error)[:100]
   }
   self.response.out.write(template.render(getTemplatePath('view.html'), context))
Пример #9
0
 def doAuthenticatedGet(self, user, *args):
     key, = args
     self.response.headers['Content-Type'] = 'text/html'
     error = LoggedError.get(key)
     filters = getFilters(self.request)
     context = {
         'title': '%s - %s' % (error.lastMessage, NAME),
         'extraScripts': ['view'],
         'user': user,
         'error': error,
         'filters': filters.items(),
         'instances': getInstances(filters, parent=error)[:100]
     }
     self.response.out.write(
         template.render(getTemplatePath('view.html'), context))
Пример #10
0
def getErrors(filters, limit, offset):
  """Gets a list of errors, filtered by the given filters."""
  for key in filters:
    if key in INSTANCE_FILTERS:
      return None, getInstances(filters, limit=limit, offset=offset)

  errors = LoggedError.all().filter('active =', True)
  for key, value in filters.items():
    if key == 'maxAgeHours':
      errors = errors.filter('firstOccurrence >', datetime.now() - timedelta(hours = int(value)))
    elif key == 'project':
      errors = errors.filter('project =', getProject(value))
    else:
      errors = errors.filter(key, value)
  if 'maxAgeHours' in filters:
    errors = errors.order('-firstOccurrence')
  else:
    errors = errors.order('-lastOccurrence')

  return errors.fetch(limit, offset), None
Пример #11
0
def getErrors(filters, limit, offset):
    """Gets a list of errors, filtered by the given filters."""
    for key in filters:
        if key in INSTANCE_FILTERS:
            return None, getInstances(filters, limit=limit, offset=offset)

    errors = LoggedError.all().filter('active =', True)
    for key, value in filters.items():
        if key == 'maxAgeHours':
            errors = errors.filter(
                'firstOccurrence >',
                datetime.now() - timedelta(hours=int(value)))
        elif key == 'project':
            errors = errors.filter('project =', getProject(value))
        else:
            errors = errors.filter(key, value)
    if 'maxAgeHours' in filters:
        errors = errors.order('-firstOccurrence')
    else:
        errors = errors.order('-lastOccurrence')

    return errors.fetch(limit, offset), None
Пример #12
0
  def get(self, viewLength):
    """Handles a new error report via POST."""
    if viewLength != 'day':
      # TODO(robbyw): For viewLength == week or viewLength == month, aggregate the aggregates.
      viewLength = 'day'

    data = AggregatedStats.all().order('-date').get()
    data = json.loads(data.json)[:25]

    for _, row in data:
      logging.info(row)
      row['servers'] = sorted(row['servers'].items(), key = lambda x: x[1], reverse=True)
      row['environments'] = sorted(row['environments'].items(), key = lambda x: x[1], reverse=True)

    keys, values = zip(*data)
    errors = LoggedError.get([db.Key(key) for key in keys])

    context = {
      'title': 'Top 25 exceptions over the last %s' % viewLength,
      'errors': zip(errors, values),
      'total': len(data)
    }
    self.response.out.write(template.render(getTemplatePath('aggregation.html'), context))
def main():
  """Runs the aggregation."""
  toEmail = config.get('toEmail')
  fromEmail = config.get('fromEmail')

  if toEmail and fromEmail:
    logging.info('running the email cron')

    errorQuery = (LoggedError.all().filter('active =', True)
        .filter('firstOccurrence >', datetime.now() - timedelta(hours = 24))
        .order('-firstOccurrence'))

    errors = errorQuery.fetch(500, 0)
    errors.sort(key = lambda x: x.count, reverse=True)

    projects = collections.defaultdict(list)
    for error in errors:
      projects[error.project.key().name()].append(error)

    context = {'projects': sorted(projects.items()), 'errorCount': len(errors), 'baseUrl': config.get('baseUrl')}

    body = template.render(getTemplatePath('dailymail.html'), context).strip()
    mail.send_mail(
        sender=fromEmail, to=toEmail, subject='Latest GEC reports', body='Only available in HTML', html=body)
Пример #14
0
    def post(self):  # pylint: disable=R0914, R0915
        """Handles a new error report via POST."""
        taskId = self.request.get("id", "0")
        currentId = memcache.get(AGGREGATION_ID)
        if taskId == "None" or not (taskId == currentId or int(taskId) % 50 == 0):
            # Skip this task unless it is the most recently added or if it is one of every fifty tasks.
            logging.debug("Skipping task %s, current is %s", taskId, currentId)
            return

        q = taskqueue.Queue("aggregation")
        tasks = _getTasks(q)
        logging.info("Leased %d tasks", len(tasks))

        byError = collections.defaultdict(list)
        instanceKeys = []
        tasksByError = collections.defaultdict(list)
        for task in tasks:
            data = json.loads(task.payload)
            errorKey = data["error"]
            if "instance" in data and "backtrace" in data:
                instanceKey = data["instance"]
                byError[errorKey].append((instanceKey, data["backtrace"]))
                instanceKeys.append(instanceKey)
                tasksByError[errorKey].append(task)
            elif "aggregation" in data:
                byError[errorKey].append(data["aggregation"])
                tasksByError[errorKey].append(task)
            else:
                # Clean up any old tasks in the queue.
                logging.warn("Deleting an old task")
                q.delete_tasks([task])

        retries = 0
        instanceByKey = getInstanceMap(instanceKeys)
        for errorKey, instances in byError.items():
            instances = [
                keyOrDict
                if isinstance(keyOrDict, dict)
                else aggregateSingleInstance(instanceByKey[keyOrDict[0]], keyOrDict[1])
                for keyOrDict in instances
            ]
            aggregation = aggregateInstances(instances)

            success = False
            if _lockError(errorKey):
                try:
                    error = LoggedError.get(errorKey)
                    aggregate(
                        error,
                        aggregation.count,
                        aggregation.firstOccurrence,
                        aggregation.lastOccurrence,
                        aggregation.lastMessage,
                        aggregation.backtrace,
                        aggregation.environments,
                        aggregation.servers,
                    )
                    error.put()
                    logging.info("Successfully aggregated %r items for key %s", aggregation.count, errorKey)
                    success = True
                except:  # pylint: disable=W0702
                    logging.exception("Error writing to data store for key %s.", errorKey)
                finally:
                    _unlockError(errorKey)
            else:
                logging.info("Could not lock %s", errorKey)

            if not success:
                # Add a retry task.
                logging.info("Retrying aggregation for %d items for key %s", len(instances), errorKey)
                aggregation.firstOccurrence = str(aggregation.firstOccurrence)
                aggregation.lastOccurrence = str(aggregation.lastOccurrence)
                aggregation.environments = list(aggregation.environments)
                aggregation.servers = list(aggregation.servers)
                taskqueue.Queue("aggregation").add(
                    [taskqueue.Task(payload=json.dumps({"error": errorKey, "aggregation": aggregation}), method="PULL")]
                )
                retries += 1

            q.delete_tasks(tasksByError[errorKey])

        if retries:
            logging.warn("Retrying %d tasks", retries)
            for _ in range(retries):
                queueAggregationWorker()
Пример #15
0
def _putInstance(exception):
    """Put an exception in the data store."""
    backtraceText = exception.get("backtrace") or ""
    environment = exception.get("environment", "Unknown")
    message = exception["message"] or ""
    project = exception["project"]
    server = exception["serverName"]
    timestamp = datetime.fromtimestamp(exception["timestamp"])
    exceptionType = exception.get("type") or ""
    logMessage = exception.get("logMessage")
    context = exception.get("context")
    errorLevel = exception.get("errorLevel")

    errorHash = generateHash(exceptionType, backtraceText)

    error = getAggregatedError(project, errorHash)

    exceptionType = exceptionType.replace("\n", " ")
    if len(exceptionType) > 500:
        exceptionType = exceptionType[:500]
    exceptionType = exceptionType.replace("\n", " ")

    needsAggregation = True
    if not error:
        error = LoggedError(
            project=getProject(project),
            backtrace=backtraceText,
            type=exceptionType,
            hash=errorHash,
            active=True,
            errorLevel=errorLevel,
            count=1,
            firstOccurrence=timestamp,
            lastOccurrence=timestamp,
            lastMessage=message[:300],
            environments=[str(environment)],
            servers=[server],
        )
        error.put()
        needsAggregation = False

    instance = LoggedErrorInstance(
        project=error.project,
        error=error,
        environment=environment,
        type=exceptionType,
        errorLevel=errorLevel,
        date=timestamp,
        message=message,
        server=server,
        logMessage=logMessage,
    )
    if context:
        instance.context = json.dumps(context)
        if "userId" in context:
            try:
                instance.affectedUser = int(context["userId"])
            except (TypeError, ValueError):
                pass
    instance.put()

    if needsAggregation:
        queueAggregation(error, instance, backtraceText)
Пример #16
0
def _putInstance(exception):
  """Put an exception in the data store."""
  backtraceText = exception.get('backtrace') or ''
  environment = exception.get('environment', 'Unknown')
  message = exception['message'] or ''
  project = exception['project']
  server = exception['serverName']
  timestamp = datetime.fromtimestamp(exception['timestamp'])
  exceptionType = exception.get('type') or ''
  logMessage = exception.get('logMessage')
  context = exception.get('context')
  errorLevel = exception.get('errorLevel')

  errorHash = generateHash(exceptionType, backtraceText)

  error = getAggregatedError(project, errorHash)

  exceptionType = exceptionType.replace('\n', ' ')
  if len(exceptionType) > 500:
    exceptionType = exceptionType[:500]
  exceptionType = exceptionType.replace('\n', ' ')

  needsAggregation = True
  if not error:
    error = LoggedError(
        project = getProject(project),
        backtrace = backtraceText,
        type = exceptionType,
        hash = errorHash,
        active = True,
        errorLevel = errorLevel,
        count = 1,
        firstOccurrence = timestamp,
        lastOccurrence = timestamp,
        lastMessage = message[:300],
        environments = [str(environment)],
        servers = [server])
    error.put()
    needsAggregation = False

  instance = LoggedErrorInstance(
      project = error.project,
      error = error,
      environment = environment,
      type = exceptionType,
      errorLevel = errorLevel,
      date = timestamp,
      message = message,
      server = server,
      logMessage = logMessage)
  if context:
    instance.context = json.dumps(context)
    if 'userId' in context:
      try:
        instance.affectedUser = int(context['userId'])
      except (TypeError, ValueError):
        pass
  instance.put()

  if needsAggregation:
    queueAggregation(error, instance, backtraceText)
Пример #17
0
  def post(self): # pylint: disable=R0914, R0915
    """Handles a new error report via POST."""
    taskId = self.request.get('id', '0')
    currentId = memcache.get(AGGREGATION_ID)
    if taskId == 'None' or not (taskId == currentId or int(taskId) % 50 == 0):
      # Skip this task unless it is the most recently added or if it is one of every fifty tasks.
      logging.debug('Skipping task %s, current is %s', taskId, currentId)
      return

    q = taskqueue.Queue('aggregation')
    tasks = _getTasks(q)
    logging.info('Leased %d tasks', len(tasks))

    byError = collections.defaultdict(list)
    instanceKeys = []
    tasksByError = collections.defaultdict(list)
    for task in tasks:
      data = json.loads(task.payload)
      errorKey = data['error']
      if 'instance' in data and 'backtrace' in data:
        instanceKey = data['instance']
        byError[errorKey].append((instanceKey, data['backtrace']))
        instanceKeys.append(instanceKey)
        tasksByError[errorKey].append(task)
      elif 'aggregation' in data:
        byError[errorKey].append(data['aggregation'])
        tasksByError[errorKey].append(task)
      else:
        # Clean up any old tasks in the queue.
        logging.warn('Deleting an old task')
        q.delete_tasks([task])

    retries = 0
    instanceByKey = getInstanceMap(instanceKeys)
    for errorKey, instances in byError.items():
      instances = [keyOrDict
                      if isinstance(keyOrDict, dict)
                      else aggregateSingleInstance(instanceByKey[keyOrDict[0]], keyOrDict[1])
                   for keyOrDict in instances]
      aggregation = aggregateInstances(instances)

      success = False
      if _lockError(errorKey):
        try:
          error = LoggedError.get(errorKey)
          aggregate(
              error, aggregation.count, aggregation.firstOccurrence,
              aggregation.lastOccurrence, aggregation.lastMessage, aggregation.backtrace,
              aggregation.environments, aggregation.servers)
          error.put()
          logging.info('Successfully aggregated %r items for key %s', aggregation.count, errorKey)
          success = True
        except: # pylint: disable=W0702
          logging.exception('Error writing to data store for key %s.', errorKey)
        finally:
          _unlockError(errorKey)
      else:
        logging.info('Could not lock %s', errorKey)

      if not success:
        # Add a retry task.
        logging.info('Retrying aggregation for %d items for key %s', len(instances), errorKey)
        aggregation.firstOccurrence = str(aggregation.firstOccurrence)
        aggregation.lastOccurrence = str(aggregation.lastOccurrence)
        aggregation.environments = list(aggregation.environments)
        aggregation.servers = list(aggregation.servers)
        taskqueue.Queue('aggregation').add([
          taskqueue.Task(payload = json.dumps({'error': errorKey, 'aggregation': aggregation}), method='PULL')
        ])
        retries += 1

      q.delete_tasks(tasksByError[errorKey])

    if retries:
      logging.warn("Retrying %d tasks", retries)
      for _ in range(retries):
        queueAggregationWorker()