def get(self, viewLength): """Handles a new error report via POST.""" if viewLength != 'day': # TODO(robbyw): For viewLength == week or viewLength == month, aggregate the aggregates. viewLength = 'day' data = AggregatedStats.all().order('-date').get() data = json.loads(data.json)[:25] for _, row in data: logging.info(row) row['servers'] = sorted(row['servers'].items(), key=lambda x: x[1], reverse=True) row['environments'] = sorted(row['environments'].items(), key=lambda x: x[1], reverse=True) keys, values = zip(*data) errors = LoggedError.get([db.Key(key) for key in keys]) context = { 'title': 'Top 25 exceptions over the last %s' % viewLength, 'errors': zip(errors, values), 'total': len(data) } self.response.out.write( template.render(getTemplatePath('aggregation.html'), context))
def doAuthenticatedGet(self, _, *args): key, = args self.response.headers['Content-Type'] = 'text/plain' error = LoggedError.get(key) error.active = False error.put() self.response.out.write('ok')
def doAuthenticatedGet(self, _): if users.is_current_user_admin(): for error in LoggedError.all(): error.delete() for instance in LoggedErrorInstance.all(): instance.delete() self.response.out.write('Done') else: self.redirect(users.create_login_url(self.request.uri))
def getAggregatedError(project, errorHash): """Gets (and updates) the error matching the given report, or None if no matching error is found.""" error = None project = getProject(project) q = LoggedError.all().filter("project =", project).filter("hash =", errorHash).filter("active =", True) for possibility in q: return possibility return error
def getAggregatedError(project, errorHash): """Gets (and updates) the error matching the given report, or None if no matching error is found.""" error = None project = getProject(project) q = LoggedError.all().filter('project =', project).filter('hash =', errorHash).filter('active =', True) for possibility in q: return possibility return error
def doAuthenticatedGet(self, user, *args): key, = args self.response.headers['Content-Type'] = 'text/html' error = LoggedError.get(key) filters = getFilters(self.request) context = { 'title': '%s - %s' % (error.lastMessage, NAME), 'extraScripts': ['view'], 'user': user, 'error': error, 'filters': filters.items(), 'instances': getInstances(filters, parent=error)[:100] } self.response.out.write(template.render(getTemplatePath('view.html'), context))
def doAuthenticatedGet(self, user, *args): key, = args self.response.headers['Content-Type'] = 'text/html' error = LoggedError.get(key) filters = getFilters(self.request) context = { 'title': '%s - %s' % (error.lastMessage, NAME), 'extraScripts': ['view'], 'user': user, 'error': error, 'filters': filters.items(), 'instances': getInstances(filters, parent=error)[:100] } self.response.out.write( template.render(getTemplatePath('view.html'), context))
def getErrors(filters, limit, offset): """Gets a list of errors, filtered by the given filters.""" for key in filters: if key in INSTANCE_FILTERS: return None, getInstances(filters, limit=limit, offset=offset) errors = LoggedError.all().filter('active =', True) for key, value in filters.items(): if key == 'maxAgeHours': errors = errors.filter('firstOccurrence >', datetime.now() - timedelta(hours = int(value))) elif key == 'project': errors = errors.filter('project =', getProject(value)) else: errors = errors.filter(key, value) if 'maxAgeHours' in filters: errors = errors.order('-firstOccurrence') else: errors = errors.order('-lastOccurrence') return errors.fetch(limit, offset), None
def getErrors(filters, limit, offset): """Gets a list of errors, filtered by the given filters.""" for key in filters: if key in INSTANCE_FILTERS: return None, getInstances(filters, limit=limit, offset=offset) errors = LoggedError.all().filter('active =', True) for key, value in filters.items(): if key == 'maxAgeHours': errors = errors.filter( 'firstOccurrence >', datetime.now() - timedelta(hours=int(value))) elif key == 'project': errors = errors.filter('project =', getProject(value)) else: errors = errors.filter(key, value) if 'maxAgeHours' in filters: errors = errors.order('-firstOccurrence') else: errors = errors.order('-lastOccurrence') return errors.fetch(limit, offset), None
def get(self, viewLength): """Handles a new error report via POST.""" if viewLength != 'day': # TODO(robbyw): For viewLength == week or viewLength == month, aggregate the aggregates. viewLength = 'day' data = AggregatedStats.all().order('-date').get() data = json.loads(data.json)[:25] for _, row in data: logging.info(row) row['servers'] = sorted(row['servers'].items(), key = lambda x: x[1], reverse=True) row['environments'] = sorted(row['environments'].items(), key = lambda x: x[1], reverse=True) keys, values = zip(*data) errors = LoggedError.get([db.Key(key) for key in keys]) context = { 'title': 'Top 25 exceptions over the last %s' % viewLength, 'errors': zip(errors, values), 'total': len(data) } self.response.out.write(template.render(getTemplatePath('aggregation.html'), context))
def main(): """Runs the aggregation.""" toEmail = config.get('toEmail') fromEmail = config.get('fromEmail') if toEmail and fromEmail: logging.info('running the email cron') errorQuery = (LoggedError.all().filter('active =', True) .filter('firstOccurrence >', datetime.now() - timedelta(hours = 24)) .order('-firstOccurrence')) errors = errorQuery.fetch(500, 0) errors.sort(key = lambda x: x.count, reverse=True) projects = collections.defaultdict(list) for error in errors: projects[error.project.key().name()].append(error) context = {'projects': sorted(projects.items()), 'errorCount': len(errors), 'baseUrl': config.get('baseUrl')} body = template.render(getTemplatePath('dailymail.html'), context).strip() mail.send_mail( sender=fromEmail, to=toEmail, subject='Latest GEC reports', body='Only available in HTML', html=body)
def post(self): # pylint: disable=R0914, R0915 """Handles a new error report via POST.""" taskId = self.request.get("id", "0") currentId = memcache.get(AGGREGATION_ID) if taskId == "None" or not (taskId == currentId or int(taskId) % 50 == 0): # Skip this task unless it is the most recently added or if it is one of every fifty tasks. logging.debug("Skipping task %s, current is %s", taskId, currentId) return q = taskqueue.Queue("aggregation") tasks = _getTasks(q) logging.info("Leased %d tasks", len(tasks)) byError = collections.defaultdict(list) instanceKeys = [] tasksByError = collections.defaultdict(list) for task in tasks: data = json.loads(task.payload) errorKey = data["error"] if "instance" in data and "backtrace" in data: instanceKey = data["instance"] byError[errorKey].append((instanceKey, data["backtrace"])) instanceKeys.append(instanceKey) tasksByError[errorKey].append(task) elif "aggregation" in data: byError[errorKey].append(data["aggregation"]) tasksByError[errorKey].append(task) else: # Clean up any old tasks in the queue. logging.warn("Deleting an old task") q.delete_tasks([task]) retries = 0 instanceByKey = getInstanceMap(instanceKeys) for errorKey, instances in byError.items(): instances = [ keyOrDict if isinstance(keyOrDict, dict) else aggregateSingleInstance(instanceByKey[keyOrDict[0]], keyOrDict[1]) for keyOrDict in instances ] aggregation = aggregateInstances(instances) success = False if _lockError(errorKey): try: error = LoggedError.get(errorKey) aggregate( error, aggregation.count, aggregation.firstOccurrence, aggregation.lastOccurrence, aggregation.lastMessage, aggregation.backtrace, aggregation.environments, aggregation.servers, ) error.put() logging.info("Successfully aggregated %r items for key %s", aggregation.count, errorKey) success = True except: # pylint: disable=W0702 logging.exception("Error writing to data store for key %s.", errorKey) finally: _unlockError(errorKey) else: logging.info("Could not lock %s", errorKey) if not success: # Add a retry task. logging.info("Retrying aggregation for %d items for key %s", len(instances), errorKey) aggregation.firstOccurrence = str(aggregation.firstOccurrence) aggregation.lastOccurrence = str(aggregation.lastOccurrence) aggregation.environments = list(aggregation.environments) aggregation.servers = list(aggregation.servers) taskqueue.Queue("aggregation").add( [taskqueue.Task(payload=json.dumps({"error": errorKey, "aggregation": aggregation}), method="PULL")] ) retries += 1 q.delete_tasks(tasksByError[errorKey]) if retries: logging.warn("Retrying %d tasks", retries) for _ in range(retries): queueAggregationWorker()
def _putInstance(exception): """Put an exception in the data store.""" backtraceText = exception.get("backtrace") or "" environment = exception.get("environment", "Unknown") message = exception["message"] or "" project = exception["project"] server = exception["serverName"] timestamp = datetime.fromtimestamp(exception["timestamp"]) exceptionType = exception.get("type") or "" logMessage = exception.get("logMessage") context = exception.get("context") errorLevel = exception.get("errorLevel") errorHash = generateHash(exceptionType, backtraceText) error = getAggregatedError(project, errorHash) exceptionType = exceptionType.replace("\n", " ") if len(exceptionType) > 500: exceptionType = exceptionType[:500] exceptionType = exceptionType.replace("\n", " ") needsAggregation = True if not error: error = LoggedError( project=getProject(project), backtrace=backtraceText, type=exceptionType, hash=errorHash, active=True, errorLevel=errorLevel, count=1, firstOccurrence=timestamp, lastOccurrence=timestamp, lastMessage=message[:300], environments=[str(environment)], servers=[server], ) error.put() needsAggregation = False instance = LoggedErrorInstance( project=error.project, error=error, environment=environment, type=exceptionType, errorLevel=errorLevel, date=timestamp, message=message, server=server, logMessage=logMessage, ) if context: instance.context = json.dumps(context) if "userId" in context: try: instance.affectedUser = int(context["userId"]) except (TypeError, ValueError): pass instance.put() if needsAggregation: queueAggregation(error, instance, backtraceText)
def _putInstance(exception): """Put an exception in the data store.""" backtraceText = exception.get('backtrace') or '' environment = exception.get('environment', 'Unknown') message = exception['message'] or '' project = exception['project'] server = exception['serverName'] timestamp = datetime.fromtimestamp(exception['timestamp']) exceptionType = exception.get('type') or '' logMessage = exception.get('logMessage') context = exception.get('context') errorLevel = exception.get('errorLevel') errorHash = generateHash(exceptionType, backtraceText) error = getAggregatedError(project, errorHash) exceptionType = exceptionType.replace('\n', ' ') if len(exceptionType) > 500: exceptionType = exceptionType[:500] exceptionType = exceptionType.replace('\n', ' ') needsAggregation = True if not error: error = LoggedError( project = getProject(project), backtrace = backtraceText, type = exceptionType, hash = errorHash, active = True, errorLevel = errorLevel, count = 1, firstOccurrence = timestamp, lastOccurrence = timestamp, lastMessage = message[:300], environments = [str(environment)], servers = [server]) error.put() needsAggregation = False instance = LoggedErrorInstance( project = error.project, error = error, environment = environment, type = exceptionType, errorLevel = errorLevel, date = timestamp, message = message, server = server, logMessage = logMessage) if context: instance.context = json.dumps(context) if 'userId' in context: try: instance.affectedUser = int(context['userId']) except (TypeError, ValueError): pass instance.put() if needsAggregation: queueAggregation(error, instance, backtraceText)
def post(self): # pylint: disable=R0914, R0915 """Handles a new error report via POST.""" taskId = self.request.get('id', '0') currentId = memcache.get(AGGREGATION_ID) if taskId == 'None' or not (taskId == currentId or int(taskId) % 50 == 0): # Skip this task unless it is the most recently added or if it is one of every fifty tasks. logging.debug('Skipping task %s, current is %s', taskId, currentId) return q = taskqueue.Queue('aggregation') tasks = _getTasks(q) logging.info('Leased %d tasks', len(tasks)) byError = collections.defaultdict(list) instanceKeys = [] tasksByError = collections.defaultdict(list) for task in tasks: data = json.loads(task.payload) errorKey = data['error'] if 'instance' in data and 'backtrace' in data: instanceKey = data['instance'] byError[errorKey].append((instanceKey, data['backtrace'])) instanceKeys.append(instanceKey) tasksByError[errorKey].append(task) elif 'aggregation' in data: byError[errorKey].append(data['aggregation']) tasksByError[errorKey].append(task) else: # Clean up any old tasks in the queue. logging.warn('Deleting an old task') q.delete_tasks([task]) retries = 0 instanceByKey = getInstanceMap(instanceKeys) for errorKey, instances in byError.items(): instances = [keyOrDict if isinstance(keyOrDict, dict) else aggregateSingleInstance(instanceByKey[keyOrDict[0]], keyOrDict[1]) for keyOrDict in instances] aggregation = aggregateInstances(instances) success = False if _lockError(errorKey): try: error = LoggedError.get(errorKey) aggregate( error, aggregation.count, aggregation.firstOccurrence, aggregation.lastOccurrence, aggregation.lastMessage, aggregation.backtrace, aggregation.environments, aggregation.servers) error.put() logging.info('Successfully aggregated %r items for key %s', aggregation.count, errorKey) success = True except: # pylint: disable=W0702 logging.exception('Error writing to data store for key %s.', errorKey) finally: _unlockError(errorKey) else: logging.info('Could not lock %s', errorKey) if not success: # Add a retry task. logging.info('Retrying aggregation for %d items for key %s', len(instances), errorKey) aggregation.firstOccurrence = str(aggregation.firstOccurrence) aggregation.lastOccurrence = str(aggregation.lastOccurrence) aggregation.environments = list(aggregation.environments) aggregation.servers = list(aggregation.servers) taskqueue.Queue('aggregation').add([ taskqueue.Task(payload = json.dumps({'error': errorKey, 'aggregation': aggregation}), method='PULL') ]) retries += 1 q.delete_tasks(tasksByError[errorKey]) if retries: logging.warn("Retrying %d tasks", retries) for _ in range(retries): queueAggregationWorker()