def test_should_filter_message(self, mock_is_valid_error_message): TestItem = namedtuple("TestItem", "value formatted result") items = [ TestItem({"type": "UnfilteredException"}, "UnfilteredException", True), TestItem( {"value": "This is an unfiltered exception."}, "This is an unfiltered exception.", True, ), TestItem( {"type": "UnfilteredException", "value": "This is an unfiltered exception."}, "UnfilteredException: This is an unfiltered exception.", True, ), TestItem( {"type": "FilteredException", "value": "This is a filtered exception."}, "FilteredException: This is a filtered exception.", False, ), ] data = {"exception": {"values": [item.value for item in items]}} project_config = get_project_config(self.project) manager = EventManager(data, project=self.project, project_config=project_config) mock_is_valid_error_message.side_effect = [item.result for item in items] assert manager.should_filter() == (True, FilterStatKeys.ERROR_MESSAGE) assert mock_is_valid_error_message.call_args_list == [ mock.call(project_config, item.formatted) for item in items ]
def test_should_filter_message(self, mock_is_valid_error_message): TestItem = namedtuple('TestItem', 'value formatted result') items = [ TestItem( {'type': 'UnfilteredException'}, 'UnfilteredException', True, ), TestItem( {'value': 'This is an unfiltered exception.'}, 'This is an unfiltered exception.', True, ), TestItem( { 'type': 'UnfilteredException', 'value': 'This is an unfiltered exception.' }, 'UnfilteredException: This is an unfiltered exception.', True, ), TestItem( { 'type': 'FilteredException', 'value': 'This is a filtered exception.' }, 'FilteredException: This is a filtered exception.', False, ), ] data = { 'exception': { 'values': [item.value for item in items] }, } relay_config = get_full_relay_config(self.project.id) manager = EventManager(data, project=self.project, relay_config=relay_config) mock_is_valid_error_message.side_effect = [ item.result for item in items ] assert manager.should_filter() == (True, FilterStatKeys.ERROR_MESSAGE) assert mock_is_valid_error_message.call_args_list == [ mock.call(relay_config, item.formatted) for item in items ]
def test_should_filter_message(self, mock_is_valid_error_message): TestItem = namedtuple('TestItem', 'value formatted result') items = [ TestItem( {'type': 'UnfilteredException'}, 'UnfilteredException', True, ), TestItem( {'value': 'This is an unfiltered exception.'}, 'This is an unfiltered exception.', True, ), TestItem( {'type': 'UnfilteredException', 'value': 'This is an unfiltered exception.'}, 'UnfilteredException: This is an unfiltered exception.', True, ), TestItem( {'type': 'FilteredException', 'value': 'This is a filtered exception.'}, 'FilteredException: This is a filtered exception.', False, ), ] data = { 'exception': { 'values': [item.value for item in items] }, } manager = EventManager(data, project=self.project) mock_is_valid_error_message.side_effect = [item.result for item in items] assert manager.should_filter() == (True, FilterStatKeys.ERROR_MESSAGE) assert mock_is_valid_error_message.call_args_list == [ mock.call(self.project, item.formatted) for item in items]
def process(self, request, project, key, auth, helper, data, attachments=None, **kwargs): metrics.incr('events.total') if not data: raise APIError('No JSON data was found') remote_addr = request.META['REMOTE_ADDR'] event_mgr = EventManager( data, project=project, key=key, auth=auth, client_ip=remote_addr, user_agent=helper.context.agent, content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''), ) del data self.pre_normalize(event_mgr, helper) event_mgr.normalize() event_received.send_robust(ip=remote_addr, project=project, sender=type(self)) start_time = time() tsdb_start_time = to_datetime(start_time) should_filter, filter_reason = event_mgr.should_filter() if should_filter: increment_list = [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_blacklisted, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_blacklisted, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_blacklisted, key.id), ] try: increment_list.append( (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id)) # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES except KeyError: pass tsdb.incr_multi( increment_list, timestamp=tsdb_start_time, ) metrics.incr('events.blacklisted', tags={'reason': filter_reason}) event_filtered.send_robust( ip=remote_addr, project=project, sender=type(self), ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug( 'Dropped event due to error with rate limiter') tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.project_total_rejected, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.organization_total_rejected, project.organization_id), (tsdb.models.key_total_received, key.id), (tsdb.models.key_total_rejected, key.id), ], timestamp=tsdb_start_time, ) metrics.incr( 'events.dropped', tags={ 'reason': rate_limit.reason_code if rate_limit else 'unknown', }) event_dropped.send_robust( ip=remote_addr, project=project, sender=type(self), reason_code=rate_limit.reason_code if rate_limit else None, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) else: tsdb.incr_multi( [ (tsdb.models.project_total_received, project.id), (tsdb.models.organization_total_received, project.organization_id), (tsdb.models.key_total_received, key.id), ], timestamp=tsdb_start_time, ) org_options = OrganizationOption.objects.get_all_values( project.organization_id) data = event_mgr.get_data() del event_mgr event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = ( org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) exclude_fields_key = 'sentry:safe_fields' exclude_fields = (org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, [])) scrub_defaults = ( org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=type(self), ) return event_id