def get(self, request, wizard_hash): """ This opens a page where with an active session fill stuff into the cache Redirects to organization whenever cache has been deleted """ context = { 'hash': wizard_hash } key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash) wizard_data = default_cache.get(key) if wizard_data is None: return self.redirect_to_org(request) orgs = client.get( reverse('sentry-api-0-organizations'), request=request) filled_projects = [] for org in orgs.data: projects = client.get(reverse('sentry-api-0-organization-projects', kwargs={ 'organization_slug': org.get('slug') }), request=request) for project in projects.data: if project.get('status') == 'deleted': continue # skip if project has been deleted enriched_project = project enriched_project['organization'] = org keys = client.get(reverse('sentry-api-0-project-keys', kwargs={ 'organization_slug': org.get('slug'), 'project_slug': project.get('slug') }), request=request) enriched_project['keys'] = keys.data filled_projects.append(enriched_project) # Fetching or creating a token token = None tokens = [ x for x in ApiToken.objects.filter(user=request.user).all() if 'project:releases' in x.get_scopes() ] if not tokens: token = ApiToken.objects.create( user=request.user, scope_list=['project:releases'], refresh_token=None, expires_at=None, ) else: token = tokens[0] result = { 'apiKeys': serialize(token), 'projects': filled_projects } key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash) default_cache.set(key, result, SETUP_WIZARD_CACHE_TIMEOUT) return render_to_response('sentry/setup-wizard.html', context, request)
def insert_data_to_database(self, data, start_time=None, from_reprocessing=False, attachments=None): if start_time is None: start_time = time() # we might be passed some sublcasses of dict that fail dumping if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) cache_timeout = 3600 cache_key = cache_key_for_event(data) default_cache.set(cache_key, data, cache_timeout) # Attachments will be empty or None if the "event-attachments" feature # is turned off. For native crash reports it will still contain the # crash dump (e.g. minidump) so we can load it during processing. if attachments is not None: attachment_cache.set(cache_key, attachments, cache_timeout) task = from_reprocessing and \ preprocess_event_from_reprocessing or preprocess_event task.delay(cache_key=cache_key, start_time=start_time, event_id=data['event_id'])
def prepare_reports(dry_run=False, *args, **kwargs): timestamp, duration = _fill_default_parameters(*args, **kwargs) logger.info("reports.begin_prepare_report") organizations = _get_organization_queryset().values_list("id", flat=True) for i, organization_id in enumerate( RangeQuerySetWrapper(organizations, step=10000, result_value_getter=lambda item: item)): prepare_organization_report.delay(timestamp, duration, organization_id, dry_run=dry_run) if i % 10000 == 0: logger.info( "reports.scheduled_prepare_organization_report", extra={ "organization_id": organization_id, "total_scheduled": i }, ) default_cache.set(prepare_reports_verify_key(), "1", int(timedelta(days=3).total_seconds())) logger.info("reports.finish_prepare_report")
def preprocess_event(cache_key=None, data=None, start_time=None, **kwargs): from sentry.plugins import plugins if cache_key: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}) logger.error('Data not available in preprocess_event (cache_key=%s)', cache_key) return project = data['project'] Raven.tags_context({ 'project': project, }) # TODO(dcramer): ideally we would know if data changed by default has_changed = False for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, _with_transaction=False) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed and cache_key: default_cache.set(cache_key, data, 3600) if cache_key: data = None save_event.delay(cache_key=cache_key, data=data, start_time=start_time)
def preprocess_event(cache_key=None, data=None, start_time=None, **kwargs): from sentry.plugins import plugins if cache_key: data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'pre'}) logger.error('Data not available in preprocess_event (cache_key=%s)', cache_key) return project = data['project'] # TODO(dcramer): ideally we would know if data changed by default has_changed = False for plugin in plugins.all(version=2): for processor in (safe_execute(plugin.get_event_preprocessors) or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed and cache_key: default_cache.set(cache_key, data, 3600) if cache_key: data = None save_event.delay(cache_key=cache_key, data=data, start_time=start_time)
def insert_data_to_database(self, data): # we might be passed LazyData if isinstance(data, LazyData): data = dict(data.items()) cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id']) default_cache.set(cache_key, data, timeout=3600) preprocess_event.delay(cache_key=cache_key, start_time=time())
def insert_data_to_database_legacy(data, start_time=None, from_reprocessing=False, attachments=None): """ Yet another "fast path" to ingest an event without making it go through Relay. Please consider using functions from the ingest consumer instead, or, if you're within tests, to use `TestCase.store_event`. """ # XXX(markus): Delete this function and merge with ingest consumer logic. if start_time is None: start_time = time() # we might be passed some subclasses of dict that fail dumping if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) cache_timeout = 3600 cache_key = cache_key_for_event(data) default_cache.set(cache_key, data, cache_timeout) # Attachments will be empty or None if the "event-attachments" feature # is turned off. For native crash reports it will still contain the # crash dump (e.g. minidump) so we can load it during processing. if attachments is not None: attachment_cache.set(cache_key, attachments, cache_timeout) task = from_reprocessing and preprocess_event_from_reprocessing or preprocess_event task.delay(cache_key=cache_key, start_time=start_time, event_id=data["event_id"])
def _update_cachefiles(self, project, debug_files, cls): rv = [] conversion_errors = {} for debug_file in debug_files: debug_id = debug_file.debug_id # Find all the known bad files we could not convert last time. We # use the debug identifier and file checksum to identify the source # DIF for historic reasons (debug_file.id would do, too). cache_key = 'scbe:%s:%s' % (debug_id, debug_file.file.checksum) err = default_cache.get(cache_key) if err is not None: conversion_errors[debug_id] = err continue # Download the original debug symbol and convert the object file to # a cache. This can either yield a cache object, an error or none of # the above. THE FILE DOWNLOAD CAN TAKE SIGNIFICANT TIME. with debug_file.file.getfile(as_tempfile=True) as tf: file, cache, err = self._update_cachefile(debug_file, tf.name, cls) # Store this conversion error so that we can skip subsequent # conversions. There might be concurrent conversions running for the # same debug file, however. if err is not None: default_cache.set(cache_key, err, CONVERSION_ERROR_TTL) conversion_errors[debug_id] = err continue if file is not None or cache is not None: rv.append((debug_id, file, cache)) return rv, conversion_errors
def _update_cachefiles(self, project, dsym_files): rv = [] # Find all the known bad files we could not convert last time # around conversion_errors = {} for dsym_file in dsym_files: cache_key = 'scbe:%s:%s' % (dsym_file.uuid, dsym_file.file.checksum) err = default_cache.get(cache_key) if err is not None: conversion_errors[dsym_file.uuid] = err for dsym_file in dsym_files: dsym_uuid = dsym_file.uuid if dsym_uuid in conversion_errors: continue try: with dsym_file.file.getfile(as_tempfile=True) as tf: fo = FatObject.from_path(tf.name) o = fo.get_object(uuid=dsym_file.uuid) if o is None: continue symcache = o.make_symcache() except SymbolicError as e: default_cache.set( 'scbe:%s:%s' % (dsym_uuid, dsym_file.file.checksum), e.message, CONVERSION_ERROR_TTL) conversion_errors[dsym_uuid] = e.message logger.error('dsymfile.symcache-build-error', exc_info=True, extra=dict(dsym_uuid=dsym_uuid)) continue file = File.objects.create( name=dsym_file.uuid, type='project.symcache', ) file.putfile(symcache.open_stream()) try: with transaction.atomic(): rv.append((dsym_uuid, ProjectSymCacheFile.objects.get_or_create( project=project, cache_file=file, dsym_file=dsym_file, defaults=dict( checksum=dsym_file.file.checksum, version=symcache.file_format_version, ))[0])) except IntegrityError: file.delete() rv.append((dsym_uuid, ProjectSymCacheFile.objects.get( project=project, dsym_file=dsym_file, ))) return rv, conversion_errors
def set_assemble_status(project, checksum, state, detail=None): cache_key = 'assemble-status:%s' % _get_idempotency_id(project, checksum) # NB: Also cache successfully created debug files to avoid races between # multiple DIFs with the same identifier. On the downside, this blocks # re-uploads for 10 minutes. default_cache.set(cache_key, (state, detail), 600)
def reprocess_minidump(data): project = Project.objects.get_from_cache(id=data['project']) minidump_is_reprocessed_cache_key = minidump_reprocessed_cache_key_for_event( data) if default_cache.get(minidump_is_reprocessed_cache_key): return minidump = get_attached_minidump(data) if not minidump: logger.error("Missing minidump for minidump event") return request_id_cache_key = request_id_cache_key_for_event(data) response = run_symbolicator(project=project, request_id_cache_key=request_id_cache_key, create_task=create_minidump_task, minidump=make_buffered_slice_reader( minidump.data, None)) if handle_symbolicator_response_status(data, response): merge_symbolicator_minidump_response(data, response) event_cache_key = cache_key_for_event(data) default_cache.set(event_cache_key, dict(data), 3600) default_cache.set(minidump_is_reprocessed_cache_key, True, 3600) return data
def get(self, request, wizard_hash=None): """ This tries to retrieve and return the cache content if possible otherwise creates new cache """ if wizard_hash is not None: key = "%s%s" % (SETUP_WIZARD_CACHE_KEY, wizard_hash) wizard_data = default_cache.get(key) if wizard_data is None: return Response(status=404) elif wizard_data == "empty": # when we just created a clean cache return Response(status=400) return Response(serialize(wizard_data)) else: # This creates a new available hash url for the project wizard rate_limited = ratelimits.is_limited(key="rl:setup-wizard:ip:%s" % request.META["REMOTE_ADDR"], limit=10) if rate_limited: logger.info("setup-wizard.rate-limit") return Response({"Too many wizard requests"}, status=403) wizard_hash = get_random_string( 64, allowed_chars="abcdefghijklmnopqrstuvwxyz012345679") key = "%s%s" % (SETUP_WIZARD_CACHE_KEY, wizard_hash) default_cache.set(key, "empty", SETUP_WIZARD_CACHE_TIMEOUT) return Response(serialize({"hash": wizard_hash}))
def process_message(self, message): message = msgpack.unpackb(message.value(), use_list=False) body = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = message["project_id"] # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) deduplication_key = "ev:{}:{}".format(project_id, event_id) if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return # message already processed do not reprocess cache_key = cache_key_from_project_id_and_event_id(project_id=project_id, event_id=event_id) cache_timeout = 3600 default_cache.set(cache_key, body, cache_timeout, raw=True) # queue the event for processing preprocess_event.delay(cache_key=cache_key, start_time=start_time, event_id=event_id) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", 3600)
def process_event(cache_key, start_time=None, **kwargs): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'}) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return project = data['project'] Raven.tags_context({ 'project': project, }) # TODO(dcramer): ideally we would know if data changed by default has_changed = False for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: default_cache.set(cache_key, data, 3600) save_event.delay(cache_key=cache_key, data=None, start_time=start_time)
def get(self, request, wizard_hash=None): """ This tries to retrieve and return the cache content if possible otherwise creates new cache """ if wizard_hash is not None: key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash) wizard_data = default_cache.get(key) if wizard_data is None: return Response(status=404) elif wizard_data == 'empty': # when we just created a clean cache return Response(status=400) return Response(serialize(wizard_data)) else: # This creates a new available hash url for the project wizard rate_limited = ratelimits.is_limited( key='rl:setup-wizard:ip:%s' % request.META['REMOTE_ADDR'], limit=10, ) if rate_limited: logger.info('setup-wizard.rate-limit') return Response( { 'Too wizard requests', }, status=403 ) wizard_hash = get_random_string( 64, allowed_chars='abcdefghijklmnopqrstuvwxyz012345679') key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash) default_cache.set(key, 'empty', SETUP_WIZARD_CACHE_TIMEOUT) return Response(serialize({'hash': wizard_hash}))
def preprocess_event(cache_key=None, data=None, **kwargs): from sentry.plugins import plugins if cache_key: data = default_cache.get(cache_key) logger = preprocess_event.get_logger() if data is None: logger.error('Data not available in preprocess_event (cache_key=%s)', cache_key) return project = data['project'] # TODO(dcramer): ideally we would know if data changed by default has_changed = False for plugin in plugins.all(version=2): for processor in (safe_execute(plugin.get_event_preprocessors) or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data[ 'project'] == project, 'Project cannot be mutated by preprocessor' if has_changed and cache_key: default_cache.set(cache_key, data, 3600) if cache_key: data = None save_event.delay(cache_key=cache_key, data=data)
def _update_cachefiles(self, project, debug_files, cls): rv = [] conversion_errors = {} for debug_file in debug_files: debug_id = debug_file.debug_id # Find all the known bad files we could not convert last time. We # use the debug identifier and file checksum to identify the source # DIF for historic reasons (debug_file.id would do, too). cache_key = 'scbe:%s:%s' % (debug_id, debug_file.file.checksum) err = default_cache.get(cache_key) if err is not None: conversion_errors[debug_id] = err continue # Download the original debug symbol and convert the object file to # a cache. This can either yield a cache object, an error or none of # the above. THE FILE DOWNLOAD CAN TAKE SIGNIFICANT TIME. with debug_file.file.getfile(as_tempfile=True) as tf: file, cache, err = self._update_cachefile( debug_file, tf.name, cls) # Store this conversion error so that we can skip subsequent # conversions. There might be concurrent conversions running for the # same debug file, however. if err is not None: default_cache.set(cache_key, err, CONVERSION_ERROR_TTL) conversion_errors[debug_id] = err continue if file is not None or cache is not None: rv.append((debug_id, file, cache)) return rv, conversion_errors
def get(self, request, wizard_hash): """ This opens a page where with an active session fill stuff into the cache Redirects to organization whenever cache has been deleted """ context = {'hash': wizard_hash} key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash) wizard_data = default_cache.get(key) if wizard_data is None: return self.redirect_to_org(request) orgs = Organization.objects.filter( member_set__role__in=[x.id for x in roles.with_scope('org:read')], member_set__user=request.user, status=OrganizationStatus.VISIBLE, ).order_by('-date_added')[:50] filled_projects = [] for org in orgs: projects = list( Project.objects.filter( organization=org, status=ProjectStatus.VISIBLE, ).order_by('-date_added')[:50]) for project in projects: enriched_project = serialize(project) enriched_project['organization'] = serialize(org) keys = list( ProjectKey.objects.filter( project=project, roles=F('roles').bitor(ProjectKey.roles.store), status=ProjectKeyStatus.ACTIVE, )) enriched_project['keys'] = serialize(keys) filled_projects.append(enriched_project) # Fetching or creating a token token = None tokens = [ x for x in ApiToken.objects.filter(user=request.user).all() if 'project:releases' in x.get_scopes() ] if not tokens: token = ApiToken.objects.create( user=request.user, scope_list=['project:releases'], refresh_token=None, expires_at=None, ) else: token = tokens[0] result = {'apiKeys': serialize(token), 'projects': filled_projects} key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash) default_cache.set(key, result, SETUP_WIZARD_CACHE_TIMEOUT) return render_to_response('sentry/setup-wizard.html', context, request)
def _do_process_event(cache_key, start_time, event_id): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={ 'reason': 'cache', 'stage': 'process' }) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return project = data['project'] Raven.tags_context({ 'project': project, }) has_changed = False # Stacktrace based event processors. These run before anything else. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data[ 'project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') if issues and create_failed_event(cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time): return default_cache.set(cache_key, data, 3600) save_event.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, )
def set_assemble_status(project, checksum, state, detail=None): cache_key = 'assemble-status:%s' % _get_idempotency_id( project, checksum) # NB: Also cache successfully created debug files to avoid races between # multiple DIFs with the same identifier. On the downside, this blocks # re-uploads for 10 minutes. default_cache.set(cache_key, (state, detail), 600)
def process_message(self, message): message = msgpack.unpackb(message.value(), use_list=False) payload = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = message["project_id"] remote_addr = message.get("remote_addr") # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) deduplication_key = "ev:{}:{}".format(project_id, event_id) if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return True # message already processed do not reprocess try: project = Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: logger.error("Project for ingested event does not exist: %s", project_id) return True # Parse the JSON payload. This is required to compute the cache key and # call process_event. The payload will be put into Kafka raw, to avoid # serializing it again. # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event # which assumes that data passed in is a raw dictionary. data = json.loads(payload) cache_timeout = 3600 cache_key = cache_key_for_event(data) default_cache.set(cache_key, data, cache_timeout) # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. preprocess_event( cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project, ) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", 3600) # emit event_accepted once everything is done event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=self.process_message ) # Return *something* so that it counts against batch size return True
def set_assemble_status(project, checksum, state, detail=None): cache_key = 'assemble-status:%s' % _get_idempotency_id(project, checksum) # If the state is okay we actually clear it from the cache because in # that case a project dsym file was created. if state == ChunkFileState.OK: default_cache.delete(cache_key) else: default_cache.set(cache_key, (state, detail), 300)
def set_assemble_status(project, checksum, state, detail=None): cache_key = 'assemble-status:%s' % _get_idempotency_id( project, checksum) # If the state is okay we actually clear it from the cache because in # that case a project dsym file was created. if state == ChunkFileState.OK: default_cache.delete(cache_key) else: default_cache.set(cache_key, (state, detail), 300)
def _process(self, create_task, task_name): task_id = default_cache.get(self.task_id_cache_key) json_response = None with self.sess: try: if task_id: # Processing has already started and we need to poll # symbolicator for an update. This in turn may put us back into # the queue. json_response = self.sess.query_task(task_id) if json_response is None: # This is a new task, so we compute all request parameters # (potentially expensive if we need to pull minidumps), and then # upload all information to symbolicator. It will likely not # have a response ready immediately, so we start polling after # some timeout. json_response = create_task() except ServiceUnavailable: # 503 can indicate that symbolicator is restarting. Wait for a # reboot, then try again. This overrides the default behavior of # retrying after just a second. # # If there is no response attached, it's a connection error. raise RetrySymbolication( retry_after=settings.SYMBOLICATOR_MAX_RETRY_AFTER) metrics.incr( "events.symbolicator.response", tags={ "response": json_response.get("status") or "null", "task_name": task_name }, ) # Symbolication is still in progress. Bail out and try again # after some timeout. Symbolicator keeps the response for the # first one to poll it. if json_response["status"] == "pending": default_cache.set(self.task_id_cache_key, json_response["request_id"], REQUEST_CACHE_TIMEOUT) raise RetrySymbolication( retry_after=json_response["retry_after"]) else: # Once we arrive here, we are done processing. Clean up the # task id from the cache. default_cache.delete(self.task_id_cache_key) metrics.timing("events.symbolicator.response.completed.size", len(json.dumps(json_response))) reverse_source_aliases(json_response) redact_internal_sources(json_response) return json_response
def insert_data_to_database(self, data, start_time=None, from_reprocessing=False): if start_time is None: start_time = time() # we might be passed LazyData if isinstance(data, LazyData): data = dict(data.items()) cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id']) default_cache.set(cache_key, data, timeout=3600) task = from_reprocessing and \ preprocess_event_from_reprocessing or preprocess_event task.delay(cache_key=cache_key, start_time=start_time, event_id=data['event_id'])
def test_simple(self): self.create_organization(owner=self.user) self.login_as(self.user) key = f"{SETUP_WIZARD_CACHE_KEY}abc" default_cache.set(key, "test", 600) url = reverse("sentry-project-wizard-fetch", kwargs={"wizard_hash": "abc"}) resp = self.client.get(url) assert resp.status_code == 200 self.assertTemplateUsed(resp, "sentry/setup-wizard.html")
def public_dsn(): project_id = settings.SENTRY_FRONTEND_PROJECT or settings.SENTRY_PROJECT cache_key = 'dsn:%s' % (project_id, ) result = default_cache.get(cache_key) if result is None: key = _get_project_key(project_id) if key: result = key.dsn_public else: result = '' default_cache.set(cache_key, result, 60) return result
def _update_cachefile(self, debug_file, tf): try: fo = FatObject.from_path(tf.name) o = fo.get_object(id=debug_file.debug_id) if o is None: return None, None symcache = o.make_symcache() except SymbolicError as e: default_cache.set( 'scbe:%s:%s' % (debug_file.debug_id, debug_file.file.checksum), e.message, CONVERSION_ERROR_TTL) if not isinstance(e, (SymCacheErrorMissingDebugSection, SymCacheErrorMissingDebugInfo)): logger.error('dsymfile.symcache-build-error', exc_info=True, extra=dict(debug_id=debug_file.debug_id)) return None, e.message # We seem to have this task running onconcurrently or some # other task might delete symcaches while this is running # which is why this requires a loop instead of just a retry # on get. for iteration in range(5): file = File.objects.create( name=debug_file.debug_id, type='project.symcache', ) file.putfile(symcache.open_stream()) try: with transaction.atomic(): return ProjectSymCacheFile.objects.get_or_create( project=debug_file.project, cache_file=file, dsym_file=debug_file, defaults=dict( checksum=debug_file.file.checksum, version=symcache.file_format_version, ))[0], None except IntegrityError: file.delete() try: return ProjectSymCacheFile.objects.get( project=debug_file.project, dsym_file=debug_file, ), None except ProjectSymCacheFile.DoesNotExist: continue raise RuntimeError('Concurrency error on symcache update')
def _update_cachefile(self, debug_file, tf): try: fo = FatObject.from_path(tf.name) o = fo.get_object(id=debug_file.debug_id) if o is None: return None, None symcache = o.make_symcache() except SymbolicError as e: default_cache.set('scbe:%s:%s' % ( debug_file.debug_id, debug_file.file.checksum), e.message, CONVERSION_ERROR_TTL) if not isinstance(e, (SymCacheErrorMissingDebugSection, SymCacheErrorMissingDebugInfo)): logger.error('dsymfile.symcache-build-error', exc_info=True, extra=dict(debug_id=debug_file.debug_id)) return None, e.message # We seem to have this task running onconcurrently or some # other task might delete symcaches while this is running # which is why this requires a loop instead of just a retry # on get. for iteration in range(5): file = File.objects.create( name=debug_file.debug_id, type='project.symcache', ) file.putfile(symcache.open_stream()) try: with transaction.atomic(): return ProjectSymCacheFile.objects.get_or_create( project=debug_file.project, cache_file=file, dsym_file=debug_file, defaults=dict( checksum=debug_file.file.checksum, version=symcache.file_format_version, ) )[0], None except IntegrityError: file.delete() try: return ProjectSymCacheFile.objects.get( project=debug_file.project, dsym_file=debug_file, ), None except ProjectSymCacheFile.DoesNotExist: continue raise RuntimeError('Concurrency error on symcache update')
def insert_data_to_database(self, data, start_time=None, from_reprocessing=False): if start_time is None: start_time = time() # we might be passed some sublcasses of dict that fail dumping if isinstance(data, DOWNGRADE_DATA_TYPES): data = dict(data.items()) cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id']) default_cache.set(cache_key, data, timeout=3600) task = from_reprocessing and \ preprocess_event_from_reprocessing or preprocess_event task.delay(cache_key=cache_key, start_time=start_time, event_id=data['event_id'])
def _do_process_event(cache_key, start_time, event_id): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'}) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return project = data['project'] Raven.tags_context({ 'project': project, }) has_changed = False # Stacktrace based event processors. These run before anything else. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute(plugin.get_event_preprocessors, data=data, _with_transaction=False) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') if issues: create_failed_event(cache_key, project, list(issues.values()), event_id=event_id) return default_cache.set(cache_key, data, 3600) save_event.delay(cache_key=cache_key, data=None, start_time=start_time, event_id=event_id)
def _process(self, create_task): task_id = default_cache.get(self.task_id_cache_key) json = None with self.sess: try: if task_id: # Processing has already started and we need to poll # symbolicator for an update. This in turn may put us back into # the queue. json = self.sess.query_task(task_id) if json is None: # This is a new task, so we compute all request parameters # (potentially expensive if we need to pull minidumps), and then # upload all information to symbolicator. It will likely not # have a response ready immediately, so we start polling after # some timeout. json = create_task() except ServiceUnavailable: # 503 can indicate that symbolicator is restarting. Wait for a # reboot, then try again. This overrides the default behavior of # retrying after just a second. # # If there is no response attached, it's a connection error. raise RetrySymbolication(retry_after=10) metrics.incr('events.symbolicator.response', tags={ 'response': json.get('status') or 'null', 'project_id': self.sess.project_id, }) # Symbolication is still in progress. Bail out and try again # after some timeout. Symbolicator keeps the response for the # first one to poll it. if json['status'] == 'pending': default_cache.set( self.task_id_cache_key, json['request_id'], REQUEST_CACHE_TIMEOUT) raise RetrySymbolication(retry_after=json['retry_after']) else: # Once we arrive here, we are done processing. Clean up the # task id from the cache. default_cache.delete(self.task_id_cache_key) return json
def test_project(self): self.org = self.create_organization(owner=self.user) self.team = self.create_team(organization=self.org, name="Mariachi Band") self.project = self.create_project(organization=self.org, teams=[self.team], name="Bengal") self.login_as(self.user) key = f"{SETUP_WIZARD_CACHE_KEY}abc" default_cache.set(key, "test", 600) url = reverse("sentry-project-wizard-fetch", kwargs={"wizard_hash": "abc"}) resp = self.client.get(url) assert resp.status_code == 200 self.assertTemplateUsed(resp, "sentry/setup-wizard.html") cached = default_cache.get(key) assert cached.get("apiKeys").get("scopes")[0] == "project:releases" assert cached.get("projects")[0].get("status") == "active" assert cached.get("projects")[0].get("keys")[0].get("isActive") assert cached.get("projects")[0].get("organization").get("status").get("id") == "active"
def insert_data_to_database(self, data, start_time=None, from_reprocessing=False, attachments=None): if start_time is None: start_time = time() # we might be passed some subclasses of dict that fail dumping if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) cache_timeout = 3600 cache_key = cache_key_for_event(data) default_cache.set(cache_key, data, cache_timeout) # Attachments will be empty or None if the "event-attachments" feature # is turned off. For native crash reports it will still contain the # crash dump (e.g. minidump) so we can load it during processing. if attachments is not None: attachment_cache.set(cache_key, attachments, cache_timeout) # NOTE: Project is bound to the context in most cases in production, which # is enough for us to do `projects:kafka-ingest` testing. project = self.context and self.context.project if project and features.has('projects:kafka-ingest', project=project): kafka.produce_sync( settings.KAFKA_PREPROCESS, value=json.dumps({ 'cache_key': cache_key, 'start_time': start_time, 'from_reprocessing': from_reprocessing, 'data': data, }), ) else: task = from_reprocessing and \ preprocess_event_from_reprocessing or preprocess_event task.delay(cache_key=cache_key, start_time=start_time, event_id=data['event_id'])
def reprocess_minidump(data): project = Project.objects.get_from_cache(id=data['project']) minidump_is_reprocessed_cache_key = minidump_reprocessed_cache_key_for_event(data) if default_cache.get(minidump_is_reprocessed_cache_key): return if not _is_symbolicator_enabled(project, data): rv = reprocess_minidump_with_cfi(data) default_cache.set(minidump_is_reprocessed_cache_key, True, 3600) return rv minidump = get_attached_minidump(data) if not minidump: logger.error("Missing minidump for minidump event") return request_id_cache_key = request_id_cache_key_for_event(data) response = run_symbolicator( project=project, request_id_cache_key=request_id_cache_key, create_task=create_minidump_task, minidump=make_buffered_slice_reader(minidump.data, None) ) if handle_symbolicator_response_status(data, response): merge_symbolicator_minidump_response(data, response) event_cache_key = cache_key_for_event(data) default_cache.set(event_cache_key, dict(data), 3600) default_cache.set(minidump_is_reprocessed_cache_key, True, 3600) return data
def get_sdk_index(): value = default_cache.get(SDK_INDEX_CACHE_KEY) if value is not None: return value base_url = settings.SENTRY_RELEASE_REGISTRY_BASEURL if not base_url: return {} url = '%s/sdks' % (base_url, ) try: with Session() as session: response = session.get(url, timeout=1) response.raise_for_status() json = response.json() except Exception: logger.exception("Failed to fetch version index from release registry") json = {} default_cache.set(SDK_INDEX_CACHE_KEY, json, 3600) return json
def insert_data_to_database(self, data, start_time=None, from_reprocessing=False, attachments=None): if start_time is None: start_time = time() # we might be passed some sublcasses of dict that fail dumping if isinstance(data, DOWNGRADE_DATA_TYPES): data = dict(data.items()) cache_timeout = 3600 cache_key = u'e:{1}:{0}'.format(data['project'], data['event_id']) default_cache.set(cache_key, data, cache_timeout) # Attachments will be empty or None if the "event-attachments" feature # is turned off. For native crash reports it will still contain the # crash dump (e.g. minidump) so we can load it during processing. if attachments is not None: attachment_cache.set(cache_key, attachments, cache_timeout) task = from_reprocessing and \ preprocess_event_from_reprocessing or preprocess_event task.delay(cache_key=cache_key, start_time=start_time, event_id=data['event_id'])
def reprocess_minidump(data): project = Project.objects.get_from_cache(id=data['project']) minidump_is_reprocessed_cache_key = minidump_reprocessed_cache_key_for_event( data) if default_cache.get(minidump_is_reprocessed_cache_key): return if not _is_symbolicator_enabled(project, data): rv = reprocess_minidump_with_cfi(data) default_cache.set(minidump_is_reprocessed_cache_key, True, 3600) return rv minidump = get_attached_minidump(data) if not minidump: logger.error("Missing minidump for minidump event") return request_id_cache_key = request_id_cache_key_for_event(data) response = run_symbolicator(project=project, request_id_cache_key=request_id_cache_key, create_task=create_minidump_task, minidump=make_buffered_slice_reader( minidump.data, None)) if not response: handle_symbolication_failed( SymbolicationFailed(type=EventError.NATIVE_SYMBOLICATOR_FAILED), data=data, ) default_cache.set(minidump_is_reprocessed_cache_key, True, 3600) return merge_symbolicator_minidump_response(data, response) event_cache_key = cache_key_for_event(data) default_cache.set(event_cache_key, dict(data), 3600) default_cache.set(minidump_is_reprocessed_cache_key, True, 3600) return data
def run_symbolicator(project, request_id_cache_key, create_task=create_payload_task, **kwargs): symbolicator_options = options.get('symbolicator.options') base_url = symbolicator_options['url'].rstrip('/') assert base_url project_id = six.text_type(project.id) request_id = default_cache.get(request_id_cache_key) sess = Session() # Will be set lazily when a symbolicator request is fired sources = None attempts = 0 wait = 0.5 with sess: while True: try: if request_id: rv = _poll_symbolication_task( sess=sess, base_url=base_url, request_id=request_id, project_id=project_id, ) else: if sources is None: sources = get_sources_for_project(project) rv = create_task( sess=sess, base_url=base_url, project_id=project_id, sources=sources, **kwargs ) metrics.incr('events.symbolicator.status_code', tags={ 'status_code': rv.status_code, 'project_id': project_id, }) if rv.status_code == 404 and request_id: default_cache.delete(request_id_cache_key) request_id = None continue elif rv.status_code == 503: raise RetrySymbolication(retry_after=10) rv.raise_for_status() json = rv.json() metrics.incr('events.symbolicator.response', tags={ 'response': json['status'], 'project_id': project_id, }) if json['status'] == 'pending': default_cache.set( request_id_cache_key, json['request_id'], REQUEST_CACHE_TIMEOUT) raise RetrySymbolication(retry_after=json['retry_after']) elif json['status'] == 'completed': default_cache.delete(request_id_cache_key) return rv.json() else: logger.error("Unexpected status: %s", json['status']) default_cache.delete(request_id_cache_key) return except (IOError, RequestException): attempts += 1 if attempts > MAX_ATTEMPTS: logger.error('Failed to contact symbolicator', exc_info=True) default_cache.delete(request_id_cache_key) return time.sleep(wait) wait *= 2.0
def post(self, request): """ Requests to Register a Relay ```````````````````````````` Registers the relay with the sentry installation. If a relay boots it will always attempt to invoke this endpoint. """ try: json_data = json.loads(request.body) except ValueError: return Response({ 'detail': 'No valid json body', }, status=status.HTTP_400_BAD_REQUEST) serializer = RelayRegisterChallengeSerializer(data=json_data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) if not self.check_allowed_relay(request, json_data): return Response({ 'detail': 'Relay is not allowed to register', }, status=status.HTTP_401_UNAUTHORIZED) sig = get_header_relay_signature(request) if not sig: return Response({ 'detail': 'Missing relay signature', }, status=status.HTTP_400_BAD_REQUEST) try: challenge = create_register_challenge(request.body, sig) except Exception as exc: return Response({ 'detail': str(exc).splitlines()[0], }, status=status.HTTP_400_BAD_REQUEST) relay_id = six.text_type(challenge['relay_id']) if relay_id != get_header_relay_id(request): return Response({ 'detail': 'relay_id in payload did not match header', }, status=status.HTTP_400_BAD_REQUEST) try: relay = Relay.objects.get(relay_id=relay_id) except Relay.DoesNotExist: pass else: if relay.public_key != six.text_type(challenge['public_key']): # This happens if we have an ID collision or someone copies an existing id return Response({ 'detail': 'Attempted to register agent with a different public key', }, status=status.HTTP_400_BAD_REQUEST) default_cache.set('relay-auth:%s' % relay_id, { 'token': challenge['token'], 'public_key': six.text_type(challenge['public_key']), }, 60) return Response(serialize({ 'relay_id': six.text_type(challenge['relay_id']), 'token': challenge['token'], }))
def set_assemble_status(task, scope, checksum, state, detail=None): """ Updates the status of an assembling task. It is cached for 10 minutes. """ cache_key = _get_cache_key(task, scope, checksum) default_cache.set(cache_key, (state, detail), 600)
def _do_process_event(cache_key, start_time, event_id, process_task): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'process'}, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project = data['project'] with configure_scope() as scope: scope.set_tag("project", project) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project) # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. process_task.delay(cache_key, start_time=start_time, event_id=event_id) return # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) default_cache.set(cache_key, data, 3600) save_event.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project )
def post(self, request): """ Requests to Register a Relay ```````````````````````````` Registers the relay with the sentry installation. If a relay boots it will always attempt to invoke this endpoint. """ try: json_data = json.loads(request.body) except ValueError: return Response({ 'detail': 'No valid json body', }, status=status.HTTP_400_BAD_REQUEST) serializer = RelayRegisterChallengeSerializer(data=json_data) if not serializer.is_valid(): return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) if not settings.SENTRY_RELAY_OPEN_REGISTRATION and \ not is_internal_relay(request, json_data.get('public_key')): return Response({ 'detail': 'Relay is not allowed to register', }, status=status.HTTP_401_UNAUTHORIZED) sig = get_header_relay_signature(request) if not sig: return Response({ 'detail': 'Missing relay signature', }, status=status.HTTP_400_BAD_REQUEST) try: challenge = create_register_challenge(request.body, sig) except Exception as exc: return Response({ 'detail': str(exc).splitlines()[0], }, status=status.HTTP_400_BAD_REQUEST) relay_id = six.text_type(challenge['relay_id']) if relay_id != get_header_relay_id(request): return Response({ 'detail': 'relay_id in payload did not match header', }, status=status.HTTP_400_BAD_REQUEST) try: relay = Relay.objects.get(relay_id=relay_id) except Relay.DoesNotExist: pass else: if relay.public_key != six.text_type(challenge['public_key']): # This happens if we have an ID collision or someone copies an existing id return Response({ 'detail': 'Attempted to register agent with a different public key', }, status=status.HTTP_400_BAD_REQUEST) default_cache.set('relay-auth:%s' % relay_id, { 'token': challenge['token'], 'public_key': six.text_type(challenge['public_key']), }, 60) return Response(serialize({ 'relay_id': six.text_type(challenge['relay_id']), 'token': challenge['token'], }))
def get(self, request, wizard_hash): """ This opens a page where with an active session fill stuff into the cache Redirects to organization whenever cache has been deleted """ context = { 'hash': wizard_hash } key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash) wizard_data = default_cache.get(key) if wizard_data is None: return self.redirect_to_org(request) orgs = Organization.objects.filter( member_set__role__in=[x.id for x in roles.with_scope('org:read')], member_set__user=request.user, status=OrganizationStatus.VISIBLE, ).order_by('-date_added')[:50] filled_projects = [] for org in orgs: projects = list(Project.objects.filter( organization=org, status=ProjectStatus.VISIBLE, ).order_by('-date_added')[:50]) for project in projects: enriched_project = serialize(project) enriched_project['organization'] = serialize(org) keys = list(ProjectKey.objects.filter( project=project, roles=ProjectKey.roles.store, status=ProjectKeyStatus.ACTIVE, )) enriched_project['keys'] = serialize(keys) filled_projects.append(enriched_project) # Fetching or creating a token token = None tokens = [ x for x in ApiToken.objects.filter(user=request.user).all() if 'project:releases' in x.get_scopes() ] if not tokens: token = ApiToken.objects.create( user=request.user, scope_list=['project:releases'], refresh_token=None, expires_at=None, ) else: token = tokens[0] result = { 'apiKeys': serialize(token), 'projects': filled_projects } key = '%s%s' % (SETUP_WIZARD_CACHE_KEY, wizard_hash) default_cache.set(key, result, SETUP_WIZARD_CACHE_TIMEOUT) return render_to_response('sentry/setup-wizard.html', context, request)
def _do_process_event(cache_key, start_time, event_id, process_task, data=None): from sentry.plugins import plugins if data is None: data = default_cache.get(cache_key) if data is None: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'process'}, skip_internal=False) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return data = CanonicalKeyDict(data) project_id = data['project'] with configure_scope() as scope: scope.set_tag("project", project_id) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project_id) try: # Event enhancers. These run before anything else. for plugin in plugins.all(version=2): enhancers = safe_execute(plugin.get_event_enhancers, data=data) for enhancer in (enhancers or ()): enhanced = safe_execute(enhancer, data, _passthrough_errors=(RetrySymbolication,)) if enhanced: data = enhanced has_changed = True # Stacktrace based event processors. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data except RetrySymbolication as e: if start_time and (time() - start_time) > 3600: raise RuntimeError('Event spent one hour in processing') retry_process_event.apply_async( args=(), kwargs={ 'process_task_name': process_task.__name__, 'task_kwargs': { 'cache_key': cache_key, 'event_id': event_id, 'start_time': start_time, } }, countdown=e.retry_after ) return # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project_id, 'Project cannot be mutated by preprocessor' project = Project.objects.get_from_cache(id=project_id) # We cannot persist canonical types in the cache, so we need to # downgrade this. if isinstance(data, CANONICAL_TYPES): data = dict(data.items()) if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project_id, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. from_reprocessing = process_task is process_event_from_reprocessing submit_process(project, from_reprocessing, cache_key, event_id, start_time, data) process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) submit_save_event(project, cache_key, event_id, start_time, data)
def insert_data_to_database(self, data): cache_key = 'e:{1}:{0}'.format(data['project'], data['event_id']) default_cache.set(cache_key, data, timeout=3600) preprocess_event.delay(cache_key=cache_key, start_time=time())
def _do_process_event(cache_key, start_time, event_id, process_task): from sentry.plugins import plugins data = default_cache.get(cache_key) if data is None: metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'process'}) error_logger.error('process.failed.empty', extra={'cache_key': cache_key}) return project = data['project'] Raven.tags_context({ 'project': project, }) has_changed = False # Fetch the reprocessing revision reprocessing_rev = reprocessing.get_reprocessing_revision(project) # Stacktrace based event processors. These run before anything else. new_data = process_stacktraces(data) if new_data is not None: has_changed = True data = new_data # TODO(dcramer): ideally we would know if data changed by default # Default event processors. for plugin in plugins.all(version=2): processors = safe_execute( plugin.get_event_preprocessors, data=data, _with_transaction=False ) for processor in (processors or ()): result = safe_execute(processor, data) if result: data = result has_changed = True assert data['project'] == project, 'Project cannot be mutated by preprocessor' if has_changed: issues = data.get('processing_issues') try: if issues and create_failed_event( cache_key, project, list(issues.values()), event_id=event_id, start_time=start_time, reprocessing_rev=reprocessing_rev ): return except RetryProcessing: # If `create_failed_event` indicates that we need to retry we # invoke outselves again. This happens when the reprocessing # revision changed while we were processing. process_task.delay(cache_key, start_time=start_time, event_id=event_id) return default_cache.set(cache_key, data, 3600) save_event.delay( cache_key=cache_key, data=None, start_time=start_time, event_id=event_id, project_id=project )