def fetch_url(url, logger=None): """ Pull down a URL, returning a UrlResult object. Attempts to fetch from the cache. """ import sentry cache_key = "fetch_url:%s" % url result = cache.get(cache_key) if result is not None: return result try: opener = urllib2.build_opener() opener.addheaders = [("User-Agent", "Sentry/%s" % sentry.VERSION)] req = opener.open(url) headers = dict(req.headers) body = req.read().rstrip("\n") except Exception: if logger: logger.error("Unable to fetch remote source for %r", url, exc_info=True) return BAD_SOURCE result = UrlResult(url, headers, body) cache.set(cache_key, result, 60 * 5) return result
def __post_save(self, instance, **kwargs): """ Pushes changes to an instance into the cache, and removes invalid (changed) lookup values. """ pk_name = instance._meta.pk.name pk_names = ("pk", pk_name) pk_val = instance.pk for key in self.cache_fields: if key in pk_names: continue # store pointers cache.set(self.__get_lookup_cache_key(**{key: getattr(instance, key)}), pk_val, self.cache_ttl) # 1 hour # Ensure we dont serialize the database into the cache db = instance._state.db instance._state.db = None # store actual object cache.set(self.__get_lookup_cache_key(**{pk_name: pk_val}), instance, self.cache_ttl) instance._state.db = db # Kill off any keys which are no longer valid if instance in self.__cache: for key in self.cache_fields: if key not in self.__cache[instance]: continue value = self.__cache[instance][key] if value != getattr(instance, key): cache.delete(self.__get_lookup_cache_key(**{key: value})) self.__cache_state(instance)
def get_or_create(cls, release, project, environment, datetime, **kwargs): cache_key = cls.get_cache_key(project.id, release.id, environment.id) instance = cache.get(cache_key) if instance is None: instance, created = cls.objects.get_or_create( release=release, project=project, environment=environment, defaults={ 'first_seen': datetime, 'last_seen': datetime, } ) cache.set(cache_key, instance, 3600) else: created = False # Same as releaseenvironment model. Minimizes last_seen updates to once a minute if not created and instance.last_seen < datetime - timedelta(seconds=60): cls.objects.filter( id=instance.id, last_seen__lt=datetime - timedelta(seconds=60), ).update( last_seen=datetime, ) instance.last_seen = datetime cache.set(cache_key, instance, 3600) return instance
def get_or_create(cls, project, release, environment, datetime, **kwargs): cache_key = cls.get_cache_key(project.id, release.id, environment.id) instance = cache.get(cache_key) if instance is None: instance, created = cls.objects.get_or_create( release_id=release.id, organization_id=project.organization_id, environment_id=environment.id, defaults={ 'first_seen': datetime, 'last_seen': datetime, } ) cache.set(cache_key, instance, 3600) else: created = False # TODO(dcramer): this would be good to buffer, but until then we minimize # updates to once a minute, and allow Postgres to optimistically skip # it even if we can't if not created and instance.last_seen < datetime - timedelta(seconds=60): cls.objects.filter( id=instance.id, last_seen__lt=datetime - timedelta(seconds=60), ).update( last_seen=datetime, ) instance.last_seen = datetime cache.set(cache_key, instance, 3600) return instance
def get_rules(self): cache_key = 'project:%d:rules' % (self.project.id,) rules_list = cache.get(cache_key) if rules_list is None: rules_list = list(Rule.objects.filter(project=self.project)) cache.set(cache_key, rules_list, 60) return rules_list
def fetch_url(url, logger=None): """ Pull down a URL, returning a UrlResult object. Attempts to fetch from the cache. """ import sentry cache_key = 'fetch_url:v2:%s' % (hashlib.md5(url).hexdigest(),) result = cache.get(cache_key) if result is not None: return UrlResult(*result) try: opener = urllib2.build_opener() opener.addheaders = [('User-Agent', 'Sentry/%s' % sentry.VERSION)] req = opener.open(url) headers = dict(req.headers) body = req.read() if headers.get('content-encoding') == 'gzip': # Content doesn't *have* to respect the Accept-Encoding header # and may send gzipped data regardless. # See: http://stackoverflow.com/questions/2423866/python-decompressing-gzip-chunk-by-chunk/2424549#2424549 body = zlib.decompress(body, 16 + zlib.MAX_WBITS) body = body.rstrip('\n') except Exception: if logger: logger.error('Unable to fetch remote source for %r', url, exc_info=True) return BAD_SOURCE result = (url, headers, body) cache.set(cache_key, result, 60 * 5) return UrlResult(url, headers, body)
def _get_project_enhancements_config(project): enhancements = project.get_option('sentry:grouping_enhancements') enhancements_base = project.get_option('sentry:grouping_enhancements_base') if not enhancements and not enhancements_base: return DEFAULT_ENHANCEMENTS_CONFIG if enhancements_base is None or enhancements_base not in ENHANCEMENT_BASES: enhancements_base = DEFAULT_ENHANCEMENT_BASE # Instead of parsing and dumping out config here, we can make a # shortcut from sentry.utils.cache import cache from sentry.utils.hashlib import md5_text cache_key = 'grouping-enhancements:' + \ md5_text('%s|%s' % (enhancements_base, enhancements)).hexdigest() rv = cache.get(cache_key) if rv is not None: return rv try: rv = Enhancements.from_config_string( enhancements or '', bases=[enhancements_base]).dumps() except InvalidEnhancerConfig: rv = DEFAULT_ENHANCEMENTS_CONFIG cache.set(cache_key, rv) return rv
def get_send_to(self, project=None): """ Returns a list of email addresses for the users that should be notified of alerts. The logic for this is a bit complicated, but it does the following: The results of this call can be fairly expensive to calculate, so the send_to list gets cached for 60 seconds. """ if project: project_id = project.pk else: project_id = '' if not (project and project.team): return [] conf_key = self.get_conf_key() cache_key = '%s:send_to:%s' % (conf_key, project_id) send_to_list = cache.get(cache_key) if send_to_list is None: send_to_list = self.get_sendable_users(project) send_to_list = filter(bool, send_to_list) cache.set(cache_key, send_to_list, 60) # 1 minute cache return send_to_list
def chart(request, team=None, project=None): gid = request.REQUEST.get('gid') days = int(request.REQUEST.get('days', '90')) if gid: try: group = Group.objects.get(pk=gid) except Group.DoesNotExist: return HttpResponseForbidden() data = Group.objects.get_chart_data(group, max_days=days) elif project: data = Project.objects.get_chart_data(project, max_days=days) elif team: cache_key = 'api.chart:team=%s,days=%s' % (team.id, days) data = cache.get(cache_key) if data is None: project_list = list(Project.objects.filter(team=team)) data = Project.objects.get_chart_data_for_group(project_list, max_days=days) cache.set(cache_key, data, 300) else: cache_key = 'api.chart:user=%s,days=%s' % (request.user.id, days) data = cache.get(cache_key) if data is None: project_list = Project.objects.get_for_user(request.user) data = Project.objects.get_chart_data_for_group(project_list, max_days=days) cache.set(cache_key, data, 300) response = HttpResponse(json.dumps(data)) response['Content-Type'] = 'application/json' return response
def get_send_to(self, project=None): """ Returns a list of email addresses for the users that should be notified of alerts. The logic for this is a bit complicated, but it does the following: - Includes members if ``send_to_members`` is enabled **and** the user has not disabled alerts for this project The results of this call can be fairly expensive to calculate, so the send_to list gets cached for 60 seconds. """ if project: project_id = project.pk else: project_id = "" conf_key = self.get_conf_key() cache_key = "%s:send_to:%s" % (conf_key, project_id) send_to_list = cache.get(cache_key) if send_to_list is None: send_to_list = set() send_to_members = self.get_option("send_to_members", project) if send_to_members and project and project.team: member_set = self.get_sendable_users(project) send_to_list |= set(self.get_emails_for_users(member_set)) send_to_list = filter(bool, send_to_list) cache.set(cache_key, send_to_list, 60) # 1 minute cache return send_to_list
def fetch_release_file(filename, release): cache_key = 'release:%s:%s' % ( release.id, hashlib.sha1(filename.encode('utf-8')).hexdigest(), ) logger.debug('Checking cache for release artfiact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug('Checking database for release artifact %r (release_id=%s)', filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ReleaseFile.objects.filter( release=release, ident=ident, ).select_related('file').get() except ReleaseFile.DoesNotExist: logger.debug('Release artifact %r not found in database (release_id=%s)', filename, release.id) return None logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) with releasefile.file.getfile() as fp: body = fp.read() result = (releasefile.file.headers, body, 200) cache.set(cache_key, result, 60) return result
def get_or_create(cls, group, release, environment, datetime, **kwargs): if not environment: environment = '' cache_key = cls.get_cache_key(group.id, release.id, environment) instance = cache.get(cache_key) if instance is None: instance, created = cls.objects.get_or_create( release_id=release.id, group_id=group.id, environment=environment, defaults={ 'project_id': group.project_id, 'first_seen': datetime, 'last_seen': datetime, }, ) cache.set(cache_key, instance, 3600) else: created = False # TODO(dcramer): this would be good to buffer if not created: instance.update(last_seen=datetime) return instance
def fetch_release_file(filename, release): cache_key = "releasefile:%s:%s" % (release.id, md5(filename).hexdigest()) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) if result is None: logger.debug("Checking database for release artifact %r (release_id=%s)", filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ( ReleaseFile.objects.filter(release=release, ident=ident).select_related("file", "file__blob").get() ) except ReleaseFile.DoesNotExist: logger.debug("Release artifact %r not found in database (release_id=%s)", filename, release.id) cache.set(cache_key, -1, 60) return None logger.debug("Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: body = fp.read() except Exception as e: logger.exception(unicode(e)) result = -1 else: result = (releasefile.file.headers, body, 200) cache.set(cache_key, result, 3600) if result == -1: result = None return result
def unset_value(self, instance, key): self.filter(**{self.field_name: instance, 'key': key}).delete() if instance.pk not in self.__metadata: cache.delete(self._make_key(instance)) return self.__metadata[instance.pk].pop(key, None) cache.set(self._make_key(instance), self.__metadata[instance.pk])
def all_keys(self, project): # TODO: cache invalidation via post_save/post_delete signals much like BaseManager key = self._get_cache_key(project.id) result = cache.get(key) if result is None: result = list(self.filter(project=project).values_list("key", flat=True)) cache.set(key, result, 60) return result
def reload_cache(self, project_id): cache_key = self._make_key(project_id) result = dict( (i.key, i.value) for i in self.filter(project=project_id) ) cache.set(cache_key, result) self.__cache[project_id] = result return result
def reload_cache(self, organization_id): cache_key = self._make_key(organization_id) result = dict( (i.key, i.value) for i in self.filter(organization=organization_id) ) cache.set(cache_key, result) self.__cache[organization_id] = result return result
def _update_cachefiles(self, project, dsym_files): rv = [] # Find all the known bad files we could not convert last time # around conversion_errors = {} for dsym_file in dsym_files: cache_key = 'scbe:%s:%s' % (dsym_file.uuid, dsym_file.file.checksum) err = cache.get(cache_key) if err is not None: conversion_errors[dsym_file.uuid] = err for dsym_file in dsym_files: dsym_uuid = dsym_file.uuid if dsym_uuid in conversion_errors: continue try: with dsym_file.file.getfile(as_tempfile=True) as tf: fo = FatObject.from_path(tf.name) o = fo.get_object(uuid=dsym_file.uuid) if o is None: continue symcache = o.make_symcache() except SymbolicError as e: cache.set('scbe:%s:%s' % ( dsym_uuid, dsym_file.file.checksum), e.message, CONVERSION_ERROR_TTL) conversion_errors[dsym_uuid] = e.message logger.error('dsymfile.symcache-build-error', exc_info=True, extra=dict(dsym_uuid=dsym_uuid)) continue file = File.objects.create( name=dsym_file.uuid, type='project.symcache', ) file.putfile(symcache.open_stream()) try: with transaction.atomic(): rv.append((dsym_uuid, ProjectSymCacheFile.objects.get_or_create( project=project, cache_file=file, dsym_file=dsym_file, defaults=dict( checksum=dsym_file.file.checksum, version=symcache.file_format_version, ) )[0])) except IntegrityError: file.delete() rv.append((dsym_uuid, ProjectSymCacheFile.objects.get( project=project, dsym_file=dsym_file, ))) return rv, conversion_errors
def _get_service_hooks(project_id): from sentry.models import ServiceHook cache_key = 'servicehooks:1:{}'.format(project_id) result = cache.get(cache_key) if result is None: result = [(h.id, h.events) for h in ServiceHook.objects.filter(project_id=project_id)] cache.set(cache_key, result, 60) return result
def get_rules(project): from sentry.models import Rule cache_key = 'project:%d:rules' % (project.id,) rules_list = cache.get(cache_key) if rules_list is None: rules_list = list(Rule.objects.filter(project=project)) cache.set(cache_key, rules_list, 60) return rules_list
def get_choices(self): key = 'filters:%s:%s' % (self.project.id, self.column) result = cache.get(key) if result is None: result = list(FilterValue.objects.filter( project=self.project, key=self.column, ).values_list('value', flat=True).order_by('value')[:self.max_choices]) cache.set(key, result, 60) return SortedDict((l, l) for l in result)
def get_choices(self): key = 'filters:%s:%s' % (self.project.id, hashlib.md5(self.column.encode('utf8')).hexdigest()) result = cache.get(key) if result is None: result = list(TagValue.objects.filter( project=self.project, key=self.column, ).values_list('value', flat=True).order_by('value')[:self.max_choices]) cache.set(key, result, 60) return SortedDict((l, l) for l in result)
def get_for_project(cls, project_id): cache_key = 'project:{}:rules'.format(project_id) rules_list = cache.get(cache_key) if rules_list is None: rules_list = list(cls.objects.filter( project=project_id, status=RuleStatus.ACTIVE, )) cache.set(cache_key, rules_list, 60) return rules_list
def wrapper(*args, **kwargs): def get_cache_key(*args, **kwargs): params = list(args) + kwargs.values() return md5("".join(map(str, params))).hexdigest() key = get_cache_key(func.__name__, *args, **kwargs) result = cache.get(key) if not result: result = func(*args, **kwargs) cache.set(key, result, timeout) return result
def add_project(self, project): cache_key = 'envproj:c:%s:%s' % (self.id, project.id) if cache.get(cache_key) is None: try: with transaction.atomic(): EnvironmentProject.objects.create(project=project, environment=self) cache.set(cache_key, 1, 3600) except IntegrityError: # We've already created the object, should still cache the action. cache.set(cache_key, 1, 3600)
def fetch_url(url, project=None): """ Pull down a URL, returning a UrlResult object. Attempts to fetch from the cache. """ cache_key = 'source:%s' % ( hashlib.md5(url.encode('utf-8')).hexdigest(),) result = cache.get(cache_key) if result is None: # lock down domains that are problematic domain = urlparse(url).netloc domain_key = 'source:%s' % (hashlib.md5(domain.encode('utf-8')).hexdigest(),) domain_result = cache.get(domain_key) if domain_result: return BAD_SOURCE headers = [] if project and is_valid_origin(url, project=project): token = project.get_option('sentry:token') if token: headers.append(('X-Sentry-Token', token)) try: request = safe_urlopen( url, allow_redirects=True, headers=headers, timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT, ) except HTTPError: result = BAD_SOURCE except Exception: # it's likely we've failed due to a timeout, dns, etc so let's # ensure we can't cascade the failure by pinning this for 5 minutes cache.set(domain_key, 1, 300) logger.warning('Disabling sources to %s for %ss', domain, 300, exc_info=True) return BAD_SOURCE else: try: body = safe_urlread(request) except Exception: result = BAD_SOURCE else: result = (dict(request.headers), body) cache.set(cache_key, result, 60) if result == BAD_SOURCE: return result return UrlResult(url, *result)
def get_or_create(cls, project, name): name = name or "" cache_key = cls.get_cache_key(project.id, name) env = cache.get(cache_key) if env is None: env = cls.objects.get_or_create(project_id=project.id, name=name)[0] cache.set(cache_key, env, 3600) return env
def get_cached(self, full_url): """ Basic Caching mechanism for requests and responses. It only caches responses based on URL TODO: Implement GET attr in cache as well. (see self.create_meta for example) """ key = 'sentry-jira-2:' + md5(full_url, self.base_url).hexdigest() cached_result = cache.get(key) if not cached_result: cached_result = self.get(full_url) cache.set(key, cached_result, 60) return cached_result
def all_keys(self, project): # TODO: cache invalidation via post_save/post_delete signals much like BaseManager key = self._get_cache_key(project.id) result = cache.get(key) if result is None: result = list( self.filter(project=project, status=TagKeyStatus.VISIBLE) .order_by("-values_seen") .values_list("key", flat=True)[:20] ) cache.set(key, result, 60) return result
def get_cached(self, full_url): """ Basic Caching mechanism for requests and responses. It only caches responses based on URL TODO: Implement GET attr in cache as well. (see self.create_meta for example) """ key = CACHE_KEY % (full_url, self.instance_url) cached_result = cache.get(key) if not cached_result: cached_result = self.make_request('get', full_url) cache.set(key, cached_result, 60) return cached_result
def set_value(self, instance, key, value): inst, created = self.get_or_create(**{ self.field_name: instance, 'key': key, 'defaults': { 'value': value, } }) if not created and inst.value != value: inst.update(value=value) if instance.pk not in self.__metadata: cache.delete(self._make_key(instance)) return self.__metadata[instance.pk][key] = value cache.set(self._make_key(instance), self.__metadata[instance.pk])
def get_codeowners_cached(self, project_id): """ Cached read access to sentry_projectcodeowners. This method implements a negative cache which saves us a pile of read queries in post_processing as most projects don't have CODEOWNERS. """ cache_key = self.get_cache_key(project_id) code_owners = cache.get(cache_key) if code_owners is None: query = self.objects.filter(project_id=project_id).order_by("-date_added") or False code_owners = self.merge_code_owners_list(code_owners_list=query) if query else query cache.set(cache_key, code_owners, READ_CACHE_DURATION) return code_owners or None
def get_or_create(cls, project, name): name = cls.get_name_or_default(name) cache_key = cls.get_cache_key(project.organization_id, name) env = cache.get(cache_key) if env is None: env = cls.objects.get_or_create( name=name, organization_id=project.organization_id, )[0] cache.set(cache_key, env, 3600) env.add_project(project) return env
def get_cached(self, url, params=None): """ Basic Caching mechanism for Jira metadata which changes infrequently """ query = '' if params: query = json.dumps(params, sort_keys=True) key = self.jira_style.cache_prefix + md5(url, query, self.base_url).hexdigest() cached_result = cache.get(key) if not cached_result: cached_result = self.get(url, params=params) # This timeout is completely arbitrary. Jira doesn't give us any # caching headers to work with. Ideally we want a duration that # lets the user make their second jira issue with cached data. cache.set(key, cached_result, 240) return cached_result
def get_send_to_all_in_project( project: Project) -> Mapping[ExternalProviders, Set[User]]: cache_key = f"mail:send_to:{project.pk}" send_to_mapping: Optional[Mapping[ExternalProviders, Set[User]]] = cache.get(cache_key) if send_to_mapping is None: users_by_provider = NotificationSetting.objects.get_notification_recipients( project) send_to_mapping = { provider: {user for user in users if user} for provider, users in users_by_provider.items() } cache.set(cache_key, send_to_mapping, 60) # 1 minute cache return send_to_mapping
def handle_owner_assignment(project, group, event): from sentry.models import GroupAssignee, ProjectOwnership # Is the issue already assigned to a team or user? key = "assignee_exists:1:%s" % (group.id) assignee_exists = cache.get(key) if assignee_exists is None: assignee_exists = group.assignee_set.exists() # Cache for an hour if it's assigned. We don't need to move that fast. cache.set(key, assignee_exists, 3600 if assignee_exists else 60) if assignee_exists: return owner = ProjectOwnership.get_autoassign_owner(group.project_id, event.data) if owner is not None: GroupAssignee.objects.assign(group, owner)
def get_cached_photo(self, size): if not self.file: return if size not in self.ALLOWED_SIZES: size = min(self.ALLOWED_SIZES, key=lambda x: abs(x - size)) cache_key = self.get_cache_key(size) photo = cache.get(cache_key) if photo is None: photo_file = self.file.getfile() with Image.open(photo_file) as image: image = image.resize((size, size), Image.LANCZOS) image_file = BytesIO() image.save(image_file, "PNG") photo = image_file.getvalue() cache.set(cache_key, photo) return photo
def get_or_create(cls, project, version, date_added=None): from sentry.models import Project if date_added is None: date_added = timezone.now() cache_key = cls.get_cache_key(project.organization_id, version) release = cache.get(cache_key) if release in (None, -1): # TODO(dcramer): if the cache result is -1 we could attempt a # default create here instead of default get project_version = ('%s-%s' % (project.slug, version))[:64] releases = list(cls.objects.filter( organization_id=project.organization_id, version__in=[version, project_version], projects=project )) if releases: try: release = [r for r in releases if r.version == project_version][0] except IndexError: release = releases[0] else: try: with transaction.atomic(): release = cls.objects.create( organization_id=project.organization_id, version=version, date_added=date_added, total_deploys=0, ) except IntegrityError: release = cls.objects.get( organization_id=project.organization_id, version=version ) release.add_project(project) if not project.flags.has_releases: project.flags.has_releases = True project.update(flags=F('flags').bitor(Project.flags.has_releases)) # TODO(dcramer): upon creating a new release, check if it should be # the new "latest release" for this project cache.set(cache_key, release, 3600) return release
def __post_save(self, instance, **kwargs): """ Pushes changes to an instance into the cache, and removes invalid (changed) lookup values. """ pk_name = instance._meta.pk.name pk_names = ('pk', pk_name) pk_val = instance.pk for key in self.cache_fields: if key in pk_names: continue # store pointers cache.set( key=self.__get_lookup_cache_key(**{key: getattr(instance, key)}), value=pk_val, timeout=self.cache_ttl, version=self.cache_version, ) # Ensure we don't serialize the database into the cache db = instance._state.db instance._state.db = None # store actual object try: cache.set( key=self.__get_lookup_cache_key(**{pk_name: pk_val}), value=instance, timeout=self.cache_ttl, version=self.cache_version, ) except Exception as e: logger.error(e, exc_info=True) instance._state.db = db # Kill off any keys which are no longer valid if instance in self.__cache: for key in self.cache_fields: if key not in self.__cache[instance]: continue value = self.__cache[instance][key] if value != getattr(instance, key): cache.delete( key=self.__get_lookup_cache_key(**{key: value}), version=self.cache_version, ) self.__cache_state(instance)
def get_codeowners_cached(self, project_id): """ Cached read access to sentry_projectcodeowners. This method implements a negative cache which saves us a pile of read queries in post_processing as most projects don't have CODEOWNERS. """ cache_key = self.get_cache_key(project_id) codeowners = cache.get(cache_key) if codeowners is None: try: codeowners = self.objects.get(project_id=project_id) except self.DoesNotExist: codeowners = False cache.set(cache_key, codeowners, READ_CACHE_DURATION) return codeowners or None
def get_artifact_index(release, dist): dist_name = dist and dist.name or None ident = ReleaseFile.get_ident(ARTIFACT_INDEX_FILENAME, dist_name) cache_key = f"artifact-index:v1:{release.id}:{ident}" result = cache.get(cache_key) if result == -1: index = None elif result: index = json.loads(result) else: index = read_artifact_index(release, dist, use_cache=True) cache_value = -1 if index is None else json.dumps(index) # Only cache for a short time to keep the manifest up-to-date cache.set(cache_key, cache_value, timeout=60) return index
def get(cls, project, version): cache_key = cls.get_cache_key(project.organization_id, version) release = cache.get(cache_key) if release is None: try: release = cls.objects.get( organization_id=project.organization_id, projects=project, version=version ) except cls.DoesNotExist: release = -1 cache.set(cache_key, release, 300) if release == -1: return return release
def get_or_create(cls, project, version, date_added): cache_key = cls.get_cache_key(project.id, version) release = cache.get(cache_key) if release in (None, -1): # TODO(dcramer): if the cache result is -1 we could attempt a # default create here instead of default get release = cls.objects.get_or_create( project=project, version=version, defaults={ 'date_added': date_added, }, )[0] cache.set(cache_key, release, 3600) return release
def _get_group_snuba_stats(self, item_list, seen_stats): start = self._get_start_from_seen_stats(seen_stats) unhandled = {} cache_keys = [] for item in item_list: cache_keys.append("group-mechanism-handled:%d" % item.id) cache_data = cache.get_many(cache_keys) for item, cache_key in zip(item_list, cache_keys): unhandled[item.id] = cache_data.get(cache_key) filter_keys = {} for item in item_list: if unhandled.get(item.id) is not None: continue filter_keys.setdefault("project_id", []).append(item.project_id) filter_keys.setdefault("group_id", []).append(item.id) if filter_keys: rv = raw_query( dataset=Dataset.Events, selected_columns=[ "group_id", [ "argMax", [["has", ["exception_stacks.mechanism_handled", 0]], "timestamp"], "unhandled", ], ], groupby=["group_id"], filter_keys=filter_keys, start=start, orderby="group_id", referrer="group.unhandled-flag", ) for x in rv["data"]: unhandled[x["group_id"]] = x["unhandled"] # cache the handled flag for 60 seconds. This is broadly in line with # the time we give for buffer flushes so the user experience is somewhat # consistent here. cache.set("group-mechanism-handled:%d" % x["group_id"], x["unhandled"], 60) return {group_id: {"unhandled": unhandled} for group_id, unhandled in unhandled.items()}
def get_or_create(cls, project, version, date_added): cache_key = cls.get_cache_key(project.id, version) release = cache.get(cache_key) if release in (None, -1): # TODO(dcramer): if the cache result is -1 we could attempt a # default create here instead of default get project_version = ('%s-%s' % (project.slug, version))[:64] releases = list( cls.objects.filter(organization_id=project.organization_id, version__in=[version, project_version], projects=project)) if releases: # TODO(jess): clean this up once all releases have been migrated try: release = [ r for r in releases if r.version == project_version ][0] except IndexError: release = releases[0] else: release = cls.objects.filter( organization_id=project.organization_id, version=version).first() if not release: lock_key = cls.get_lock_key(project.organization_id, version) lock = locks.get(lock_key, duration=5) with TimedRetryPolicy(10)(lock.acquire): try: release = cls.objects.get( organization_id=project.organization_id, version=version) except cls.DoesNotExist: release = cls.objects.create( organization_id=project.organization_id, version=version, date_added=date_added) release.add_project(project) # TODO(dcramer): upon creating a new release, check if it should be # the new "latest release" for this project cache.set(cache_key, release, 3600) return release
def process_snoozes(group): """ Return True if the group is transitioning from "resolved" to "unresolved", otherwise return False. """ from sentry.models import ( GroupSnooze, GroupStatus, GroupInboxReason, add_group_to_inbox, ) key = GroupSnooze.get_cache_key(group.id) snooze = cache.get(key) if snooze is None: try: snooze = GroupSnooze.objects.get(group=group) except GroupSnooze.DoesNotExist: snooze = False # This cache is also set in post_save|delete. cache.set(key, snooze, 3600) if not snooze: return False if not snooze.is_valid(group, test_rates=True): snooze_details = { "until": snooze.until, "count": snooze.count, "window": snooze.window, "user_count": snooze.user_count, "user_window": snooze.user_window, } add_group_to_inbox(group, GroupInboxReason.UNIGNORED, snooze_details) snooze.delete() group.update(status=GroupStatus.UNRESOLVED) issue_unignored.send_robust( project=group.project, user=None, group=group, transition_type="automatic", sender="process_snoozes", ) return True return False
def get_or_create(cls, project, name): name = name or '' cache_key = cls.get_cache_key(project.id, name) env = cache.get(cache_key) if env is None: env = cls.objects.get_or_create( project_id=project.id, name=name, defaults={'organization_id': project.organization_id} )[0] if env.organization_id is None: env.update(organization_id=project.organization_id) env.add_project(project) cache.set(cache_key, env, 3600) return env
def get_all_values(self, instance): if isinstance(instance, models.Model): instance_id = instance.pk else: instance_id = instance if instance_id not in self.__metadata: cache_key = self._make_key(instance) result = cache.get(cache_key) if result is None: result = dict( (i.key, i.value) for i in self.filter(**{ self.field_name: instance_id, })) cache.set(cache_key, result) self.__metadata[instance_id] = result return self.__metadata.get(instance_id, {})
def get_or_create(cls, project, name): with metrics.timer("models.environment.get_or_create") as metrics_tags: name = cls.get_name_or_default(name) cache_key = cls.get_cache_key(project.organization_id, name) env = cache.get(cache_key) if env is None: metrics_tags["cache_hit"] = "false" env = cls.objects.get_or_create( name=name, organization_id=project.organization_id)[0] cache.set(cache_key, env, 3600) else: metrics_tags["cache_hit"] = "true" env.add_project(project) return env
def get_send_to(self, project): """ Returns a list of user IDs for the users that should receive notifications for the provided project. This result may come from cached data. """ if not (project and project.team): logger.debug('Tried to send notification to invalid project: %r', project) return [] cache_key = '%s:send_to:%s' % (self.get_conf_key(), project.pk) send_to_list = cache.get(cache_key) if send_to_list is None: send_to_list = [s for s in self.get_sendable_users(project) if s] cache.set(cache_key, send_to_list, 60) # 1 minute cache return send_to_list
def fetch_url(url): """ Pull down a URL, returning a UrlResult object. Attempts to fetch from the cache. """ cache_key = 'source:%s' % ( hashlib.md5(url.encode('utf-8')).hexdigest(),) result = cache.get(cache_key) if result is None: # lock down domains that are problematic domain = urlparse(url).netloc domain_key = 'source:%s' % (hashlib.md5(domain.encode('utf-8')).hexdigest(),) domain_result = cache.get(domain_key) if domain_result: return BAD_SOURCE try: request = safe_urlopen(url, allow_redirects=True, timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT) except HTTPError: result = BAD_SOURCE except Exception: # it's likely we've failed due to a timeout, dns, etc so let's # ensure we can't cascade the failure by pinning this for 5 minutes cache.set(domain_key, 1, 300) logger.warning('Disabling sources to %s for %ss', domain, 300, exc_info=True) return BAD_SOURCE else: try: body = safe_urlread(request) except Exception: result = BAD_SOURCE else: result = (dict(request.headers), body) cache.set(cache_key, result, 60) if result == BAD_SOURCE: return result return UrlResult(url, *result)
def _should_send_error_created_hooks(project): from sentry.models import ServiceHook, Organization from sentry import options import random use_sampling = options.get('post-process.use-error-hook-sampling') # XXX(Meredith): Sampling is used to test the process_resource_change task. # We have an option to explicity say we want to use sampling, and the other # to determine what that rate should be. # Going forward the sampling will be removed and the task will only be # gated using the integrations-event-hooks (i.e. gated by plan) # # We also don't want to cache the result in case we need to manually lower the # sample rate immediately, or turn it down completely. if use_sampling: if random.random() >= options.get('post-process.error-hook-sample-rate'): return False org = Organization.objects.get_from_cache(id=project.organization_id) result = ServiceHook.objects.filter( organization_id=org.id, ).extra(where=["events @> '{error.created}'"]).exists() return result cache_key = u'servicehooks-error-created:1:{}'.format(project.id) result = cache.get(cache_key) if result is None: org = Organization.objects.get_from_cache(id=project.organization_id) if not features.has('organizations:integrations-event-hooks', organization=org): cache.set(cache_key, 0, 60) return False result = ServiceHook.objects.filter( organization_id=org.id, ).extra(where=["events @> '{error.created}'"]).exists() cache_value = 1 if result else 0 cache.set(cache_key, cache_value, 60) return result
def fetch_and_cache_artifact(filename, fetch_fn, cache_key, cache_key_meta, headers, compress_fn): # If the release file is not in cache, check if we can retrieve at # least the size metadata from cache and prevent compression and # caching if payload exceeds the backend limit. z_body_size = None if CACHE_MAX_VALUE_SIZE: cache_meta = cache.get(cache_key_meta) if cache_meta: z_body_size = int(cache_meta.get("compressed_size")) def fetch_release_body(): with fetch_fn() as fp: if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE: return None, fp.read() else: return compress_fn(fp) try: with metrics.timer("sourcemaps.release_file_read"): z_body, body = fetch_retry_policy(fetch_release_body) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # If we don't have the compressed body for caching because the # cached metadata said it is too large payload for the cache # backend, do not attempt to cache. if z_body: # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) # In case the previous call to cache implicitly fails, we use # the meta data to avoid pointless compression which is done # only for caching. cache.set(cache_key_meta, {"compressed_size": len(z_body)}, 3600) return result
def fetch_url(url): """ Pull down a URL, returning a UrlResult object. Attempts to fetch from the cache. """ cache_key = 'fetch_url:v2:%s' % (hashlib.md5( url.encode('utf-8')).hexdigest(), ) result = cache.get(cache_key) if result is None: result = fetch_url_content(url) cache.set(cache_key, result, 30) if result == BAD_SOURCE: return result return UrlResult(*result)
def fetch_release_archive(release, dist) -> Optional[IO]: """Fetch release archive and cache if possible. If return value is not empty, the caller is responsible for closing the stream. """ dist_name = dist and dist.name or None releasefile_ident = ReleaseFile.get_ident(RELEASE_ARCHIVE_FILENAME, dist_name) cache_key = get_release_file_cache_key(release_id=release.id, releasefile_ident=releasefile_ident) result = cache.get(cache_key) if result == -1: return None elif result: return BytesIO(result) else: qs = ReleaseFile.objects.filter( release=release, dist=dist, ident=releasefile_ident).select_related("file") try: releasefile = qs[0] except IndexError: # Cache as nonexistent: cache.set(cache_key, -1, 60) return None else: try: file_ = fetch_retry_policy( lambda: ReleaseFile.cache.getfile(releasefile)) except Exception: logger.error("sourcemaps.read_archive_failed", exc_info=sys.exc_info()) return None # This will implicitly skip too large payloads. cache.set(cache_key, file_.read(), 3600) file_.seek(0) return file_
def handle_owner_assignment(project, group, event): from sentry.models import GroupAssignee, ProjectOwnership with metrics.timer("post_process.handle_owner_assignment"): owner_key = "owner_exists:1:%s" % group.id owners_exists = cache.get(owner_key) if owners_exists is None: owners_exists = group.groupowner_set.exists() # Cache for an hour if it's assigned. We don't need to move that fast. cache.set(owner_key, owners_exists, 3600 if owners_exists else 60) # Is the issue already assigned to a team or user? assignee_key = "assignee_exists:1:%s" % group.id assignees_exists = cache.get(assignee_key) if assignees_exists is None: assignees_exists = group.assignee_set.exists() # Cache for an hour if it's assigned. We don't need to move that fast. cache.set(assignee_key, assignees_exists, 3600 if assignees_exists else 60) if owners_exists and assignees_exists: return auto_assignment, owners, assigned_by_codeowners = ProjectOwnership.get_autoassign_owners( group.project_id, event.data) if auto_assignment and owners and not assignees_exists: assignment = GroupAssignee.objects.assign(group, owners[0]) if assignment["new_assignment"] or assignment["updated_assignment"]: analytics.record( "codeowners.assignment" if assigned_by_codeowners else "issueowners.assignment", organization_id=project.organization_id, project_id=project.id, group_id=group.id, ) if owners and not owners_exists: try: handle_group_owners(project, group, owners) except Exception: logger.exception("Failed to store group owners")
def get_send_to(self, project): """ Returns a list of email addresses for the users that should be notified of alerts. The logic for this is a bit complicated, but it does the following: The results of this call can be fairly expensive to calculate, so the send_to list gets cached for 60 seconds. """ if not (project and project.team): logger.debug('Tried to send notification to invalid project: %r', project) return [] cache_key = '%s:send_to:%s' % (self.get_conf_key(), project.pk) send_to_list = cache.get(cache_key) if send_to_list is None: send_to_list = filter(bool, self.get_sendable_users(project)) cache.set(cache_key, send_to_list, 60) # 1 minute cache return send_to_list
def save_to_cache(self): """Stores the reprocessed stack trace to the cache. For frames with known code modules only relative offsets are stored, otherwise the absolute address as fallback.""" if self.resolved_frames is None: raise RuntimeError('save_to_cache called before resolving frames') if self.resolved_frames == NO_CFI_PLACEHOLDER: cache.set(self._cache_key, NO_CFI_PLACEHOLDER) return values = [] for module, frame in self.resolved_frames: module_id = module and module.id addr = frame['instruction_addr'] if module: addr = '0x%x' % rebase_addr(addr, module) values.append((module_id, addr, frame['trust'])) cache.set(self._cache_key, values)
def get_fingerprinting_config_for_project(project): from sentry.grouping.fingerprinting import FingerprintingRules, \ InvalidFingerprintingConfig rules = project.get_option('sentry:fingerprinting_rules') if not rules: return FingerprintingRules([]) from sentry.utils.cache import cache from sentry.utils.hashlib import md5_text cache_key = 'fingerprinting-rules:' + md5_text(rules).hexdigest() rv = cache.get(cache_key) if rv is not None: return FingerprintingRules.from_json(rv) try: rv = FingerprintingRules.from_config_string(rules) except InvalidFingerprintingConfig: rv = FingerprintingRules([]) cache.set(cache_key, rv.to_json()) return rv
def get_ownership_cached(cls, project_id): """ Cached read access to projectownership. This method implements a negative cache which saves us a pile of read queries in post_processing as most projects don't have ownership rules. See the post_save and post_delete signals below for additional cache updates. """ cache_key = cls.get_cache_key(project_id) ownership = cache.get(cache_key) if ownership is None: try: ownership = cls.objects.get(project_id=project_id) except cls.DoesNotExist: ownership = False cache.set(cache_key, ownership, READ_CACHE_DURATION) return ownership or None