示例#1
0
    def __search_events_legacy(self, request, project):
        from sentry import quotas
        from sentry.api.paginator import DateTimePaginator
        from sentry.models import Event

        events = Event.objects.filter(
            project_id=project.id,
        )

        query = request.GET.get('query')
        if query:
            events = events.filter(
                message__icontains=query,
            )

        # filter out events which are beyond the retention period
        retention = quotas.get_event_retention(organization=project.organization)
        if retention:
            events = events.filter(
                datetime__gte=timezone.now() - timedelta(days=retention)
            )

        return self.paginate(
            request=request,
            queryset=events,
            order_by='-datetime',
            on_results=lambda x: serialize(x, request.user),
            paginator_cls=DateTimePaginator,
        )
示例#2
0
    def insert(self, group, event, is_new, is_sample, is_regression,
               is_new_group_environment, primary_hash, skip_consume=False):
        # ensure the superclass's insert() is called, regardless of what happens
        # attempting to send to Kafka
        super(KafkaEventStream, self).insert(
            group, event, is_new, is_sample,
            is_regression, is_new_group_environment,
            primary_hash, skip_consume
        )

        project = event.project
        retention_days = quotas.get_event_retention(
            organization=Organization(project.organization_id)
        )

        self._send(project.id, 'insert', extra_data=({
            'group_id': event.group_id,
            'event_id': event.event_id,
            'organization_id': project.organization_id,
            'project_id': event.project_id,
            'message': event.message,
            'platform': event.platform,
            'datetime': event.datetime,
            'data': dict(event.data.items()),
            'primary_hash': primary_hash,
            'retention_days': retention_days,
        }, {
            'is_new': is_new,
            'is_sample': is_sample,
            'is_regression': is_regression,
            'is_new_group_environment': is_new_group_environment,
        },))
示例#3
0
    def _get_events_legacy(
        self,
        request,
        group,
        environments,
        query,
        tags,
        start,
        end,
    ):
        events = Event.objects.filter(group_id=group.id)

        if query:
            q = Q(message__icontains=query)

            if is_event_id(query):
                q |= Q(event_id__exact=query)

            events = events.filter(q)

        if tags:
            event_filter = tagstore.get_group_event_filter(
                group.project_id,
                group.id,
                [env.id for env in environments],
                tags,
                start,
                end,
            )

            if not event_filter:
                return Response([])

            events = events.filter(**event_filter)

        # Filter start/end here in case we didn't filter by tags at all
        if start:
            events = events.filter(datetime__gte=start)
        if end:
            events = events.filter(datetime__lte=end)

        # filter out events which are beyond the retention period
        retention = quotas.get_event_retention(organization=group.project.organization)
        if retention:
            events = events.filter(
                datetime__gte=timezone.now() - timedelta(days=retention)
            )

        return self.paginate(
            request=request,
            queryset=events,
            order_by='-datetime',
            on_results=lambda x: serialize(x, request.user),
            paginator_cls=DateTimePaginator,
        )
示例#4
0
    def insert(
        self,
        group,
        event,
        is_new,
        is_regression,
        is_new_group_environment,
        primary_hash,
        received_timestamp,  # type: float
        skip_consume=False,
    ):
        project = event.project
        retention_days = quotas.get_event_retention(
            organization=project.organization)

        event_data = event.get_raw_data()

        unexpected_tags = set([
            k for (k, v) in (get_path(event_data, "tags", filter=True) or [])
            if k in self.UNEXPECTED_TAG_KEYS
        ])
        if unexpected_tags:
            logger.error("%r received unexpected tags: %r", self,
                         unexpected_tags)

        self._send(
            project.id,
            "insert",
            extra_data=(
                {
                    "group_id": event.group_id,
                    "event_id": event.event_id,
                    "organization_id": project.organization_id,
                    "project_id": event.project_id,
                    # TODO(mitsuhiko): We do not want to send this incorrect
                    # message but this is what snuba needs at the moment.
                    "message": event.search_message,
                    "platform": event.platform,
                    "datetime": event.datetime,
                    "data": event_data,
                    "primary_hash": primary_hash,
                    "retention_days": retention_days,
                },
                {
                    "is_new": is_new,
                    "is_regression": is_regression,
                    "is_new_group_environment": is_new_group_environment,
                    "skip_consume": skip_consume,
                },
            ),
            headers={'Received-Timestamp': six.text_type(received_timestamp)})
def test_internal_relays_should_receive_full_configs(call_endpoint,
                                                     default_project,
                                                     default_projectkey):
    result, status_code = call_endpoint(full_config=True)

    assert status_code < 400

    # Sweeping assertion that we do not have any snake_case in that config.
    # Might need refining.
    assert not {x for x in _get_all_keys(result) if "-" in x or "_" in x}

    cfg = safe.get_path(result, "configs", str(default_project.id))
    assert safe.get_path(cfg, "disabled") is False

    (public_key, ) = cfg["publicKeys"]
    assert public_key["publicKey"] == default_projectkey.public_key
    assert public_key["isEnabled"]
    assert "quotas" in public_key

    assert safe.get_path(cfg, "slug") == default_project.slug
    last_change = safe.get_path(cfg, "lastChange")
    assert _date_regex.match(last_change) is not None
    last_fetch = safe.get_path(cfg, "lastFetch")
    assert _date_regex.match(last_fetch) is not None
    assert safe.get_path(cfg,
                         "organizationId") == default_project.organization.id
    assert safe.get_path(cfg, "projectId") == default_project.id
    assert safe.get_path(cfg, "slug") == default_project.slug
    assert safe.get_path(cfg, "rev") is not None

    assert safe.get_path(cfg, "config", "trustedRelays") == []
    assert safe.get_path(cfg, "config", "filterSettings") is not None
    assert safe.get_path(cfg, "config", "groupingConfig",
                         "enhancements") is not None
    assert safe.get_path(cfg, "config", "groupingConfig", "id") is not None
    assert safe.get_path(cfg, "config", "piiConfig", "applications") is None
    assert safe.get_path(cfg, "config", "piiConfig", "rules") is None
    assert safe.get_path(cfg, "config", "datascrubbingSettings",
                         "scrubData") is True
    assert safe.get_path(cfg, "config", "datascrubbingSettings",
                         "scrubDefaults") is True
    assert safe.get_path(cfg, "config", "datascrubbingSettings",
                         "scrubIpAddresses") is True
    assert safe.get_path(cfg, "config", "datascrubbingSettings",
                         "sensitiveFields") == []
    assert safe.get_path(cfg, "config", "quotas") == []

    # Event retention depends on settings, so assert the actual value. Likely
    # `None` in dev, but must not be missing.
    assert cfg["config"]["eventRetention"] == quotas.get_event_retention(
        default_project.organization)
示例#6
0
    def publish(self,
                group,
                event,
                is_new,
                is_sample,
                is_regression,
                is_new_group_environment,
                primary_hash,
                skip_consume=False):
        project = event.project
        retention_days = quotas.get_event_retention(
            organization=Organization(project.organization_id))

        # Polling the producer is required to ensure callbacks are fired. This
        # means that the latency between a message being delivered (or failing
        # to be delivered) and the corresponding callback being fired is
        # roughly the same as the duration of time that passes between publish
        # calls. If this ends up being too high, the publisher should be moved
        # into a background thread that can poll more frequently without
        # interfering with request handling. (This does `poll` does not act as
        # a heartbeat for the purposes of any sort of session expiration.)
        self.producer.poll(0.0)

        try:
            key = '%s:%s' % (event.project_id, event.event_id)
            value = (EVENT_PROTOCOL_VERSION, 'insert', {
                'group_id': event.group_id,
                'event_id': event.event_id,
                'organization_id': project.organization_id,
                'project_id': event.project_id,
                'message': event.message,
                'platform': event.platform,
                'datetime': event.datetime,
                'data': event.data.data,
                'primary_hash': primary_hash,
                'retention_days': retention_days,
            }, {
                'is_new': is_new,
                'is_sample': is_sample,
                'is_regression': is_regression,
                'is_new_group_environment': is_new_group_environment,
            })
            self.producer.produce(
                self.publish_topic,
                key=key.encode('utf-8'),
                value=json.dumps(value),
                on_delivery=self.delivery_callback,
            )
        except Exception as error:
            logger.warning('Could not publish event: %s', error, exc_info=True)
            raise
示例#7
0
    def insert(self,
               group,
               event,
               is_new,
               is_sample,
               is_regression,
               is_new_group_environment,
               primary_hash,
               skip_consume=False):
        project = event.project
        retention_days = quotas.get_event_retention(
            organization=project.organization, )

        event_data = event.get_raw_data()

        unexpected_tags = set([
            k for (k, v) in (get_path(event_data, 'tags', filter=True) or [])
            if k in self.UNEXPECTED_TAG_KEYS
        ])
        if unexpected_tags:
            logger.error('%r received unexpected tags: %r', self,
                         unexpected_tags)

        self._send(
            project.id,
            'insert',
            extra_data=(
                {
                    'group_id': event.group_id,
                    'event_id': event.event_id,
                    'organization_id': project.organization_id,
                    'project_id': event.project_id,
                    # TODO(mitsuhiko): We do not want to send this incorrect
                    # message but this is what snuba needs at the moment.
                    'message': event.message,
                    'platform': event.platform,
                    'datetime': event.datetime,
                    'data': event_data,
                    'primary_hash': primary_hash,
                    'retention_days': retention_days,
                },
                {
                    'is_new': is_new,
                    'is_sample': is_sample,
                    'is_regression': is_regression,
                    'is_new_group_environment': is_new_group_environment,
                    'skip_consume': skip_consume,
                },
            ))
示例#8
0
def outside_retention_with_modified_start(start, end, organization):
    """
    Check if a start-end datetime range is outside an
    organizations retention period. Returns an updated
    start datetime if start is out of retention.
    """
    retention = quotas.get_event_retention(organization=organization)
    if not retention:
        return False, start

    # Need to support timezone-aware and naive datetimes since
    # Snuba API only deals in naive UTC
    now = datetime.utcnow().astimezone(pytz.utc) if start.tzinfo else datetime.utcnow()
    start = max(start, now - timedelta(days=retention))

    return start > end, start
示例#9
0
    def insert(self,
               group,
               event,
               is_new,
               is_sample,
               is_regression,
               is_new_group_environment,
               primary_hash,
               skip_consume=False):
        if options.get('eventstream.kafka.send-post_process-task'):
            super(KafkaEventStream,
                  self).insert(group, event, is_new, is_sample, is_regression,
                               is_new_group_environment, primary_hash,
                               skip_consume)

        project = event.project
        retention_days = quotas.get_event_retention(
            organization=Organization(project.organization_id))

        self._send(
            project.id,
            'insert',
            extra_data=(
                {
                    'group_id': event.group_id,
                    'event_id': event.event_id,
                    'organization_id': project.organization_id,
                    'project_id': event.project_id,
                    # TODO(mitsuhiko): We do not want to send this incorrect
                    # message but this is what snuba needs at the moment.
                    'message': event.message,
                    'platform': event.platform,
                    'datetime': event.datetime,
                    'data': dict(event.data.items()),
                    'primary_hash': primary_hash,
                    'retention_days': retention_days,
                },
                {
                    'is_new': is_new,
                    'is_sample': is_sample,
                    'is_regression': is_regression,
                    'is_new_group_environment': is_new_group_environment,
                    'skip_consume': skip_consume,
                },
            ))
示例#10
0
    def publish(self, group, event, is_new, is_sample, is_regression, is_new_group_environment, primary_hash, skip_consume=False):
        project = event.project
        retention_days = quotas.get_event_retention(
            organization=Organization(project.organization_id)
        )

        # Polling the producer is required to ensure callbacks are fired. This
        # means that the latency between a message being delivered (or failing
        # to be delivered) and the corresponding callback being fired is
        # roughly the same as the duration of time that passes between publish
        # calls. If this ends up being too high, the publisher should be moved
        # into a background thread that can poll more frequently without
        # interfering with request handling. (This does `poll` does not act as
        # a heartbeat for the purposes of any sort of session expiration.)
        self.producer.poll(0.0)

        try:
            key = '%s:%s' % (event.project_id, event.event_id)
            value = (EVENT_PROTOCOL_VERSION, 'insert', {
                'group_id': event.group_id,
                'event_id': event.event_id,
                'organization_id': project.organization_id,
                'project_id': event.project_id,
                'message': event.message,
                'platform': event.platform,
                'datetime': event.datetime,
                'data': event.data.data,
                'primary_hash': primary_hash,
                'retention_days': retention_days,
            }, {
                'is_new': is_new,
                'is_sample': is_sample,
                'is_regression': is_regression,
                'is_new_group_environment': is_new_group_environment,
            })
            self.producer.produce(
                self.publish_topic,
                key=key.encode('utf-8'),
                value=json.dumps(value),
                on_delivery=self.delivery_callback,
            )
        except Exception as error:
            logger.warning('Could not publish event: %s', error, exc_info=True)
            raise
示例#11
0
    def insert(self,
               group,
               event,
               is_new,
               is_sample,
               is_regression,
               is_new_group_environment,
               primary_hash,
               skip_consume=False):
        # ensure the superclass's insert() is called, regardless of what happens
        # attempting to send to Kafka
        super(KafkaEventStream,
              self).insert(group, event, is_new, is_sample, is_regression,
                           is_new_group_environment, primary_hash,
                           skip_consume)

        project = event.project
        retention_days = quotas.get_event_retention(
            organization=Organization(project.organization_id))

        self._send(project.id,
                   'insert',
                   extra_data=(
                       {
                           'group_id': event.group_id,
                           'event_id': event.event_id,
                           'organization_id': project.organization_id,
                           'project_id': event.project_id,
                           'message': event.message,
                           'platform': event.platform,
                           'datetime': event.datetime,
                           'data': dict(event.data.items()),
                           'primary_hash': primary_hash,
                           'retention_days': retention_days,
                       },
                       {
                           'is_new': is_new,
                           'is_sample': is_sample,
                           'is_regression': is_regression,
                           'is_new_group_environment':
                           is_new_group_environment,
                       },
                   ))
示例#12
0
    def publish(self,
                group,
                event,
                is_new,
                is_sample,
                is_regression,
                is_new_group_environment,
                primary_hash,
                skip_consume=False):
        project = event.project
        retention_days = quotas.get_event_retention(
            organization=Organization(project.organization_id))

        try:
            key = '%s:%s' % (event.project_id, event.event_id)
            value = (EVENT_PROTOCOL_VERSION, 'insert', {
                'group_id': event.group_id,
                'event_id': event.event_id,
                'organization_id': project.organization_id,
                'project_id': event.project_id,
                'message': event.message,
                'platform': event.platform,
                'datetime': event.datetime,
                'data': event.data.data,
                'primary_hash': primary_hash,
                'retention_days': retention_days,
            }, {
                'is_new': is_new,
                'is_sample': is_sample,
                'is_regression': is_regression,
                'is_new_group_environment': is_new_group_environment,
            })

            self.pubsub.publish(self.publish_topic,
                                key=key.encode('utf-8'),
                                value=json.dumps(value))
        except Exception as error:
            logger.warning('Could not publish event: %s', error, exc_info=True)
            raise
    def get(self, request, project):
        """
        List a Project's Events
        ```````````````````````

        Return a list of events bound to a project.

        Note: This endpoint is experimental and may be removed without notice.

        :pparam string organization_slug: the slug of the organization the
                                          groups belong to.
        :pparam string project_slug: the slug of the project the groups
                                     belong to.
        """

        events = Event.objects.filter(
            project_id=project.id,
        )

        query = request.GET.get('query')
        if query:
            events = events.filter(
                message__icontains=query,
            )

        # filter out events which are beyond the retention period
        retention = quotas.get_event_retention(organization=project.organization)
        if retention:
            events = events.filter(
                datetime__gte=timezone.now() - timedelta(days=retention)
            )

        return self.paginate(
            request=request,
            queryset=events,
            order_by='-datetime',
            on_results=lambda x: serialize(x, request.user),
            paginator_cls=DateTimePaginator,
        )
示例#14
0
    def _get_events_legacy(self, request, group, environment, query, tags):
        events = Event.objects.filter(group_id=group.id)

        if query:
            q = Q(message__icontains=query)

            if is_event_id(query):
                q |= Q(event_id__exact=query)

            events = events.filter(q)

        if tags:
            event_filter = tagstore.get_group_event_filter(
                group.project_id,
                group.id,
                environment.id if environment is not None else None,
                tags,
            )

            if not event_filter:
                return Response([])

            events = events.filter(**event_filter)

        # filter out events which are beyond the retention period
        retention = quotas.get_event_retention(
            organization=group.project.organization)
        if retention:
            events = events.filter(datetime__gte=timezone.now() -
                                   timedelta(days=retention))

        return self.paginate(
            request=request,
            queryset=events,
            order_by='-datetime',
            on_results=lambda x: serialize(x, request.user),
            paginator_cls=DateTimePaginator,
        )
示例#15
0
文件: snuba.py 项目: getsentry/sentry
    def insert(self, group, event, is_new, is_sample, is_regression,
               is_new_group_environment, primary_hash, skip_consume=False):
        project = event.project
        retention_days = quotas.get_event_retention(
            organization=project.organization,
        )

        event_data = event.get_raw_data()

        unexpected_tags = set([
            k for (k, v) in (get_path(event_data, 'tags', filter=True) or [])
            if k in self.UNEXPECTED_TAG_KEYS
        ])
        if unexpected_tags:
            logger.error('%r received unexpected tags: %r', self, unexpected_tags)

        self._send(project.id, 'insert', extra_data=({
            'group_id': event.group_id,
            'event_id': event.event_id,
            'organization_id': project.organization_id,
            'project_id': event.project_id,
            # TODO(mitsuhiko): We do not want to send this incorrect
            # message but this is what snuba needs at the moment.
            'message': event.message,
            'platform': event.platform,
            'datetime': event.datetime,
            'data': event_data,
            'primary_hash': primary_hash,
            'retention_days': retention_days,
        }, {
            'is_new': is_new,
            'is_sample': is_sample,
            'is_regression': is_regression,
            'is_new_group_environment': is_new_group_environment,
            'skip_consume': skip_consume,
        },))
示例#16
0
    def insert(self, group, event, is_new, is_sample, is_regression,
               is_new_group_environment, primary_hash, skip_consume=False):
        project = event.project
        retention_days = quotas.get_event_retention(
            organization=Organization(project.organization_id)
        )

        self._send(project.id, 'insert', extra_data=({
            'group_id': event.group_id,
            'event_id': event.event_id,
            'organization_id': project.organization_id,
            'project_id': event.project_id,
            'message': event.message,
            'platform': event.platform,
            'datetime': event.datetime,
            'data': dict(event.data.items()),
            'primary_hash': primary_hash,
            'retention_days': retention_days,
        }, {
            'is_new': is_new,
            'is_sample': is_sample,
            'is_regression': is_regression,
            'is_new_group_environment': is_new_group_environment,
        },))
示例#17
0
    def query(self,
              project,
              tags=None,
              environment=None,
              sort_by='date',
              limit=100,
              cursor=None,
              count_hits=False,
              paginator_options=None,
              **parameters):

        from sentry.models import Group, GroupStatus, GroupSubscription, Release

        if paginator_options is None:
            paginator_options = {}

        if tags is None:
            tags = {}

        try:
            if tags.get('sentry:release') == 'latest':
                tags['sentry:release'] = get_latest_release(
                    project, environment)

            if parameters.get('first_release') == 'latest':
                parameters['first_release'] = get_latest_release(
                    project, environment)
        except Release.DoesNotExist:
            # no matches could possibly be found from this point on
            return Paginator(Group.objects.none()).get_result()

        group_queryset = QuerySetBuilder({
            'query':
            CallbackCondition(
                lambda queryset, query: queryset.filter(
                    Q(message__icontains=query) | Q(culprit__icontains=query),
                ) if query else queryset,
            ),
            'status':
            CallbackCondition(
                lambda queryset, status: queryset.filter(status=status),
            ),
            'bookmarked_by':
            CallbackCondition(
                lambda queryset, user: queryset.filter(
                    bookmark_set__project=project,
                    bookmark_set__user=user,
                ),
            ),
            'assigned_to':
            CallbackCondition(
                functools.partial(assigned_to_filter, project=project), ),
            'unassigned':
            CallbackCondition(
                lambda queryset, unassigned: queryset.filter(
                    assignee_set__isnull=unassigned, ),
            ),
            'subscribed_by':
            CallbackCondition(
                lambda queryset, user: queryset.filter(
                    id__in=GroupSubscription.objects.filter(
                        project=project,
                        user=user,
                        is_active=True,
                    ).values_list('group'), ),
            ),
            'active_at_from':
            ScalarCondition('active_at', 'gt'),
            'active_at_to':
            ScalarCondition('active_at', 'lt'),
        }).build(
            Group.objects.filter(project=project).exclude(status__in=[
                GroupStatus.PENDING_DELETION,
                GroupStatus.DELETION_IN_PROGRESS,
                GroupStatus.PENDING_MERGE,
            ]),
            parameters,
        )

        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(
            organization=project.organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
        else:
            retention_window_start = None
        # TODO: This could be optimized when building querysets to identify
        # criteria that are logically impossible (e.g. if the upper bound
        # for last seen is before the retention window starts, no results
        # exist.)
        if retention_window_start:
            group_queryset = group_queryset.filter(
                last_seen__gte=retention_window_start)

        # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it
        # seemed better to handle all the shared initialization and then handoff to the
        # actual backend.
        return self._query(project, retention_window_start, group_queryset,
                           tags, environment, sort_by, limit, cursor,
                           count_hits, paginator_options, **parameters)
示例#18
0
    def query(
        self,
        projects,
        environments=None,
        sort_by="date",
        limit=100,
        cursor=None,
        count_hits=False,
        paginator_options=None,
        search_filters=None,
        date_from=None,
        date_to=None,
    ):
        from sentry.models import Group, GroupStatus, GroupSubscription

        search_filters = search_filters if search_filters is not None else []

        # ensure projects are from same org
        if len({p.organization_id for p in projects}) != 1:
            raise RuntimeError("Cross organization search not supported")

        if paginator_options is None:
            paginator_options = {}

        group_queryset = Group.objects.filter(project__in=projects).exclude(
            status__in=[
                GroupStatus.PENDING_DELETION,
                GroupStatus.DELETION_IN_PROGRESS,
                GroupStatus.PENDING_MERGE,
            ])

        qs_builder_conditions = {
            "status":
            QCallbackCondition(lambda status: Q(status=status)),
            "bookmarked_by":
            QCallbackCondition(lambda user: Q(
                bookmark_set__project__in=projects, bookmark_set__user=user)),
            "assigned_to":
            QCallbackCondition(
                functools.partial(assigned_to_filter, projects=projects)),
            "unassigned":
            QCallbackCondition(
                functools.partial(unassigned_filter, projects=projects)),
            "subscribed_by":
            QCallbackCondition(
                lambda user: Q(id__in=GroupSubscription.objects.filter(
                    project__in=projects, user=user, is_active=True).
                               values_list("group"))),
            "active_at":
            ScalarCondition("active_at"),
        }

        group_queryset = QuerySetBuilder(qs_builder_conditions).build(
            group_queryset, search_filters)
        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(
            organization=projects[0].organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
        else:
            retention_window_start = None
        # TODO: This could be optimized when building querysets to identify
        # criteria that are logically impossible (e.g. if the upper bound
        # for last seen is before the retention window starts, no results
        # exist.)
        if retention_window_start:
            group_queryset = group_queryset.filter(
                last_seen__gte=retention_window_start)

        # TODO: It's possible `first_release` could be handled by Snuba.
        if environments is not None:
            environment_ids = [environment.id for environment in environments]
            group_queryset = group_queryset.filter(
                groupenvironment__environment_id__in=environment_ids)
            group_queryset = QuerySetBuilder({
                "first_release":
                QCallbackCondition(lambda version: Q(
                    # if environment(s) are selected, we just filter on the group
                    # environment's first_release attribute.
                    groupenvironment__first_release__organization_id=projects[
                        0].organization_id,
                    groupenvironment__first_release__version=version,
                    groupenvironment__environment_id__in=environment_ids,
                )),
                "first_seen":
                ScalarCondition(
                    "groupenvironment__first_seen",
                    {"groupenvironment__environment_id__in": environment_ids},
                ),
            }).build(group_queryset, search_filters)
        else:
            group_queryset = QuerySetBuilder({
                "first_release":
                QCallbackCondition(lambda release_version: Q(
                    # if no specific environments are supplied, we either choose any
                    # groups/issues whose first release matches the given release_version,
                    Q(first_release_id__in=Release.objects.filter(
                        version=release_version,
                        organization_id=projects[0].organization_id,
                    ))
                    |
                    # or we choose any groups whose first occurrence in any environment and the latest release at
                    # the time of the groups' first occurrence matches the given
                    # release_version
                    Q(id__in=GroupEnvironment.objects.filter(
                        first_release__version=release_version,
                        first_release__organization_id=projects[0].
                        organization_id,
                        environment__organization_id=projects[
                            0].organization_id,
                    ).values_list("group_id")))),
                "first_seen":
                ScalarCondition("first_seen"),
            }).build(group_queryset, search_filters)

        query_executor = PostgresSnubaQueryExecutor()

        return query_executor.query(
            projects,
            retention_window_start,
            group_queryset,
            environments,
            sort_by,
            limit,
            cursor,
            count_hits,
            paginator_options,
            search_filters,
            date_from,
            date_to,
        )
示例#19
0
    def get_event_stats_data(
        self,
        request: Request,
        organization: Organization,
        get_event_stats: Callable[
            [Sequence[str], str, Dict[str, str], int, bool, Optional[timedelta]], SnubaTSResult
        ],
        top_events: int = 0,
        query_column: str = "count()",
        params: Optional[Dict[str, Any]] = None,
        query: Optional[str] = None,
        allow_partial_buckets: bool = False,
        zerofill_results: bool = True,
        comparison_delta: Optional[timedelta] = None,
    ) -> Dict[str, Any]:
        with self.handle_query_errors():
            with sentry_sdk.start_span(
                op="discover.endpoint", description="base.stats_query_creation"
            ):
                columns = request.GET.getlist("yAxis", [query_column])
                if query is None:
                    query = request.GET.get("query")
                if params is None:
                    try:
                        # events-stats is still used by events v1 which doesn't require global views
                        params = self.get_snuba_params(
                            request, organization, check_global_views=False
                        )
                    except NoProjects:
                        return {"data": []}

                try:
                    rollup = get_rollup_from_request(
                        request,
                        params,
                        default_interval=None,
                        error=InvalidSearchQuery(),
                        top_events=top_events,
                    )
                # If the user sends an invalid interval, use the default instead
                except InvalidSearchQuery:
                    sentry_sdk.set_tag("user.invalid_interval", request.GET.get("interval"))
                    date_range = params["end"] - params["start"]
                    stats_period = parse_stats_period(get_interval_from_range(date_range, False))
                    rollup = int(stats_period.total_seconds()) if stats_period is not None else 3600

                if comparison_delta is not None:
                    retention = quotas.get_event_retention(organization=organization)
                    comparison_start = params["start"] - comparison_delta
                    if retention and comparison_start < timezone.now() - timedelta(days=retention):
                        raise ValidationError("Comparison period is outside your retention window")

                # Backwards compatibility for incidents which uses the old
                # column aliases as it straddles both versions of events/discover.
                # We will need these aliases until discover2 flags are enabled for all
                # users.
                # We need these rollup columns to generate correct events-stats results
                column_map = {
                    "user_count": "count_unique(user)",
                    "event_count": "count()",
                    "epm()": "epm(%d)" % rollup,
                    "eps()": "eps(%d)" % rollup,
                    "tpm()": "tpm(%d)" % rollup,
                    "tps()": "tps(%d)" % rollup,
                }

                query_columns = [column_map.get(column, column) for column in columns]
            with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_query"):
                result = get_event_stats(
                    query_columns, query, params, rollup, zerofill_results, comparison_delta
                )

        serializer = SnubaTSResultSerializer(organization, None, request.user)

        with sentry_sdk.start_span(op="discover.endpoint", description="base.stats_serialization"):
            # When the request is for top_events, result can be a SnubaTSResult in the event that
            # there were no top events found. In this case, result contains a zerofilled series
            # that acts as a placeholder.
            is_multiple_axis = len(query_columns) > 1
            if top_events > 0 and isinstance(result, dict):
                results = {}
                for key, event_result in result.items():
                    if is_multiple_axis:
                        results[key] = self.serialize_multiple_axis(
                            serializer,
                            event_result,
                            columns,
                            query_columns,
                            allow_partial_buckets,
                            zerofill_results=zerofill_results,
                        )
                    else:
                        # Need to get function alias if count is a field, but not the axis
                        results[key] = serializer.serialize(
                            event_result,
                            column=resolve_axis_column(query_columns[0]),
                            allow_partial_buckets=allow_partial_buckets,
                            zerofill_results=zerofill_results,
                        )
                serialized_result = results
            elif is_multiple_axis:
                serialized_result = self.serialize_multiple_axis(
                    serializer,
                    result,
                    columns,
                    query_columns,
                    allow_partial_buckets,
                    zerofill_results=zerofill_results,
                )
            else:
                extra_columns = None
                if comparison_delta:
                    extra_columns = ["comparisonCount"]
                serialized_result = serializer.serialize(
                    result,
                    resolve_axis_column(query_columns[0]),
                    allow_partial_buckets=allow_partial_buckets,
                    zerofill_results=zerofill_results,
                    extra_columns=extra_columns,
                )

            return serialized_result
示例#20
0
文件: config.py 项目: w7374520/sentry
def get_project_config(project, full_config=True, project_keys=None):
    """
    Constructs the ProjectConfig information.

    :param project: The project to load configuration for. Ensure that
        organization is bound on this object; otherwise it will be loaded from
        the database.
    :param full_config: True if only the full config is required, False
        if only the restricted (for external relays) is required
        (default True, i.e. full configuration)
    :param project_keys: Pre-fetched project keys for performance. However, if
        no project keys are provided it is assumed that the config does not
        need to contain auth information (this is the case when used in
        python's StoreView)

    :return: a ProjectConfig object for the given project
    """
    with configure_scope() as scope:
        scope.set_tag("project", project.id)

    if project.status != ObjectStatus.VISIBLE:
        return ProjectConfig(project, disabled=True)

    public_keys = get_public_key_configs(project,
                                         full_config,
                                         project_keys=project_keys)

    with Hub.current.start_span(op="get_public_config"):
        now = datetime.utcnow().replace(tzinfo=utc)
        cfg = {
            "disabled": False,
            "slug": project.slug,
            "lastFetch": now,
            "lastChange": project.get_option("sentry:relay-rev-lastchange",
                                             now),
            "rev": project.get_option("sentry:relay-rev",
                                      uuid.uuid4().hex),
            "publicKeys": public_keys,
            "config": {
                "allowedDomains":
                list(get_origins(project)),
                "trustedRelays": [
                    r["public_key"] for r in project.organization.get_option(
                        "sentry:trusted-relays", []) if r
                ],
                "piiConfig":
                get_pii_config(project),
                "datascrubbingSettings":
                get_datascrubbing_settings(project),
            },
            "organizationId": project.organization_id,
            "projectId":
            project.id,  # XXX: Unused by Relay, required by Python store
        }

    if not full_config:
        # This is all we need for external Relay processors
        return ProjectConfig(project, **cfg)

    with Hub.current.start_span(op="get_filter_settings"):
        cfg["config"]["filterSettings"] = get_filter_settings(project)
    with Hub.current.start_span(op="get_grouping_config_dict_for_project"):
        cfg["config"]["groupingConfig"] = get_grouping_config_dict_for_project(
            project)
    with Hub.current.start_span(op="get_event_retention"):
        cfg["config"]["eventRetention"] = quotas.get_event_retention(
            project.organization)
    with Hub.current.start_span(op="get_all_quotas"):
        cfg["config"]["quotas"] = get_quotas(project, keys=project_keys)

    return ProjectConfig(project, **cfg)
示例#21
0
文件: snuba.py 项目: y1024/sentry
def _prepare_query_params(query_params):
    # convert to naive UTC datetimes, as Snuba only deals in UTC
    # and this avoids offset-naive and offset-aware issues
    start = naiveify_datetime(query_params.start)
    end = naiveify_datetime(query_params.end)

    with timer("get_snuba_map"):
        forward, reverse = get_snuba_translators(
            query_params.filter_keys, is_grouprelease=query_params.is_grouprelease
        )

    if query_params.dataset in [Dataset.Events, Dataset.Discover, Dataset.Sessions]:
        (organization_id, params_to_update) = get_query_params_to_update_for_projects(
            query_params, with_org=query_params.dataset == Dataset.Sessions
        )
    elif query_params.dataset in [Dataset.Outcomes, Dataset.OutcomesRaw]:
        (organization_id, params_to_update) = get_query_params_to_update_for_organizations(
            query_params
        )
    else:
        raise UnqualifiedQueryError(
            "No strategy found for getting an organization for the given dataset."
        )

    query_params.kwargs.update(params_to_update)

    for col, keys in six.iteritems(forward(deepcopy(query_params.filter_keys))):
        if keys:
            if len(keys) == 1 and None in keys:
                query_params.conditions.append((col, "IS NULL", None))
            else:
                query_params.conditions.append((col, "IN", keys))

    retention = quotas.get_event_retention(organization=Organization(organization_id))
    if retention:
        start = max(start, datetime.utcnow() - timedelta(days=retention))
        if start > end:
            raise QueryOutsideRetentionError

    # if `shrink_time_window` pushed `start` after `end` it means the user queried
    # a Group for T1 to T2 when the group was only active for T3 to T4, so the query
    # wouldn't return any results anyway
    new_start = shrink_time_window(query_params.filter_keys.get("group_id"), start)

    # TODO (alexh) this is a quick emergency fix for an occasion where a search
    # results in only 1 django candidate, which is then passed to snuba to
    # check and we raised because of it. Remove this once we figure out why the
    # candidate was returned from django at all if it existed only outside the
    # time range of the query
    if new_start <= end:
        start = new_start

    if start > end:
        raise QueryOutsideGroupActivityError

    query_params.kwargs.update(
        {
            "dataset": query_params.dataset.value,
            "from_date": start.isoformat(),
            "to_date": end.isoformat(),
            "groupby": query_params.groupby,
            "conditions": query_params.conditions,
            "aggregations": query_params.aggregations,
            "granularity": query_params.rollup,  # TODO name these things the same
        }
    )
    kwargs = {k: v for k, v in six.iteritems(query_params.kwargs) if v is not None}

    kwargs.update(OVERRIDE_OPTIONS)
    return kwargs, forward, reverse
示例#22
0
def raw_query(start, end, groupby=None, conditions=None, filter_keys=None,
              aggregations=None, rollup=None, arrayjoin=None, limit=None, offset=None,
              orderby=None, having=None, referrer=None, is_grouprelease=False,
              selected_columns=None, totals=None, limitby=None):
    """
    Sends a query to snuba.

    `conditions`: A list of (column, operator, literal) conditions to be passed
    to the query. Conditions that we know will not have to be translated should
    be passed this way (eg tag[foo] = bar).

    `filter_keys`: A dictionary of {col: [key, ...]} that will be converted
    into "col IN (key, ...)" conditions. These are used to restrict the query to
    known sets of project/issue/environment/release etc. Appropriate
    translations (eg. from environment model ID to environment name) are
    performed on the query, and the inverse translation performed on the
    result. The project_id(s) to restrict the query to will also be
    automatically inferred from these keys.

    `aggregations` a list of (aggregation_function, column, alias) tuples to be
    passed to the query.
    """

    # convert to naive UTC datetimes, as Snuba only deals in UTC
    # and this avoids offset-naive and offset-aware issues
    start = naiveify_datetime(start)
    end = naiveify_datetime(end)

    groupby = groupby or []
    conditions = conditions or []
    having = having or []
    aggregations = aggregations or []
    filter_keys = filter_keys or {}
    selected_columns = selected_columns or []

    with timer('get_snuba_map'):
        forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease)

    if 'project_id' in filter_keys:
        # If we are given a set of project ids, use those directly.
        project_ids = filter_keys['project_id']
    elif filter_keys:
        # Otherwise infer the project_ids from any related models
        with timer('get_related_project_ids'):
            ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys]
            project_ids = list(set.union(*map(set, ids)))
    else:
        project_ids = []

    for col, keys in six.iteritems(forward(filter_keys.copy())):
        if keys:
            if len(keys) == 1 and keys[0] is None:
                conditions.append((col, 'IS NULL', None))
            else:
                conditions.append((col, 'IN', keys))

    if not project_ids:
        raise SnubaError("No project_id filter, or none could be inferred from other filters.")

    # any project will do, as they should all be from the same organization
    project = Project.objects.get(pk=project_ids[0])
    retention = quotas.get_event_retention(
        organization=Organization(project.organization_id)
    )
    if retention:
        start = max(start, datetime.utcnow() - timedelta(days=retention))
        if start > end:
            raise QueryOutsideRetentionError

    use_group_id_column = options.get('snuba.use_group_id_column')
    issues = None
    if not use_group_id_column:
        # If the grouping, aggregation, or any of the conditions reference `issue`
        # we need to fetch the issue definitions (issue -> fingerprint hashes)
        aggregate_cols = [a[1] for a in aggregations]
        condition_cols = all_referenced_columns(conditions)
        all_cols = groupby + aggregate_cols + condition_cols + selected_columns
        get_issues = 'issue' in all_cols

        if get_issues:
            with timer('get_project_issues'):
                issues = get_project_issues(project_ids, filter_keys.get('issue'))

    start, end = shrink_time_window(filter_keys.get('issue'), start, end)

    # if `shrink_time_window` pushed `start` after `end` it means the user queried
    # a Group for T1 to T2 when the group was only active for T3 to T4, so the query
    # wouldn't return any results anyway
    if start > end:
        raise QueryOutsideGroupActivityError

    request = {k: v for k, v in six.iteritems({
        'from_date': start.isoformat(),
        'to_date': end.isoformat(),
        'conditions': conditions,
        'having': having,
        'groupby': groupby,
        'totals': totals,
        'project': project_ids,
        'aggregations': aggregations,
        'granularity': rollup,
        'use_group_id_column': use_group_id_column,
        'issues': issues,
        'arrayjoin': arrayjoin,
        'limit': limit,
        'offset': offset,
        'limitby': limitby,
        'orderby': orderby,
        'selected_columns': selected_columns,
    }) if v is not None}

    headers = {}
    if referrer:
        headers['referer'] = referrer

    try:
        with timer('snuba_query'):
            response = _snuba_pool.urlopen(
                'POST', '/query', body=json.dumps(request), headers=headers)
    except urllib3.exceptions.HTTPError as err:
        raise SnubaError(err)

    try:
        body = json.loads(response.data)
    except ValueError:
        raise SnubaError(u"Could not decode JSON response: {}".format(response.data))

    if response.status != 200:
        if body.get('error'):
            raise SnubaError(body['error'])
        else:
            raise SnubaError(u'HTTP {}'.format(response.status))

    # Forward and reverse translation maps from model ids to snuba keys, per column
    body['data'] = [reverse(d) for d in body['data']]
    return body
示例#23
0
    def query(self, projects, tags=None, environments=None, sort_by='date', limit=100,
              cursor=None, count_hits=False, paginator_options=None, search_filters=None,
              use_new_filters=False, **parameters):

        from sentry.models import Group, GroupStatus, GroupSubscription

        search_filters = search_filters if search_filters is not None else []

        # ensure projects are from same org
        if len({p.organization_id for p in projects}) != 1:
            raise RuntimeError('Cross organization search not supported')

        if paginator_options is None:
            paginator_options = {}

        if tags is None:
            tags = {}

        group_queryset = Group.objects.filter(project__in=projects).exclude(status__in=[
            GroupStatus.PENDING_DELETION,
            GroupStatus.DELETION_IN_PROGRESS,
            GroupStatus.PENDING_MERGE,
        ])

        if use_new_filters:
            query_set_builder_class = SearchFilterQuerySetBuilder
            query_set_builder_params = search_filters
        else:
            query_set_builder_class = NewQuerySetBuilder
            query_set_builder_params = parameters

        group_queryset = query_set_builder_class({
            'message': QCallbackCondition(
                lambda query: Q(
                    Q(message__icontains=query) | Q(culprit__icontains=query),
                ),
                skip_if_falsey=True,
            ),
            # TODO: Remove this once we've stopped using old params
            'query': QCallbackCondition(
                lambda query: Q(
                    Q(message__icontains=query) | Q(culprit__icontains=query),
                ),
                skip_if_falsey=True,
            ),
            'status': QCallbackCondition(
                lambda status: Q(status=status),
            ),
            'bookmarked_by': QCallbackCondition(
                lambda user: Q(
                    bookmark_set__project__in=projects,
                    bookmark_set__user=user,
                ),
            ),
            'assigned_to': QCallbackCondition(
                functools.partial(assigned_to_filter, projects=projects),
            ),
            'unassigned': QCallbackCondition(
                functools.partial(unassigned_filter, projects=projects),
            ),
            'subscribed_by': QCallbackCondition(
                lambda user: Q(
                    id__in=GroupSubscription.objects.filter(
                        project__in=projects,
                        user=user,
                        is_active=True,
                    ).values_list('group'),
                ),
            ),
            'active_at': SearchFilterScalarCondition('active_at'),
            # TODO: These are legacy params. Once we've moved to SearchFilter
            # entirely then they can be removed, since the `'active_at'`
            # condition will handle both
            'active_at_from': ScalarCondition('active_at', 'gt'),
            'active_at_to': ScalarCondition('active_at', 'lt'),
        }).build(group_queryset, query_set_builder_params)

        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(organization=projects[0].organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
        else:
            retention_window_start = None
        # TODO: This could be optimized when building querysets to identify
        # criteria that are logically impossible (e.g. if the upper bound
        # for last seen is before the retention window starts, no results
        # exist.)
        if retention_window_start:
            group_queryset = group_queryset.filter(last_seen__gte=retention_window_start)

        # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it
        # seemed better to handle all the shared initialization and then handoff to the
        # actual backend.
        return self._query(projects, retention_window_start, group_queryset, tags,
                           environments, sort_by, limit, cursor, count_hits,
                           paginator_options, search_filters, use_new_filters,
                           **parameters)
示例#24
0
    def _build_queryset(
        self,
        project,
        query=None,
        status=None,
        tags=None,
        bookmarked_by=None,
        assigned_to=None,
        first_release=None,
        sort_by='date',
        unassigned=None,
        subscribed_by=None,
        age_from=None,
        age_from_inclusive=True,
        age_to=None,
        age_to_inclusive=True,
        last_seen_from=None,
        last_seen_from_inclusive=True,
        last_seen_to=None,
        last_seen_to_inclusive=True,
        date_from=None,
        date_from_inclusive=True,
        date_to=None,
        date_to_inclusive=True,
        active_at_from=None,
        active_at_from_inclusive=True,
        active_at_to=None,
        active_at_to_inclusive=True,
        times_seen=None,
        times_seen_lower=None,
        times_seen_lower_inclusive=True,
        times_seen_upper=None,
        times_seen_upper_inclusive=True,
        cursor=None,
        limit=None,
        environment=None,
    ):
        from sentry.models import Event, Group, GroupSubscription, GroupStatus, OrganizationMember

        if tags is None:
            tags = {}

        engine = get_db_engine('default')

        queryset = Group.objects.filter(project=project)

        if query:
            # TODO(dcramer): if we want to continue to support search on SQL
            # we should at least optimize this in Postgres so that it does
            # the query filter **after** the index filters, and restricts the
            # result set
            queryset = queryset.filter(
                Q(message__icontains=query) | Q(culprit__icontains=query))

        if status is None:
            status_in = (
                GroupStatus.PENDING_DELETION,
                GroupStatus.DELETION_IN_PROGRESS,
                GroupStatus.PENDING_MERGE,
            )
            queryset = queryset.exclude(status__in=status_in)
        else:
            queryset = queryset.filter(status=status)

        if bookmarked_by:
            queryset = queryset.filter(
                bookmark_set__project=project,
                bookmark_set__user=bookmarked_by,
            )

        if assigned_to:
            teams = []
            try:
                member = OrganizationMember.objects.get(
                    user=assigned_to,
                    organization_id=project.organization_id,
                )
            except OrganizationMember.DoesNotExist:
                pass
            else:
                teams = member.get_teams()

            queryset = queryset.filter(
                Q(assignee_set__user=assigned_to,
                  assignee_set__project=project)
                | Q(assignee_set__team__in=teams))
        elif unassigned in (True, False):
            queryset = queryset.filter(assignee_set__isnull=unassigned, )

        if subscribed_by is not None:
            queryset = queryset.filter(id__in=GroupSubscription.objects.filter(
                project=project,
                user=subscribed_by,
                is_active=True,
            ).values_list('group'), )

        if first_release:
            if first_release is EMPTY:
                return queryset.none()
            queryset = queryset.filter(
                first_release__organization_id=project.organization_id,
                first_release__version=first_release,
            )

        if environment is not None:
            # XXX: This overwrites the ``environment`` tag, if present, to
            # ensure that the result set is limited to groups that have been
            # seen in this environment (there is no way to search for groups
            # that match multiple values of a single tag without changes to the
            # tagstore API.)
            tags['environment'] = environment.name

        if tags:
            matches = tagstore.get_group_ids_for_search_filter(
                project.id,
                environment.id if environment is not None else None,
                tags,
            )
            if not matches:
                return queryset.none()
            queryset = queryset.filter(id__in=matches, )

        if age_from or age_to:
            params = {}
            if age_from:
                if age_from_inclusive:
                    params['first_seen__gte'] = age_from
                else:
                    params['first_seen__gt'] = age_from
            if age_to:
                if age_to_inclusive:
                    params['first_seen__lte'] = age_to
                else:
                    params['first_seen__lt'] = age_to
            queryset = queryset.filter(**params)

        if last_seen_from or last_seen_to:
            params = {}
            if last_seen_from:
                if last_seen_from_inclusive:
                    params['last_seen__gte'] = last_seen_from
                else:
                    params['last_seen__gt'] = last_seen_from
            if last_seen_to:
                if last_seen_to_inclusive:
                    params['last_seen__lte'] = last_seen_to
                else:
                    params['last_seen__lt'] = last_seen_to
            queryset = queryset.filter(**params)

        if active_at_from or active_at_to:
            params = {}
            if active_at_from:
                if active_at_from_inclusive:
                    params['active_at__gte'] = active_at_from
                else:
                    params['active_at__gt'] = active_at_from
            if active_at_to:
                if active_at_to_inclusive:
                    params['active_at__lte'] = active_at_to
                else:
                    params['active_at__lt'] = active_at_to
            queryset = queryset.filter(**params)

        if times_seen is not None:
            queryset = queryset.filter(times_seen=times_seen)

        if times_seen_lower is not None or times_seen_upper is not None:
            params = {}
            if times_seen_lower is not None:
                if times_seen_lower_inclusive:
                    params['times_seen__gte'] = times_seen_lower
                else:
                    params['times_seen__gt'] = times_seen_lower
            if times_seen_upper is not None:
                if times_seen_upper_inclusive:
                    params['times_seen__lte'] = times_seen_upper
                else:
                    params['times_seen__lt'] = times_seen_upper
            queryset = queryset.filter(**params)

        if date_from or date_to:
            params = {
                'project_id': project.id,
            }
            if date_from:
                if date_from_inclusive:
                    params['datetime__gte'] = date_from
                else:
                    params['datetime__gt'] = date_from
            if date_to:
                if date_to_inclusive:
                    params['datetime__lte'] = date_to
                else:
                    params['datetime__lt'] = date_to

            event_queryset = Event.objects.filter(**params)

            if query:
                event_queryset = event_queryset.filter(
                    message__icontains=query)

            # limit to the first 1000 results
            group_ids = event_queryset.distinct().values_list('group_id',
                                                              flat=True)[:1000]

            # if Event is not on the primary database remove Django's
            # implicit subquery by coercing to a list
            base = router.db_for_read(Group)
            using = router.db_for_read(Event)
            # MySQL also cannot do a LIMIT inside of a subquery
            if base != using or engine.startswith('mysql'):
                group_ids = list(group_ids)

            queryset = queryset.filter(id__in=group_ids, )

        if engine.startswith('sqlite'):
            score_clause = SQLITE_SORT_CLAUSES[sort_by]
        elif engine.startswith('mysql'):
            score_clause = MYSQL_SORT_CLAUSES[sort_by]
        elif engine.startswith('oracle'):
            score_clause = ORACLE_SORT_CLAUSES[sort_by]
        elif engine in MSSQL_ENGINES:
            score_clause = MSSQL_SORT_CLAUSES[sort_by]
        else:
            score_clause = SORT_CLAUSES[sort_by]

        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(
            organization=project.organization)
        if retention:
            queryset = queryset.filter(last_seen__gte=timezone.now() -
                                       timedelta(days=retention))

        queryset = queryset.extra(select={'sort_value': score_clause}, )
        return queryset
示例#25
0
文件: snuba.py 项目: Kayle009/sentry
def raw_query(start, end, groupby=None, conditions=None, filter_keys=None,
              aggregations=None, rollup=None, arrayjoin=None, limit=None, offset=None,
              orderby=None, having=None, referrer=None, is_grouprelease=False,
              selected_columns=None, totals=None, limitby=None, turbo=False):
    """
    Sends a query to snuba.

    `conditions`: A list of (column, operator, literal) conditions to be passed
    to the query. Conditions that we know will not have to be translated should
    be passed this way (eg tag[foo] = bar).

    `filter_keys`: A dictionary of {col: [key, ...]} that will be converted
    into "col IN (key, ...)" conditions. These are used to restrict the query to
    known sets of project/issue/environment/release etc. Appropriate
    translations (eg. from environment model ID to environment name) are
    performed on the query, and the inverse translation performed on the
    result. The project_id(s) to restrict the query to will also be
    automatically inferred from these keys.

    `aggregations` a list of (aggregation_function, column, alias) tuples to be
    passed to the query.
    """

    # convert to naive UTC datetimes, as Snuba only deals in UTC
    # and this avoids offset-naive and offset-aware issues
    start = naiveify_datetime(start)
    end = naiveify_datetime(end)

    groupby = groupby or []
    conditions = conditions or []
    having = having or []
    aggregations = aggregations or []
    filter_keys = filter_keys or {}
    selected_columns = selected_columns or []

    with timer('get_snuba_map'):
        forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease)

    if 'project_id' in filter_keys:
        # If we are given a set of project ids, use those directly.
        project_ids = list(set(filter_keys['project_id']))
    elif filter_keys:
        # Otherwise infer the project_ids from any related models
        with timer('get_related_project_ids'):
            ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys]
            project_ids = list(set.union(*map(set, ids)))
    else:
        project_ids = []

    for col, keys in six.iteritems(forward(filter_keys.copy())):
        if keys:
            if len(keys) == 1 and None in keys:
                conditions.append((col, 'IS NULL', None))
            else:
                conditions.append((col, 'IN', keys))

    if not project_ids:
        raise UnqualifiedQueryError(
            "No project_id filter, or none could be inferred from other filters.")

    # any project will do, as they should all be from the same organization
    project = Project.objects.get(pk=project_ids[0])
    retention = quotas.get_event_retention(
        organization=Organization(project.organization_id)
    )
    if retention:
        start = max(start, datetime.utcnow() - timedelta(days=retention))
        if start > end:
            raise QueryOutsideRetentionError

    start = shrink_time_window(filter_keys.get('issue'), start)

    # if `shrink_time_window` pushed `start` after `end` it means the user queried
    # a Group for T1 to T2 when the group was only active for T3 to T4, so the query
    # wouldn't return any results anyway
    if start > end:
        raise QueryOutsideGroupActivityError

    request = {k: v for k, v in six.iteritems({
        'from_date': start.isoformat(),
        'to_date': end.isoformat(),
        'conditions': conditions,
        'having': having,
        'groupby': groupby,
        'totals': totals,
        'project': project_ids,
        'aggregations': aggregations,
        'granularity': rollup,
        'arrayjoin': arrayjoin,
        'limit': limit,
        'offset': offset,
        'limitby': limitby,
        'orderby': orderby,
        'selected_columns': selected_columns,
        'turbo': turbo
    }) if v is not None}

    request.update(OVERRIDE_OPTIONS)

    headers = {}
    if referrer:
        headers['referer'] = referrer

    try:
        with timer('snuba_query'):
            response = _snuba_pool.urlopen(
                'POST', '/query', body=json.dumps(request), headers=headers)
    except urllib3.exceptions.HTTPError as err:
        raise SnubaError(err)

    try:
        body = json.loads(response.data)
    except ValueError:
        raise UnexpectedResponseError(u"Could not decode JSON response: {}".format(response.data))

    if response.status != 200:
        if body.get('error'):
            error = body['error']
            if response.status == 429:
                raise RateLimitExceeded(error['message'])
            elif error['type'] == 'schema':
                raise SchemaValidationError(error['message'])
            elif error['type'] == 'clickhouse':
                raise clickhouse_error_codes_map.get(
                    error['code'],
                    QueryExecutionError,
                )(error['message'])
            else:
                raise SnubaError(error['message'])
        else:
            raise SnubaError(u'HTTP {}'.format(response.status))

    # Forward and reverse translation maps from model ids to snuba keys, per column
    body['data'] = [reverse(d) for d in body['data']]
    return body
示例#26
0
    def query(self,
              project,
              tags=None,
              environment=None,
              sort_by='date',
              limit=100,
              cursor=None,
              count_hits=False,
              paginator_options=None,
              **parameters):
        from sentry.models import (Environment, Event, Group, GroupEnvironment,
                                   GroupStatus, GroupSubscription, Release)

        if paginator_options is None:
            paginator_options = {}

        if tags is None:
            tags = {}

        try:
            if tags.get('sentry:release') == 'latest':
                tags['sentry:release'] = get_latest_release(
                    project, environment)

            if parameters.get('first_release') == 'latest':
                parameters['first_release'] = get_latest_release(
                    project, environment)
        except Release.DoesNotExist:
            # no matches could possibly be found from this point on
            return Paginator(Group.objects.none()).get_result()

        group_queryset = QuerySetBuilder({
            'query':
            CallbackCondition(
                lambda queryset, query: queryset.filter(
                    Q(message__icontains=query) | Q(culprit__icontains=query),
                ) if query else queryset,
            ),
            'status':
            CallbackCondition(
                lambda queryset, status: queryset.filter(status=status),
            ),
            'bookmarked_by':
            CallbackCondition(
                lambda queryset, user: queryset.filter(
                    bookmark_set__project=project,
                    bookmark_set__user=user,
                ),
            ),
            'assigned_to':
            CallbackCondition(
                functools.partial(assigned_to_filter, project=project), ),
            'unassigned':
            CallbackCondition(
                lambda queryset, unassigned: queryset.filter(
                    assignee_set__isnull=unassigned, ),
            ),
            'subscribed_by':
            CallbackCondition(
                lambda queryset, user: queryset.filter(
                    id__in=GroupSubscription.objects.filter(
                        project=project,
                        user=user,
                        is_active=True,
                    ).values_list('group'), ),
            ),
            'active_at_from':
            ScalarCondition('active_at', 'gt'),
            'active_at_to':
            ScalarCondition('active_at', 'lt'),
        }).build(
            Group.objects.filter(project=project).exclude(status__in=[
                GroupStatus.PENDING_DELETION,
                GroupStatus.DELETION_IN_PROGRESS,
                GroupStatus.PENDING_MERGE,
            ]),
            parameters,
        )

        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(
            organization=project.organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
            # TODO: This could be optimized when building querysets to identify
            # criteria that are logically impossible (e.g. if the upper bound
            # for last seen is before the retention window starts, no results
            # exist.)
            group_queryset = group_queryset.filter(
                last_seen__gte=retention_window_start)
        else:
            retention_window_start = None

        if environment is not None:
            if 'environment' in tags:
                # TODO: This should probably just overwrite the existing tag,
                # rather than asserting on it, but...?
                assert Environment.objects.get(
                    projects=project,
                    name=tags.pop('environment'),
                ).id == environment.id

            event_queryset_builder = QuerySetBuilder({
                'date_from':
                ScalarCondition('date_added', 'gt'),
                'date_to':
                ScalarCondition('date_added', 'lt'),
            })
            if any(key in parameters
                   for key in event_queryset_builder.conditions.keys()):
                event_queryset = event_queryset_builder.build(
                    tagstore.get_event_tag_qs(
                        project.id,
                        environment.id,
                        'environment',
                        environment.name,
                    ),
                    parameters,
                )
                if retention_window_start is not None:
                    event_queryset = event_queryset.filter(
                        date_added__gte=retention_window_start)

                group_queryset = group_queryset.filter(
                    id__in=list(event_queryset.distinct().values_list(
                        'group_id', flat=True)[:1000]))

            group_queryset = QuerySetBuilder({
                'first_release':
                CallbackCondition(
                    lambda queryset, version: queryset.extra(
                        where=[
                            '{} = {}'.format(
                                get_sql_column(GroupEnvironment,
                                               'first_release_id'),
                                get_sql_column(Release, 'id'),
                            ),
                            '{} = %s'.format(
                                get_sql_column(Release, 'organization'), ),
                            '{} = %s'.format(
                                get_sql_column(Release, 'version'), ),
                        ],
                        params=[project.organization_id, version],
                        tables=[Release._meta.db_table],
                    ),
                ),
                'times_seen':
                CallbackCondition(
                    # This condition represents the exact number of times that
                    # an issue has been seen in an environment. Since an issue
                    # can't be seen in an environment more times than the issue
                    # was seen overall, we can safely exclude any groups that
                    # don't have at least that many events.
                    lambda queryset, times_seen: queryset.exclude(
                        times_seen__lt=times_seen, ),
                ),
                'times_seen_lower':
                CallbackCondition(
                    # This condition represents the lower threshold for the
                    # number of times an issue has been seen in an environment.
                    # Since an issue can't be seen in an environment more times
                    # than the issue was seen overall, we can safely exclude
                    # any groups that haven't met that threshold.
                    lambda queryset, times_seen: queryset.exclude(
                        times_seen__lt=times_seen, ),
                ),
                # The following conditions make a few assertions that are are
                # correct in an abstract sense but may not accurately reflect
                # the existing implementation (see GH-5289). These assumptions
                # are that 1. The first seen time for a Group is the minimum
                # value of the first seen time for all of it's GroupEnvironment
                # relations; 2. The last seen time for a Group is the maximum
                # value of the last seen time for all of it's GroupEnvironment
                # relations; 3. The first seen time is always less than or
                # equal to the last seen time.
                'age_from':
                CallbackCondition(
                    # This condition represents the lower threshold for "first
                    # seen" time for an environment. Due to assertions #1 and
                    # #3, we can exclude any groups where the "last seen" time
                    # is prior to this timestamp.
                    lambda queryset, first_seen: queryset.exclude(
                        last_seen__lt=first_seen, ),
                ),
                'age_to':
                CallbackCondition(
                    # This condition represents the upper threshold for "first
                    # seen" time for an environment. Due to assertions #1, we
                    # can exclude any values where the group first seen is
                    # greater than that threshold.
                    lambda queryset, first_seen: queryset.exclude(
                        first_seen__gt=first_seen, ),
                ),
                'last_seen_from':
                CallbackCondition(
                    # This condition represents the lower threshold for "last
                    # seen" time for an environment. Due to assertion #2, we
                    # can exclude any values where the group last seen value is
                    # less than that threshold.
                    lambda queryset, last_seen: queryset.exclude(last_seen__lt=
                                                                 last_seen, ),
                ),
                'last_seen_to':
                CallbackCondition(
                    # This condition represents the upper threshold for "last
                    # seen" time for an environment. Due to assertions #2 and
                    # #3, we can exclude any values where the group first seen
                    # value is greater than that threshold.
                    lambda queryset, last_seen: queryset.exclude(first_seen__gt
                                                                 =last_seen, ),
                ),
            }).build(
                group_queryset.extra(
                    where=[
                        '{} = {}'.format(
                            get_sql_column(Group, 'id'),
                            get_sql_column(GroupEnvironment, 'group_id'),
                        ),
                        '{} = %s'.format(
                            get_sql_column(GroupEnvironment,
                                           'environment_id'), ),
                    ],
                    params=[environment.id],
                    tables=[GroupEnvironment._meta.db_table],
                ),
                parameters,
            )

            get_sort_expression, sort_value_to_cursor_value = environment_sort_strategies[
                sort_by]

            group_tag_value_queryset = tagstore.get_group_tag_value_qs(
                project.id,
                set(group_queryset.values_list('id',
                                               flat=True)),  # TODO: Limit?,
                environment.id,
                'environment',
                environment.name,
            )

            if retention_window_start is not None:
                group_tag_value_queryset = group_tag_value_queryset.filter(
                    last_seen__gte=retention_window_start)

            candidates = dict(
                QuerySetBuilder({
                    'age_from':
                    ScalarCondition('first_seen', 'gt'),
                    'age_to':
                    ScalarCondition('first_seen', 'lt'),
                    'last_seen_from':
                    ScalarCondition('last_seen', 'gt'),
                    'last_seen_to':
                    ScalarCondition('last_seen', 'lt'),
                    'times_seen':
                    CallbackCondition(
                        lambda queryset, times_seen: queryset.filter(
                            times_seen=times_seen),
                    ),
                    'times_seen_lower':
                    ScalarCondition('times_seen', 'gt'),
                    'times_seen_upper':
                    ScalarCondition('times_seen', 'lt'),
                }).build(
                    group_tag_value_queryset,
                    parameters,
                ).extra(select={
                    'sort_value':
                    get_sort_expression(group_tag_value_queryset.model),
                }, ).values_list('group_id', 'sort_value'))

            if tags:
                # TODO: `get_group_ids_for_search_filter` should be able to
                # utilize the retention window start parameter for additional
                # optimizations.
                matches = tagstore.get_group_ids_for_search_filter(
                    project.id,
                    environment.id,
                    tags,
                    candidates.keys(),
                    limit=len(candidates),
                )
                for key in set(candidates) - set(matches or []):
                    del candidates[key]

            result = SequencePaginator([(sort_value_to_cursor_value(score), id)
                                        for (id, score) in candidates.items()],
                                       reverse=True,
                                       **paginator_options).get_result(
                                           limit,
                                           cursor,
                                           count_hits=count_hits)

            groups = Group.objects.in_bulk(result.results)
            result.results = [groups[k] for k in result.results if k in groups]

            return result
        else:
            event_queryset_builder = QuerySetBuilder({
                'date_from':
                ScalarCondition('datetime', 'gt'),
                'date_to':
                ScalarCondition('datetime', 'lt'),
            })
            if any(key in parameters
                   for key in event_queryset_builder.conditions.keys()):
                group_queryset = group_queryset.filter(id__in=list(
                    event_queryset_builder.build(
                        Event.objects.filter(project_id=project.id),
                        parameters,
                    ).distinct().values_list('group_id', flat=True)[:1000], ))

            group_queryset = QuerySetBuilder({
                'first_release':
                CallbackCondition(
                    lambda queryset, version: queryset.filter(
                        first_release__organization_id=project.organization_id,
                        first_release__version=version,
                    ),
                ),
                'age_from':
                ScalarCondition('first_seen', 'gt'),
                'age_to':
                ScalarCondition('first_seen', 'lt'),
                'last_seen_from':
                ScalarCondition('last_seen', 'gt'),
                'last_seen_to':
                ScalarCondition('last_seen', 'lt'),
                'times_seen':
                CallbackCondition(
                    lambda queryset, times_seen: queryset.filter(times_seen=
                                                                 times_seen),
                ),
                'times_seen_lower':
                ScalarCondition('times_seen', 'gt'),
                'times_seen_upper':
                ScalarCondition('times_seen', 'lt'),
            }).build(
                group_queryset,
                parameters,
            ).extra(select={
                'sort_value': get_sort_clause(sort_by),
            }, )

            if tags:
                matches = tagstore.get_group_ids_for_search_filter(
                    project.id, None, tags)
                if matches:
                    group_queryset = group_queryset.filter(id__in=matches)
                else:
                    group_queryset = group_queryset.none()

            paginator_cls, sort_clause = sort_strategies[sort_by]
            group_queryset = group_queryset.order_by(sort_clause)
            paginator = paginator_cls(group_queryset, sort_clause,
                                      **paginator_options)
            return paginator.get_result(limit, cursor, count_hits=count_hits)
示例#27
0
    def get(self, request, group):
        """
        List an Issue's Events
        ``````````````````````

        This endpoint lists an issue's events.

        :pparam string issue_id: the ID of the issue to retrieve.
        :auth: required
        """
        def respond(queryset):
            return self.paginate(
                request=request,
                queryset=queryset,
                order_by='-datetime',
                on_results=lambda x: serialize(x, request.user),
                paginator_cls=DateTimePaginator,
            )

        events = Event.objects.filter(group_id=group.id)

        try:
            environment = self._get_environment_from_request(
                request,
                group.project.organization_id,
            )
        except Environment.DoesNotExist:
            return respond(events.none())

        raw_query = request.GET.get('query')

        if raw_query:
            try:
                query_kwargs = parse_query(group.project, raw_query,
                                           request.user)
            except InvalidQuery as exc:
                return Response({'detail': six.text_type(exc)}, status=400)
            else:
                query = query_kwargs.pop('query', None)
                tags = query_kwargs.pop('tags', {})
        else:
            query = None
            tags = {}

        if environment is not None:
            if 'environment' in tags and tags[
                    'environment'] != environment.name:
                # An event can only be associated with a single
                # environment, so if the environment associated with
                # the request is different than the environment
                # provided as a tag lookup, the query cannot contain
                # any valid results.
                return respond(events.none())
            else:
                tags['environment'] = environment.name

        if query:
            q = Q(message__icontains=query)

            if len(query) == 32:
                q |= Q(event_id__exact=query)

            events = events.filter(q)

        # TODO currently snuba can be used to get this filter of event_ids matching
        # the search tags, which is then used to further filter a postgres QuerySet
        # Ideally we would just use snuba to completely replace the fetching of the
        # events.
        if tags:
            event_filter = tagstore.get_group_event_filter(
                group.project_id,
                group.id,
                environment.id if environment is not None else None,
                tags,
            )

            if not event_filter:
                return respond(events.none())

            events = events.filter(**event_filter)

        # filter out events which are beyond the retention period
        retention = quotas.get_event_retention(
            organization=group.project.organization)
        if retention:
            events = events.filter(datetime__gte=timezone.now() -
                                   timedelta(days=retention))

        return respond(events)
示例#28
0
    def query(
        self,
        projects,
        environments=None,
        sort_by="date",
        limit=100,
        cursor=None,
        count_hits=False,
        paginator_options=None,
        search_filters=None,
        date_from=None,
        date_to=None,
    ):
        from sentry.models import Group, GroupStatus, GroupSubscription

        search_filters = search_filters if search_filters is not None else []

        # ensure projects are from same org
        if len({p.organization_id for p in projects}) != 1:
            raise RuntimeError("Cross organization search not supported")

        if paginator_options is None:
            paginator_options = {}

        group_queryset = Group.objects.filter(project__in=projects).exclude(
            status__in=[
                GroupStatus.PENDING_DELETION,
                GroupStatus.DELETION_IN_PROGRESS,
                GroupStatus.PENDING_MERGE,
            ]
        )

        qs_builder_conditions = {
            "status": QCallbackCondition(lambda status: Q(status=status)),
            "bookmarked_by": QCallbackCondition(
                lambda user: Q(bookmark_set__project__in=projects, bookmark_set__user=user)
            ),
            "assigned_to": QCallbackCondition(
                functools.partial(assigned_to_filter, projects=projects)
            ),
            "unassigned": QCallbackCondition(
                functools.partial(unassigned_filter, projects=projects)
            ),
            "subscribed_by": QCallbackCondition(
                lambda user: Q(
                    id__in=GroupSubscription.objects.filter(
                        project__in=projects, user=user, is_active=True
                    ).values_list("group")
                )
            ),
            "active_at": ScalarCondition("active_at"),
        }

        group_queryset = QuerySetBuilder(qs_builder_conditions).build(
            group_queryset, search_filters
        )
        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(organization=projects[0].organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
        else:
            retention_window_start = None
        # TODO: This could be optimized when building querysets to identify
        # criteria that are logically impossible (e.g. if the upper bound
        # for last seen is before the retention window starts, no results
        # exist.)
        if retention_window_start:
            group_queryset = group_queryset.filter(last_seen__gte=retention_window_start)

        # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it
        # seemed better to handle all the shared initialization and then handoff to the
        # actual backend.
        return self._query(
            projects,
            retention_window_start,
            group_queryset,
            environments,
            sort_by,
            limit,
            cursor,
            count_hits,
            paginator_options,
            search_filters,
            date_from,
            date_to,
        )
示例#29
0
    def get(self, request, group):
        """
        List an Issue's Events
        ``````````````````````

        This endpoint lists an issue's events.

        :pparam string issue_id: the ID of the issue to retrieve.
        :auth: required
        """

        def respond(queryset):
            return self.paginate(
                request=request,
                queryset=queryset,
                order_by='-datetime',
                on_results=lambda x: serialize(x, request.user),
                paginator_cls=DateTimePaginator,
            )

        events = Event.objects.filter(group_id=group.id)

        try:
            environment = self._get_environment_from_request(
                request,
                group.project.organization_id,
            )
        except Environment.DoesNotExist:
            return respond(events.none())

        raw_query = request.GET.get('query')

        if raw_query:
            try:
                query_kwargs = parse_query(group.project, raw_query, request.user)
            except InvalidQuery as exc:
                return Response({'detail': six.text_type(exc)}, status=400)
            else:
                query = query_kwargs.pop('query', None)
                tags = query_kwargs.pop('tags', {})
        else:
            query = None
            tags = {}

        if environment is not None:
            if 'environment' in tags and tags['environment'] != environment.name:
                # An event can only be associated with a single
                # environment, so if the environment associated with
                # the request is different than the environment
                # provided as a tag lookup, the query cannot contain
                # any valid results.
                return respond(events.none())
            else:
                tags['environment'] = environment.name

        if query:
            q = Q(message__icontains=query)

            if len(query) == 32:
                q |= Q(event_id__exact=query)

            events = events.filter(q)

        if tags:
            event_ids = tagstore.get_group_event_ids(
                group.project_id,
                group.id,
                environment.id if environment is not None else None,
                tags,
            )

            if not event_ids:
                return respond(events.none())

            events = events.filter(id__in=event_ids)

        # filter out events which are beyond the retention period
        retention = quotas.get_event_retention(organization=group.project.organization)
        if retention:
            events = events.filter(
                datetime__gte=timezone.now() - timedelta(days=retention)
            )

        return respond(events)
示例#30
0
def raw_query(start, end, groupby=None, conditions=None, filter_keys=None,
              aggregations=None, rollup=None, referrer=None,
              is_grouprelease=False, **kwargs):
    """
    Sends a query to snuba.

    `start` and `end`: The beginning and end of the query time window (required)

    `groupby`: A list of column names to group by.

    `conditions`: A list of (column, operator, literal) conditions to be passed
    to the query. Conditions that we know will not have to be translated should
    be passed this way (eg tag[foo] = bar).

    `filter_keys`: A dictionary of {col: [key, ...]} that will be converted
    into "col IN (key, ...)" conditions. These are used to restrict the query to
    known sets of project/issue/environment/release etc. Appropriate
    translations (eg. from environment model ID to environment name) are
    performed on the query, and the inverse translation performed on the
    result. The project_id(s) to restrict the query to will also be
    automatically inferred from these keys.

    `aggregations` a list of (aggregation_function, column, alias) tuples to be
    passed to the query.

    The rest of the args are passed directly into the query JSON unmodified.
    See the snuba schema for details.
    """

    # convert to naive UTC datetimes, as Snuba only deals in UTC
    # and this avoids offset-naive and offset-aware issues
    start = naiveify_datetime(start)
    end = naiveify_datetime(end)

    groupby = groupby or []
    conditions = conditions or []
    aggregations = aggregations or []
    filter_keys = filter_keys or {}

    with timer('get_snuba_map'):
        forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease)

    if 'project_id' in filter_keys:
        # If we are given a set of project ids, use those directly.
        project_ids = list(set(filter_keys['project_id']))
    elif filter_keys:
        # Otherwise infer the project_ids from any related models
        with timer('get_related_project_ids'):
            ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys]
            project_ids = list(set.union(*map(set, ids)))
    else:
        project_ids = []

    for col, keys in six.iteritems(forward(deepcopy(filter_keys))):
        if keys:
            if len(keys) == 1 and None in keys:
                conditions.append((col, 'IS NULL', None))
            else:
                conditions.append((col, 'IN', keys))

    if not project_ids:
        raise UnqualifiedQueryError(
            "No project_id filter, or none could be inferred from other filters.")

    # any project will do, as they should all be from the same organization
    project = Project.objects.get(pk=project_ids[0])
    retention = quotas.get_event_retention(
        organization=Organization(project.organization_id)
    )
    if retention:
        start = max(start, datetime.utcnow() - timedelta(days=retention))
        if start > end:
            raise QueryOutsideRetentionError

    # if `shrink_time_window` pushed `start` after `end` it means the user queried
    # a Group for T1 to T2 when the group was only active for T3 to T4, so the query
    # wouldn't return any results anyway
    new_start = shrink_time_window(filter_keys.get('issue'), start)

    # TODO (alexh) this is a quick emergency fix for an occasion where a search
    # results in only 1 django candidate, which is then passed to snuba to
    # check and we raised because of it. Remove this once we figure out why the
    # candidate was returned from django at all if it existed only outside the
    # time range of the query
    if new_start <= end:
        start = new_start

    if start > end:
        raise QueryOutsideGroupActivityError

    kwargs.update({
        'from_date': start.isoformat(),
        'to_date': end.isoformat(),
        'groupby': groupby,
        'conditions': conditions,
        'aggregations': aggregations,
        'project': project_ids,
        'granularity': rollup,  # TODO name these things the same
    })
    kwargs = {k: v for k, v in six.iteritems(kwargs) if v is not None}

    kwargs.update(OVERRIDE_OPTIONS)

    headers = {}
    if referrer:
        headers['referer'] = referrer

    try:
        with timer('snuba_query'):
            response = _snuba_pool.urlopen(
                'POST', '/query', body=json.dumps(kwargs), headers=headers)
    except urllib3.exceptions.HTTPError as err:
        raise SnubaError(err)

    try:
        body = json.loads(response.data)
    except ValueError:
        raise UnexpectedResponseError(u"Could not decode JSON response: {}".format(response.data))

    if response.status != 200:
        if body.get('error'):
            error = body['error']
            if response.status == 429:
                raise RateLimitExceeded(error['message'])
            elif error['type'] == 'schema':
                raise SchemaValidationError(error['message'])
            elif error['type'] == 'clickhouse':
                raise clickhouse_error_codes_map.get(
                    error['code'],
                    QueryExecutionError,
                )(error['message'])
            else:
                raise SnubaError(error['message'])
        else:
            raise SnubaError(u'HTTP {}'.format(response.status))

    # Forward and reverse translation maps from model ids to snuba keys, per column
    body['data'] = [reverse(d) for d in body['data']]
    return body
示例#31
0
    def query(self, projects, tags=None, environments=None, sort_by='date', limit=100,
              cursor=None, count_hits=False, paginator_options=None, **parameters):

        from sentry.models import Group, GroupAssignee, GroupStatus, GroupSubscription, Release

        # ensure projects are from same org
        if len({p.organization_id for p in projects}) != 1:
            raise RuntimeError('Cross organization search not supported')

        if paginator_options is None:
            paginator_options = {}

        if tags is None:
            tags = {}

        try:
            if tags.get('sentry:release') == 'latest':
                tags['sentry:release'] = get_latest_release(projects, environments)

            if parameters.get('first_release') == 'latest':
                parameters['first_release'] = get_latest_release(projects, environments)
        except Release.DoesNotExist:
            # no matches could possibly be found from this point on
            return Paginator(Group.objects.none()).get_result()

        group_queryset = QuerySetBuilder({
            'query': CallbackCondition(
                lambda queryset, query: queryset.filter(
                    Q(message__icontains=query) | Q(culprit__icontains=query),
                ) if query else queryset,
            ),
            'status': CallbackCondition(
                lambda queryset, status: queryset.filter(status=status),
            ),
            'bookmarked_by': CallbackCondition(
                lambda queryset, user: queryset.filter(
                    bookmark_set__project__in=projects,
                    bookmark_set__user=user,
                ),
            ),
            'assigned_to': CallbackCondition(
                functools.partial(assigned_to_filter, projects=projects),
            ),
            'unassigned': CallbackCondition(
                lambda queryset, unassigned: (queryset.exclude if unassigned else queryset.filter)(
                    id__in=GroupAssignee.objects.filter(
                        project_id__in=[p.id for p in projects],
                    ).values_list('group_id', flat=True),
                ),
            ),
            'subscribed_by': CallbackCondition(
                lambda queryset, user: queryset.filter(
                    id__in=GroupSubscription.objects.filter(
                        project__in=projects,
                        user=user,
                        is_active=True,
                    ).values_list('group'),
                ),
            ),
            'active_at_from': ScalarCondition('active_at', 'gt'),
            'active_at_to': ScalarCondition('active_at', 'lt'),
        }).build(
            Group.objects.filter(project__in=projects).exclude(status__in=[
                GroupStatus.PENDING_DELETION,
                GroupStatus.DELETION_IN_PROGRESS,
                GroupStatus.PENDING_MERGE,
            ]),
            parameters,
        )

        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(organization=projects[0].organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
        else:
            retention_window_start = None
        # TODO: This could be optimized when building querysets to identify
        # criteria that are logically impossible (e.g. if the upper bound
        # for last seen is before the retention window starts, no results
        # exist.)
        if retention_window_start:
            group_queryset = group_queryset.filter(last_seen__gte=retention_window_start)

        # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it
        # seemed better to handle all the shared initialization and then handoff to the
        # actual backend.
        return self._query(projects, retention_window_start, group_queryset, tags,
                           environments, sort_by, limit, cursor, count_hits,
                           paginator_options, **parameters)
示例#32
0
def _prepare_query_params(query_params):
    # convert to naive UTC datetimes, as Snuba only deals in UTC
    # and this avoids offset-naive and offset-aware issues
    start = naiveify_datetime(query_params.start)
    end = naiveify_datetime(query_params.end)

    with timer('get_snuba_map'):
        forward, reverse = get_snuba_translators(
            query_params.filter_keys,
            is_grouprelease=query_params.is_grouprelease,
        )

    if 'project_id' in query_params.filter_keys:
        # If we are given a set of project ids, use those directly.
        project_ids = list(set(query_params.filter_keys['project_id']))
    elif query_params.filter_keys:
        # Otherwise infer the project_ids from any related models
        with timer('get_related_project_ids'):
            ids = [
                get_related_project_ids(k, query_params.filter_keys[k])
                for k in query_params.filter_keys
            ]
            project_ids = list(set.union(*map(set, ids)))
    else:
        project_ids = []

    for col, keys in six.iteritems(forward(deepcopy(
            query_params.filter_keys))):
        if keys:
            if len(keys) == 1 and None in keys:
                query_params.conditions.append((col, 'IS NULL', None))
            else:
                query_params.conditions.append((col, 'IN', keys))

    if not project_ids:
        raise UnqualifiedQueryError(
            "No project_id filter, or none could be inferred from other filters."
        )

    # any project will do, as they should all be from the same organization
    project = Project.objects.get(pk=project_ids[0])
    retention = quotas.get_event_retention(
        organization=Organization(project.organization_id))
    if retention:
        start = max(start, datetime.utcnow() - timedelta(days=retention))
        if start > end:
            raise QueryOutsideRetentionError

    # if `shrink_time_window` pushed `start` after `end` it means the user queried
    # a Group for T1 to T2 when the group was only active for T3 to T4, so the query
    # wouldn't return any results anyway
    new_start = shrink_time_window(query_params.filter_keys.get('issue'),
                                   start)

    # TODO (alexh) this is a quick emergency fix for an occasion where a search
    # results in only 1 django candidate, which is then passed to snuba to
    # check and we raised because of it. Remove this once we figure out why the
    # candidate was returned from django at all if it existed only outside the
    # time range of the query
    if new_start <= end:
        start = new_start

    if start > end:
        raise QueryOutsideGroupActivityError

    query_params.kwargs.update({
        'from_date': start.isoformat(),
        'to_date': end.isoformat(),
        'groupby': query_params.groupby,
        'conditions': query_params.conditions,
        'aggregations': query_params.aggregations,
        'project': project_ids,
        'granularity': query_params.rollup,  # TODO name these things the same
    })
    kwargs = {
        k: v
        for k, v in six.iteritems(query_params.kwargs) if v is not None
    }

    kwargs.update(OVERRIDE_OPTIONS)
    return kwargs, forward, reverse
示例#33
0
def get_project_config(project,
                       org_options=None,
                       full_config=True,
                       project_keys=None):
    """
    Constructs the ProjectConfig information.

    :param project: The project to load configuration for. Ensure that
        organization is bound on this object; otherwise it will be loaded from
        the database.
    :param org_options: Inject preloaded organization options for faster loading.
        If ``None``, options are lazy-loaded from the database.
    :param full_config: True if only the full config is required, False
        if only the restricted (for external relays) is required
        (default True, i.e. full configuration)
    :param project_keys: Pre-fetched project keys for performance, similar to
        org_options. However, if no project keys are provided it is assumed
        that the config does not need to contain auth information (this is the
        case when used in python's StoreView)

    :return: a ProjectConfig object for the given project
    """
    with configure_scope() as scope:
        scope.set_tag("project", project.id)

    public_keys = []

    for project_key in project_keys or ():
        key = {
            "publicKey": project_key.public_key,
            "isEnabled": project_key.status == 0
        }
        if full_config:
            key["numericId"] = project_key.id

            key["quotas"] = [
                quota.to_json()
                for quota in quotas.get_quotas(project, key=project_key)
            ]
        public_keys.append(key)

    now = datetime.utcnow().replace(tzinfo=utc)

    if org_options is None:
        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)

    with Hub.current.start_span(op="get_public_config"):

        cfg = {
            "disabled": project.status > 0,
            "slug": project.slug,
            "lastFetch": now,
            "lastChange": project.get_option("sentry:relay-rev-lastchange",
                                             now),
            "rev": project.get_option("sentry:relay-rev",
                                      uuid.uuid4().hex),
            "publicKeys": public_keys,
            "config": {
                "allowedDomains":
                list(get_origins(project)),
                "trustedRelays":
                org_options.get("sentry:trusted-relays", []),
                "piiConfig":
                _get_pii_config(project),
                "datascrubbingSettings":
                _get_datascrubbing_settings(project, org_options),
            },
            "projectId":
            project.id,  # XXX: Unused by Relay, required by Python store
        }

    if not full_config:
        # This is all we need for external Relay processors
        return ProjectConfig(project, **cfg)

    # The organization id is only required for reporting when processing events
    # internally. Do not expose it to external Relays.
    cfg["organizationId"] = project.organization_id

    project_cfg = cfg["config"]

    with Hub.current.start_span(op="get_filter_settings"):
        # get the filter settings for this project
        filter_settings = {}
        project_cfg["filterSettings"] = filter_settings

        for flt in get_all_filters():
            filter_id = get_filter_key(flt)
            settings = _load_filter_settings(flt, project)
            filter_settings[filter_id] = settings

        invalid_releases = project.get_option(u"sentry:{}".format(
            FilterTypes.RELEASES))
        if invalid_releases:
            filter_settings["releases"] = {"releases": invalid_releases}

        blacklisted_ips = project.get_option("sentry:blacklisted_ips")
        if blacklisted_ips:
            filter_settings["clientIps"] = {"blacklistedIps": blacklisted_ips}

        error_messages = project.get_option(u"sentry:{}".format(
            FilterTypes.ERROR_MESSAGES))
        if error_messages:
            filter_settings["errorMessages"] = {"patterns": error_messages}

        csp_disallowed_sources = []
        if bool(project.get_option("sentry:csp_ignored_sources_defaults",
                                   True)):
            csp_disallowed_sources += DEFAULT_DISALLOWED_SOURCES
        csp_disallowed_sources += project.get_option(
            "sentry:csp_ignored_sources", [])
        if csp_disallowed_sources:
            filter_settings["csp"] = {
                "disallowedSources": csp_disallowed_sources
            }

    with Hub.current.start_span(op="get_grouping_config_dict_for_project"):
        project_cfg["groupingConfig"] = get_grouping_config_dict_for_project(
            project)

    with Hub.current.start_span(op="get_event_retention"):
        project_cfg["eventRetention"] = quotas.get_event_retention(
            project.organization)

    return ProjectConfig(project, **cfg)
示例#34
0
def raw_query(start, end, groupby=None, conditions=None, filter_keys=None,
              aggregations=None, rollup=None, arrayjoin=None, limit=None, orderby=None,
              having=None, referrer=None, is_grouprelease=False, selected_columns=None,):
    """
    Sends a query to snuba.

    `conditions`: A list of (column, operator, literal) conditions to be passed
    to the query. Conditions that we know will not have to be translated should
    be passed this way (eg tag[foo] = bar).

    `filter_keys`: A dictionary of {col: [key, ...]} that will be converted
    into "col IN (key, ...)" conditions. These are used to restrict the query to
    known sets of project/issue/environment/release etc. Appropriate
    translations (eg. from environment model ID to environment name) are
    performed on the query, and the inverse translation performed on the
    result. The project_id(s) to restrict the query to will also be
    automatically inferred from these keys.

    `aggregations` a list of (aggregation_function, column, alias) tuples to be
    passed to the query.
    """
    groupby = groupby or []
    conditions = conditions or []
    having = having or []
    aggregations = aggregations or []
    filter_keys = filter_keys or {}
    selected_columns = selected_columns or []

    with timer('get_snuba_map'):
        forward, reverse = get_snuba_translators(filter_keys, is_grouprelease=is_grouprelease)

    if 'project_id' in filter_keys:
        # If we are given a set of project ids, use those directly.
        project_ids = filter_keys['project_id']
    elif filter_keys:
        # Otherwise infer the project_ids from any related models
        with timer('get_related_project_ids'):
            ids = [get_related_project_ids(k, filter_keys[k]) for k in filter_keys]
            project_ids = list(set.union(*map(set, ids)))
    else:
        project_ids = []

    for col, keys in six.iteritems(forward(filter_keys.copy())):
        if keys:
            if len(keys) == 1 and keys[0] is None:
                conditions.append((col, 'IS NULL', None))
            else:
                conditions.append((col, 'IN', keys))

    if not project_ids:
        raise SnubaError("No project_id filter, or none could be inferred from other filters.")

    # any project will do, as they should all be from the same organization
    project = Project.objects.get(pk=project_ids[0])
    retention = quotas.get_event_retention(
        organization=Organization(project.organization_id)
    )
    if retention:
        start = max(start, datetime.utcnow() - timedelta(days=retention))
        if start > end:
            raise EntireQueryOutsideRetentionError

    # If the grouping, aggregation, or any of the conditions reference `issue`
    # we need to fetch the issue definitions (issue -> fingerprint hashes)
    aggregate_cols = [a[1] for a in aggregations]
    condition_cols = [c[0] for c in flat_conditions(conditions)]
    all_cols = groupby + aggregate_cols + condition_cols + selected_columns
    get_issues = 'issue' in all_cols

    with timer('get_project_issues'):
        issues = get_project_issues(project_ids, filter_keys.get('issue')) if get_issues else None

    request = {k: v for k, v in six.iteritems({
        'from_date': start.isoformat(),
        'to_date': end.isoformat(),
        'conditions': conditions,
        'having': having,
        'groupby': groupby,
        'project': project_ids,
        'aggregations': aggregations,
        'granularity': rollup,
        'issues': issues,
        'arrayjoin': arrayjoin,
        'limit': limit,
        'orderby': orderby,
        'selected_columns': selected_columns,
    }) if v is not None}

    headers = {}
    if referrer:
        headers['referer'] = referrer

    try:
        with timer('snuba_query'):
            response = _snuba_pool.urlopen(
                'POST', '/query', body=json.dumps(request), headers=headers)
    except urllib3.exceptions.HTTPError as err:
        raise SnubaError(err)

    try:
        body = json.loads(response.data)
    except ValueError:
        raise SnubaError("Could not decode JSON response: {}".format(response.data))

    if response.status != 200:
        if body.get('error'):
            raise SnubaError(body['error'])
        else:
            raise SnubaError('HTTP {}'.format(response.status))

    # Forward and reverse translation maps from model ids to snuba keys, per column
    body['data'] = [reverse(d) for d in body['data']]
    return body
示例#35
0
    def query(self,
              projects,
              tags=None,
              environments=None,
              sort_by='date',
              limit=100,
              cursor=None,
              count_hits=False,
              paginator_options=None,
              search_filters=None,
              **parameters):

        from sentry.models import Group, GroupStatus, GroupSubscription

        search_filters = search_filters if search_filters is not None else []

        # ensure projects are from same org
        if len({p.organization_id for p in projects}) != 1:
            raise RuntimeError('Cross organization search not supported')

        if paginator_options is None:
            paginator_options = {}

        if tags is None:
            tags = {}

        group_queryset = Group.objects.filter(project__in=projects).exclude(
            status__in=[
                GroupStatus.PENDING_DELETION,
                GroupStatus.DELETION_IN_PROGRESS,
                GroupStatus.PENDING_MERGE,
            ])

        qs_builder_conditions = {
            'status':
            QCallbackCondition(lambda status: Q(status=status), ),
            'bookmarked_by':
            QCallbackCondition(
                lambda user: Q(
                    bookmark_set__project__in=projects,
                    bookmark_set__user=user,
                ), ),
            'assigned_to':
            QCallbackCondition(
                functools.partial(assigned_to_filter, projects=projects), ),
            'unassigned':
            QCallbackCondition(
                functools.partial(unassigned_filter, projects=projects), ),
            'subscribed_by':
            QCallbackCondition(
                lambda user: Q(id__in=GroupSubscription.objects.filter(
                    project__in=projects,
                    user=user,
                    is_active=True,
                ).values_list('group'), ), ),
            'active_at':
            ScalarCondition('active_at'),
        }

        message = [
            search_filter for search_filter in search_filters
            if search_filter.key.name == 'message'
        ]
        if message and message[0].value.raw_value:
            message = message[0]
            # We only support full wildcard matching in postgres
            if is_postgres() and message.value.is_wildcard():
                group_queryset = message_regex_filter(group_queryset, message)
            else:
                # Otherwise, use the standard LIKE query
                qs_builder_conditions['message'] = QCallbackCondition(
                    lambda message: Q(
                        Q(message__icontains=message) | Q(culprit__icontains=
                                                          message), ), )

        group_queryset = QuerySetBuilder(qs_builder_conditions).build(
            group_queryset,
            search_filters,
        )
        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(
            organization=projects[0].organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
        else:
            retention_window_start = None
        # TODO: This could be optimized when building querysets to identify
        # criteria that are logically impossible (e.g. if the upper bound
        # for last seen is before the retention window starts, no results
        # exist.)
        if retention_window_start:
            group_queryset = group_queryset.filter(
                last_seen__gte=retention_window_start)

        # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it
        # seemed better to handle all the shared initialization and then handoff to the
        # actual backend.
        return self._query(projects, retention_window_start, group_queryset,
                           environments, sort_by, limit, cursor, count_hits,
                           paginator_options, search_filters, **parameters)
示例#36
0
文件: backend.py 项目: snilwx/sentry
    def query(
        self,
        projects,
        environments=None,
        sort_by="date",
        limit=100,
        cursor=None,
        count_hits=False,
        paginator_options=None,
        search_filters=None,
        date_from=None,
        date_to=None,
        max_hits=None,
    ):
        search_filters = search_filters if search_filters is not None else []

        # ensure projects are from same org
        if len({p.organization_id for p in projects}) != 1:
            raise RuntimeError("Cross organization search not supported")

        if paginator_options is None:
            paginator_options = {}

        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(organization=projects[0].organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
        else:
            retention_window_start = None

        group_queryset = self._build_group_queryset(
            projects=projects,
            environments=environments,
            search_filters=search_filters,
            retention_window_start=retention_window_start,
            date_from=date_from,
            date_to=date_to,
        )

        query_executor = self._get_query_executor(
            group_queryset=group_queryset,
            projects=projects,
            environments=environments,
            search_filters=search_filters,
            date_from=date_from,
            date_to=date_to,
        )

        # ensure sort strategy is supported by executor
        if not query_executor.has_sort_strategy(sort_by):
            raise InvalidSearchQuery("Sort key '{}' not supported.".format(sort_by))

        return query_executor.query(
            projects=projects,
            retention_window_start=retention_window_start,
            group_queryset=group_queryset,
            environments=environments,
            sort_by=sort_by,
            limit=limit,
            cursor=cursor,
            count_hits=count_hits,
            paginator_options=paginator_options,
            search_filters=search_filters,
            date_from=date_from,
            date_to=date_to,
            max_hits=max_hits,
        )
示例#37
0
    def query(
        self, projects, environments=None, sort_by='date', limit=100,
        cursor=None, count_hits=False, paginator_options=None,
        search_filters=None, date_from=None, date_to=None,
    ):
        from sentry.models import Group, GroupStatus, GroupSubscription

        search_filters = search_filters if search_filters is not None else []

        # ensure projects are from same org
        if len({p.organization_id for p in projects}) != 1:
            raise RuntimeError('Cross organization search not supported')

        if paginator_options is None:
            paginator_options = {}

        group_queryset = Group.objects.filter(project__in=projects).exclude(status__in=[
            GroupStatus.PENDING_DELETION,
            GroupStatus.DELETION_IN_PROGRESS,
            GroupStatus.PENDING_MERGE,
        ])

        qs_builder_conditions = {
            'status': QCallbackCondition(
                lambda status: Q(status=status),
            ),
            'bookmarked_by': QCallbackCondition(
                lambda user: Q(
                    bookmark_set__project__in=projects,
                    bookmark_set__user=user,
                ),
            ),
            'assigned_to': QCallbackCondition(
                functools.partial(assigned_to_filter, projects=projects),
            ),
            'unassigned': QCallbackCondition(
                functools.partial(unassigned_filter, projects=projects),
            ),
            'subscribed_by': QCallbackCondition(
                lambda user: Q(
                    id__in=GroupSubscription.objects.filter(
                        project__in=projects,
                        user=user,
                        is_active=True,
                    ).values_list('group'),
                ),
            ),
            'active_at': ScalarCondition('active_at'),
        }

        message = [
            search_filter for search_filter in search_filters
            if search_filter.key.name == 'message'
        ]
        if message and message[0].value.raw_value:
            message = message[0]
            # We only support full wildcard matching in postgres
            if is_postgres() and message.value.is_wildcard():
                group_queryset = message_regex_filter(group_queryset, message)
            else:
                # Otherwise, use the standard LIKE query
                qs_builder_conditions['message'] = QCallbackCondition(
                    lambda message: Q(
                        Q(message__icontains=message) | Q(culprit__icontains=message),
                    ),
                )

        group_queryset = QuerySetBuilder(qs_builder_conditions).build(
            group_queryset,
            search_filters,
        )
        # filter out groups which are beyond the retention period
        retention = quotas.get_event_retention(organization=projects[0].organization)
        if retention:
            retention_window_start = timezone.now() - timedelta(days=retention)
        else:
            retention_window_start = None
        # TODO: This could be optimized when building querysets to identify
        # criteria that are logically impossible (e.g. if the upper bound
        # for last seen is before the retention window starts, no results
        # exist.)
        if retention_window_start:
            group_queryset = group_queryset.filter(last_seen__gte=retention_window_start)

        # This is a punt because the SnubaSearchBackend (a subclass) shares so much that it
        # seemed better to handle all the shared initialization and then handoff to the
        # actual backend.
        return self._query(
            projects, retention_window_start, group_queryset, environments,
            sort_by, limit, cursor, count_hits, paginator_options,
            search_filters, date_from, date_to,
        )