def test_or_query(self):
        result = get_filter(
            "trend_percentage():>0% OR trend_percentage():<100%",
            {"aliases": self.improved_aliases},
        )

        assert result.having == [[
            [
                "or",
                [["less", ["trend_percentage", 1.0]],
                 ["greater", ["trend_percentage", 0.0]]],
            ],
            "=",
            1,
        ]]

        result = get_filter(
            "trend_percentage():>0% OR trend_percentage():<100%",
            {"aliases": self.regression_aliases},
        )

        assert result.having == [[
            [
                "or",
                [["greater", ["trend_percentage", 1.0]],
                 ["less", ["trend_percentage", 2.0]]],
            ],
            "=",
            1,
        ]]
    def test_greater_than(self):
        result = get_filter("trend_difference():>=0",
                            {"aliases": self.improved_aliases})

        assert result.having == [["trend_difference", "<=", 0.0]]

        result = get_filter("trend_difference():>=0",
                            {"aliases": self.regression_aliases})

        assert result.having == [["trend_difference", ">=", 0.0]]
    def test_negation(self):
        result = get_filter("!trend_difference():>=0",
                            {"aliases": self.improved_aliases})

        assert result.having == [["trend_difference", ">", 0.0]]

        result = get_filter("!trend_difference():>=0",
                            {"aliases": self.regression_aliases})

        assert result.having == [["trend_difference", "<", 0.0]]
    def test_confidence(self):
        result = get_filter("confidence():>6",
                            {"aliases": self.improved_aliases})

        assert result.having == [["t_test", ">", 6.0]]

        result = get_filter("confidence():>6",
                            {"aliases": self.regression_aliases})

        assert result.having == [["t_test", "<", -6.0]]
Пример #5
0
def get_timeseries_snuba_filter(selected_columns,
                                query,
                                params,
                                rollup,
                                default_count=True):
    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    snuba_filter.update_with(
        resolve_field_list(selected_columns, snuba_filter, auto_fields=False))

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    # Change the alias of the first aggregation to count. This ensures compatibility
    # with other parts of the timeseries endpoint expectations
    if len(snuba_filter.aggregations) == 1 and default_count:
        snuba_filter.aggregations[0][2] = "count"

    return snuba_filter, translated_columns
Пример #6
0
    def _get_events_snuba(self, request, group, environments, query, tags, start, end):
        default_end = timezone.now()
        default_start = default_end - timedelta(days=90)
        params = {
            "group_ids": [group.id],
            "project_id": [group.project_id],
            "organization_id": group.project.organization_id,
            "start": start if start else default_start,
            "end": end if end else default_end,
        }
        direct_hit_resp = get_direct_hit_response(request, query, params, "api.group-events")
        if direct_hit_resp:
            return direct_hit_resp

        if environments:
            params["environment"] = [env.name for env in environments]

        full = request.GET.get("full", False)
        try:
            snuba_filter = get_filter(request.GET.get("query", None), params)
        except InvalidSearchQuery as e:
            raise ParseError(detail=str(e))

        snuba_filter.conditions.append(["event.type", "!=", "transaction"])

        data_fn = partial(eventstore.get_events, referrer="api.group-events", filter=snuba_filter)
        serializer = EventSerializer() if full else SimpleEventSerializer()
        return self.paginate(
            request=request,
            on_results=lambda results: serialize(results, request.user, serializer),
            paginator=GenericOffsetPaginator(data_fn=data_fn),
        )
    def test_simple(self):
        result = get_filter("trend_percentage():>0% trend_difference():>0",
                            {"aliases": self.improved_aliases})

        assert result.having == [
            ["trend_percentage", "<", 1.0],
            ["trend_difference", "<", 0.0],
        ]

        result = get_filter("trend_percentage():>0% trend_difference():>0",
                            {"aliases": self.regression_aliases})

        assert result.having == [
            ["trend_percentage", ">", 1.0],
            ["trend_difference", ">", 0.0],
        ]
    def test_and_query(self):
        result = get_filter(
            "trend_percentage():>0% AND trend_percentage():<100%",
            {"aliases": self.improved_aliases},
        )

        assert result.having == [["trend_percentage", "<", 1.0],
                                 ["trend_percentage", ">", 0.0]]

        result = get_filter(
            "trend_percentage():>0% AND trend_percentage():<100%",
            {"aliases": self.regression_aliases},
        )

        assert result.having == [["trend_percentage", ">", 1.0],
                                 ["trend_percentage", "<", 2.0]]
Пример #9
0
    def build_snuba_filter(
        self,
        query: str,
        environment: Optional[Environment],
        params: Optional[Mapping[str, Any]] = None,
    ) -> Filter:
        resolve_func = resolve_column(Dataset(self.dataset.value))
        aggregations = [self.aggregate]
        # This aggregation is added to return the total number of sessions in crash
        # rate alerts that is used to identify if we are below a general minimum alert threshold
        count_col = re.search(r"(sessions|users)", self.aggregate)
        if not count_col:
            raise UnsupportedQuerySubscription(
                "Only crash free percentage queries are supported for subscriptions"
                "over the sessions dataset"
            )
        count_col_matched = count_col.group()

        aggregations += [f"identity({count_col_matched}) AS {CRASH_RATE_ALERT_SESSION_COUNT_ALIAS}"]
        functions_acl = ["identity"]
        snuba_filter = get_filter(query, params=params)
        snuba_filter.update_with(
            resolve_field_list(
                aggregations, snuba_filter, auto_fields=False, functions_acl=functions_acl
            )
        )
        snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
        if environment:
            snuba_filter.conditions.append(["environment", "=", environment.name])
        return snuba_filter
Пример #10
0
    def get_snuba_query_args_legacy(
        self, request: Request, organization: Organization
    ) -> Dict[
        str,
        Union[
            Optional[datetime],
            Sequence[Sequence[Union[str, str, Any]]],
            Optional[Dict[str, Sequence[int]]],
        ],
    ]:
        params = self.get_filter_params(request, organization)
        query = request.GET.get("query")
        try:
            _filter = get_filter(query, params)
        except InvalidSearchQuery as e:
            raise ParseError(detail=str(e))

        snuba_args = {
            "start": _filter.start,
            "end": _filter.end,
            "conditions": _filter.conditions,
            "filter_keys": _filter.filter_keys,
        }

        return snuba_args
Пример #11
0
    def validate(self, data):
        query = {}
        query_keys = [
            "environment",
            "query",
            "fields",
            "conditions",
            "aggregations",
            "range",
            "start",
            "end",
            "orderby",
            "limit",
            "widths",
            "yAxis",
            "display",
            "topEvents",
        ]

        for key in query_keys:
            if data.get(key) is not None:
                query[key] = data[key]

        version = data.get("version", 1)
        self.validate_version_fields(version, query)
        if version == 2:
            if len(query["fields"]) < 1:
                raise serializers.ValidationError(
                    "You must include at least one field.")

        if data["projects"] == ALL_ACCESS_PROJECTS:
            data["projects"] = []
            query["all_projects"] = True

        if "query" in query:
            try:
                get_filter(query["query"], self.context["params"])
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(
                    f"Cannot save invalid query: {err}")

        return {
            "name": data["name"],
            "project_ids": data["projects"],
            "query": query,
            "version": version,
        }
Пример #12
0
 def get_snuba_filter(self, request, organization, params=None):
     if params is None:
         params = self.get_snuba_params(request, organization)
     query = request.GET.get("query")
     try:
         return get_filter(query, params)
     except InvalidSearchQuery as e:
         raise ParseError(detail=str(e))
Пример #13
0
def query_tag_data(
    params: Mapping[str, str],
    referrer: str,
    filter_query: Optional[str] = None,
    aggregate_column: Optional[str] = None,
) -> Optional[Dict]:
    """
    Fetch general data about all the transactions with this transaction name to feed into the facet query
    :return: Returns the row with aggregate and count if the query was successful
             Returns None if query was not successful which causes the endpoint to return early
    """
    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.filter_transform") as span:
        span.set_data("query", filter_query)
        snuba_filter = get_filter(filter_query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = discover.resolve_discover_aliases(
            snuba_filter)

    translated_aggregate_column = discover.resolve_discover_column(
        aggregate_column)

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.frequent_tags"):
        # Get the average and count to use to filter the next request to facets
        tag_data = discover.query(
            selected_columns=[
                "count()",
                f"avg({aggregate_column}) as aggregate",
                f"max({aggregate_column}) as max",
                f"min({aggregate_column}) as min",
            ],
            conditions=[
                [translated_aggregate_column, "IS NOT NULL", None],
            ],
            query=filter_query,
            params=params,
            orderby=["-count"],
            referrer=f"{referrer}.all_transactions",
            limit=1,
        )

        if len(tag_data["data"]) != 1:
            return None

        counts = [r["count"] for r in tag_data["data"]]
        aggregates = [r["aggregate"] for r in tag_data["data"]]

        # Return early to avoid doing more queries with 0 count transactions or aggregates for columns that don't exist
        if counts[0] == 0 or aggregates[0] is None:
            return None
    if not tag_data["data"][0]:
        return None
    return tag_data["data"][0]
Пример #14
0
    def __init__(self, query, params, allow_minute_resolution=False):
        self.query = query.get("query", "")
        self.raw_fields = raw_fields = query.getlist("field", [])
        self.raw_groupby = raw_groupby = query.getlist("groupBy", [])

        if len(raw_fields) == 0:
            raise InvalidField('Request is missing a "field"')

        self.fields = {}
        for key in raw_fields:
            if key not in COLUMN_MAP:
                raise InvalidField(f'Invalid field: "{key}"')
            self.fields[key] = COLUMN_MAP[key]

        self.groupby = []
        for key in raw_groupby:
            if key not in GROUPBY_MAP:
                raise InvalidField(f'Invalid groupBy: "{key}"')
            self.groupby.append(GROUPBY_MAP[key])

        start, end, rollup = get_constrained_date_range(
            query, allow_minute_resolution)
        self.rollup = rollup
        self.start = start
        self.end = end

        self.params = params

        query_columns = set()
        for field in self.fields.values():
            query_columns.update(field.get_snuba_columns(raw_groupby))
        for groupby in self.groupby:
            query_columns.update(groupby.get_snuba_columns())
        self.query_columns = list(query_columns)

        query_groupby = set()
        for groupby in self.groupby:
            query_groupby.update(groupby.get_snuba_groupby())
        self.query_groupby = list(query_groupby)

        # the `params` are:
        # project_id, organization_id, environment;
        # also: start, end; but we got those ourselves.
        snuba_filter = get_filter(self.query, params)

        # this makes sure that literals in complex queries are properly quoted,
        # and unknown fields are raised as errors
        conditions = [
            resolve_condition(c, resolve_column)
            for c in snuba_filter.conditions
        ]

        self.aggregations = snuba_filter.aggregations
        self.conditions = conditions
        self.filter_keys = snuba_filter.filter_keys
Пример #15
0
def build_snuba_filter(dataset, query, aggregate, environment, event_types, params=None):
    resolve_func = (
        resolve_column(Dataset.Events)
        if dataset == QueryDatasets.EVENTS
        else resolve_column(Dataset.Transactions)
    )
    query = apply_dataset_query_conditions(dataset, query, event_types)
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(resolve_field_list([aggregate], snuba_filter, auto_fields=False))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
    if snuba_filter.group_ids:
        snuba_filter.conditions.append(["group_id", "IN", list(map(int, snuba_filter.group_ids))])
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    return snuba_filter
Пример #16
0
    def get_snuba_query_args_legacy(self, request, organization):
        params = self.get_filter_params(request, organization)
        query = request.GET.get("query")
        try:
            _filter = get_filter(query, params)
        except InvalidSearchQuery as e:
            raise ParseError(detail=str(e))

        snuba_args = {
            "start": _filter.start,
            "end": _filter.end,
            "conditions": _filter.conditions,
            "filter_keys": _filter.filter_keys,
        }

        return snuba_args
Пример #17
0
def get_direct_hit_response(request, query, snuba_params, referrer):
    """
    Checks whether a query is a direct hit for an event, and if so returns
    a response. Otherwise returns None
    """
    event_id = normalize_event_id(query)
    if event_id:
        snuba_filter = get_filter(query=f"id:{event_id}", params=snuba_params)
        snuba_filter.conditions.append(["event.type", "!=", "transaction"])

        results = eventstore.get_events(referrer=referrer, filter=snuba_filter)

        if len(results) == 1:
            response = Response(serialize(results, request.user))
            response["X-Sentry-Direct-Hit"] = "1"
            return response
Пример #18
0
    def validate(self, data):
        if not data.get("id"):
            keys = set(data.keys())
            if self.required_for_create - keys:
                raise serializers.ValidationError(
                    {
                        "fields": "fields are required during creation.",
                        "conditions": "conditions are required during creation.",
                    }
                )

        # Validate the query that would be created when run.
        conditions = self._get_attr(data, "conditions", "")
        fields = self._get_attr(data, "fields", []).copy()
        orderby = self._get_attr(data, "orderby", "")
        equations, fields = categorize_columns(fields)

        if equations is not None:
            resolved_equations, _ = resolve_equation_list(equations, fields)
        else:
            resolved_equations = []

        try:
            # When using the eps/epm functions, they require an interval argument
            # or to provide the start/end so that the interval can be computed.
            # This uses a hard coded start/end to ensure the validation succeeds
            # since the values themselves don't matter.
            params = {
                "start": datetime.now() - timedelta(days=1),
                "end": datetime.now(),
                "project_id": [p.id for p in self.context.get("projects")],
                "organization_id": self.context.get("organization").id,
            }

            snuba_filter = get_filter(conditions, params=params)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError({"conditions": f"Invalid conditions: {err}"})

        if orderby:
            snuba_filter.orderby = get_function_alias(orderby)
        try:
            resolve_field_list(fields, snuba_filter, resolved_equations=resolved_equations)
        except InvalidSearchQuery as err:
            raise serializers.ValidationError({"fields": f"Invalid fields: {err}"})
        return data
Пример #19
0
    def build_snuba_filter(
        self,
        query: str,
        environment: Optional[Environment],
        params: Optional[Mapping[str, Any]] = None,
    ) -> Filter:
        snuba_filter = get_filter(query, params=params)
        conditions = copy(snuba_filter.conditions)
        session_status_tag_values = resolve_many_weak(["crashed", "init"])
        snuba_filter.update_with({
            "aggregations":
            [[f"{self.aggregation_func}(value)", None, "value"]],
            "conditions": [
                ["metric_id", "=",
                 resolve(self.metric_key.value)],
                [self.session_status, "IN", session_status_tag_values],
            ],
            "groupby":
            self.get_query_groupby(),
            "rollup":
            self.get_granularity(),
        })
        if environment:
            snuba_filter.conditions.append([
                resolve_tag_key("environment"), "=",
                resolve_weak(environment.name)
            ])
        if query and len(conditions) > 0:
            release_conditions = [
                condition for condition in conditions
                if condition[0] == "release"
            ]

            for release_condition in release_conditions:
                snuba_filter.conditions.append([
                    resolve_tag_key(release_condition[0]),
                    release_condition[1],
                    resolve_weak(release_condition[2]),
                ])

        return snuba_filter
Пример #20
0
def build_snuba_filter(dataset,
                       query,
                       aggregate,
                       environment,
                       event_types,
                       params=None):
    resolve_func = {
        QueryDatasets.EVENTS: resolve_column(Dataset.Events),
        QueryDatasets.SESSIONS: resolve_column(Dataset.Sessions),
        QueryDatasets.TRANSACTIONS: resolve_column(Dataset.Transactions),
    }[dataset]

    functions_acl = None

    aggregations = [aggregate]
    if dataset == QueryDatasets.SESSIONS:
        # This aggregation is added to return the total number of sessions in crash
        # rate alerts that is used to identify if we are below a general minimum alert threshold
        count_col = re.search(r"(sessions|users)", aggregate)
        count_col_matched = count_col.group()

        aggregations += [
            f"identity({count_col_matched}) AS {CRASH_RATE_ALERT_SESSION_COUNT_ALIAS}"
        ]
        functions_acl = ["identity"]

    query = apply_dataset_query_conditions(dataset, query, event_types)
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(
        resolve_field_list(aggregations,
                           snuba_filter,
                           auto_fields=False,
                           functions_acl=functions_acl))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
    if snuba_filter.group_ids:
        snuba_filter.conditions.append(
            ["group_id", "IN",
             list(map(int, snuba_filter.group_ids))])
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    return snuba_filter
Пример #21
0
    def build_snuba_filter(
        self,
        query: str,
        environment: Optional[Environment],
        params: Optional[Mapping[str, Any]] = None,
    ) -> Filter:
        resolve_func = resolve_column(Dataset(self.dataset.value))

        query = apply_dataset_query_conditions(QueryDatasets(self.dataset), query, self.event_types)
        snuba_filter = get_filter(query, params=params)
        snuba_filter.update_with(
            resolve_field_list([self.aggregate], snuba_filter, auto_fields=False)
        )
        snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
        if snuba_filter.group_ids:
            snuba_filter.conditions.append(
                ["group_id", "IN", list(map(int, snuba_filter.group_ids))]
            )
        if environment:
            snuba_filter.conditions.append(["environment", "=", environment.name])
        return snuba_filter
def query_facet_performance(
    params: Mapping[str, str],
    tag_data: Mapping[str, Any],
    referrer: str,
    aggregate_column: Optional[str] = None,
    filter_query: Optional[str] = None,
    orderby: Optional[str] = None,
    limit: Optional[int] = None,
    offset: Optional[int] = None,
    all_tag_keys: Optional[bool] = None,
    tag_key: Optional[bool] = None,
) -> Dict:
    with sentry_sdk.start_span(
        op="discover.discover", description="facets.filter_transform"
    ) as span:
        span.set_data("query", filter_query)
        snuba_filter = get_filter(filter_query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = discover.resolve_discover_aliases(snuba_filter)
    translated_aggregate_column = discover.resolve_discover_column(aggregate_column)

    # Aggregate (avg) and count of all transactions for this query
    transaction_aggregate = tag_data["aggregate"]

    # Dynamically sample so at least 50000 transactions are selected
    sample_start_count = 50000
    transaction_count = tag_data["count"]
    sampling_enabled = transaction_count > sample_start_count

    # log-e growth starting at 50,000
    target_sample = max(
        sample_start_count * (math.log(transaction_count) - (math.log(sample_start_count) - 1)),
        transaction_count,
    )

    dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample / transaction_count)
    sample_rate = min(max(dynamic_sample_rate, 0), 1) if sampling_enabled else None
    frequency_sample_rate = sample_rate if sample_rate else 1

    # Exclude tags that have high cardinality are are generally unrelated to performance
    excluded_tags = [
        "tags_key",
        "NOT IN",
        ["trace", "trace.ctx", "trace.span", "project", "browser", "celery_task_id", "url"],
    ]

    with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"):
        span.set_data("sample_rate", sample_rate)
        span.set_data("target_sample", target_sample)
        conditions = snuba_filter.conditions
        aggregate_comparison = transaction_aggregate * 1.005 if transaction_aggregate else 0
        having = [excluded_tags]
        if not all_tag_keys and not tag_key:
            having.append(["aggregate", ">", aggregate_comparison])

        resolved_orderby = [] if orderby is None else orderby

        conditions.append([translated_aggregate_column, "IS NOT NULL", None])

        if tag_key:
            conditions.append(["tags_key", "IN", [tag_key]])

        tag_key_limit = limit if tag_key else 1

        tag_selected_columns = [
            [
                "divide",
                [
                    ["sum", [["minus", [translated_aggregate_column, transaction_aggregate]]]],
                    frequency_sample_rate,
                ],
                "sumdelta",
            ],
            ["count", [], "count"],
            [
                "divide",
                [["divide", [["count", []], frequency_sample_rate]], transaction_count],
                "frequency",
            ],
            ["divide", ["aggregate", transaction_aggregate], "comparison"],
            ["avg", [translated_aggregate_column], "aggregate"],
        ]

        limitby = [tag_key_limit, "tags_key"] if not tag_key else None

        results = discover.raw_query(
            selected_columns=tag_selected_columns,
            conditions=conditions,
            start=snuba_filter.start,
            end=snuba_filter.end,
            filter_keys=snuba_filter.filter_keys,
            orderby=resolved_orderby + ["tags_key", "tags_value"],
            groupby=["tags_key", "tags_value"],
            having=having,
            dataset=Dataset.Discover,
            referrer=f"{referrer}.tag_values".format(referrer, "tag_values"),
            sample=sample_rate,
            turbo=sample_rate is not None,
            limitby=limitby,
            limit=limit,
            offset=offset,
        )

        results = discover.transform_results(results, {}, translated_columns, snuba_filter)

        return results
Пример #23
0
def get_performance_facets(
    query,
    params,
    orderby=None,
    aggregate_column="duration",
    aggregate_function="avg",
    limit=20,
    offset=None,
    referrer=None,
):
    """
    High-level API for getting 'facet map' results for performance data

    Performance facets are high frequency tags and the aggregate duration of
    their most frequent values

    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.

    Returns Sequence[FacetResult]
    """
    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.filter_transform") as span:
        span.set_data("query", query)
        snuba_filter = get_filter(query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.frequent_tags"):
        # Get the most relevant tag keys
        key_names = raw_query(
            aggregations=[
                [aggregate_function, aggregate_column, "aggregate"],
                ["count", None, "count"],
            ],
            start=snuba_filter.start,
            end=snuba_filter.end,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            orderby=["-count"],
            dataset=Dataset.Discover,
            referrer="{}.{}".format(referrer, "all_transactions"),
        )
        counts = [r["count"] for r in key_names["data"]]
        aggregates = [r["aggregate"] for r in key_names["data"]]

        # Return early to avoid doing more queries with 0 count transactions or aggregates for columns that dont exist
        if len(counts) != 1 or counts[0] == 0 or aggregates[0] is None:
            return []

    results = []
    snuba_filter.conditions.append([aggregate_column, "IS NOT NULL", None])

    # Aggregate for transaction
    transaction_aggregate = key_names["data"][0]["aggregate"]

    # Dynamically sample so at least 10000 transactions are selected
    transaction_count = key_names["data"][0]["count"]
    sampling_enabled = transaction_count > 50000
    # Log growth starting at 50,000
    target_sample = 50000 * (math.log(transaction_count, 10) - 3)

    dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample /
                                                            transaction_count)
    sample_rate = min(max(dynamic_sample_rate, 0),
                      1) if sampling_enabled else None
    frequency_sample_rate = sample_rate if sample_rate else 1

    excluded_tags = [
        "tags_key",
        "NOT IN",
        [
            "trace", "trace.ctx", "trace.span", "project", "browser",
            "celery_task_id"
        ],
    ]

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.aggregate_tags"):
        conditions = snuba_filter.conditions
        aggregate_comparison = transaction_aggregate * 1.01 if transaction_aggregate else 0
        having = [excluded_tags]
        if orderby and orderby in ("sumdelta", "-sumdelta", "aggregate",
                                   "-aggregate"):
            having.append(["aggregate", ">", aggregate_comparison])

        if orderby is None:
            orderby = []
        else:
            orderby = [orderby]

        tag_values = raw_query(
            selected_columns=[
                [
                    "sum",
                    [
                        "minus",
                        [
                            aggregate_column,
                            str(transaction_aggregate),
                        ],
                    ],
                    "sumdelta",
                ],
            ],
            aggregations=[
                [aggregate_function, aggregate_column, "aggregate"],
                ["count", None, "cnt"],
            ],
            conditions=conditions,
            start=snuba_filter.start,
            end=snuba_filter.end,
            filter_keys=snuba_filter.filter_keys,
            orderby=orderby + ["tags_key"],
            groupby=["tags_key", "tags_value"],
            having=having,
            dataset=Dataset.Discover,
            referrer="{}.{}".format(referrer, "tag_values"),
            sample=sample_rate,
            turbo=sample_rate is not None,
            limitby=[1, "tags_key"],
            limit=limit,
            offset=offset,
        )
        results.extend([
            PerformanceFacetResult(
                key=r["tags_key"],
                value=r["tags_value"],
                performance=float(r["aggregate"]),
                count=int(r["cnt"]),
                frequency=float(
                    (r["cnt"] / frequency_sample_rate) / transaction_count),
                comparison=float(r["aggregate"] / transaction_aggregate),
                sumdelta=float(r["sumdelta"]),
            ) for r in tag_values["data"]
        ])

    return results
Пример #24
0
def get_timeseries_snuba_filter(selected_columns, query, params):
    snuba_filter = get_filter(query, params)
    if not snuba_filter.start and not snuba_filter.end:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    columns = []
    equations = []

    for column in selected_columns:
        if is_equation(column):
            equations.append(strip_equation(column))
        else:
            columns.append(column)

    if len(equations) > 0:
        resolved_equations, updated_columns = resolve_equation_list(
            equations, columns, aggregates_only=True, auto_add=True)
    else:
        resolved_equations = []
        updated_columns = columns

    # For the new apdex, we need to add project threshold config as a selected
    # column which means the group by for the time series won't work.
    # As a temporary solution, we will calculate the mean of all the project
    # level thresholds in the request and use the legacy apdex, user_misery
    # or count_miserable calculation.
    # TODO(snql): Alias the project_threshold_config column so it doesn't
    # have to be in the SELECT statement and group by to be able to use new apdex,
    # user_misery and count_miserable.
    threshold = None
    for agg in CONFIGURABLE_AGGREGATES:
        if agg not in updated_columns:
            continue

        if threshold is None:
            project_ids = params.get("project_id")
            threshold_configs = list(
                ProjectTransactionThreshold.objects.filter(
                    organization_id=params["organization_id"],
                    project_id__in=project_ids,
                ).values_list("threshold", flat=True))

            projects_without_threshold = len(project_ids) - len(
                threshold_configs)
            threshold_configs.extend([DEFAULT_PROJECT_THRESHOLD] *
                                     projects_without_threshold)
            threshold = int(mean(threshold_configs))

        updated_columns.remove(agg)
        updated_columns.append(
            CONFIGURABLE_AGGREGATES[agg].format(threshold=threshold))

    snuba_filter.update_with(
        resolve_field_list(updated_columns,
                           snuba_filter,
                           auto_fields=False,
                           resolved_equations=resolved_equations))

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter)
    if not snuba_filter.aggregations:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    return snuba_filter, translated_columns
Пример #25
0
    def validate(self, data):
        if not data.get("id"):
            keys = set(data.keys())
            if self.required_for_create - keys:
                raise serializers.ValidationError({
                    "fields":
                    "fields are required during creation.",
                    "conditions":
                    "conditions are required during creation.",
                })

        # Validate the query that would be created when run.
        conditions = self._get_attr(data, "conditions", "")
        fields = self._get_attr(data, "fields", []).copy()
        orderby = self._get_attr(data, "orderby", "")
        equations, fields = categorize_columns(fields)
        is_table = is_table_display_type(self.context.get("displayType"))

        if equations is not None:
            try:
                resolved_equations, _, _ = resolve_equation_list(
                    equations,
                    fields,
                    auto_add=not is_table,
                    aggregates_only=not is_table,
                )
            except (InvalidSearchQuery, ArithmeticError) as err:
                raise serializers.ValidationError(
                    {"fields": f"Invalid fields: {err}"})
        else:
            resolved_equations = []

        try:
            parse_search_query(conditions)
        except InvalidSearchQuery as err:
            # We don't know if the widget that this query belongs to is an
            # Issue widget or Discover widget. Pass the error back to the
            # Widget serializer to decide if whether or not to raise this
            # error based on the Widget's type
            data["issue_query_error"] = {
                "conditions": [f"Invalid conditions: {err}"]
            }

        try:
            # When using the eps/epm functions, they require an interval argument
            # or to provide the start/end so that the interval can be computed.
            # This uses a hard coded start/end to ensure the validation succeeds
            # since the values themselves don't matter.
            params = {
                "start": datetime.now() - timedelta(days=1),
                "end": datetime.now(),
                "project_id": [p.id for p in self.context.get("projects")],
                "organization_id": self.context.get("organization").id,
            }

            snuba_filter = get_filter(conditions, params=params)
        except InvalidSearchQuery as err:
            data["discover_query_error"] = {
                "conditions": [f"Invalid conditions: {err}"]
            }
            return data

        if orderby:
            snuba_filter.orderby = get_function_alias(orderby)
        try:
            resolve_field_list(fields,
                               snuba_filter,
                               resolved_equations=resolved_equations)
        except InvalidSearchQuery as err:
            # We don't know if the widget that this query belongs to is an
            # Issue widget or Discover widget. Pass the error back to the
            # Widget serializer to decide if whether or not to raise this
            # error based on the Widget's type
            data["discover_query_error"] = {"fields": f"Invalid fields: {err}"}

        return data
Пример #26
0
    def validate(self, data):
        organization = self.context["organization"]
        query_info = data["query_info"]

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid
        project_query = query_info.get("project")
        if project_query:
            get_projects_by_id = self.context["get_projects_by_id"]
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = get_projects_by_id(set(map(int, project_query)))
            else:
                projects = get_projects_by_id({int(project_query)})
            query_info["project"] = [project.id for project in projects]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            # coerce the fields into a list as needed
            fields = query_info.get("field", [])
            if not isinstance(fields, list):
                fields = [fields]

            if len(fields) > MAX_FIELDS:
                detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
                raise serializers.ValidationError(detail)
            elif len(fields) == 0:
                raise serializers.ValidationError("at least one field is required to export")

            if "query" not in query_info:
                detail = "query is a required to export, please pass an empty string if you don't want to set one"
                raise serializers.ValidationError(detail)

            query_info["field"] = fields

            if not query_info.get("project"):
                projects = self.context["get_projects"]()
                query_info["project"] = [project.id for project in projects]

            # make sure to fix the export start/end times to ensure consistent results
            try:
                start, end = get_date_range_from_params(query_info)
            except InvalidParams as e:
                sentry_sdk.set_tag("query.error_reason", "Invalid date params")
                raise serializers.ValidationError(str(e))

            if "statsPeriod" in query_info:
                del query_info["statsPeriod"]
            if "statsPeriodStart" in query_info:
                del query_info["statsPeriodStart"]
            if "statsPeriodEnd" in query_info:
                del query_info["statsPeriodEnd"]
            query_info["start"] = start.isoformat()
            query_info["end"] = end.isoformat()

            # validate the query string by trying to parse it
            processor = DiscoverProcessor(
                discover_query=query_info,
                organization_id=organization.id,
            )
            try:
                snuba_filter = get_filter(query_info["query"], processor.params)
                resolve_field_list(
                    fields.copy(),
                    snuba_filter,
                    auto_fields=True,
                    auto_aggregations=True,
                )
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(str(err))

        return data
Пример #27
0
def get_facets(query, params, limit=10, referrer=None):
    """
    High-level API for getting 'facet map' results.

    Facets are high frequency tags and attribute results that
    can be used to further refine user queries. When many projects
    are requested sampling will be enabled to help keep response times low.

    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.

    Returns Sequence[FacetResult]
    """
    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.filter_transform") as span:
        span.set_data("query", query)
        snuba_filter = get_filter(query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

    # Exclude tracing tags as they are noisy and generally not helpful.
    # TODO(markus): Tracing tags are no longer written but may still reside in DB.
    excluded_tags = [
        "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project"]
    ]

    # Sampling keys for multi-project results as we don't need accuracy
    # with that much data.
    sample = len(snuba_filter.filter_keys["project_id"]) > 2

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.frequent_tags"):
        # Get the most frequent tag keys
        key_names = raw_query(
            aggregations=[["count", None, "count"]],
            start=snuba_filter.start,
            end=snuba_filter.end,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            orderby=["-count", "tags_key"],
            groupby="tags_key",
            having=[excluded_tags],
            dataset=Dataset.Discover,
            limit=limit,
            referrer=referrer,
            turbo=sample,
        )
        top_tags = [r["tags_key"] for r in key_names["data"]]
        if not top_tags:
            return []

    # TODO(mark) Make the sampling rate scale based on the result size and scaling factor in
    # sentry.options. To test the lowest acceptable sampling rate, we use 0.1 which
    # is equivalent to turbo. We don't use turbo though as we need to re-scale data, and
    # using turbo could cause results to be wrong if the value of turbo is changed in snuba.
    sampling_enabled = options.get("discover2.tags_facet_enable_sampling")
    sample_rate = 0.1 if (sampling_enabled
                          and key_names["data"][0]["count"] > 10000) else None
    # Rescale the results if we're sampling
    multiplier = 1 / sample_rate if sample_rate is not None else 1

    fetch_projects = False
    if len(params.get("project_id", [])) > 1:
        if len(top_tags) == limit:
            top_tags.pop()
        fetch_projects = True

    results = []
    if fetch_projects:
        with sentry_sdk.start_span(op="discover.discover",
                                   description="facets.projects"):
            project_values = raw_query(
                aggregations=[["count", None, "count"]],
                start=snuba_filter.start,
                end=snuba_filter.end,
                conditions=snuba_filter.conditions,
                filter_keys=snuba_filter.filter_keys,
                groupby="project_id",
                orderby="-count",
                dataset=Dataset.Discover,
                referrer=referrer,
                sample=sample_rate,
                # Ensures Snuba will not apply FINAL
                turbo=sample_rate is not None,
            )
            results.extend([
                FacetResult("project", r["project_id"],
                            int(r["count"]) * multiplier)
                for r in project_values["data"]
            ])

    # Get tag counts for our top tags. Fetching them individually
    # allows snuba to leverage promoted tags better and enables us to get
    # the value count we want.
    max_aggregate_tags = options.get("discover2.max_tags_to_combine")
    individual_tags = []
    aggregate_tags = []
    for i, tag in enumerate(top_tags):
        if tag == "environment":
            # Add here tags that you want to be individual
            individual_tags.append(tag)
        elif i >= len(top_tags) - max_aggregate_tags:
            aggregate_tags.append(tag)
        else:
            individual_tags.append(tag)

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.individual_tags") as span:
        span.set_data("tag_count", len(individual_tags))
        for tag_name in individual_tags:
            tag = f"tags[{tag_name}]"
            tag_values = raw_query(
                aggregations=[["count", None, "count"]],
                conditions=snuba_filter.conditions,
                start=snuba_filter.start,
                end=snuba_filter.end,
                filter_keys=snuba_filter.filter_keys,
                orderby=["-count"],
                groupby=[tag],
                limit=TOP_VALUES_DEFAULT_LIMIT,
                dataset=Dataset.Discover,
                referrer=referrer,
                sample=sample_rate,
                # Ensures Snuba will not apply FINAL
                turbo=sample_rate is not None,
            )
            results.extend([
                FacetResult(tag_name, r[tag],
                            int(r["count"]) * multiplier)
                for r in tag_values["data"]
            ])

    if aggregate_tags:
        with sentry_sdk.start_span(op="discover.discover",
                                   description="facets.aggregate_tags"):
            conditions = snuba_filter.conditions
            conditions.append(["tags_key", "IN", aggregate_tags])
            tag_values = raw_query(
                aggregations=[["count", None, "count"]],
                conditions=conditions,
                start=snuba_filter.start,
                end=snuba_filter.end,
                filter_keys=snuba_filter.filter_keys,
                orderby=["tags_key", "-count"],
                groupby=["tags_key", "tags_value"],
                dataset=Dataset.Discover,
                referrer=referrer,
                sample=sample_rate,
                # Ensures Snuba will not apply FINAL
                turbo=sample_rate is not None,
                limitby=[TOP_VALUES_DEFAULT_LIMIT, "tags_key"],
            )
            results.extend([
                FacetResult(r["tags_key"], r["tags_value"],
                            int(r["count"]) * multiplier)
                for r in tag_values["data"]
            ])

    return results
def query_top_tags(
    params: Mapping[str, str],
    tag_key: str,
    limit: int,
    referrer: str,
    orderby: Optional[List[str]],
    offset: Optional[int] = None,
    aggregate_column: Optional[str] = None,
    filter_query: Optional[str] = None,
) -> Optional[List[Any]]:
    """
    Fetch counts by tag value, finding the top tag values for a tag key by a limit.
    :return: Returns the row with the value, the aggregate and the count if the query was successful
             Returns None if query was not successful which causes the endpoint to return early
    """
    with sentry_sdk.start_span(
        op="discover.discover", description="facets.filter_transform"
    ) as span:
        span.set_data("query", filter_query)
        snuba_filter = get_filter(filter_query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = discover.resolve_discover_aliases(snuba_filter)

    translated_aggregate_column = discover.resolve_discover_column(aggregate_column)

    with sentry_sdk.start_span(op="discover.discover", description="facets.top_tags"):

        if not orderby:
            orderby = ["-count"]

        for i, sort in enumerate(orderby):
            if "frequency" in sort:
                # Replacing frequency as it's the same underlying data dimension, this way we don't have to modify the existing histogram query.
                orderby[i] = sort.replace("frequency", "count")

        if "tags_value" not in orderby:
            orderby = orderby + ["tags_value"]

        # Get the average and count to use to filter the next request to facets
        tag_data = discover.query(
            selected_columns=[
                "count()",
                f"avg({aggregate_column}) as aggregate",
                "array_join(tags.value) as tags_value",
            ],
            query=filter_query,
            params=params,
            orderby=orderby,
            conditions=[
                [translated_aggregate_column, "IS NOT NULL", None],
                ["tags_key", "IN", [tag_key]],
            ],
            functions_acl=["array_join"],
            referrer=f"{referrer}.top_tags",
            limit=limit,
            offset=offset,
        )

        if len(tag_data["data"]) <= 0:
            return None

        counts = [r["count"] for r in tag_data["data"]]

        # Return early to avoid doing more queries with 0 count transactions or aggregates for columns that don't exist
        if counts[0] == 0:
            return None
    if not tag_data["data"]:
        return None
    return tag_data["data"]
Пример #29
0
def prepare_discover_query(
    selected_columns,
    query,
    params,
    orderby=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            assert (
                not auto_aggregations
            ), "Auto aggregations cannot be used without enabling aggregate conditions"
            snuba_filter.having = []

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        resolved_fields = resolve_field_list(
            selected_columns,
            snuba_filter,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            functions_acl=functions_acl,
        )

        snuba_filter.update_with(resolved_fields)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            error_extra = ", and could not be automatically added" if auto_aggregations else ""
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        # Only need to iterate on arg[1] if its a list
                        elif isinstance(arg[1], (list, tuple)):
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        "Aggregate(s) {} used in a condition but are not in the selected columns{}."
                        .format(
                            ", ".join(conditions_not_in_aggregations),
                            error_extra,
                        ))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        "Aggregate {} used in a condition but is not a selected column{}."
                        .format(
                            having_clause[0],
                            error_extra,
                        ))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    return PreparedQuery(snuba_filter, translated_columns, resolved_fields)