Пример #1
0
    def test_simple_orderby(self):
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            selected_columns=["user.email", "release"],
            orderby=["user.email"],
        )

        self.assertCountEqual(query.where, self.default_conditions)
        self.assertCountEqual(
            query.orderby,
            [
                OrderBy(Function("toString", [Column("email")], "user.email"),
                        Direction.ASC)
            ],
        )
        query.get_snql_query().validate()

        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            selected_columns=["user.email", "release"],
            orderby=["-user.email"],
        )

        self.assertCountEqual(query.where, self.default_conditions)
        self.assertCountEqual(
            query.orderby,
            [
                OrderBy(Function("toString", [Column("email")], "user.email"),
                        Direction.DESC)
            ],
        )
        query.get_snql_query().validate()
Пример #2
0
    def test_environment_param(self):
        self.params["environment"] = ["", "prod"]
        query = QueryBuilder(Dataset.Discover,
                             self.params,
                             selected_columns=["environment"])

        self.assertCountEqual(
            query.where,
            [
                *self.default_conditions,
                Or([
                    Condition(Column("environment"), Op.IS_NULL),
                    Condition(Column("environment"), Op.EQ, "prod"),
                ]),
            ],
        )
        query.get_snql_query().validate()

        self.params["environment"] = ["dev", "prod"]
        query = QueryBuilder(Dataset.Discover,
                             self.params,
                             selected_columns=["environment"])

        self.assertCountEqual(
            query.where,
            [
                *self.default_conditions,
                Condition(Column("environment"), Op.IN, ["dev", "prod"]),
            ],
        )
        query.get_snql_query().validate()
Пример #3
0
    def test_environment_filter(self):
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            "environment:prod",
            ["environment"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("environment"), Op.EQ, "prod"),
                *self.default_conditions,
            ],
        )
        query.get_snql_query().validate()

        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            "environment:[dev, prod]",
            ["environment"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("environment"), Op.IN, ["dev", "prod"]),
                *self.default_conditions,
            ],
        )
        query.get_snql_query().validate()
Пример #4
0
def wip_snql_query(
    selected_columns,
    query,
    params,
    equations=None,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    """
    Replacement API for query using snql, this function is still a work in
    progress and is not ready for use in production
    """
    builder = QueryBuilder(
        Dataset.Discover,
        params,
        query=query,
        selected_columns=selected_columns,
        orderby=orderby,
        use_aggregate_conditions=use_aggregate_conditions,
        limit=limit,
    )
    snql_query = builder.get_snql_query()

    results = raw_snql_query(snql_query, referrer)
    return results
Пример #5
0
 def test_array_join_clause(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "spans_op",
             "count()",
         ],
         array_join="spans_op",
     )
     self.assertCountEqual(
         query.columns,
         [
             AliasedExpression(Column("spans.op"), "spans_op"),
             Function("count", [], "count"),
         ],
     )
     assert query.array_join == Column("spans.op")
     query.get_snql_query().validate()
Пример #6
0
 def test_turbo(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "count()",
         ],
         turbo=True,
     )
     assert query.turbo.value
     snql_query = query.get_snql_query()
     snql_query.validate()
     assert snql_query.turbo.value
Пример #7
0
 def test_sample_rate(self):
     query = QueryBuilder(
         Dataset.Discover,
         self.params,
         "",
         selected_columns=[
             "count()",
         ],
         sample_rate=0.1,
     )
     assert query.sample_rate == 0.1
     snql_query = query.get_snql_query()
     snql_query.validate()
     assert snql_query.match.sample == 0.1
Пример #8
0
    def test_simple_query(self):
        query = QueryBuilder(
            Dataset.Discover,
            self.params,
            "user.email:[email protected] release:1.2.1",
            ["user.email", "release"],
        )

        self.assertCountEqual(
            query.where,
            [
                Condition(Column("email"), Op.EQ, "*****@*****.**"),
                Condition(Column("release"), Op.EQ, "1.2.1"),
                *self.default_conditions,
            ],
        )
        self.assertCountEqual(
            query.select,
            [
                Function("toString", [Column("email")], "user.email"),
                Column("release"),
            ],
        )
        query.get_snql_query().validate()
 def data_fn(offset: int, limit: int) -> Any:
     builder = QueryBuilder(
         dataset=Dataset.Discover,
         params=params,
         selected_columns=["spans_op", "count()"],
         array_join="spans_op",
         query=query,
         limit=limit,
         offset=offset,
         orderby="-count",
     )
     snql_query = builder.get_snql_query()
     results = raw_snql_query(snql_query,
                              "api.organization-events-span-ops")
     return [
         SpanOp(op=row["spans_op"], count=row["count"])
         for row in results["data"]
     ]
Пример #10
0
def query_example_transactions(
    params: ParamsType,
    query: Optional[str],
    direction: str,
    orderby: str,
    spans: List[Span],
    per_suspect: int = 5,
    offset: Optional[int] = None,
) -> Dict[Span, List[EventID]]:
    # there aren't any suspects, early return to save an empty query
    if not spans or per_suspect == 0:
        return {}

    orderby_columns = SPAN_PERFORMANCE_COLUMNS[orderby].suspect_example_sort

    selected_columns: List[str] = [
        "id",
        "project.id",
        "project",
        "array_join(spans_op)",
        "array_join(spans_group)",
        *orderby_columns,
    ]

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        query=query,
        orderby=[direction + column for column in orderby_columns],
        # we want only `per_suspect` examples for each suspect
        limit=len(spans) * per_suspect,
        offset=offset,
        functions_acl=[
            "array_join", "sumArray", "percentileArray", "maxArray"
        ],
    )

    # we are only interested in the specific op, group pairs from the suspects
    builder.add_conditions([
        Condition(
            Function(
                "tuple",
                [
                    builder.resolve_function("array_join(spans_op)"),
                    builder.resolve_function("array_join(spans_group)"),
                ],
            ),
            Op.IN,
            Function(
                "tuple",
                [
                    Function("tuple", [suspect.op, suspect.group])
                    for suspect in spans
                ],
            ),
        ),
    ])

    if len(spans) > 1:
        # Hack: the limit by clause only allows columns but here we want to
        # do a limitby on the two array joins. For the time being, directly
        # do the limitby on the internal snuba name for the span group column
        # but this should not be relied upon in production, and if two spans
        # differ only by the span op, this will result in a incorrect query
        builder.limitby = LimitBy(Column("_snuba_array_join_spans_group"),
                                  per_suspect)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(
        snql_query, "api.organization-events-spans-performance-examples")

    examples: Dict[Span, List[EventID]] = {
        Span(suspect.op, suspect.group): []
        for suspect in spans
    }

    for example in results["data"]:
        key = Span(example["array_join_spans_op"],
                   example["array_join_spans_group"])
        value = EventID(example["project.id"], example["project"],
                        example["id"])
        examples[key].append(value)

    return examples
Пример #11
0
def query_suspect_span_groups(
    params: ParamsType,
    fields: List[str],
    query: Optional[str],
    span_ops: Optional[List[str]],
    span_groups: Optional[List[str]],
    direction: str,
    orderby: str,
    limit: int,
    offset: int,
) -> List[SuspectSpan]:
    suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby]

    selected_columns: List[str] = [
        column
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if not is_equation(column)
    ] + [
        "array_join(spans_op)",
        "array_join(spans_group)",
        "count()",
        "count_unique(id)",
    ]

    equations: List[str] = [
        strip_equation(column)
        for column in suspect_span_columns.suspect_op_group_columns
        if is_equation(column)
    ]

    # TODO: This adds all the possible fields to the query by default. However,
    # due to the way shards aggregate the rows, this can be slow. As an
    # optimization, allow the fields to be user specified to only get the
    # necessary aggregations.
    #
    # As part of the transition, continue to add all possible fields when its
    # not specified, but this should be removed in the future.
    if not fields:
        for column in SPAN_PERFORMANCE_COLUMNS.values():
            for col in column.suspect_op_group_sort:
                if not col.startswith("equation["):
                    selected_columns.append(col)

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        equations=equations,
        query=query,
        orderby=[
            direction + column
            for column in suspect_span_columns.suspect_op_group_sort
        ],
        auto_aggregations=True,
        use_aggregate_conditions=True,
        limit=limit,
        offset=offset,
        functions_acl=[
            "array_join", "sumArray", "percentileArray", "maxArray"
        ],
    )

    extra_conditions = []

    if span_ops:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_op)"),
                Op.IN,
                Function("tuple", span_ops),
            ))

    if span_groups:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_group)"),
                Op.IN,
                Function("tuple", span_groups),
            ))

    if extra_conditions:
        builder.add_conditions(extra_conditions)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(
        snql_query, "api.organization-events-spans-performance-suspects")

    return [
        SuspectSpan(
            op=suspect["array_join_spans_op"],
            group=suspect["array_join_spans_group"],
            frequency=suspect.get("count_unique_id"),
            count=suspect.get("count"),
            avg_occurrences=suspect.get("equation[0]"),
            sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"),
            p50_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_50"),
            p75_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_75"),
            p95_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_95"),
            p99_exclusive_time=suspect.get(
                "percentileArray_spans_exclusive_time_0_99"),
        ) for suspect in results["data"]
    ]
Пример #12
0
    def validate(self, data):
        organization = self.context["organization"]
        query_info = data["query_info"]

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid
        project_query = query_info.get("project")
        if project_query:
            get_projects_by_id = self.context["get_projects_by_id"]
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = get_projects_by_id(set(map(int, project_query)))
            else:
                projects = get_projects_by_id({int(project_query)})
            query_info["project"] = [project.id for project in projects]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            # coerce the fields into a list as needed
            base_fields = query_info.get("field", [])
            if not isinstance(base_fields, list):
                base_fields = [base_fields]

            equations, fields = categorize_columns(base_fields)

            if len(base_fields) > MAX_FIELDS:
                detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
                raise serializers.ValidationError(detail)
            elif len(base_fields) == 0:
                raise serializers.ValidationError("at least one field is required to export")

            if "query" not in query_info:
                detail = "query is a required to export, please pass an empty string if you don't want to set one"
                raise serializers.ValidationError(detail)

            query_info["field"] = fields
            query_info["equations"] = equations

            if not query_info.get("project"):
                projects = self.context["get_projects"]()
                query_info["project"] = [project.id for project in projects]

            # make sure to fix the export start/end times to ensure consistent results
            try:
                start, end = get_date_range_from_params(query_info)
            except InvalidParams as e:
                sentry_sdk.set_tag("query.error_reason", "Invalid date params")
                raise serializers.ValidationError(str(e))

            if "statsPeriod" in query_info:
                del query_info["statsPeriod"]
            if "statsPeriodStart" in query_info:
                del query_info["statsPeriodStart"]
            if "statsPeriodEnd" in query_info:
                del query_info["statsPeriodEnd"]
            query_info["start"] = start.isoformat()
            query_info["end"] = end.isoformat()
            query_info["use_snql"] = features.has("organizations:discover-use-snql", organization)

            # validate the query string by trying to parse it
            processor = DiscoverProcessor(
                discover_query=query_info,
                organization_id=organization.id,
            )
            try:
                builder = QueryBuilder(
                    Dataset.Discover,
                    processor.params,
                    query=query_info["query"],
                    selected_columns=fields.copy(),
                    equations=equations,
                    auto_fields=True,
                    auto_aggregations=True,
                )
                builder.get_snql_query()
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(str(err))

        return data
Пример #13
0
def query_suspect_span_groups(
    params: ParamsType,
    fields: List[str],
    query: Optional[str],
    span_ops: Optional[List[str]],
    span_groups: Optional[List[str]],
    direction: str,
    orderby: str,
    limit: int,
    offset: int,
) -> List[SuspectSpan]:
    suspect_span_columns = SPAN_PERFORMANCE_COLUMNS[orderby]

    selected_columns: List[str] = [
        column
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if not is_equation(column)
    ] + [
        "array_join(spans_op)",
        "array_join(spans_group)",
        # want a single event id to fetch from nodestore for the span description
        "any(id)",
    ]

    equations: List[str] = [
        strip_equation(column)
        for column in suspect_span_columns.suspect_op_group_columns + fields
        if is_equation(column)
    ]

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        equations=equations,
        query=query,
        orderby=[direction + column for column in suspect_span_columns.suspect_op_group_sort],
        auto_aggregations=True,
        use_aggregate_conditions=True,
        limit=limit,
        offset=offset,
        functions_acl=["array_join", "sumArray", "percentileArray", "maxArray"],
    )

    extra_conditions = []

    if span_ops:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_op)"),
                Op.IN,
                Function("tuple", span_ops),
            )
        )

    if span_groups:
        extra_conditions.append(
            Condition(
                builder.resolve_function("array_join(spans_group)"),
                Op.IN,
                Function("tuple", span_groups),
            )
        )

    if extra_conditions:
        builder.add_conditions(extra_conditions)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(snql_query, "api.organization-events-spans-performance-suspects")

    return [
        SuspectSpan(
            op=suspect["array_join_spans_op"],
            group=suspect["array_join_spans_group"],
            description=get_span_description(
                EventID(params["project_id"][0], suspect["any_id"]),
                span_op=suspect["array_join_spans_op"],
                span_group=suspect["array_join_spans_group"],
            ),
            frequency=suspect.get("count_unique_id"),
            count=suspect.get("count"),
            avg_occurrences=suspect.get("equation[0]"),
            sum_exclusive_time=suspect.get("sumArray_spans_exclusive_time"),
            p50_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_50"),
            p75_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_75"),
            p95_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_95"),
            p99_exclusive_time=suspect.get("percentileArray_spans_exclusive_time_0_99"),
        )
        for suspect in results["data"]
    ]
Пример #14
0
def query_trace_data(
    trace_id: str,
    params: Mapping[str, str],
    use_snql: bool = False
) -> Tuple[Sequence[SnubaTransaction], Sequence[SnubaError]]:
    sentry_sdk.set_tag("discover.use_snql", use_snql)
    transaction_query: Union[QueryBuilder, discover.PreparedQuery]
    error_query: Union[QueryBuilder, discover.PreparedQuery]
    if use_snql:
        transaction_query = QueryBuilder(
            Dataset.Transactions,
            params,
            query=f"trace:{trace_id}",
            selected_columns=[
                "id",
                "transaction.status",
                "transaction.op",
                "transaction.duration",
                "transaction",
                "timestamp",
                "project",
                "project.id",
                "trace.span",
                "trace.parent_span",
                'to_other(trace.parent_span, "", 0, 1) AS root',
            ],
            # We want to guarantee at least getting the root, and hopefully events near it with timestamp
            # id is just for consistent results
            orderby=["-root", "timestamp", "id"],
            limit=MAX_TRACE_SIZE,
        )
        error_query = QueryBuilder(
            Dataset.Events,
            params,
            query=f"trace:{trace_id}",
            selected_columns=[
                "id",
                "project",
                "project.id",
                "timestamp",
                "trace.span",
                "transaction",
                "issue",
                "title",
                "tags[level]",
            ],
            # Don't add timestamp to this orderby as snuba will have to split the time range up and make multiple queries
            orderby=["id"],
            auto_fields=False,
            limit=MAX_TRACE_SIZE,
        )
        results = bulk_snql_query(
            [transaction_query.get_snql_query(),
             error_query.get_snql_query()],
            referrer="api.trace-view.get-events.wip-snql",
        )
        transformed_results = [
            discover.transform_results(result, query.function_alias_map, {},
                                       None)["data"]
            for result, query in zip(results, [transaction_query, error_query])
        ]
        return cast(Sequence[SnubaTransaction],
                    transformed_results[0]), cast(Sequence[SnubaError],
                                                  transformed_results[1])

    transaction_query = discover.prepare_discover_query(
        selected_columns=[
            "id",
            "transaction.status",
            "transaction.op",
            "transaction.duration",
            "transaction",
            "timestamp",
            # project gets the slug, and project.id gets added automatically
            "project",
            "trace.span",
            "trace.parent_span",
            'to_other(trace.parent_span, "", 0, 1) AS root',
        ],
        # We want to guarantee at least getting the root, and hopefully events near it with timestamp
        # id is just for consistent results
        orderby=["-root", "timestamp", "id"],
        params=params,
        query=f"event.type:transaction trace:{trace_id}",
    )
    error_query = discover.prepare_discover_query(
        selected_columns=[
            "id",
            "project",
            "timestamp",
            "trace.span",
            "transaction",
            "issue",
            "title",
            "tags[level]",
        ],
        # Don't add timestamp to this orderby as snuba will have to split the time range up and make multiple queries
        orderby=["id"],
        params=params,
        query=f"!event.type:transaction trace:{trace_id}",
        auto_fields=False,
    )
    snuba_params = [
        SnubaQueryParams(
            dataset=Dataset.Discover,
            start=snuba_filter.start,
            end=snuba_filter.end,
            groupby=snuba_filter.groupby,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            aggregations=snuba_filter.aggregations,
            selected_columns=snuba_filter.selected_columns,
            having=snuba_filter.having,
            orderby=snuba_filter.orderby,
            limit=MAX_TRACE_SIZE,
        ) for snuba_filter in [transaction_query.filter, error_query.filter]
    ]
    results = bulk_raw_query(
        snuba_params,
        referrer="api.trace-view.get-events",
    )
    transformed_results = [
        discover.transform_results(result, query.fields["functions"],
                                   query.columns, query.filter)["data"]
        for result, query in zip(results, [transaction_query, error_query])
    ]
    return cast(Sequence[SnubaTransaction],
                transformed_results[0]), cast(Sequence[SnubaError],
                                              transformed_results[1])