Пример #1
0
def test_limitby(column: Any, count: Any,
                 exception: Optional[Exception]) -> None:
    if exception is not None:
        with pytest.raises(type(exception), match=re.escape(str(exception))):
            LimitBy(column, count)
    else:
        assert LimitBy(column, count).count == count
Пример #2
0
    def test_simple_limitby(self):
        query = QueryBuilder(
            dataset=Dataset.Discover,
            params=self.params,
            query="",
            selected_columns=["message"],
            orderby="message",
            limitby=("message", 1),
            limit=4,
        )

        assert query.limitby == LimitBy(Column("message"), 1)
Пример #3
0
    def resolve_limitby(
            self, limitby: Optional[Tuple[str, int]]) -> Optional[LimitBy]:
        if limitby is None:
            return None

        column, count = limitby
        resolved = self.resolve_column(column)

        if isinstance(resolved, Column):
            return LimitBy(resolved, count)

        # TODO: Limit By can only operate on a `Column`. This has the implication
        # that non aggregate transforms are not allowed in the order by clause.
        raise InvalidSearchQuery(
            f"{column} used in a limit by but is not a column.")
Пример #4
0
def json_to_snql(body: Mapping[str, Any], entity: str) -> Query:
    """
    This will output a Query object that matches the Legacy query body that was passed in.
    The entity is necessary since the SnQL API requires an explicit entity. This doesn't
    support subquery or joins.

    :param body: The legacy API body.
    :type body: Mapping[str, Any]
    :param entity: The name of the entity being queried.
    :type entity: str

    :raises InvalidExpressionError, InvalidQueryError: If the legacy body is invalid, the SDK will
        raise an exception.

    """

    dataset = body.get("dataset") or entity
    sample = body.get("sample")
    if sample is not None:
        sample = float(sample)
    query = Query(dataset, Entity(entity, None, sample))

    selected_columns = []
    for a in body.get("aggregations", []):
        selected_columns.append(parse_exp(list(a)))

    selected = []
    for s in body.get("selected_columns", []):
        if isinstance(s, tuple):
            selected.append(list(s))
        else:
            selected.append(s)

    selected_columns.extend(list(map(parse_exp, selected)))

    arrayjoin = body.get("arrayjoin")
    if arrayjoin:
        query = query.set_array_join([Column(arrayjoin)])

    query = query.set_select(selected_columns)

    groupby = body.get("groupby", [])
    if groupby and not isinstance(groupby, list):
        groupby = [groupby]

    parsed_groupby = []
    for g in groupby:
        if isinstance(g, tuple):
            g = list(g)
        parsed_groupby.append(parse_exp(g))
    query = query.set_groupby(parsed_groupby)

    conditions: list[Union[Or, Condition]] = []
    if body.get("organization"):
        org_cond = parse_extension_condition("org_id", body["organization"])
        if org_cond:
            conditions.append(org_cond)

    assert isinstance(query.match, Entity)
    time_column = get_required_time_column(query.match.name)
    if time_column:
        time_cols = (("from_date", Op.GTE), ("to_date", Op.LT))
        for col, op in time_cols:
            date_val = body.get(col)
            if date_val:
                conditions.append(
                    Condition(Column(time_column), op,
                              parse_datetime(date_val)))

    if body.get("project"):
        proj_cond = parse_extension_condition("project_id", body["project"],
                                              True)
        if proj_cond:
            conditions.append(proj_cond)

    for cond in body.get("conditions", []):
        if not is_condition(cond):
            or_conditions = []
            for or_cond in cond:
                or_conditions.append(parse_condition(or_cond))

            if len(or_conditions) > 1:
                conditions.append(Or(or_conditions))
            else:
                conditions.extend(or_conditions)
        else:
            conditions.append(parse_condition(cond))

    query = query.set_where(conditions)

    having: list[Union[Or, Condition]] = []
    for cond in body.get("having", []):
        if not is_condition(cond):
            or_conditions = []
            for or_cond in cond:
                or_conditions.append(parse_condition(or_cond))

            having.append(Or(or_conditions))
        else:
            having.append(parse_condition(cond))

    query = query.set_having(having)

    order_by = body.get("orderby")
    if order_by:
        if not isinstance(order_by, list):
            order_by = [order_by]

        order_bys = []
        for o in order_by:
            direction = Direction.ASC
            if isinstance(o, list):
                first = o[0]
                if isinstance(first, str) and first.startswith("-"):
                    o[0] = first.lstrip("-")
                    direction = Direction.DESC
                part = parse_exp(o)
            elif isinstance(o, str):
                if o.startswith("-"):
                    direction = Direction.DESC
                    part = parse_exp(o.lstrip("-"))
                else:
                    part = parse_exp(o)

            order_bys.append(OrderBy(part, direction))

        query = query.set_orderby(order_bys)

    limitby = body.get("limitby")
    if limitby:
        limit, name = limitby
        query = query.set_limitby(LimitBy([Column(name)], int(limit)))

    extras = (
        "limit",
        "offset",
        "granularity",
        "totals",
        "consistent",
        "turbo",
        "debug",
        "dry_run",
        "parent_api",
    )
    for extra in extras:
        if body.get(extra) is not None:
            query = getattr(query, f"set_{extra}")(body.get(extra))

    query.set_legacy(True)
    return query
Пример #5
0
def query_example_transactions(
    params: ParamsType,
    query: Optional[str],
    direction: str,
    orderby: str,
    spans: List[Span],
    per_suspect: int = 5,
    offset: Optional[int] = None,
) -> Dict[Span, List[EventID]]:
    # there aren't any suspects, early return to save an empty query
    if not spans or per_suspect == 0:
        return {}

    orderby_columns = SPAN_PERFORMANCE_COLUMNS[orderby].suspect_example_sort

    selected_columns: List[str] = [
        "id",
        "project.id",
        "project",
        "array_join(spans_op)",
        "array_join(spans_group)",
        *orderby_columns,
    ]

    builder = QueryBuilder(
        dataset=Dataset.Discover,
        params=params,
        selected_columns=selected_columns,
        query=query,
        orderby=[direction + column for column in orderby_columns],
        # we want only `per_suspect` examples for each suspect
        limit=len(spans) * per_suspect,
        offset=offset,
        functions_acl=[
            "array_join", "sumArray", "percentileArray", "maxArray"
        ],
    )

    # we are only interested in the specific op, group pairs from the suspects
    builder.add_conditions([
        Condition(
            Function(
                "tuple",
                [
                    builder.resolve_function("array_join(spans_op)"),
                    builder.resolve_function("array_join(spans_group)"),
                ],
            ),
            Op.IN,
            Function(
                "tuple",
                [
                    Function("tuple", [suspect.op, suspect.group])
                    for suspect in spans
                ],
            ),
        ),
    ])

    if len(spans) > 1:
        # Hack: the limit by clause only allows columns but here we want to
        # do a limitby on the two array joins. For the time being, directly
        # do the limitby on the internal snuba name for the span group column
        # but this should not be relied upon in production, and if two spans
        # differ only by the span op, this will result in a incorrect query
        builder.limitby = LimitBy(Column("_snuba_array_join_spans_group"),
                                  per_suspect)

    snql_query = builder.get_snql_query()
    results = raw_snql_query(
        snql_query, "api.organization-events-spans-performance-examples")

    examples: Dict[Span, List[EventID]] = {
        Span(suspect.op, suspect.group): []
        for suspect in spans
    }

    for example in results["data"]:
        key = Span(example["array_join_spans_op"],
                   example["array_join_spans_group"])
        value = EventID(example["project.id"], example["project"],
                        example["id"])
        examples[key].append(value)

    return examples
Пример #6
0
         BooleanCondition(
             BooleanOp.OR,
             [
                 Condition(Function("uniq", [Column("event_id")]),
                           Op.GTE, 10),
                 Condition(
                     CurriedFunction("quantile", [0.5],
                                     [Column("duration")]),
                     Op.GTE,
                     99,
                 ),
             ],
         ),
     ],
     orderby=[OrderBy(Column("title"), Direction.ASC)],
     limitby=LimitBy([Column("title")], 5),
     limit=Limit(10),
     offset=Offset(1),
     granularity=Granularity(3600),
     totals=Totals(True),
 ),
 (
     "MATCH (events SAMPLE 1000.0)",
     "SELECT title, uniq(event_id) AS uniq_events, quantile(0.5)(duration) AS p50",
     "BY title",
     ("WHERE timestamp > toDateTime('2021-01-02T03:04:05.000006') "
      "AND toHour(timestamp) <= toDateTime('2021-01-02T03:04:05.000006') "
      "AND project_id IN tuple(1, 2, 3) "
      "AND (event_id = 'abc' OR duration > 10)"),
     "HAVING uniq(event_id) > 1 AND (uniq(event_id) >= 10 OR quantile(0.5)(duration) >= 99)",
     "ORDER BY title ASC",