def data_fn(offset, limit):
     if use_snql:
         trend_query.offset = Offset(offset)
         trend_query.limit = Limit(limit)
         result = raw_snql_query(
             trend_query.get_snql_query(),
             referrer="api.trends.get-percentage-change.wip-snql",
         )
         result = discover.transform_results(
             result, trend_query.function_alias_map, {}, None
         )
         return result
     else:
         return discover.query(
             selected_columns=selected_columns + trend_columns,
             query=query,
             params=params,
             orderby=orderby,
             offset=offset,
             limit=limit,
             referrer="api.trends.get-percentage-change",
             auto_fields=True,
             auto_aggregations=True,
             use_aggregate_conditions=True,
         )
示例#2
0
文件: utils.py 项目: yxlbeyond/sentry
def transform_aliases_and_query(**kwargs):
    """
    Convert aliases in selected_columns, groupby, aggregation, conditions,
    orderby and arrayjoin fields to their internal Snuba format and post the
    query to Snuba. Convert back translated aliases before returning snuba
    results.

    :deprecated: This method is deprecated. You should use sentry.snuba.discover instead.
    """

    arrayjoin_map = {"error": "exception_stacks", "stack": "exception_frames"}

    translated_columns = {}
    derived_columns = set()

    selected_columns = kwargs.get("selected_columns")
    groupby = kwargs.get("groupby")
    aggregations = kwargs.get("aggregations")
    conditions = kwargs.get("conditions")
    filter_keys = kwargs["filter_keys"]
    arrayjoin = kwargs.get("arrayjoin")
    orderby = kwargs.get("orderby")
    having = kwargs.get("having", [])
    dataset = Dataset.Events

    if selected_columns:
        for (idx, col) in enumerate(selected_columns):
            if isinstance(col, list):
                # if list, means there are potentially nested functions and need to
                # iterate and translate potential columns
                parse_columns_in_functions(col)
                selected_columns[idx] = col
                translated_columns[col[2]] = col[2]
                derived_columns.add(col[2])
            else:
                name = get_snuba_column_name(col)
                selected_columns[idx] = name
                translated_columns[name] = col

    if groupby:
        for (idx, col) in enumerate(groupby):
            if col not in derived_columns:
                name = get_snuba_column_name(col)
            else:
                name = col

            groupby[idx] = name
            translated_columns[name] = col

    for aggregation in aggregations or []:
        derived_columns.add(aggregation[2])
        if isinstance(aggregation[1], six.string_types):
            aggregation[1] = get_snuba_column_name(aggregation[1])
        elif isinstance(aggregation[1], (set, tuple, list)):
            aggregation[1] = [get_snuba_column_name(col) for col in aggregation[1]]

    for col in filter_keys.keys():
        name = get_snuba_column_name(col)
        filter_keys[name] = filter_keys.pop(col)

    if conditions:
        aliased_conditions = []
        for condition in conditions:
            field = condition[0]
            if not isinstance(field, (list, tuple)) and field in derived_columns:
                having.append(condition)
            else:
                aliased_conditions.append(condition)
        kwargs["conditions"] = aliased_conditions

    if having:
        kwargs["having"] = having

    if orderby:
        orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby]
        translated_orderby = []

        for field_with_order in orderby:
            field = field_with_order.lstrip("-")
            translated_orderby.append(
                u"{}{}".format(
                    "-" if field_with_order.startswith("-") else "",
                    field if field in derived_columns else get_snuba_column_name(field),
                )
            )

        kwargs["orderby"] = translated_orderby

    kwargs["arrayjoin"] = arrayjoin_map.get(arrayjoin, arrayjoin)
    kwargs["dataset"] = dataset

    result = dataset_query(**kwargs)

    return transform_results(result, translated_columns, kwargs)
def query_facet_performance(
    params: Mapping[str, str],
    tag_data: Mapping[str, Any],
    referrer: str,
    aggregate_column: Optional[str] = None,
    filter_query: Optional[str] = None,
    orderby: Optional[str] = None,
    limit: Optional[int] = None,
    offset: Optional[int] = None,
    all_tag_keys: Optional[bool] = None,
    tag_key: Optional[bool] = None,
) -> Dict:
    with sentry_sdk.start_span(
        op="discover.discover", description="facets.filter_transform"
    ) as span:
        span.set_data("query", filter_query)
        snuba_filter = get_filter(filter_query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = discover.resolve_discover_aliases(snuba_filter)
    translated_aggregate_column = discover.resolve_discover_column(aggregate_column)

    # Aggregate (avg) and count of all transactions for this query
    transaction_aggregate = tag_data["aggregate"]

    # Dynamically sample so at least 50000 transactions are selected
    sample_start_count = 50000
    transaction_count = tag_data["count"]
    sampling_enabled = transaction_count > sample_start_count

    # log-e growth starting at 50,000
    target_sample = max(
        sample_start_count * (math.log(transaction_count) - (math.log(sample_start_count) - 1)),
        transaction_count,
    )

    dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample / transaction_count)
    sample_rate = min(max(dynamic_sample_rate, 0), 1) if sampling_enabled else None
    frequency_sample_rate = sample_rate if sample_rate else 1

    # Exclude tags that have high cardinality are are generally unrelated to performance
    excluded_tags = [
        "tags_key",
        "NOT IN",
        ["trace", "trace.ctx", "trace.span", "project", "browser", "celery_task_id", "url"],
    ]

    with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"):
        span.set_data("sample_rate", sample_rate)
        span.set_data("target_sample", target_sample)
        conditions = snuba_filter.conditions
        aggregate_comparison = transaction_aggregate * 1.005 if transaction_aggregate else 0
        having = [excluded_tags]
        if not all_tag_keys and not tag_key:
            having.append(["aggregate", ">", aggregate_comparison])

        resolved_orderby = [] if orderby is None else orderby

        conditions.append([translated_aggregate_column, "IS NOT NULL", None])

        if tag_key:
            conditions.append(["tags_key", "IN", [tag_key]])

        tag_key_limit = limit if tag_key else 1

        tag_selected_columns = [
            [
                "divide",
                [
                    ["sum", [["minus", [translated_aggregate_column, transaction_aggregate]]]],
                    frequency_sample_rate,
                ],
                "sumdelta",
            ],
            ["count", [], "count"],
            [
                "divide",
                [["divide", [["count", []], frequency_sample_rate]], transaction_count],
                "frequency",
            ],
            ["divide", ["aggregate", transaction_aggregate], "comparison"],
            ["avg", [translated_aggregate_column], "aggregate"],
        ]

        limitby = [tag_key_limit, "tags_key"] if not tag_key else None

        results = discover.raw_query(
            selected_columns=tag_selected_columns,
            conditions=conditions,
            start=snuba_filter.start,
            end=snuba_filter.end,
            filter_keys=snuba_filter.filter_keys,
            orderby=resolved_orderby + ["tags_key", "tags_value"],
            groupby=["tags_key", "tags_value"],
            having=having,
            dataset=Dataset.Discover,
            referrer=f"{referrer}.tag_values".format(referrer, "tag_values"),
            sample=sample_rate,
            turbo=sample_rate is not None,
            limitby=limitby,
            limit=limit,
            offset=offset,
        )

        results = discover.transform_results(results, {}, translated_columns, snuba_filter)

        return results
def query_trace_data(
    trace_id: str, params: Mapping[str, str]
) -> Tuple[Sequence[SnubaTransaction], Sequence[SnubaError]]:
    transaction_query = discover.prepare_discover_query(
        selected_columns=[
            "id",
            "transaction.status",
            "transaction.op",
            "transaction.duration",
            "transaction",
            "timestamp",
            # project gets the slug, and project.id gets added automatically
            "project",
            "trace.span",
            "trace.parent_span",
            'to_other(trace.parent_span, "", 0, 1) AS root',
        ],
        # We want to guarantee at least getting the root, and hopefully events near it with timestamp
        # id is just for consistent results
        orderby=["-root", "timestamp", "id"],
        params=params,
        query=f"event.type:transaction trace:{trace_id}",
    )
    error_query = discover.prepare_discover_query(
        selected_columns=[
            "id",
            "project",
            "timestamp",
            "trace.span",
            "transaction",
            "issue",
            "title",
            "tags[level]",
        ],
        # Don't add timestamp to this orderby as snuba will have to split the time range up and make multiple queries
        orderby=["id"],
        params=params,
        query=f"!event.type:transaction trace:{trace_id}",
        auto_fields=False,
    )
    snuba_params = [
        SnubaQueryParams(
            dataset=Dataset.Discover,
            start=snuba_filter.start,
            end=snuba_filter.end,
            groupby=snuba_filter.groupby,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            aggregations=snuba_filter.aggregations,
            selected_columns=snuba_filter.selected_columns,
            having=snuba_filter.having,
            orderby=snuba_filter.orderby,
            limit=MAX_TRACE_SIZE,
        )
        for snuba_filter in [transaction_query.filter, error_query.filter]
    ]
    results = bulk_raw_query(
        snuba_params,
        referrer="api.trace-view.get-events",
    )
    transformed_results = [
        discover.transform_results(result, query.fields["functions"], query.columns, query.filter)[
            "data"
        ]
        for result, query in zip(results, [transaction_query, error_query])
    ]
    return cast(Sequence[SnubaTransaction], transformed_results[0]), cast(
        Sequence[SnubaError], transformed_results[1]
    )
示例#5
0
def query(
    selected_columns,
    query,
    params,
    equations=None,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    extra_snql_condition=None,
    functions_acl=None,
    use_snql=False,
):
    """ """
    metrics_compatible = not equations

    if metrics_compatible:
        try:
            metrics_query = MetricsQueryBuilder(
                params,
                query=query,
                selected_columns=selected_columns,
                equations=[],
                orderby=orderby,
                # Auto fields will add things like id back in if enabled
                auto_fields=False,
                auto_aggregations=auto_aggregations,
                use_aggregate_conditions=use_aggregate_conditions,
                functions_acl=functions_acl,
                limit=limit,
                offset=offset,
            )
            # Getting the 0th result for now, will need to consolidate multiple query results later
            results = metrics_query.run_query(referrer + ".metrics-enhanced")
            results = discover.transform_results(
                results, metrics_query.function_alias_map, {}, None
            )
            results = resolve_tags(results, metrics_query)
            results["meta"]["isMetricsData"] = True
            return results
        # raise Invalid Queries since the same thing will happen with discover
        except InvalidSearchQuery as error:
            raise error
        # any remaining errors mean we should try again with discover
        except IncompatibleMetricsQuery:
            metrics_compatible = False

    # Either metrics failed, or this isn't a query we can enhance with metrics
    if not metrics_compatible:
        results = discover.query(
            selected_columns,
            query,
            params,
            equations=equations,
            orderby=orderby,
            offset=offset,
            limit=limit,
            referrer=referrer,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            use_aggregate_conditions=use_aggregate_conditions,
            conditions=conditions,
            extra_snql_condition=extra_snql_condition,
            functions_acl=functions_acl,
            use_snql=use_snql,
        )
        results["meta"]["isMetricsData"] = False

        return results

    return {}
示例#6
0
def timeseries_query(
    selected_columns: Sequence[str],
    query: str,
    params: Dict[str, str],
    rollup: int,
    referrer: str,
    zerofill_results: bool = True,
    comparison_delta: Optional[timedelta] = None,
    functions_acl: Optional[List[str]] = None,
    use_snql: Optional[bool] = False,
) -> SnubaTSResult:
    """
    High-level API for doing arbitrary user timeseries queries against events.
    this API should match that of sentry.snuba.discover.timeseries_query
    """
    metrics_compatible = False
    equations, columns = categorize_columns(selected_columns)
    if comparison_delta is None and not equations:
        metrics_compatible = True

    if metrics_compatible:
        try:
            metrics_query = TimeseriesMetricQueryBuilder(
                params,
                rollup,
                query=query,
                selected_columns=columns,
                functions_acl=functions_acl,
            )
            result = metrics_query.run_query(referrer + ".metrics-enhanced")
            result = discover.transform_results(result, metrics_query.function_alias_map, {}, None)
            result["data"] = (
                discover.zerofill(
                    result["data"],
                    params["start"],
                    params["end"],
                    rollup,
                    "time",
                )
                if zerofill_results
                else result["data"]
            )
            return SnubaTSResult(
                {"data": result["data"], "isMetricsData": True},
                params["start"],
                params["end"],
                rollup,
            )
        # raise Invalid Queries since the same thing will happen with discover
        except InvalidSearchQuery as error:
            raise error
        # any remaining errors mean we should try again with discover
        except IncompatibleMetricsQuery:
            metrics_compatible = False

    # This isn't a query we can enhance with metrics
    if not metrics_compatible:
        return discover.timeseries_query(
            selected_columns,
            query,
            params,
            rollup,
            referrer,
            zerofill_results,
            comparison_delta,
            functions_acl,
            use_snql,
        )
    return SnubaTSResult()