Пример #1
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    auto_aggregations (bool) Whether aggregates should be added automatically if they're used
                    in conditions, and there's at least one aggregate already.
    use_aggregate_conditions (bool) Set to true if aggregates conditions should be used at all.
    conditions (Sequence[any]) List of conditions that are passed directly to snuba without
                    any additional processing.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")
    else:
        # We clobber this value throughout this code, so copy the value
        selected_columns = selected_columns[:]

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            assert (
                not auto_aggregations
            ), "Auto aggregations cannot be used without enabling aggregate conditions"
            snuba_filter.having = []

    function_translations = {}

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        resolved_fields = resolve_field_list(
            selected_columns,
            snuba_filter,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            functions_acl=functions_acl,
        )

        snuba_filter.update_with(resolved_fields)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter, function_translations)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            error_extra = u", and could not be automatically added" if auto_aggregations else u""
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        # Only need to iterate on arg[1] if its a list
                        elif isinstance(arg[1], (list, tuple)):
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        u"Aggregate(s) {} used in a condition but are not in the selected columns{}."
                        .format(
                            ", ".join(conditions_not_in_aggregations),
                            error_extra,
                        ))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        u"Aggregate {} used in a condition but is not a selected column{}."
                        .format(
                            having_clause[0],
                            error_extra,
                        ))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.snuba_query"):
        result = raw_query(
            start=snuba_filter.start,
            end=snuba_filter.end,
            groupby=snuba_filter.groupby,
            conditions=snuba_filter.conditions,
            aggregations=snuba_filter.aggregations,
            selected_columns=snuba_filter.selected_columns,
            filter_keys=snuba_filter.filter_keys,
            having=snuba_filter.having,
            orderby=snuba_filter.orderby,
            dataset=Dataset.Discover,
            limit=limit,
            offset=offset,
            referrer=referrer,
        )

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        return transform_results(result, resolved_fields["functions"],
                                 translated_columns, snuba_filter,
                                 selected_columns)
Пример #2
0
def timeseries_query(selected_columns,
                     query,
                     params,
                     rollup,
                     reference_event=None,
                     referrer=None):
    """
    High-level API for doing arbitrary user timeseries queries against events.

    This function operates on the public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    This function is intended to only get timeseries based
    results and thus requires the `rollup` parameter.

    Returns a SnubaTSResult object that has been zerofilled in
    case of gaps.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    rollup (int) The bucket width in seconds
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    snuba_filter = get_filter(query, params)
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
        "having": snuba_filter.having,
    }
    if not snuba_args["start"] and not snuba_args["end"]:
        raise InvalidSearchQuery(
            "Cannot get timeseries result without a start and end.")

    snuba_args.update(
        resolve_field_list(selected_columns, snuba_args, auto_fields=False))
    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_args["conditions"].extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_args, _ = resolve_discover_aliases(snuba_args)
    if not snuba_args["aggregations"]:
        raise InvalidSearchQuery(
            "Cannot get timeseries result with no aggregation.")

    # Change the alias of the first aggregation to count. This ensures compatibility
    # with other parts of the timeseries endpoint expectations
    if len(snuba_args["aggregations"]) == 1:
        snuba_args["aggregations"][0][2] = "count"

    result = raw_query(
        aggregations=snuba_args.get("aggregations"),
        conditions=snuba_args.get("conditions"),
        filter_keys=snuba_args.get("filter_keys"),
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        rollup=rollup,
        orderby="time",
        groupby=["time"],
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )
    result = zerofill(result["data"], snuba_args["start"], snuba_args["end"],
                      rollup, "time")

    return SnubaTSResult({"data": result}, snuba_filter.start,
                         snuba_filter.end, rollup)
Пример #3
0
def get_facets(query, params, limit=10, referrer=None):
    """
    High-level API for getting 'facet map' results.

    Facets are high frequency tags and attribute results that
    can be used to further refine user queries. When many projects
    are requested sampling will be enabled to help keep response times low.

    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.

    Returns Sequence[FacetResult]
    """
    snuba_filter = get_filter(query, params)

    # TODO(mark) Refactor the need for this translation shim.
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
    }
    # Resolve the public aliases into the discover dataset names.
    snuba_args, translated_columns = resolve_discover_aliases(snuba_args)

    # Exclude tracing tags as they are noisy and generally not helpful.
    excluded_tags = [
        "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project"]
    ]

    # Sampling keys for multi-project results as we don't need accuracy
    # with that much data.
    sample = len(snuba_filter.filter_keys["project_id"]) > 2

    # Get the most frequent tag keys
    key_names = raw_query(
        aggregations=[["count", None, "count"]],
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        conditions=snuba_args.get("conditions"),
        filter_keys=snuba_args.get("filter_keys"),
        orderby=["-count", "tags_key"],
        groupby="tags_key",
        having=[excluded_tags],
        dataset=Dataset.Discover,
        limit=limit,
        referrer=referrer,
        turbo=sample,
    )
    top_tags = [r["tags_key"] for r in key_names["data"]]
    if not top_tags:
        return []

    # TODO(mark) Make the sampling rate scale based on the result size and scaling factor in
    # sentry.options. To test the lowest acceptable sampling rate, we use 0.1 which
    # is equivalent to turbo. We don't use turbo though as we need to re-scale data, and
    # using turbo could cause results to be wrong if the value of turbo is changed in snuba.
    sample_rate = 0.1 if key_names["data"][0]["count"] > 10000 else None
    # Rescale the results if we're sampling
    multiplier = 1 / sample_rate if sample_rate is not None else 1

    fetch_projects = False
    if len(params.get("project_id", [])) > 1:
        if len(top_tags) == limit:
            top_tags.pop()
        fetch_projects = True

    results = []
    if fetch_projects:
        project_values = raw_query(
            aggregations=[["count", None, "count"]],
            start=snuba_args.get("start"),
            end=snuba_args.get("end"),
            conditions=snuba_args.get("conditions"),
            filter_keys=snuba_args.get("filter_keys"),
            groupby="project_id",
            orderby="-count",
            dataset=Dataset.Discover,
            referrer=referrer,
            sample=sample_rate,
        )
        results.extend([
            FacetResult("project", r["project_id"],
                        int(r["count"]) * multiplier)
            for r in project_values["data"]
        ])

    # Get tag counts for our top tags. Fetching them individually
    # allows snuba to leverage promoted tags better and enables us to get
    # the value count we want.
    max_aggregate_tags = options.get("discover2.max_tags_to_combine")
    individual_tags = []
    aggregate_tags = []
    for i, tag in enumerate(top_tags):
        if tag == "environment":
            # Add here tags that you want to be individual
            individual_tags.append(tag)
        elif i >= len(top_tags) - max_aggregate_tags:
            aggregate_tags.append(tag)
        else:
            individual_tags.append(tag)

    for tag_name in individual_tags:
        tag = u"tags[{}]".format(tag_name)
        tag_values = raw_query(
            aggregations=[["count", None, "count"]],
            conditions=snuba_args.get("conditions"),
            start=snuba_args.get("start"),
            end=snuba_args.get("end"),
            filter_keys=snuba_args.get("filter_keys"),
            orderby=["-count"],
            groupby=[tag],
            limit=TOP_VALUES_DEFAULT_LIMIT,
            dataset=Dataset.Discover,
            referrer=referrer,
            sample=sample_rate,
        )
        results.extend([
            FacetResult(tag_name, r[tag],
                        int(r["count"]) * multiplier)
            for r in tag_values["data"]
        ])

    if aggregate_tags:
        conditions = snuba_args.get("conditions", [])
        conditions.append(["tags_key", "IN", aggregate_tags])
        tag_values = raw_query(
            aggregations=[["count", None, "count"]],
            conditions=conditions,
            start=snuba_args.get("start"),
            end=snuba_args.get("end"),
            filter_keys=snuba_args.get("filter_keys"),
            orderby=["tags_key", "-count"],
            groupby=["tags_key", "tags_value"],
            dataset=Dataset.Discover,
            referrer=referrer,
            sample=sample_rate,
            limitby=[TOP_VALUES_DEFAULT_LIMIT, "tags_key"],
        )
        results.extend([
            FacetResult(r["tags_key"], r["tags_value"], int(r["count"]))
            for r in tag_values["data"]
        ])

    return results
Пример #4
0
def get_performance_facets(
    query,
    params,
    orderby=None,
    aggregate_column="duration",
    aggregate_function="avg",
    limit=20,
    referrer=None,
):
    """
    High-level API for getting 'facet map' results for performance data

    Performance facets are high frequency tags and the aggregate duration of
    their most frequent values

    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.

    Returns Sequence[FacetResult]
    """
    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.filter_transform") as span:
        span.set_data("query", query)
        snuba_filter = get_filter(query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.frequent_tags"):
        # Get the most relevant tag keys
        key_names = raw_query(
            aggregations=[
                [aggregate_function, aggregate_column, "aggregate"],
                ["count", None, "count"],
            ],
            start=snuba_filter.start,
            end=snuba_filter.end,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            orderby=["-count"],
            dataset=Dataset.Discover,
            limit=limit,
            referrer="{}.{}".format(referrer, "all_transactions"),
        )
        counts = [r["count"] for r in key_names["data"]]
        aggregates = [r["aggregate"] for r in key_names["data"]]

        # Return early to avoid doing more queries with 0 count transactions or aggregates for columns that dont exist
        if len(counts) != 1 or counts[0] == 0 or aggregates[0] is None:
            return []

    results = []
    snuba_filter.conditions.append([aggregate_column, "IS NOT NULL", None])

    # Aggregate for transaction
    transaction_aggregate = key_names["data"][0]["aggregate"]

    # Dynamically sample so at least 10000 transactions are selected
    transaction_count = key_names["data"][0]["count"]
    sampling_enabled = transaction_count > 50000
    # Log growth starting at 50,000
    target_sample = 50000 * (math.log(transaction_count, 10) - 3)

    dynamic_sample_rate = 0 if transaction_count <= 0 else (target_sample /
                                                            transaction_count)
    sample_rate = min(max(dynamic_sample_rate, 0),
                      1) if sampling_enabled else None
    frequency_sample_rate = sample_rate if sample_rate else 1

    excluded_tags = [
        "tags_key",
        "NOT IN",
        [
            "trace", "trace.ctx", "trace.span", "project", "browser",
            "celery_task_id"
        ],
    ]

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.aggregate_tags"):
        conditions = snuba_filter.conditions
        aggregate_comparison = transaction_aggregate * 1.01 if transaction_aggregate else 0
        having = [excluded_tags]
        if orderby and orderby in ("sumdelta", "-sumdelta", "aggregate",
                                   "-aggregate"):
            having.append(["aggregate", ">", aggregate_comparison])

        if orderby is None:
            orderby = []
        else:
            orderby = [orderby]

        tag_values = raw_query(
            selected_columns=[
                [
                    "sum",
                    [
                        "minus",
                        [
                            aggregate_column,
                            str(transaction_aggregate),
                        ],
                    ],
                    "sumdelta",
                ],
            ],
            aggregations=[
                [aggregate_function, aggregate_column, "aggregate"],
                ["count", None, "cnt"],
            ],
            conditions=conditions,
            start=snuba_filter.start,
            end=snuba_filter.end,
            filter_keys=snuba_filter.filter_keys,
            orderby=orderby + ["tags_key"],
            groupby=["tags_key", "tags_value"],
            having=having,
            dataset=Dataset.Discover,
            referrer="{}.{}".format(referrer, "tag_values"),
            sample=sample_rate,
            turbo=sample_rate is not None,
            limitby=[1, "tags_key"],
        )
        results.extend([
            PerformanceFacetResult(
                key=r["tags_key"],
                value=r["tags_value"],
                performance=float(r["aggregate"]),
                frequency=float(
                    (r["cnt"] / frequency_sample_rate) / transaction_count),
                comparison=float(r["aggregate"] / transaction_aggregate),
                sumdelta=float(r["sumdelta"]),
            ) for r in tag_values["data"]
        ])

    return results
Пример #5
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    reference_event=None,
    referrer=None,
    auto_fields=False,
    use_aggregate_conditions=False,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")

    snuba_filter = get_filter(query, params)

    # TODO(mark) Refactor the need for this translation shim once all of
    # discover is using this module. Remember to update all the functions
    # in this module.
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
        "orderby": orderby,
        "having": [],
    }

    if use_aggregate_conditions:
        snuba_args["having"] = snuba_filter.having

    snuba_args.update(
        resolve_field_list(selected_columns,
                           snuba_args,
                           auto_fields=auto_fields))

    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_args["conditions"].extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_args, translated_columns = resolve_discover_aliases(snuba_args)

    # Make sure that any aggregate conditions are also in the selected columns
    for having_clause in snuba_args.get("having"):
        found = any(having_clause[0] == agg_clause[-1]
                    for agg_clause in snuba_args.get("aggregations"))
        if not found:
            raise InvalidSearchQuery(
                u"Aggregate {} used in a condition but is not a selected column."
                .format(having_clause[0]))

    result = raw_query(
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        groupby=snuba_args.get("groupby"),
        conditions=snuba_args.get("conditions"),
        aggregations=snuba_args.get("aggregations"),
        selected_columns=snuba_args.get("selected_columns"),
        filter_keys=snuba_args.get("filter_keys"),
        having=snuba_args.get("having"),
        orderby=snuba_args.get("orderby"),
        dataset=Dataset.Discover,
        limit=limit,
        offset=offset,
        referrer=referrer,
    )

    return transform_results(result, translated_columns, snuba_args)
Пример #6
0
def get_pagination_ids(event,
                       query,
                       params,
                       organization,
                       reference_event=None,
                       referrer=None):
    """
    High-level API for getting pagination data for an event + filter

    The provided event is used as a reference event to find events
    that are older and newer than the current one.

    event (Event) The event to find related events for.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    # TODO(evanh): This can be removed once we migrate the frontend / saved queries
    # to use the new function values
    query = transform_deprecated_functions_in_query(query)

    snuba_filter = get_filter(query, params)

    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_filter.conditions.extend(ref_conditions)

    result = {
        "next": eventstore.get_next_event_id(event, filter=snuba_filter),
        "previous": eventstore.get_prev_event_id(event, filter=snuba_filter),
        "latest": eventstore.get_latest_event_id(event, filter=snuba_filter),
        "oldest": eventstore.get_earliest_event_id(event, filter=snuba_filter),
    }

    # translate project ids to slugs

    project_ids = set([tuple[0] for tuple in result.values() if tuple])

    project_slugs = {}
    projects = Project.objects.filter(id__in=list(project_ids),
                                      organization=organization,
                                      status=ProjectStatus.VISIBLE).values(
                                          "id", "slug")

    for project in projects:
        project_slugs[project["id"]] = project["slug"]

    def into_pagination_record(project_slug_event_id):

        if not project_slug_event_id:
            return None

        project_id = int(project_slug_event_id[0])

        return "{}:{}".format(project_slugs[project_id],
                              project_slug_event_id[1])

    for key, value in result.items():
        result[key] = into_pagination_record(value)

    return PaginationResult(**result)
Пример #7
0
def prepare_discover_query(
    selected_columns,
    query,
    params,
    orderby=None,
    auto_fields=False,
    auto_aggregations=False,
    use_aggregate_conditions=False,
    conditions=None,
    functions_acl=None,
):
    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            assert (
                not auto_aggregations
            ), "Auto aggregations cannot be used without enabling aggregate conditions"
            snuba_filter.having = []

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        resolved_fields = resolve_field_list(
            selected_columns,
            snuba_filter,
            auto_fields=auto_fields,
            auto_aggregations=auto_aggregations,
            functions_acl=functions_acl,
        )

        snuba_filter.update_with(resolved_fields)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            error_extra = ", and could not be automatically added" if auto_aggregations else ""
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        # Only need to iterate on arg[1] if its a list
                        elif isinstance(arg[1], (list, tuple)):
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        "Aggregate(s) {} used in a condition but are not in the selected columns{}."
                        .format(
                            ", ".join(conditions_not_in_aggregations),
                            error_extra,
                        ))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        "Aggregate {} used in a condition but is not a selected column{}."
                        .format(
                            having_clause[0],
                            error_extra,
                        ))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    return PreparedQuery(snuba_filter, translated_columns, resolved_fields)
Пример #8
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    referrer=None,
    auto_fields=False,
    use_aggregate_conditions=False,
    conditions=None,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    conditions (Sequence[any]) List of conditions that are passed directly to snuba without
                    any additional processing.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")
    else:
        # We clobber this value throughout this code, so copy the value
        selected_columns = selected_columns[:]

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.filter_transform") as span:
        span.set_data("query", query)

        snuba_filter = get_filter(query, params)
        if not use_aggregate_conditions:
            snuba_filter.having = []

    # We need to run a separate query to be able to properly bucket the values for the histogram
    # Do that here, and format the bucket number in to the columns before passing it through
    # to event search.
    idx = 0
    function_translations = {}
    for col in selected_columns:
        if col.startswith("histogram("):
            with sentry_sdk.start_span(
                    op="discover.discover",
                    description="query.histogram_calculation") as span:
                span.set_data("histogram", col)
                histogram_column = find_histogram_buckets(
                    col, params, snuba_filter.conditions)
                selected_columns[idx] = histogram_column
                snuba_name = get_function_alias(histogram_column)
                sentry_name = get_function_alias(col)
                function_translations[snuba_name] = sentry_name
                # Since we're completely renaming the histogram function, we need to also check if we are
                # ordering by the histogram values, and change that.
                if orderby is not None:
                    orderby = list(orderby) if isinstance(
                        orderby, (list, tuple)) else [orderby]
                    for i, ordering in enumerate(orderby):
                        if sentry_name == ordering.lstrip("-"):
                            ordering = "{}{}".format(
                                "-" if ordering.startswith("-") else "",
                                snuba_name)
                            orderby[i] = ordering

            break

        idx += 1

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.field_translations"):
        if orderby is not None:
            orderby = list(orderby) if isinstance(orderby,
                                                  (list,
                                                   tuple)) else [orderby]
            snuba_filter.orderby = [get_function_alias(o) for o in orderby]

        snuba_filter.update_with(
            resolve_field_list(selected_columns,
                               snuba_filter,
                               auto_fields=auto_fields))

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter, function_translations)

        # Make sure that any aggregate conditions are also in the selected columns
        for having_clause in snuba_filter.having:
            # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure
            # any referenced functions are in the aggregations.
            if isinstance(having_clause[0], (list, tuple)):
                # Functions are of the form [fn, [args]]
                args_to_check = [[having_clause[0]]]
                conditions_not_in_aggregations = []
                while len(args_to_check) > 0:
                    args = args_to_check.pop()
                    for arg in args:
                        if arg[0] in [SNUBA_AND, SNUBA_OR]:
                            args_to_check.extend(arg[1])
                        else:
                            alias = arg[1][0]
                            found = any(
                                alias == agg_clause[-1]
                                for agg_clause in snuba_filter.aggregations)
                            if not found:
                                conditions_not_in_aggregations.append(alias)

                if len(conditions_not_in_aggregations) > 0:
                    raise InvalidSearchQuery(
                        u"Aggregate(s) {} used in a condition but are not in the selected columns."
                        .format(", ".join(conditions_not_in_aggregations)))
            else:
                found = any(having_clause[0] == agg_clause[-1]
                            for agg_clause in snuba_filter.aggregations)
                if not found:
                    raise InvalidSearchQuery(
                        u"Aggregate {} used in a condition but is not a selected column."
                        .format(having_clause[0]))

        if conditions is not None:
            snuba_filter.conditions.extend(conditions)

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.snuba_query"):
        result = raw_query(
            start=snuba_filter.start,
            end=snuba_filter.end,
            groupby=snuba_filter.groupby,
            conditions=snuba_filter.conditions,
            aggregations=snuba_filter.aggregations,
            selected_columns=snuba_filter.selected_columns,
            filter_keys=snuba_filter.filter_keys,
            having=snuba_filter.having,
            orderby=snuba_filter.orderby,
            dataset=Dataset.Discover,
            limit=limit,
            offset=offset,
            referrer=referrer,
        )

    with sentry_sdk.start_span(op="discover.discover",
                               description="query.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        return transform_results(result, translated_columns, snuba_filter,
                                 selected_columns)
Пример #9
0
def query(
    selected_columns,
    query,
    params,
    orderby=None,
    offset=None,
    limit=50,
    reference_event=None,
    referrer=None,
    auto_fields=False,
    use_aggregate_conditions=False,
    conditions=None,
):
    """
    High-level API for doing arbitrary user queries against events.

    This function operates on the Discover public event schema and
    virtual fields/aggregate functions for selected columns and
    conditions are supported through this function.

    The resulting list will have all internal field names mapped
    back into their public schema names.

    selected_columns (Sequence[str]) List of public aliases to fetch.
    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    orderby (None|str|Sequence[str]) The field to order results by.
    offset (None|int) The record offset to read.
    limit (int) The number of records to fetch.
    reference_event (ReferenceEvent) A reference event object. Used to generate additional
                    conditions based on the provided reference.
    referrer (str|None) A referrer string to help locate the origin of this query.
    auto_fields (bool) Set to true to have project + eventid fields automatically added.
    conditions (Sequence[any]) List of conditions that are passed directly to snuba without
                    any additional processing.
    """
    if not selected_columns:
        raise InvalidSearchQuery("No columns selected")

    # TODO(evanh): These can be removed once we migrate the frontend / saved queries
    # to use the new function values
    selected_columns, function_translations = transform_deprecated_functions_in_columns(
        selected_columns)
    query = transform_deprecated_functions_in_query(query)

    snuba_filter = get_filter(query, params)
    if not use_aggregate_conditions:
        snuba_filter.having = []

    # We need to run a separate query to be able to properly bucket the values for the histogram
    # Do that here, and format the bucket number in to the columns before passing it through
    # to event search.
    idx = 0
    for col in selected_columns:
        if col.startswith("histogram("):
            histogram_column = find_histogram_buckets(col, params,
                                                      snuba_filter.conditions)
            selected_columns[idx] = histogram_column
            function_translations[get_function_alias(
                histogram_column)] = get_function_alias(col)
            break

        idx += 1

    # Check to see if we are ordering by any functions and convert the orderby to be the correct alias.
    if orderby:
        orderby = orderby if isinstance(orderby, (list, tuple)) else [orderby]
        new_orderby = []
        for ordering in orderby:
            is_reversed = ordering.startswith("-")
            ordering = ordering.lstrip("-")
            for snuba_name, sentry_name in six.iteritems(
                    function_translations):
                if sentry_name == ordering:
                    ordering = snuba_name
                    break

            ordering = "{}{}".format("-" if is_reversed else "", ordering)
            new_orderby.append(ordering)

        snuba_filter.orderby = new_orderby

    snuba_filter.update_with(
        resolve_field_list(selected_columns,
                           snuba_filter,
                           auto_fields=auto_fields))

    if reference_event:
        ref_conditions = create_reference_event_conditions(reference_event)
        if ref_conditions:
            snuba_filter.conditions.extend(ref_conditions)

    # Resolve the public aliases into the discover dataset names.
    snuba_filter, translated_columns = resolve_discover_aliases(
        snuba_filter, function_translations)

    # Make sure that any aggregate conditions are also in the selected columns
    for having_clause in snuba_filter.having:
        found = any(having_clause[0] == agg_clause[-1]
                    for agg_clause in snuba_filter.aggregations)
        if not found:
            raise InvalidSearchQuery(
                u"Aggregate {} used in a condition but is not a selected column."
                .format(having_clause[0]))

    if conditions is not None:
        snuba_filter.conditions.extend(conditions)

    result = raw_query(
        start=snuba_filter.start,
        end=snuba_filter.end,
        groupby=snuba_filter.groupby,
        conditions=snuba_filter.conditions,
        aggregations=snuba_filter.aggregations,
        selected_columns=snuba_filter.selected_columns,
        filter_keys=snuba_filter.filter_keys,
        having=snuba_filter.having,
        orderby=snuba_filter.orderby,
        dataset=Dataset.Discover,
        limit=limit,
        offset=offset,
        referrer=referrer,
    )

    return transform_results(result, translated_columns, snuba_filter,
                             selected_columns)
Пример #10
0
def calculate_incident_start(query, projects, groups):
    """
    Attempts to automatically calculate the date that an incident began at based
    on the events related to the incident.
    """
    params = {}
    if groups:
        params["group_ids"] = [g.id for g in groups]
        end = max(g.last_seen for g in groups) + timedelta(seconds=1)
    else:
        end = timezone.now()

    params["start"] = end - INCIDENT_START_PERIOD
    params["end"] = end

    if projects:
        params["project_id"] = [p.id for p in projects]

    filter = get_filter(query, params)
    rollup = int(INCIDENT_START_ROLLUP.total_seconds())

    result = raw_query(
        aggregations=[("count()", "", "count"),
                      ("min", "timestamp", "first_seen")],
        orderby="time",
        groupby=["time"],
        rollup=rollup,
        referrer="incidents.calculate_incident_start",
        limit=10000,
        start=filter.start,
        end=filter.end,
        conditions=filter.conditions,
        filter_keys=filter.filter_keys,
    )["data"]
    # TODO: Start could be the period before the first period we find
    result = zerofill(result, params["start"], params["end"], rollup, "time")

    # We want to linearly scale scores from 100% value at the most recent to
    # 50% at the oldest. This gives a bias towards newer results.
    negative_weight = (1.0 / len(result)) / 2
    multiplier = 1.0
    cur_spike_max_count = -1
    cur_spike_start = None
    cur_spike_end = None
    max_height = 0
    incident_start = None
    cur_height = 0
    prev_count = 0

    def get_row_first_seen(row, default=None):
        first_seen = default
        if "first_seen" in row:
            first_seen = parse_date(row["first_seen"]).replace(tzinfo=pytz.utc)
        return first_seen

    def calculate_start(spike_start, spike_end):
        """
        We arbitrarily choose a date about 1/3 into the incident period. We
        could potentially improve this if we want by analyzing the period in
        more detail and choosing a date that most closely fits with being 1/3
        up the spike.
        """
        spike_length = spike_end - spike_start
        return spike_start + (spike_length / 3)

    for row in reversed(result):
        cur_count = row.get("count", 0)
        if cur_count < prev_count or cur_count > 0 and cur_count == prev_count:
            cur_height = cur_spike_max_count - cur_count
        elif cur_count > 0 or prev_count > 0 or cur_height > 0:
            # Now we've got the height of the current spike, compare it to the
            # current max. We decrease the value by `multiplier` so that we
            # favour newer results
            cur_height *= multiplier
            if cur_height > max_height:
                # If we detect that we have a new highest peak, then set a new
                # incident start date
                incident_start = calculate_start(cur_spike_start,
                                                 cur_spike_end)
                max_height = cur_height

            cur_height = 0
            cur_spike_max_count = cur_count
            cur_spike_end = get_row_first_seen(row)

        # We attempt to get the first_seen value from the row here. If the row
        # doesn't have it (because it's a zerofilled row), then just use the
        # previous value. This allows us to have the start of a spike always be
        # a bucket that contains at least one element.
        cur_spike_start = get_row_first_seen(row, cur_spike_start)
        prev_count = cur_count
        multiplier -= negative_weight

    if (cur_height > max_height or not incident_start) and cur_spike_start:
        incident_start = calculate_start(cur_spike_start, cur_spike_end)

    if not incident_start:
        incident_start = timezone.now()

    return incident_start
Пример #11
0
def validate_alert_rule_query(query):
    # TODO: We should add more validation here to reject queries that include
    # fields that are invalid in alert rules. For now this will just make sure
    # the query parses correctly.
    get_filter(query)
Пример #12
0
def get_performance_facets(
    query,
    params,
    orderby=None,
    aggregate_column="duration",
    aggregate_function="avg",
    limit=20,
    referrer=None,
):
    """
    High-level API for getting 'facet map' results for performance data

    Performance facets are high frequency tags and the aggregate duration of
    their most frequent values

    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.

    Returns Sequence[FacetResult]
    """
    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.filter_transform") as span:
        span.set_data("query", query)
        snuba_filter = get_filter(query, params)

        # Resolve the public aliases into the discover dataset names.
        snuba_filter, translated_columns = resolve_discover_aliases(
            snuba_filter)

    # Exclude tracing tags as they are noisy and generally not helpful.
    # TODO(markus): Tracing tags are no longer written but may still reside in DB.
    excluded_tags = [
        "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span", "project"]
    ]

    # Sampling keys for multi-project results as we don't need accuracy
    # with that much data.
    sample = len(snuba_filter.filter_keys["project_id"]) > 2

    with sentry_sdk.start_span(op="discover.discover",
                               description="facets.frequent_tags"):
        # Get the most relevant tag keys
        key_names = raw_query(
            aggregations=[["count", None, "count"]],
            start=snuba_filter.start,
            end=snuba_filter.end,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            orderby=["-count", "tags_key"],
            groupby="tags_key",
            # TODO(Kevan): Check using having vs where before mainlining
            having=[excluded_tags],
            dataset=Dataset.Discover,
            limit=limit,
            referrer=referrer,
            turbo=sample,
        )
        top_tags = [r["tags_key"] for r in key_names["data"]]
        if not top_tags:
            return []

    results = []
    snuba_filter.conditions.append([aggregate_column, "IS NOT NULL", None])

    # Only enable sampling if over 10000 values
    sampling_enabled = key_names["data"][0]["count"] > 10000
    options_sample_rate = options.get(
        "discover2.tags_performance_facet_sample_rate") or 0.1

    sample_rate = options_sample_rate if sampling_enabled else None

    max_aggregate_tags = 20
    aggregate_tags = []
    for i, tag in enumerate(top_tags):
        if i >= len(top_tags) - max_aggregate_tags:
            aggregate_tags.append(tag)

    if orderby is None:
        orderby = []

    if aggregate_tags:
        with sentry_sdk.start_span(op="discover.discover",
                                   description="facets.aggregate_tags"):
            conditions = snuba_filter.conditions
            conditions.append(["tags_key", "IN", aggregate_tags])
            tag_values = raw_query(
                aggregations=[
                    [aggregate_function, aggregate_column, "aggregate"],
                    ["count", None, "count"],
                ],
                conditions=conditions,
                start=snuba_filter.start,
                end=snuba_filter.end,
                filter_keys=snuba_filter.filter_keys,
                orderby=orderby + ["tags_key"],
                groupby=["tags_key", "tags_value"],
                dataset=Dataset.Discover,
                referrer=referrer,
                sample=sample_rate,
                turbo=sample_rate is not None,
                limitby=[TOP_VALUES_DEFAULT_LIMIT, "tags_key"],
            )
            results.extend([
                PerformanceFacetResult(r["tags_key"], r["tags_value"],
                                       float(r["aggregate"]), int(r["count"]))
                for r in tag_values["data"]
            ])

    return results
Пример #13
0
def get_facets(query, params, limit=10, referrer=None):
    """
    High-level API for getting 'facet map' results.

    Facets are high frequency tags and attribute results that
    can be used to further refine user queries. When many projects
    are requested sampling will be enabled to help keep response times low.

    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.

    Returns Sequence[FacetResult]
    """
    snuba_filter = get_filter(query, params)

    # TODO(mark) Refactor the need for this translation shim.
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
    }
    # Resolve the public aliases into the discover dataset names.
    snuba_args, translated_columns = resolve_discover_aliases(snuba_args)

    # Force sampling for multi-project results as we don't need accuracy
    # with that much data.
    sample = len(snuba_filter.filter_keys["project_id"]) > 2

    # Exclude tracing tags as they are noisy and generally not helpful.
    excluded_tags = [
        "tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span"]
    ]

    # Get the most frequent tag keys, enable sampling
    # as we don't need accuracy here.
    key_names = raw_query(
        aggregations=[["count", None, "count"]],
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        conditions=snuba_args.get("conditions"),
        filter_keys=snuba_args.get("filter_keys"),
        orderby=["-count", "tags_key"],
        groupby="tags_key",
        having=[excluded_tags],
        dataset=Dataset.Discover,
        limit=limit,
        referrer=referrer,
        turbo=sample,
    )
    top_tags = [r["tags_key"] for r in key_names["data"]]
    if not top_tags:
        return []

    fetch_projects = False
    if len(params.get("project_id", [])) > 1:
        if len(top_tags) == limit:
            top_tags.pop()
        fetch_projects = True

    results = []
    if fetch_projects:
        project_values = raw_query(
            aggregations=[["count", None, "count"]],
            start=snuba_args.get("start"),
            end=snuba_args.get("end"),
            conditions=snuba_args.get("conditions"),
            filter_keys=snuba_args.get("filter_keys"),
            groupby="project_id",
            orderby="-count",
            dataset=Dataset.Discover,
            referrer=referrer,
        )
        results.extend([
            FacetResult("project", r["project_id"], r["count"])
            for r in project_values["data"]
        ])

    # Get tag counts for our top tags. Fetching them individually
    # allows snuba to leverage promoted tags better and enables us to get
    # the value count we want.
    for tag_name in top_tags:
        tag = u"tags[{}]".format(tag_name)
        tag_values = raw_query(
            aggregations=[["count", None, "count"]],
            conditions=snuba_args.get("conditions"),
            start=snuba_args.get("start"),
            end=snuba_args.get("end"),
            filter_keys=snuba_args.get("filter_keys"),
            orderby=["-count"],
            groupby=[tag],
            limit=TOP_VALUES_DEFAULT_LIMIT,
            dataset=Dataset.Discover,
            referrer=referrer,
        )
        results.extend([
            FacetResult(tag_name, r[tag], int(r["count"]))
            for r in tag_values["data"]
        ])

    return results
Пример #14
0
 def validate_conditions(self, conditions):
     try:
         get_filter(conditions)
     except InvalidSearchQuery as err:
         raise serializers.ValidationError("Invalid conditions: {}".format(err))
     return conditions
Пример #15
0
    def validate(self, data):
        organization = self.context["organization"]
        query_info = data["query_info"]

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid
        project_query = query_info.get("project")
        if project_query:
            get_projects_by_id = self.context["get_projects_by_id"]
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = get_projects_by_id(set(map(int, project_query)))
            else:
                projects = get_projects_by_id({int(project_query)})
            query_info["project"] = [project.id for project in projects]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            # coerce the fields into a list as needed
            fields = query_info.get("field", [])
            if not isinstance(fields, list):
                fields = [fields]

            if len(fields) > MAX_FIELDS:
                detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
                raise serializers.ValidationError(detail)
            elif len(fields) == 0:
                raise serializers.ValidationError(
                    "at least one field is required to export")

            if "query" not in query_info:
                detail = "query is a required to export, please pass an empty string if you don't want to set one"
                raise serializers.ValidationError(detail)

            query_info["field"] = fields

            if not query_info.get("project"):
                projects = self.context["get_projects"]()
                query_info["project"] = [project.id for project in projects]

            # make sure to fix the export start/end times to ensure consistent results
            try:
                start, end = get_date_range_from_params(query_info)
            except InvalidParams as e:
                sentry_sdk.set_tag("query.error_reason", "Invalid date params")
                raise serializers.ValidationError(str(e))

            if "statsPeriod" in query_info:
                del query_info["statsPeriod"]
            if "statsPeriodStart" in query_info:
                del query_info["statsPeriodStart"]
            if "statsPeriodEnd" in query_info:
                del query_info["statsPeriodEnd"]
            query_info["start"] = start.isoformat()
            query_info["end"] = end.isoformat()

            # validate the query string by trying to parse it
            processor = DiscoverProcessor(
                discover_query=query_info,
                organization_id=organization.id,
            )
            try:
                snuba_filter = get_filter(query_info["query"],
                                          processor.params)
                resolve_field_list(
                    fields.copy(),
                    snuba_filter,
                    auto_fields=True,
                    auto_aggregations=True,
                )
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(str(err))

        return data
Пример #16
0
def get_facets(query, params, limit=20, referrer=None):
    """
    High-level API for getting 'facet map' results.

    Facets are high frequency tags and attribute results that
    can be used to further refine user queries. When many projects
    are requested sampling will be enabled to help keep response times low.

    query (str) Filter query string to create conditions from.
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
    limit (int) The number of records to fetch.
    referrer (str|None) A referrer string to help locate the origin of this query.

    Returns Sequence[FacetResult]
    """
    snuba_filter = get_filter(query, params)

    # TODO(mark) Refactor the need for this translation shim.
    snuba_args = {
        "start": snuba_filter.start,
        "end": snuba_filter.end,
        "conditions": snuba_filter.conditions,
        "filter_keys": snuba_filter.filter_keys,
    }
    # Resolve the public aliases into the discover dataset names.
    snuba_args, translated_columns = resolve_discover_aliases(snuba_args)

    # Force sampling for more than 9 projects. 9 was chosen arbitrarily.
    sample = len(snuba_filter.filter_keys["project_id"]) > 9

    # Exclude tracing tags as they are noisy and generally not helpful.
    conditions = snuba_args.get("conditions", [])
    conditions.append(
        ["tags_key", "NOT IN", ["trace", "trace.ctx", "trace.span"]])

    # Get the most frequent tag keys, enable sampling
    # as we don't need accuracy here.
    key_names = raw_query(
        aggregations=[["count", None, "count"]],
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        conditions=snuba_args.get("conditions"),
        filter_keys=snuba_args.get("filter_keys"),
        orderby=["-count", "tags_key"],
        groupby="tags_key",
        dataset=Dataset.Discover,
        limit=limit,
        referrer=referrer,
        turbo=sample,
    )
    top_tags = [r["tags_key"] for r in key_names["data"]]
    if not top_tags:
        return []

    fetch_projects = False
    if len(params.get("project_id", [])) > 1:
        if len(top_tags) == limit:
            top_tags.pop()
        fetch_projects = True

    results = []
    if fetch_projects:
        project_values = raw_query(
            aggregations=[["uniq", "event_id", "count"]],
            start=snuba_args.get("start"),
            end=snuba_args.get("end"),
            conditions=snuba_args.get("conditions"),
            filter_keys=snuba_args.get("filter_keys"),
            groupby="project_id",
            orderby="-count",
            dataset=Dataset.Discover,
            referrer=referrer,
        )
        results.extend([
            FacetResult("project", r["project_id"], r["count"])
            for r in project_values["data"]
        ])

    # Environment is a special case because of the "" value which is stored as null
    # in the environment column but not in the tag arrays.
    if "environment" in top_tags:
        top_tags.remove("environment")
        environment_values = raw_query(
            aggregations=[["uniq", "event_id", "count"]],
            start=snuba_args.get("start"),
            end=snuba_args.get("end"),
            conditions=snuba_args.get("conditions"),
            filter_keys=snuba_args.get("filter_keys"),
            groupby="environment",
            orderby=["-count", "environment"],
            dataset=Dataset.Discover,
            referrer=referrer,
        )
        results.extend([
            FacetResult("environment", r["environment"], r["count"])
            for r in environment_values["data"]
        ])

    # Get tag counts for our top tags.
    conditions.append(["tags_key", "IN", top_tags])
    tag_values = raw_query(
        aggregations=[["count", None, "count"]],
        conditions=conditions,
        start=snuba_args.get("start"),
        end=snuba_args.get("end"),
        filter_keys=snuba_args.get("filter_keys"),
        orderby=["tags_key", "-count"],
        groupby=["tags_key", "tags_value"],
        dataset=Dataset.Discover,
        referrer=referrer,
    )
    results.extend([
        FacetResult(r["tags_key"], r["tags_value"], int(r["count"]))
        for r in tag_values["data"]
    ])

    return results