Пример #1
0
def top_events_timeseries(
    timeseries_columns,
    selected_columns,
    user_query,
    params,
    orderby,
    rollup,
    limit,
    organization,
    referrer=None,
    top_events=None,
    allow_empty=True,
):
    """
    High-level API for doing arbitrary user timeseries queries for a limited number of top events

    Returns a dictionary of SnubaTSResult objects that have been zerofilled in
    case of gaps. Each value of the dictionary should match the result of a timeseries query

    timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query,
                    usually matches the y-axis of the graph
    selected_columns (Sequence[str]) List of public aliases to fetch for the events query,
                    this is to determine what the top events are
    user_query (str) Filter query string to create conditions from. needs to be user_query
                    to not conflict with the function query
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    orderby (Sequence[str]) The fields to order results by.
    rollup (int) The bucket width in seconds
    limit (int) The number of events to get timeseries for
    organization (Organization) Used to map group ids to short ids
    referrer (str|None) A referrer string to help locate the origin of this query.
    top_events (dict|None) A dictionary with a 'data' key containing a list of dictionaries that
                    represent the top events matching the query. Useful when you have found
                    the top events earlier and want to save a query.
    """
    if top_events is None:
        with sentry_sdk.start_span(op="discover.discover",
                                   description="top_events.fetch_events"):
            top_events = query(
                selected_columns,
                query=user_query,
                params=params,
                orderby=orderby,
                limit=limit,
                referrer=referrer,
                auto_aggregations=True,
                use_aggregate_conditions=True,
            )

    with sentry_sdk.start_span(
            op="discover.discover",
            description="top_events.filter_transform") as span:
        span.set_data("query", user_query)
        snuba_filter, translated_columns = get_timeseries_snuba_filter(
            list(sorted(set(timeseries_columns + selected_columns))),
            user_query,
            params,
            rollup,
            default_count=False,
        )

        for field in selected_columns:
            # If we have a project field, we need to limit results by project so we dont hit the result limit
            if field in ["project", "project.id"] and top_events["data"]:
                snuba_filter.project_ids = [
                    event["project.id"] for event in top_events["data"]
                ]
                continue
            if field in FIELD_ALIASES:
                field = FIELD_ALIASES[field].alias
            # Note that because orderby shouldn't be an array field its not included in the values
            values = list({
                event.get(field)
                for event in top_events["data"]
                if field in event and not isinstance(event.get(field), list)
            })
            if values:
                # timestamp fields needs special handling, creating a big OR instead
                if field == "timestamp" or field.startswith("timestamp.to_"):
                    snuba_filter.conditions.append([[field, "=", value]
                                                    for value in sorted(values)
                                                    ])
                elif None in values:
                    non_none_values = [
                        value for value in values if value is not None
                    ]
                    condition = [[["isNull", [resolve_discover_column(field)]],
                                  "=", 1]]
                    if non_none_values:
                        condition.append([
                            resolve_discover_column(field), "IN",
                            non_none_values
                        ])
                    snuba_filter.conditions.append(condition)
                elif field in FIELD_ALIASES:
                    snuba_filter.conditions.append([field, "IN", values])
                else:
                    snuba_filter.conditions.append(
                        [resolve_discover_column(field), "IN", values])

    with sentry_sdk.start_span(op="discover.discover",
                               description="top_events.snuba_query"):
        result = raw_query(
            aggregations=snuba_filter.aggregations,
            conditions=snuba_filter.conditions,
            filter_keys=snuba_filter.filter_keys,
            selected_columns=snuba_filter.selected_columns,
            start=snuba_filter.start,
            end=snuba_filter.end,
            rollup=rollup,
            orderby=["time"] + snuba_filter.groupby,
            groupby=["time"] + snuba_filter.groupby,
            dataset=Dataset.Discover,
            limit=10000,
            referrer=referrer,
        )

    if not allow_empty and not len(result.get("data", [])):
        return SnubaTSResult(
            {
                "data":
                zerofill([], snuba_filter.start, snuba_filter.end, rollup,
                         "time")
            },
            snuba_filter.start,
            snuba_filter.end,
            rollup,
        )

    with sentry_sdk.start_span(
            op="discover.discover",
            description="top_events.transform_results") as span:
        span.set_data("result_count", len(result.get("data", [])))
        result = transform_data(result, translated_columns, snuba_filter,
                                selected_columns)

        if "project" in selected_columns:
            translated_columns["project_id"] = "project"
        translated_groupby = [
            translated_columns.get(groupby, groupby)
            for groupby in snuba_filter.groupby
        ]

        issues = {}
        if "issue" in selected_columns:
            issues = Group.issues_mapping(
                {event["issue.id"]
                 for event in top_events["data"]},
                params["project_id"],
                organization,
            )
        # so the result key is consistent
        translated_groupby.sort()

        results = {}
        # Using the top events add the order to the results
        for index, item in enumerate(top_events["data"]):
            result_key = create_result_key(item, translated_groupby, issues)
            results[result_key] = {"order": index, "data": []}
        for row in result["data"]:
            result_key = create_result_key(row, translated_groupby, issues)
            if result_key in results:
                results[result_key]["data"].append(row)
            else:
                logger.warning(
                    "discover.top-events.timeseries.key-mismatch",
                    extra={
                        "result_key": result_key,
                        "top_event_keys": list(results.keys())
                    },
                )
        for key, item in results.items():
            results[key] = SnubaTSResult(
                {
                    "data":
                    zerofill(item["data"], snuba_filter.start,
                             snuba_filter.end, rollup, "time"),
                    "order":
                    item["order"],
                },
                snuba_filter.start,
                snuba_filter.end,
                rollup,
            )

    return results
Пример #2
0
def top_events_timeseries(
    timeseries_columns,
    selected_columns,
    user_query,
    params,
    orderby,
    rollup,
    limit,
    organization,
    referrer=None,
):
    """
    High-level API for doing arbitrary user timeseries queries for a limited number of top events

    Returns a dictionary of SnubaTSResult objects that have been zerofilled in
    case of gaps. Each value of the dictionary should match the result of a timeseries query

    timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query,
                        usually matches the y-axis of the graph
    selected_columns (Sequence[str]) List of public aliases to fetch for the events query,
                        this is to determine what the top events are
    user_query (str) Filter query string to create conditions from. needs to be user_query
                        to not conflict with the function query
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    orderby (Sequence[str]) The fields to order results by.
    rollup (int) The bucket width in seconds
    limit (int) The number of events to get timeseries for
    organization (Organization) Used to map group ids to short ids
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    top_events = query(
        selected_columns,
        query=user_query,
        params=params,
        orderby=orderby,
        limit=limit,
        referrer=referrer,
    )

    snuba_filter, translated_columns = get_timeseries_snuba_filter(
        timeseries_columns + selected_columns, user_query, params, rollup)

    user_fields = FIELD_ALIASES["user"]["fields"]

    for field in selected_columns:
        # project is handled by filter_keys already
        if field in ["project", "project.id"]:
            continue
        if field == "issue":
            field = FIELD_ALIASES["issue"]["column_alias"]
        values = list({
            event.get(field)
            for event in top_events["data"] if field in event
        })
        if values:
            # timestamp needs special handling, creating a big OR instead
            if field == "timestamp":
                snuba_filter.conditions.append([["timestamp", "=", value]
                                                for value in values])
            # A user field can be any of its field aliases, do an OR across all the user fields
            elif field == "user":
                snuba_filter.conditions.append(
                    [[resolve_column(user_field), "IN", values]
                     for user_field in user_fields])
            elif None in values:
                non_none_values = [
                    value for value in values if value is not None
                ]
                condition = [[["isNull", [resolve_column(field)]], "=", 1]]
                if non_none_values:
                    condition.append(
                        [resolve_column(field), "IN", non_none_values])
                snuba_filter.conditions.append(condition)
            else:
                snuba_filter.conditions.append(
                    [resolve_column(field), "IN", values])

    result = raw_query(
        aggregations=snuba_filter.aggregations,
        conditions=snuba_filter.conditions,
        filter_keys=snuba_filter.filter_keys,
        start=snuba_filter.start,
        end=snuba_filter.end,
        rollup=rollup,
        orderby="time",
        groupby=["time"] + snuba_filter.groupby,
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )

    result = transform_results(result, translated_columns, snuba_filter,
                               selected_columns)

    translated_columns["project_id"] = "project"
    translated_groupby = [
        translated_columns.get(groupby, groupby)
        for groupby in snuba_filter.groupby
    ]

    if "user" in selected_columns:
        # Determine user related fields to prune based on what wasn't selected, since transform_results does the same
        for field in user_fields:
            if field not in selected_columns:
                translated_groupby.remove(field)
        translated_groupby.append("user")
    issues = {}
    if "issue" in selected_columns:
        issues = Group.issues_mapping(
            set([event["issue.id"] for event in top_events["data"]]),
            params["project_id"],
            organization,
        )
    # so the result key is consistent
    translated_groupby.sort()

    results = {}
    for row in result["data"]:
        result_key = create_result_key(row, translated_groupby, issues)
        results.setdefault(result_key, {"data": []})["data"].append(row)
    # Using the top events add the order to the results
    for index, item in enumerate(top_events["data"]):
        result_key = create_result_key(item, translated_groupby, issues)
        results[result_key]["order"] = index
    for key, item in six.iteritems(results):
        results[key] = SnubaTSResult(
            {
                "data":
                zerofill(item["data"], snuba_filter.start, snuba_filter.end,
                         rollup, "time"),
                "order":
                item["order"],
            },
            snuba_filter.start,
            snuba_filter.end,
            rollup,
        )

    return results
Пример #3
0
def top_events_timeseries(
    timeseries_columns,
    selected_columns,
    user_query,
    params,
    orderby,
    rollup,
    limit,
    organization,
    referrer=None,
):
    """
    High-level API for doing arbitrary user timeseries queries for a limited number of top events

    Returns a dictionary of SnubaTSResult objects that have been zerofilled in
    case of gaps. Each value of the dictionary should match the result of a timeseries query

    timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query,
                        usually matches the y-axis of the graph
    selected_columns (Sequence[str]) List of public aliases to fetch for the events query,
                        this is to determine what the top events are
    user_query (str) Filter query string to create conditions from. needs to be user_query
                        to not conflict with the function query
    params (Dict[str, str]) Filtering parameters with start, end, project_id, environment,
    orderby (Sequence[str]) The fields to order results by.
    rollup (int) The bucket width in seconds
    limit (int) The number of events to get timeseries for
    organization (Organization) Used to map group ids to short ids
    referrer (str|None) A referrer string to help locate the origin of this query.
    """
    top_events = query(
        selected_columns,
        query=user_query,
        params=params,
        orderby=orderby,
        limit=limit,
        referrer=referrer,
    )

    snuba_filter, translated_columns = get_timeseries_snuba_filter(
        timeseries_columns + selected_columns, user_query, params, rollup)

    for field in selected_columns:
        # project is handled by filter_keys already
        if field in ["project", "project.id"]:
            continue
        values = list({
            event.get(field)
            for event in top_events["data"] if field in event
        })
        if values and all(value is not None for value in values):
            # timestamp needs special handling, creating a big OR instead
            if field == "timestamp":
                snuba_filter.conditions.append([["timestamp", "=", value]
                                                for value in values])
            else:
                snuba_filter.conditions.append(
                    [resolve_column(field), "IN", values])

    result = raw_query(
        aggregations=snuba_filter.aggregations,
        conditions=snuba_filter.conditions,
        filter_keys=snuba_filter.filter_keys,
        start=snuba_filter.start,
        end=snuba_filter.end,
        rollup=rollup,
        orderby="time",
        groupby=["time"] + snuba_filter.groupby,
        dataset=Dataset.Discover,
        limit=10000,
        referrer=referrer,
    )

    result = transform_results(result, translated_columns, snuba_filter)
    issues = {}
    if "issue" in selected_columns:
        issues = Group.issues_mapping(
            set([event["issue.id"] for event in top_events["data"]]),
            params["project_id"],
            organization,
        )

    translated_columns["project_id"] = "project"
    translated_groupby = [
        translated_columns.get(field, field) for field in snuba_filter.groupby
    ]
    # so the result key is consistent
    translated_groupby.sort()

    results = {}
    for row in result["data"]:
        values = []
        for field in translated_groupby:
            if field == "issue.id":
                values.append(issues.get(row["issue.id"], "unknown"))
            else:
                values.append(six.text_type(row.get(field)))
        result_key = ",".join(values)
        results.setdefault(result_key, []).append(row)
    for key, item in six.iteritems(results):
        results[key] = SnubaTSResult(
            {
                "data":
                zerofill(item, snuba_filter.start, snuba_filter.end, rollup,
                         "time")
            },
            snuba_filter.start,
            snuba_filter.end,
            rollup,
        )

    return results