def top_events_timeseries( timeseries_columns, selected_columns, user_query, params, orderby, rollup, limit, organization, referrer=None, top_events=None, allow_empty=True, ): """ High-level API for doing arbitrary user timeseries queries for a limited number of top events Returns a dictionary of SnubaTSResult objects that have been zerofilled in case of gaps. Each value of the dictionary should match the result of a timeseries query timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query, usually matches the y-axis of the graph selected_columns (Sequence[str]) List of public aliases to fetch for the events query, this is to determine what the top events are user_query (str) Filter query string to create conditions from. needs to be user_query to not conflict with the function query params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, orderby (Sequence[str]) The fields to order results by. rollup (int) The bucket width in seconds limit (int) The number of events to get timeseries for organization (Organization) Used to map group ids to short ids referrer (str|None) A referrer string to help locate the origin of this query. top_events (dict|None) A dictionary with a 'data' key containing a list of dictionaries that represent the top events matching the query. Useful when you have found the top events earlier and want to save a query. """ if top_events is None: with sentry_sdk.start_span(op="discover.discover", description="top_events.fetch_events"): top_events = query( selected_columns, query=user_query, params=params, orderby=orderby, limit=limit, referrer=referrer, auto_aggregations=True, use_aggregate_conditions=True, ) with sentry_sdk.start_span( op="discover.discover", description="top_events.filter_transform") as span: span.set_data("query", user_query) snuba_filter, translated_columns = get_timeseries_snuba_filter( list(sorted(set(timeseries_columns + selected_columns))), user_query, params, rollup, default_count=False, ) for field in selected_columns: # If we have a project field, we need to limit results by project so we dont hit the result limit if field in ["project", "project.id"] and top_events["data"]: snuba_filter.project_ids = [ event["project.id"] for event in top_events["data"] ] continue if field in FIELD_ALIASES: field = FIELD_ALIASES[field].alias # Note that because orderby shouldn't be an array field its not included in the values values = list({ event.get(field) for event in top_events["data"] if field in event and not isinstance(event.get(field), list) }) if values: # timestamp fields needs special handling, creating a big OR instead if field == "timestamp" or field.startswith("timestamp.to_"): snuba_filter.conditions.append([[field, "=", value] for value in sorted(values) ]) elif None in values: non_none_values = [ value for value in values if value is not None ] condition = [[["isNull", [resolve_discover_column(field)]], "=", 1]] if non_none_values: condition.append([ resolve_discover_column(field), "IN", non_none_values ]) snuba_filter.conditions.append(condition) elif field in FIELD_ALIASES: snuba_filter.conditions.append([field, "IN", values]) else: snuba_filter.conditions.append( [resolve_discover_column(field), "IN", values]) with sentry_sdk.start_span(op="discover.discover", description="top_events.snuba_query"): result = raw_query( aggregations=snuba_filter.aggregations, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, selected_columns=snuba_filter.selected_columns, start=snuba_filter.start, end=snuba_filter.end, rollup=rollup, orderby=["time"] + snuba_filter.groupby, groupby=["time"] + snuba_filter.groupby, dataset=Dataset.Discover, limit=10000, referrer=referrer, ) if not allow_empty and not len(result.get("data", [])): return SnubaTSResult( { "data": zerofill([], snuba_filter.start, snuba_filter.end, rollup, "time") }, snuba_filter.start, snuba_filter.end, rollup, ) with sentry_sdk.start_span( op="discover.discover", description="top_events.transform_results") as span: span.set_data("result_count", len(result.get("data", []))) result = transform_data(result, translated_columns, snuba_filter, selected_columns) if "project" in selected_columns: translated_columns["project_id"] = "project" translated_groupby = [ translated_columns.get(groupby, groupby) for groupby in snuba_filter.groupby ] issues = {} if "issue" in selected_columns: issues = Group.issues_mapping( {event["issue.id"] for event in top_events["data"]}, params["project_id"], organization, ) # so the result key is consistent translated_groupby.sort() results = {} # Using the top events add the order to the results for index, item in enumerate(top_events["data"]): result_key = create_result_key(item, translated_groupby, issues) results[result_key] = {"order": index, "data": []} for row in result["data"]: result_key = create_result_key(row, translated_groupby, issues) if result_key in results: results[result_key]["data"].append(row) else: logger.warning( "discover.top-events.timeseries.key-mismatch", extra={ "result_key": result_key, "top_event_keys": list(results.keys()) }, ) for key, item in results.items(): results[key] = SnubaTSResult( { "data": zerofill(item["data"], snuba_filter.start, snuba_filter.end, rollup, "time"), "order": item["order"], }, snuba_filter.start, snuba_filter.end, rollup, ) return results
def top_events_timeseries( timeseries_columns, selected_columns, user_query, params, orderby, rollup, limit, organization, referrer=None, ): """ High-level API for doing arbitrary user timeseries queries for a limited number of top events Returns a dictionary of SnubaTSResult objects that have been zerofilled in case of gaps. Each value of the dictionary should match the result of a timeseries query timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query, usually matches the y-axis of the graph selected_columns (Sequence[str]) List of public aliases to fetch for the events query, this is to determine what the top events are user_query (str) Filter query string to create conditions from. needs to be user_query to not conflict with the function query params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, orderby (Sequence[str]) The fields to order results by. rollup (int) The bucket width in seconds limit (int) The number of events to get timeseries for organization (Organization) Used to map group ids to short ids referrer (str|None) A referrer string to help locate the origin of this query. """ top_events = query( selected_columns, query=user_query, params=params, orderby=orderby, limit=limit, referrer=referrer, ) snuba_filter, translated_columns = get_timeseries_snuba_filter( timeseries_columns + selected_columns, user_query, params, rollup) user_fields = FIELD_ALIASES["user"]["fields"] for field in selected_columns: # project is handled by filter_keys already if field in ["project", "project.id"]: continue if field == "issue": field = FIELD_ALIASES["issue"]["column_alias"] values = list({ event.get(field) for event in top_events["data"] if field in event }) if values: # timestamp needs special handling, creating a big OR instead if field == "timestamp": snuba_filter.conditions.append([["timestamp", "=", value] for value in values]) # A user field can be any of its field aliases, do an OR across all the user fields elif field == "user": snuba_filter.conditions.append( [[resolve_column(user_field), "IN", values] for user_field in user_fields]) elif None in values: non_none_values = [ value for value in values if value is not None ] condition = [[["isNull", [resolve_column(field)]], "=", 1]] if non_none_values: condition.append( [resolve_column(field), "IN", non_none_values]) snuba_filter.conditions.append(condition) else: snuba_filter.conditions.append( [resolve_column(field), "IN", values]) result = raw_query( aggregations=snuba_filter.aggregations, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, start=snuba_filter.start, end=snuba_filter.end, rollup=rollup, orderby="time", groupby=["time"] + snuba_filter.groupby, dataset=Dataset.Discover, limit=10000, referrer=referrer, ) result = transform_results(result, translated_columns, snuba_filter, selected_columns) translated_columns["project_id"] = "project" translated_groupby = [ translated_columns.get(groupby, groupby) for groupby in snuba_filter.groupby ] if "user" in selected_columns: # Determine user related fields to prune based on what wasn't selected, since transform_results does the same for field in user_fields: if field not in selected_columns: translated_groupby.remove(field) translated_groupby.append("user") issues = {} if "issue" in selected_columns: issues = Group.issues_mapping( set([event["issue.id"] for event in top_events["data"]]), params["project_id"], organization, ) # so the result key is consistent translated_groupby.sort() results = {} for row in result["data"]: result_key = create_result_key(row, translated_groupby, issues) results.setdefault(result_key, {"data": []})["data"].append(row) # Using the top events add the order to the results for index, item in enumerate(top_events["data"]): result_key = create_result_key(item, translated_groupby, issues) results[result_key]["order"] = index for key, item in six.iteritems(results): results[key] = SnubaTSResult( { "data": zerofill(item["data"], snuba_filter.start, snuba_filter.end, rollup, "time"), "order": item["order"], }, snuba_filter.start, snuba_filter.end, rollup, ) return results
def top_events_timeseries( timeseries_columns, selected_columns, user_query, params, orderby, rollup, limit, organization, referrer=None, ): """ High-level API for doing arbitrary user timeseries queries for a limited number of top events Returns a dictionary of SnubaTSResult objects that have been zerofilled in case of gaps. Each value of the dictionary should match the result of a timeseries query timeseries_columns (Sequence[str]) List of public aliases to fetch for the timeseries query, usually matches the y-axis of the graph selected_columns (Sequence[str]) List of public aliases to fetch for the events query, this is to determine what the top events are user_query (str) Filter query string to create conditions from. needs to be user_query to not conflict with the function query params (Dict[str, str]) Filtering parameters with start, end, project_id, environment, orderby (Sequence[str]) The fields to order results by. rollup (int) The bucket width in seconds limit (int) The number of events to get timeseries for organization (Organization) Used to map group ids to short ids referrer (str|None) A referrer string to help locate the origin of this query. """ top_events = query( selected_columns, query=user_query, params=params, orderby=orderby, limit=limit, referrer=referrer, ) snuba_filter, translated_columns = get_timeseries_snuba_filter( timeseries_columns + selected_columns, user_query, params, rollup) for field in selected_columns: # project is handled by filter_keys already if field in ["project", "project.id"]: continue values = list({ event.get(field) for event in top_events["data"] if field in event }) if values and all(value is not None for value in values): # timestamp needs special handling, creating a big OR instead if field == "timestamp": snuba_filter.conditions.append([["timestamp", "=", value] for value in values]) else: snuba_filter.conditions.append( [resolve_column(field), "IN", values]) result = raw_query( aggregations=snuba_filter.aggregations, conditions=snuba_filter.conditions, filter_keys=snuba_filter.filter_keys, start=snuba_filter.start, end=snuba_filter.end, rollup=rollup, orderby="time", groupby=["time"] + snuba_filter.groupby, dataset=Dataset.Discover, limit=10000, referrer=referrer, ) result = transform_results(result, translated_columns, snuba_filter) issues = {} if "issue" in selected_columns: issues = Group.issues_mapping( set([event["issue.id"] for event in top_events["data"]]), params["project_id"], organization, ) translated_columns["project_id"] = "project" translated_groupby = [ translated_columns.get(field, field) for field in snuba_filter.groupby ] # so the result key is consistent translated_groupby.sort() results = {} for row in result["data"]: values = [] for field in translated_groupby: if field == "issue.id": values.append(issues.get(row["issue.id"], "unknown")) else: values.append(six.text_type(row.get(field))) result_key = ",".join(values) results.setdefault(result_key, []).append(row) for key, item in six.iteritems(results): results[key] = SnubaTSResult( { "data": zerofill(item, snuba_filter.start, snuba_filter.end, rollup, "time") }, snuba_filter.start, snuba_filter.end, rollup, ) return results