def get_timeseries_snuba_filter(selected_columns, query, params, rollup, default_count=True): snuba_filter = get_filter(query, params) if not snuba_filter.start and not snuba_filter.end: raise InvalidSearchQuery( "Cannot get timeseries result without a start and end.") snuba_filter.update_with( resolve_field_list(selected_columns, snuba_filter, auto_fields=False)) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter) if not snuba_filter.aggregations: raise InvalidSearchQuery( "Cannot get timeseries result with no aggregation.") # Change the alias of the first aggregation to count. This ensures compatibility # with other parts of the timeseries endpoint expectations if len(snuba_filter.aggregations) == 1 and default_count: snuba_filter.aggregations[0][2] = "count" return snuba_filter, translated_columns
def build_snuba_filter( self, query: str, environment: Optional[Environment], params: Optional[Mapping[str, Any]] = None, ) -> Filter: resolve_func = resolve_column(Dataset(self.dataset.value)) aggregations = [self.aggregate] # This aggregation is added to return the total number of sessions in crash # rate alerts that is used to identify if we are below a general minimum alert threshold count_col = re.search(r"(sessions|users)", self.aggregate) if not count_col: raise UnsupportedQuerySubscription( "Only crash free percentage queries are supported for subscriptions" "over the sessions dataset" ) count_col_matched = count_col.group() aggregations += [f"identity({count_col_matched}) AS {CRASH_RATE_ALERT_SESSION_COUNT_ALIAS}"] functions_acl = ["identity"] snuba_filter = get_filter(query, params=params) snuba_filter.update_with( resolve_field_list( aggregations, snuba_filter, auto_fields=False, functions_acl=functions_acl ) ) snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0] if environment: snuba_filter.conditions.append(["environment", "=", environment.name]) return snuba_filter
def get_field(self, request, snuba_args): y_axis = request.GET.get("yAxis", None) # These aliases are used by v1 of events. if not y_axis or y_axis == "event_count": y_axis = "count()" elif y_axis == "user_count": y_axis = "count_unique(user)" snuba_filter = eventstore.Filter( { "start": snuba_args.get("start"), "end": snuba_args.get("end"), "rollup": snuba_args.get("rollup"), } ) try: resolved = resolve_field_list([y_axis], snuba_filter) except InvalidSearchQuery as err: raise ParseError(detail=str(err)) try: aggregate = resolved["aggregations"][0] except IndexError: raise ParseError(detail="Invalid yAxis value requested.") aggregate[2] = "count" snuba_args["aggregations"] = [aggregate] return snuba_args
def validate(self, data): if not data.get("id"): keys = set(data.keys()) if self.required_for_create - keys: raise serializers.ValidationError( { "fields": "fields are required during creation.", "conditions": "conditions are required during creation.", } ) # Validate the query that would be created when run. conditions = self._get_attr(data, "conditions", "") fields = self._get_attr(data, "fields", []).copy() orderby = self._get_attr(data, "orderby", "") equations, fields = categorize_columns(fields) if equations is not None: resolved_equations, _ = resolve_equation_list(equations, fields) else: resolved_equations = [] try: # When using the eps/epm functions, they require an interval argument # or to provide the start/end so that the interval can be computed. # This uses a hard coded start/end to ensure the validation succeeds # since the values themselves don't matter. params = { "start": datetime.now() - timedelta(days=1), "end": datetime.now(), "project_id": [p.id for p in self.context.get("projects")], "organization_id": self.context.get("organization").id, } snuba_filter = get_filter(conditions, params=params) except InvalidSearchQuery as err: raise serializers.ValidationError({"conditions": f"Invalid conditions: {err}"}) if orderby: snuba_filter.orderby = get_function_alias(orderby) try: resolve_field_list(fields, snuba_filter, resolved_equations=resolved_equations) except InvalidSearchQuery as err: raise serializers.ValidationError({"fields": f"Invalid fields: {err}"}) return data
def build_snuba_filter(dataset, query, aggregate, environment, event_types, params=None): resolve_func = ( resolve_column(Dataset.Events) if dataset == QueryDatasets.EVENTS else resolve_column(Dataset.Transactions) ) query = apply_dataset_query_conditions(dataset, query, event_types) snuba_filter = get_filter(query, params=params) snuba_filter.update_with(resolve_field_list([aggregate], snuba_filter, auto_fields=False)) snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0] if snuba_filter.group_ids: snuba_filter.conditions.append(["group_id", "IN", list(map(int, snuba_filter.group_ids))]) if environment: snuba_filter.conditions.append(["environment", "=", environment.name]) return snuba_filter
def build_snuba_filter(dataset, query, aggregate, environment, event_types, params=None): resolve_func = { QueryDatasets.EVENTS: resolve_column(Dataset.Events), QueryDatasets.SESSIONS: resolve_column(Dataset.Sessions), QueryDatasets.TRANSACTIONS: resolve_column(Dataset.Transactions), }[dataset] functions_acl = None aggregations = [aggregate] if dataset == QueryDatasets.SESSIONS: # This aggregation is added to return the total number of sessions in crash # rate alerts that is used to identify if we are below a general minimum alert threshold count_col = re.search(r"(sessions|users)", aggregate) count_col_matched = count_col.group() aggregations += [ f"identity({count_col_matched}) AS {CRASH_RATE_ALERT_SESSION_COUNT_ALIAS}" ] functions_acl = ["identity"] query = apply_dataset_query_conditions(dataset, query, event_types) snuba_filter = get_filter(query, params=params) snuba_filter.update_with( resolve_field_list(aggregations, snuba_filter, auto_fields=False, functions_acl=functions_acl)) snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0] if snuba_filter.group_ids: snuba_filter.conditions.append( ["group_id", "IN", list(map(int, snuba_filter.group_ids))]) if environment: snuba_filter.conditions.append(["environment", "=", environment.name]) return snuba_filter
def build_snuba_filter( self, query: str, environment: Optional[Environment], params: Optional[Mapping[str, Any]] = None, ) -> Filter: resolve_func = resolve_column(Dataset(self.dataset.value)) query = apply_dataset_query_conditions(QueryDatasets(self.dataset), query, self.event_types) snuba_filter = get_filter(query, params=params) snuba_filter.update_with( resolve_field_list([self.aggregate], snuba_filter, auto_fields=False) ) snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0] if snuba_filter.group_ids: snuba_filter.conditions.append( ["group_id", "IN", list(map(int, snuba_filter.group_ids))] ) if environment: snuba_filter.conditions.append(["environment", "=", environment.name]) return snuba_filter
def validate(self, data): organization = self.context["organization"] query_info = data["query_info"] # Validate the project field, if provided # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid project_query = query_info.get("project") if project_query: get_projects_by_id = self.context["get_projects_by_id"] # Coerce the query into a set if isinstance(project_query, list): projects = get_projects_by_id(set(map(int, project_query))) else: projects = get_projects_by_id({int(project_query)}) query_info["project"] = [project.id for project in projects] # Discover Pre-processing if data["query_type"] == ExportQueryType.DISCOVER_STR: # coerce the fields into a list as needed fields = query_info.get("field", []) if not isinstance(fields, list): fields = [fields] if len(fields) > MAX_FIELDS: detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again." raise serializers.ValidationError(detail) elif len(fields) == 0: raise serializers.ValidationError("at least one field is required to export") if "query" not in query_info: detail = "query is a required to export, please pass an empty string if you don't want to set one" raise serializers.ValidationError(detail) query_info["field"] = fields if not query_info.get("project"): projects = self.context["get_projects"]() query_info["project"] = [project.id for project in projects] # make sure to fix the export start/end times to ensure consistent results try: start, end = get_date_range_from_params(query_info) except InvalidParams as e: sentry_sdk.set_tag("query.error_reason", "Invalid date params") raise serializers.ValidationError(str(e)) if "statsPeriod" in query_info: del query_info["statsPeriod"] if "statsPeriodStart" in query_info: del query_info["statsPeriodStart"] if "statsPeriodEnd" in query_info: del query_info["statsPeriodEnd"] query_info["start"] = start.isoformat() query_info["end"] = end.isoformat() # validate the query string by trying to parse it processor = DiscoverProcessor( discover_query=query_info, organization_id=organization.id, ) try: snuba_filter = get_filter(query_info["query"], processor.params) resolve_field_list( fields.copy(), snuba_filter, auto_fields=True, auto_aggregations=True, ) except InvalidSearchQuery as err: raise serializers.ValidationError(str(err)) return data
def prepare_discover_query( selected_columns, query, params, orderby=None, auto_fields=False, auto_aggregations=False, use_aggregate_conditions=False, conditions=None, functions_acl=None, ): with sentry_sdk.start_span(op="discover.discover", description="query.filter_transform") as span: span.set_data("query", query) snuba_filter = get_filter(query, params) if not use_aggregate_conditions: assert ( not auto_aggregations ), "Auto aggregations cannot be used without enabling aggregate conditions" snuba_filter.having = [] with sentry_sdk.start_span(op="discover.discover", description="query.field_translations"): if orderby is not None: orderby = list(orderby) if isinstance(orderby, (list, tuple)) else [orderby] snuba_filter.orderby = [get_function_alias(o) for o in orderby] resolved_fields = resolve_field_list( selected_columns, snuba_filter, auto_fields=auto_fields, auto_aggregations=auto_aggregations, functions_acl=functions_acl, ) snuba_filter.update_with(resolved_fields) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases( snuba_filter) # Make sure that any aggregate conditions are also in the selected columns for having_clause in snuba_filter.having: # The first element of the having can be an alias, or a nested array of functions. Loop through to make sure # any referenced functions are in the aggregations. error_extra = ", and could not be automatically added" if auto_aggregations else "" if isinstance(having_clause[0], (list, tuple)): # Functions are of the form [fn, [args]] args_to_check = [[having_clause[0]]] conditions_not_in_aggregations = [] while len(args_to_check) > 0: args = args_to_check.pop() for arg in args: if arg[0] in [SNUBA_AND, SNUBA_OR]: args_to_check.extend(arg[1]) # Only need to iterate on arg[1] if its a list elif isinstance(arg[1], (list, tuple)): alias = arg[1][0] found = any( alias == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: conditions_not_in_aggregations.append(alias) if len(conditions_not_in_aggregations) > 0: raise InvalidSearchQuery( "Aggregate(s) {} used in a condition but are not in the selected columns{}." .format( ", ".join(conditions_not_in_aggregations), error_extra, )) else: found = any(having_clause[0] == agg_clause[-1] for agg_clause in snuba_filter.aggregations) if not found: raise InvalidSearchQuery( "Aggregate {} used in a condition but is not a selected column{}." .format( having_clause[0], error_extra, )) if conditions is not None: snuba_filter.conditions.extend(conditions) return PreparedQuery(snuba_filter, translated_columns, resolved_fields)
def validate(self, data): if not data.get("id"): keys = set(data.keys()) if self.required_for_create - keys: raise serializers.ValidationError({ "fields": "fields are required during creation.", "conditions": "conditions are required during creation.", }) # Validate the query that would be created when run. conditions = self._get_attr(data, "conditions", "") fields = self._get_attr(data, "fields", []).copy() orderby = self._get_attr(data, "orderby", "") equations, fields = categorize_columns(fields) is_table = is_table_display_type(self.context.get("displayType")) if equations is not None: try: resolved_equations, _, _ = resolve_equation_list( equations, fields, auto_add=not is_table, aggregates_only=not is_table, ) except (InvalidSearchQuery, ArithmeticError) as err: raise serializers.ValidationError( {"fields": f"Invalid fields: {err}"}) else: resolved_equations = [] try: parse_search_query(conditions) except InvalidSearchQuery as err: # We don't know if the widget that this query belongs to is an # Issue widget or Discover widget. Pass the error back to the # Widget serializer to decide if whether or not to raise this # error based on the Widget's type data["issue_query_error"] = { "conditions": [f"Invalid conditions: {err}"] } try: # When using the eps/epm functions, they require an interval argument # or to provide the start/end so that the interval can be computed. # This uses a hard coded start/end to ensure the validation succeeds # since the values themselves don't matter. params = { "start": datetime.now() - timedelta(days=1), "end": datetime.now(), "project_id": [p.id for p in self.context.get("projects")], "organization_id": self.context.get("organization").id, } snuba_filter = get_filter(conditions, params=params) except InvalidSearchQuery as err: data["discover_query_error"] = { "conditions": [f"Invalid conditions: {err}"] } return data if orderby: snuba_filter.orderby = get_function_alias(orderby) try: resolve_field_list(fields, snuba_filter, resolved_equations=resolved_equations) except InvalidSearchQuery as err: # We don't know if the widget that this query belongs to is an # Issue widget or Discover widget. Pass the error back to the # Widget serializer to decide if whether or not to raise this # error based on the Widget's type data["discover_query_error"] = {"fields": f"Invalid fields: {err}"} return data
def get_timeseries_snuba_filter(selected_columns, query, params): snuba_filter = get_filter(query, params) if not snuba_filter.start and not snuba_filter.end: raise InvalidSearchQuery( "Cannot get timeseries result without a start and end.") columns = [] equations = [] for column in selected_columns: if is_equation(column): equations.append(strip_equation(column)) else: columns.append(column) if len(equations) > 0: resolved_equations, updated_columns = resolve_equation_list( equations, columns, aggregates_only=True, auto_add=True) else: resolved_equations = [] updated_columns = columns # For the new apdex, we need to add project threshold config as a selected # column which means the group by for the time series won't work. # As a temporary solution, we will calculate the mean of all the project # level thresholds in the request and use the legacy apdex, user_misery # or count_miserable calculation. # TODO(snql): Alias the project_threshold_config column so it doesn't # have to be in the SELECT statement and group by to be able to use new apdex, # user_misery and count_miserable. threshold = None for agg in CONFIGURABLE_AGGREGATES: if agg not in updated_columns: continue if threshold is None: project_ids = params.get("project_id") threshold_configs = list( ProjectTransactionThreshold.objects.filter( organization_id=params["organization_id"], project_id__in=project_ids, ).values_list("threshold", flat=True)) projects_without_threshold = len(project_ids) - len( threshold_configs) threshold_configs.extend([DEFAULT_PROJECT_THRESHOLD] * projects_without_threshold) threshold = int(mean(threshold_configs)) updated_columns.remove(agg) updated_columns.append( CONFIGURABLE_AGGREGATES[agg].format(threshold=threshold)) snuba_filter.update_with( resolve_field_list(updated_columns, snuba_filter, auto_fields=False, resolved_equations=resolved_equations)) # Resolve the public aliases into the discover dataset names. snuba_filter, translated_columns = resolve_discover_aliases(snuba_filter) if not snuba_filter.aggregations: raise InvalidSearchQuery( "Cannot get timeseries result with no aggregation.") return snuba_filter, translated_columns