def do_post_processing( self, project_ids: Sequence[int], query: Query, request_settings: RequestSettings, ) -> None: if not request_settings.get_turbo(): final, exclude_group_ids = get_projects_query_flags( project_ids, self.__replacer_state_name) if not final and exclude_group_ids: # If the number of groups to exclude exceeds our limit, the query # should just use final instead of the exclusion set. max_group_ids_exclude = get_config( "max_group_ids_exclude", settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE) if len(exclude_group_ids) > max_group_ids_exclude: query.set_final(True) else: query.add_conditions([(["assumeNotNull", ["group_id"]], "NOT IN", exclude_group_ids)]) query.add_condition_to_ast( not_in_condition( None, FunctionCall(None, "assumeNotNull", (Column(None, "group_id", None), )), [Literal(None, p) for p in exclude_group_ids], )) else: query.set_final(final)
def process_query(self, query: Query, request_settings: RequestSettings) -> None: if request_settings.get_turbo(): return project_ids = get_project_ids_in_query_ast(query, self.__project_column) set_final = False condition_to_add = None if project_ids: final, exclude_group_ids = get_projects_query_flags( list(project_ids), self.__replacer_state_name, ) if final: metrics.increment("final", tags={"cause": "final_flag"}) if not final and exclude_group_ids: # If the number of groups to exclude exceeds our limit, the query # should just use final instead of the exclusion set. max_group_ids_exclude = get_config( "max_group_ids_exclude", settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE) if len(exclude_group_ids) > max_group_ids_exclude: metrics.increment("final", tags={"cause": "max_groups"}) set_final = True else: condition_to_add = ( ["assumeNotNull", ["group_id"]], "NOT IN", exclude_group_ids, ) query.add_condition_to_ast( not_in_condition( None, FunctionCall(None, "assumeNotNull", (Column(None, None, "group_id"), )), [Literal(None, p) for p in exclude_group_ids], )) else: set_final = final query.set_final(set_final) if condition_to_add: query.add_conditions([condition_to_add])
( # Exclude NOT IN condition from the prewhere as they are generally not excluding # most of the dataset. { "conditions": [["a", "NOT IN", [1, 2, 3]], ["b", "=", "2"], ["c", "=", "3"]] }, ["a", "b"], FunctionCall( None, BooleanFunctions.AND, ( not_in_condition( None, Column("a", None, "a"), [Literal(None, 1), Literal(None, 2), Literal(None, 3)], ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], (Column("c", None, "c"), Literal(None, "3")), ), ), ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], (Column("b", None, "b"), Literal(None, "2")), ),
def process_query(self, query: Query, query_settings: QuerySettings) -> None: if query_settings.get_turbo(): return project_ids = get_object_ids_in_query_ast(query, self.__project_column) if project_ids is None: self._set_query_final(query, False) return flags: ProjectsQueryFlags = ProjectsQueryFlags.load_from_redis( list(project_ids), self.__replacer_state_name ) query_overlaps_replacement = self._query_overlaps_replacements( query, flags.latest_replacement_time ) if not query_overlaps_replacement: self._set_query_final(query, False) return tags = self._initialize_tags(query_settings, flags) set_final = False if flags.needs_final: tags["cause"] = "final_flag" metrics.increment( name=FINAL_METRIC, tags=tags, ) set_final = True elif flags.group_ids_to_exclude: # If the number of groups to exclude exceeds our limit, the query # should just use final instead of the exclusion set. max_group_ids_exclude = get_config( "max_group_ids_exclude", settings.REPLACER_MAX_GROUP_IDS_TO_EXCLUDE, ) assert isinstance(max_group_ids_exclude, int) groups_to_exclude = self._groups_to_exclude( query, flags.group_ids_to_exclude ) if len(groups_to_exclude) > max_group_ids_exclude: tags["cause"] = "max_groups" metrics.increment( name=FINAL_METRIC, tags=tags, ) set_final = True elif groups_to_exclude: query.add_condition_to_ast( not_in_condition( FunctionCall( None, "assumeNotNull", (Column(None, None, self.__groups_column),), ), [Literal(None, p) for p in groups_to_exclude], ) ) self._set_query_final(query, set_final)
( # Exclude NOT IN condition from the prewhere as they are generally not excluding # most of the dataset. { "conditions": [["a", "NOT IN", [1, 2, 3]], ["b", "=", "2"], ["c", "=", "3"]] }, ["a", "b"], [], FunctionCall( None, BooleanFunctions.AND, ( not_in_condition( Column("_snuba_a", None, "a"), [Literal(None, 1), Literal(None, 2), Literal(None, 3)], ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], (Column("_snuba_c", None, "c"), Literal(None, "3")), ), ), ), FunctionCall( None, OPERATOR_TO_FUNCTION["="], (Column("_snuba_b", None, "b"), Literal(None, "2")), ), False,