Пример #1
0
def test_edit_query():
    query = Query({
        "selected_columns": ["c1", "c2", "c3"],
        "conditions": [["c1", "=", "a"]],
        "groupby": ["project_id"],
        "aggregations": [["count()", "", "count"]],
        "orderby": "event_id",
        "sample": 10,
        "limit": 100,
        "offset": 50,
    })

    query.set_selected_columns(["c4"])
    assert query.get_selected_columns() == ["c4"]

    query.set_aggregations([["different_agg()", "", "something"]])
    assert query.get_aggregations() == [["different_agg()", "", "something"]]

    query.add_groupby(["more", "more2"])
    assert query.get_groupby() == ["project_id", "more", "more2"]

    query.add_conditions([["c5", "=", "9"]])
    assert query.get_conditions() == [
        ["c1", "=", "a"],
        ["c5", "=", "9"],
    ]

    query.set_conditions([["c6", "=", "10"]])
    assert query.get_conditions() == [
        ["c6", "=", "10"],
    ]
Пример #2
0
def test_edit_query():
    query = Query(
        {
            "selected_columns": ["c1", "c2", "c3"],
            "conditions": [["c1", "=", "a"]],
            "arrayjoin": "tags",
            "having": [["c4", "=", "c"]],
            "groupby": ["project_id"],
            "aggregations": [["count()", "", "count"]],
            "orderby": "event_id",
            "limitby": (100, "environment"),
            "sample": 10,
            "limit": 100,
            "offset": 50,
            "totals": True,
        },
        TableSource("my_table", ColumnSet([])),
    )

    query.set_selected_columns(["c4"])
    assert query.get_selected_columns() == ["c4"]

    query.set_aggregations([["different_agg()", "", "something"]])
    assert query.get_aggregations() == [["different_agg()", "", "something"]]

    query.add_groupby(["more", "more2"])
    assert query.get_groupby() == ["project_id", "more", "more2"]

    query.add_conditions([["c5", "=", "9"]])
    assert query.get_conditions() == [
        ["c1", "=", "a"],
        ["c5", "=", "9"],
    ]

    query.set_conditions([["c6", "=", "10"]])
    assert query.get_conditions() == [
        ["c6", "=", "10"],
    ]

    query.set_arrayjoin("not_tags")
    assert query.get_arrayjoin() == "not_tags"

    query.set_granularity(7200)
    assert query.get_granularity() == 7200

    query.set_prewhere([["pc6", "=", "10"]])
    assert query.get_prewhere() == [["pc6", "=", "10"]]
Пример #3
0
 def process_query(self, query: Query, request_settings: RequestSettings,) -> None:
     max_prewhere_conditions: int = (
         self.__max_prewhere_conditions or settings.MAX_PREWHERE_CONDITIONS
     )
     prewhere_keys = query.get_data_source().get_prewhere_candidates()
     if not prewhere_keys:
         return
     prewhere_conditions: Sequence[Condition] = []
     # Add any condition to PREWHERE if:
     # - It is a single top-level condition (not OR-nested), and
     # - Any of its referenced columns are in prewhere_keys
     conditions = query.get_conditions()
     if not conditions:
         return
     prewhere_candidates = [
         (util.columns_in_expr(cond[0]), cond)
         for cond in conditions
         if util.is_condition(cond)
         and any(col in prewhere_keys for col in util.columns_in_expr(cond[0]))
     ]
     # Use the condition that has the highest priority (based on the
     # position of its columns in the prewhere keys list)
     prewhere_candidates = sorted(
         [
             (
                 min(
                     prewhere_keys.index(col) for col in cols if col in prewhere_keys
                 ),
                 cond,
             )
             for cols, cond in prewhere_candidates
         ],
         key=lambda priority_and_col: priority_and_col[0],
     )
     if prewhere_candidates:
         prewhere_conditions = [cond for _, cond in prewhere_candidates][
             :max_prewhere_conditions
         ]
         query.set_conditions(
             list(filter(lambda cond: cond not in prewhere_conditions, conditions))
         )
     query.set_prewhere(prewhere_conditions)
Пример #4
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        conditions = query.get_conditions()
        if not conditions:
            return

        # Enable the processor only if we have enough data in the flattened
        # columns. Which have been deployed at BEGINNING_OF_TIME. If the query
        # starts earlier than that we do not apply the optimization.
        if self.__beginning_of_time:
            apply_optimization = False
            for condition in conditions:
                if (is_condition(condition) and isinstance(condition[0], str)
                        and condition[0] in self.__timestamp_cols
                        and condition[1] in (">=", ">")
                        and isinstance(condition[2], str)):
                    try:
                        start_ts = parse_datetime(condition[2])
                        if (start_ts -
                                self.__beginning_of_time).total_seconds() > 0:
                            apply_optimization = True
                    except Exception:
                        # We should not get here, it means the from timestamp is malformed
                        # Returning here is just for safety
                        logger.error(
                            "Cannot parse start date for NestedFieldOptimizer: %r",
                            condition,
                        )
                        return
            if not apply_optimization:
                return

        # Do not use flattened tags if tags are being unpacked anyway. In that case
        # using flattened tags only implies loading an additional column thus making
        # the query heavier and slower
        if self.__has_tags(query.get_arrayjoin_from_ast()):
            return
        if query.get_groupby_from_ast():
            for expression in query.get_groupby_from_ast():
                if self.__has_tags(expression):
                    return
        if self.__has_tags(query.get_having_from_ast()):
            return

        if query.get_orderby_from_ast():
            for orderby in query.get_orderby_from_ast():
                if self.__has_tags(orderby.expression):
                    return

        new_conditions = []
        positive_like_expression: List[str] = []
        negative_like_expression: List[str] = []

        for c in conditions:
            keyvalue = self.__is_optimizable(c, self.__nested_col)
            if not keyvalue:
                new_conditions.append(c)
            else:
                expression = f"{escape_field(keyvalue.nested_col_key)}={escape_field(keyvalue.value)}"
                if keyvalue.operand == Operand.EQ:
                    positive_like_expression.append(expression)
                else:
                    negative_like_expression.append(expression)

        if positive_like_expression:
            # Positive conditions "=" are all merged together in one LIKE expression
            positive_like_expression = sorted(positive_like_expression)
            like_formatted = f"%|{'|%|'.join(positive_like_expression)}|%"
            new_conditions.append(
                [self.__flattened_col, "LIKE", like_formatted])

        for expression in negative_like_expression:
            # Negative conditions "!=" cannot be merged together. We can still transform
            # them into NOT LIKE statements, but each condition has to be one
            # statement.
            not_like_formatted = f"%|{expression}|%"
            new_conditions.append(
                [self.__flattened_col, "NOT LIKE", not_like_formatted])

        query.set_conditions(new_conditions)