示例#1
0
    def __init__(self, column_name: str, hash_map_name: str, killswitch: str) -> None:
        self.__column_name = column_name
        self.__hash_map_name = hash_map_name
        self.__killswitch = killswitch

        # TODO: Add the support for IN conditions.
        self.__optimizable_pattern = FunctionCall(
            function_name=String("equals"),
            parameters=(
                Or(
                    [
                        mapping_pattern,
                        FunctionCall(
                            function_name=String("ifNull"),
                            parameters=(mapping_pattern, Literal(String(""))),
                        ),
                    ]
                ),
                Param("right_hand_side", Literal(Any(str))),
            ),
        )
        self.__tag_exists_patterns = [
            FunctionCall(
                function_name=String("notEquals"),
                parameters=(
                    Or(
                        [
                            mapping_pattern,
                            FunctionCall(
                                function_name=String("ifNull"),
                                parameters=(mapping_pattern, Literal(String(""))),
                            ),
                        ]
                    ),
                    Param("right_hand_side", Literal(Any(str))),
                ),
            ),
            FunctionCall(
                function_name=String("has"),
                parameters=(
                    ColumnMatcher(
                        Param(TABLE_MAPPING_PARAM, AnyOptionalString()),
                        Param(VALUE_COL_MAPPING_PARAM, String(f"{column_name}.key")),
                    ),
                    Literal(Param(KEY_MAPPING_PARAM, Any(str))),
                ),
            ),
        ]
示例#2
0
def get_time_range_expressions(
    conditions: Sequence[Expression],
    timestamp_field: str,
    table_name: Optional[str] = None,
) -> Tuple[Optional[Tuple[datetime, FunctionCallExpr]], Optional[Tuple[
        datetime, FunctionCallExpr]], ]:
    max_lower_bound: Optional[Tuple[datetime, FunctionCallExpr]] = None
    min_upper_bound: Optional[Tuple[datetime, FunctionCallExpr]] = None
    table_match = String(table_name) if table_name else None
    for c in conditions:
        match = FunctionCall(
            Param(
                "operator",
                Or([
                    String(OPERATOR_TO_FUNCTION[">="]),
                    String(OPERATOR_TO_FUNCTION["<"]),
                ]),
            ),
            (
                Column(table_match, String(timestamp_field)),
                Literal(Param("timestamp", Any(datetime))),
            ),
        ).match(c)

        if match is not None:
            timestamp = cast(datetime, match.scalar("timestamp"))
            assert isinstance(c, FunctionCallExpr)
            if match.string("operator") == OPERATOR_TO_FUNCTION[">="]:
                if not max_lower_bound or timestamp > max_lower_bound[0]:
                    max_lower_bound = (timestamp, c)
            else:
                if not min_upper_bound or timestamp < min_upper_bound[0]:
                    min_upper_bound = (timestamp, c)

    return (max_lower_bound, min_upper_bound)
示例#3
0
def get_time_range_estimate(
    query: ProcessableQuery[Table],
) -> Tuple[Optional[datetime], Optional[datetime]]:
    """
    Best guess to find the time range for the query.
    We pick the first column that is compared with a datetime Literal.
    """
    pattern = FunctionCall(
        Or([String(ConditionFunctions.GT),
            String(ConditionFunctions.GTE)]),
        (Column(None, Param("col_name", Any(str))), Literal(Any(datetime))),
    )

    from_date, to_date = None, None
    condition = query.get_condition()
    if condition is None:
        return None, None
    for exp in condition:
        result = pattern.match(exp)
        if result is not None:
            from_date, to_date = get_time_range(query,
                                                result.string("col_name"))
            break

    return from_date, to_date
示例#4
0
def _get_date_range(query: Query) -> Optional[int]:
    """
    Best guess to find the time range for the query.
    We pick the first column that is compared with a datetime Literal.
    """
    pattern = FunctionCall(
        Or([String(ConditionFunctions.GT),
            String(ConditionFunctions.GTE)]),
        (Column(None, Param("col_name", Any(str))), Literal(Any(datetime))),
    )

    condition = query.get_condition_from_ast()
    if condition is None:
        return None
    for exp in condition:
        result = pattern.match(exp)
        if result is not None:
            from_date, to_date = get_time_range(query,
                                                result.string("col_name"))
            if from_date is None or to_date is None:
                return None
            else:
                return (to_date - from_date).days

    return None
    def extractor(condition: Expression) -> Set[str]:
        match = FunctionCall(
            String(ConditionFunctions.EQ),
            (key_pattern, Literal(Param("key", Any(str)))),
        ).match(condition)

        if match is None:
            return set()

        return {match.string("key")}
示例#6
0
        def replace_exp(exp: Expression) -> Expression:
            matcher = FunctionCall(
                String("notEquals"),
                (Column(None, String("type")), Literal(String("transaction"))),
            )

            if matcher.match(exp):
                return LiteralExpr(None, 1)

            return exp
示例#7
0
    def __init__(self, column_name: str, hash_map_name: str,
                 killswitch: str) -> None:
        self.__column_name = column_name
        self.__hash_map_name = hash_map_name
        self.__killswitch = killswitch

        # TODO: Add the support for IN connditions.
        self.__optimizable_pattern = FunctionCall(
            function_name=String("equals"),
            parameters=(
                Or([
                    mapping_pattern,
                    FunctionCall(
                        function_name=String("ifNull"),
                        parameters=(mapping_pattern, Literal(String(""))),
                    ),
                ]),
                Param("right_hand_side", Literal(Any(str))),
            ),
        )
示例#8
0
    def get_project_ids_in_condition(
            condition: Expression) -> Optional[Set[int]]:
        """
        Extract project ids from an expression. Returns None if no project
        if condition is found. It returns an empty set of conflicting project_id
        conditions are found.
        """
        match = FunctionCall(
            None,
            String(ConditionFunctions.EQ),
            (
                Column(column_name=String(project_column)),
                Literal(value=Param("project_id", Any(int))),
            ),
        ).match(condition)
        if match is not None:
            return {match.integer("project_id")}

        match = is_in_condition_pattern(
            Column(column_name=String(project_column))).match(condition)
        if match is not None:
            projects = match.expression("tuple")
            assert isinstance(projects, FunctionCallExpr)
            return {
                l.value
                for l in projects.parameters
                if isinstance(l, LiteralExpr) and isinstance(l.value, int)
            }

        match = FunctionCall(
            None,
            Param(
                "operator",
                Or([String(BooleanFunctions.AND),
                    String(BooleanFunctions.OR)]),
            ),
            (Param("lhs", AnyExpression()), Param("rhs", AnyExpression())),
        ).match(condition)
        if match is not None:
            lhs_projects = get_project_ids_in_condition(
                match.expression("lhs"))
            rhs_projects = get_project_ids_in_condition(
                match.expression("rhs"))
            if lhs_projects is None:
                return rhs_projects
            elif rhs_projects is None:
                return lhs_projects
            else:
                return (lhs_projects & rhs_projects if match.string("operator")
                        == BooleanFunctions.AND else lhs_projects
                        | rhs_projects)

        return None
示例#9
0
 def __init__(self, array_columns: Sequence[str]):
     self.__array_has_pattern = FunctionCall(
         String("equals"),
         (
             Param(
                 "has",
                 FunctionCall(
                     String("has"),
                     (
                         Column(
                             column_name=Or(
                                 [String(column) for column in array_columns]
                             )
                         ),
                         Literal(Any(str)),
                     ),
                 ),
             ),
             Literal(Integer(1)),
         ),
     )
示例#10
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        matcher = FunctionCall(
            String("arrayElement"),
            (
                Column(
                    None,
                    String("contexts.value"),
                ),
                FunctionCall(
                    String("indexOf"),
                    (
                        Column(None, String("contexts.key")),
                        Literal(
                            Or([
                                String("device.simulator"),
                                String("device.online"),
                                String("device.charging"),
                            ]), ),
                    ),
                ),
            ),
        )

        def process_column(exp: Expression) -> Expression:
            match = matcher.match(exp)

            if match:
                inner = replace(exp, alias=None)
                return FunctionCallExpr(
                    exp.alias,
                    "if",
                    (
                        binary_condition(
                            ConditionFunctions.IN,
                            inner,
                            literals_tuple(
                                None,
                                [
                                    LiteralExpr(None, "1"),
                                    LiteralExpr(None, "True")
                                ],
                            ),
                        ),
                        LiteralExpr(None, "True"),
                        LiteralExpr(None, "False"),
                    ),
                )

            return exp

        query.transform_expressions(process_column)
 def is_skippable_condition(conditions: Expression) -> bool:
     """
     A condition composed of a bunch of has(column, ...) conditions OR'ed together
     can be ignored when looking for filter keys because these are the conditions
     used for the bloom filter index on the array column.
     """
     for column_name in column_names:
         has_pattern = FunctionCall(
             String("has"),
             (Column(column_name=String(column_name)), Literal(Any(str))),
         )
         if all(
             has_pattern.match(c) for c in get_first_level_or_conditions(conditions)
         ):
             return True
     return False
def _get_mapping_keys_in_condition(
    condition: Expression, column_name: str
) -> Optional[Set[str]]:
    """
    Finds the top level conditions that include filter based on the arrayJoin.
    This is meant to be used to find the keys the query is filtering the arrayJoin
    on.
    We can only apply the arrayFilter optimization to arrayJoin conditions
    that are not in OR with other columns. To simplify the problem, we only
    consider those conditions that are included in the first level of the query:
    [['tagskey' '=' 'a'],['col' '=' 'b'],['col2' '=' 'c']]  works
    [[['tagskey' '=' 'a'], ['col2' '=' 'b']], ['tagskey' '=' 'c']] does not

    If we encounter an OR condition we return None, which means we cannot
    safely apply the optimization. Empty set means we did not find any
    suitable arrayJoin for optimization in this condition but that does
    not disqualify the whole query in the way the OR condition does.
    """
    keys_found = set()

    conditions = get_first_level_and_conditions(condition)
    for c in conditions:
        if is_binary_condition(c, BooleanFunctions.OR):
            return None

        match = FunctionCall(
            None,
            String(ConditionFunctions.EQ),
            (array_join_pattern(column_name), Literal(None, Param("key", Any(str)))),
        ).match(c)
        if match is not None:
            keys_found.add(match.string("key"))

        match = is_in_condition_pattern(array_join_pattern(column_name)).match(c)
        if match is not None:
            function = match.expression("tuple")
            assert isinstance(function, FunctionCallExpr)
            keys_found |= {
                lit.value
                for lit in function.parameters
                if isinstance(lit, LiteralExpr) and isinstance(lit.value, str)
            }

    return keys_found
示例#13
0
def get_time_range(
        query: Query,
        timestamp_field: str) -> Tuple[Optional[datetime], Optional[datetime]]:
    """
    Finds the minimal time range for this query. Which means, it finds
    the >= timestamp condition with the highest datetime literal and
    the < timestamp condition with the smallest and returns the interval
    in the form of a tuple of Literals. It only looks into first level
    AND conditions since, if the timestamp is nested in an OR we cannot
    say anything on how that compares to the other timestamp conditions.
    """

    condition_clause = query.get_condition_from_ast()
    if not condition_clause:
        return (None, None)

    max_lower_bound = None
    min_upper_bound = None
    for c in get_first_level_and_conditions(condition_clause):
        match = FunctionCall(
            None,
            Param(
                "operator",
                Or([
                    String(OPERATOR_TO_FUNCTION[">="]),
                    String(OPERATOR_TO_FUNCTION["<"]),
                ]),
            ),
            (
                Column(None, None, String(timestamp_field)),
                Literal(None, Param("timestamp", Any(datetime))),
            ),
        ).match(c)

        if match is not None:
            timestamp = cast(datetime, match.scalar("timestamp"))
            if match.string("operator") == OPERATOR_TO_FUNCTION[">="]:
                if not max_lower_bound or timestamp > max_lower_bound:
                    max_lower_bound = timestamp
            else:
                if not min_upper_bound or timestamp < min_upper_bound:
                    min_upper_bound = timestamp

    return (max_lower_bound, min_upper_bound)
示例#14
0
            LiteralExpr(None, None),
        ),
    )


TABLE_MAPPING_PARAM = "table_name"
VALUE_COL_MAPPING_PARAM = "value_column"
KEY_COL_MAPPING_PARAM = "key_column"
KEY_MAPPING_PARAM = "key"
mapping_pattern = FunctionCall(
    None,
    String("arrayElement"),
    (
        Column(
            None,
            Param(TABLE_MAPPING_PARAM, AnyOptionalString()),
            Param(VALUE_COL_MAPPING_PARAM, Any(str)),
        ),
        FunctionCall(
            None,
            String("indexOf"),
            (
                Column(None, None, Param(KEY_COL_MAPPING_PARAM, Any(str))),
                Literal(None, Param(KEY_MAPPING_PARAM, Any(str))),
            ),
        ),
    ),
)

# TODO: build more of these mappers.
示例#15
0
    Column,
    FunctionCall,
    Literal,
    MatchResult,
    OptionalString,
    Or,
    Param,
    Pattern,
    String,
    SubscriptableReference,
)

test_cases = [
    (
        "Literal match",
        Literal(None),
        LiteralExpr("random_alias", 1),
        MatchResult(),
    ),
    (
        "Literal match with none type",
        Literal(Any(type(None))),
        LiteralExpr("alias", 1),
        None,
    ),
    (
        "Single node match",
        Column(OptionalString("table"), String("test_col")),
        ColumnExpr("alias_we_don't_care_of", "table", "test_col"),
        MatchResult(),
    ),
示例#16
0
 (
     "Or within a Param",
     Param(
         "one_of_the_two",
         Or([
             Column(None, String("col_name1")),
             Column(None, String("col_name2"))
         ]),
     ),
     ColumnExpr("irrelevant", None, "col_name2"),
     MatchResult(
         {"one_of_the_two": ColumnExpr("irrelevant", None, "col_name2")}),
 ),
 (
     "Match String Literal",
     Literal(OptionalString("value")),
     LiteralExpr("irrelevant", "value"),
     MatchResult(),
 ),
 (
     "Match any string as Literal",
     Literal(Any(str)),
     LiteralExpr("irrelevant", "value"),
     MatchResult(),
 ),
 (
     "Does not match an int as Literal",
     Literal(Any(str)),
     LiteralExpr("irrelevant", 123),
     None,
 ),
示例#17
0
def test_subscription_worker(subscription_data: SubscriptionData) -> None:
    broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(),
                                                    TestingClock())

    result_topic = Topic("subscription-results")

    broker.create_topic(result_topic, partitions=1)

    frequency = timedelta(minutes=1)
    evaluations = 3

    subscription = Subscription(
        SubscriptionIdentifier(PartitionId(0), uuid1()),
        subscription_data,
    )

    store = DummySubscriptionDataStore()
    store.create(subscription.identifier.uuid, subscription.data)

    metrics = DummyMetricsBackend(strict=True)

    dataset = get_dataset("events")
    worker = SubscriptionWorker(
        dataset,
        ThreadPoolExecutor(),
        {
            0: SubscriptionScheduler(store, PartitionId(0), timedelta(),
                                     metrics)
        },
        broker.get_producer(),
        result_topic,
        metrics,
    )

    now = datetime(2000, 1, 1)

    tick = Tick(
        offsets=Interval(0, 1),
        timestamps=Interval(now - (frequency * evaluations), now),
    )

    result_futures = worker.process_message(
        Message(Partition(Topic("events"), 0), 0, tick, now))

    assert result_futures is not None and len(result_futures) == evaluations

    # Publish the results.
    worker.flush_batch([result_futures])

    # Check to make sure the results were published.
    # NOTE: This does not cover the ``SubscriptionTaskResultCodec``!
    consumer = broker.get_consumer("group")
    consumer.subscribe([result_topic])

    for i in range(evaluations):
        timestamp = now - frequency * (evaluations - i)

        message = consumer.poll()
        assert message is not None
        assert message.partition.topic == result_topic

        task, future = result_futures[i]
        future_result = request, result = future.result()
        assert message.payload.task.timestamp == timestamp
        assert message.payload == SubscriptionTaskResult(task, future_result)

        # NOTE: The time series extension is folded back into the request
        # body, ideally this would reference the timeseries options in
        # isolation.
        from_pattern = FunctionCall(
            String(ConditionFunctions.GTE),
            (
                Column(None, String("timestamp")),
                Literal(Datetime(timestamp - subscription.data.time_window)),
            ),
        )
        to_pattern = FunctionCall(
            String(ConditionFunctions.LT),
            (Column(None, String("timestamp")), Literal(Datetime(timestamp))),
        )

        condition = request.query.get_condition()
        assert condition is not None

        conditions = get_first_level_and_conditions(condition)

        assert any([from_pattern.match(e) for e in conditions])
        assert any([to_pattern.match(e) for e in conditions])

        assert result == {
            "meta": [{
                "name": "count",
                "type": "UInt64"
            }],
            "data": [{
                "count": 0
            }],
        }
示例#18
0
    Any,
    AnyExpression,
    AnyOptionalString,
    Column,
    FunctionCall,
    Literal,
    MatchResult,
    OptionalString,
    Or,
    Param,
    Pattern,
    String,
)

test_cases = [
    ("Literal match", Literal(None), LiteralExpr("random_alias", 1), MatchResult(),),
    (
        "Literal match with none type",
        Literal(Any(type(None))),
        LiteralExpr("alias", 1),
        None,
    ),
    (
        "Single node match",
        Column(OptionalString("table"), String("test_col")),
        ColumnExpr("alias_we_don't_care_of", "table", "test_col"),
        MatchResult(),
    ),
    (
        "Single node no match",
        Column(None, String("test_col")),