Python ParsingContext示例，snuba.query.parsing.ParsingContext Python示例

示例#1

0

显示文件

文件： test_util.py 项目： ruezetle/snuba

    def test_nested_aggregate_legacy_format(self, dataset):
        source = (dataset.get_all_storages()
                  [0].get_schemas().get_read_schema().get_data_source())
        priority = [
            "toUInt64(plus(multiply(log(times_seen), 600), last_seen))",
            "",
            "priority",
        ]
        assert (
            column_expr(
                dataset,
                "",
                Query({"aggregations": [priority]}, source),
                ParsingContext(),
                priority[2],
                priority[0],
            ) ==
            "(toUInt64(plus(multiply(log(times_seen), 600), last_seen)) AS priority)"
        )

        top_k = ["topK(3)", "logger", "top_3"]
        assert (column_expr(
            dataset,
            top_k[1],
            Query({"aggregations": [top_k]}, source),
            ParsingContext(),
            top_k[2],
            top_k[0],
        ) == "(topK(3)(logger) AS top_3)")

示例#2

0

显示文件

文件： test_join_columns.py 项目： denisgolius/snuba

def test_conditions_expr():
    dataset = get_dataset("groups")
    state.set_config('use_escape_alias', 1)
    conditions = [['events.a', '=', 1]]
    assert conditions_expr(dataset, conditions, Query({}),
                           ParsingContext()) == '(events.a AS `events.a`) = 1'

    conditions = [[['events.a', '=', 1], ['groups.b', '=', 2]],
                  [['events.c', '=', 3], ['groups.d', '=', 4]]]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == ('((events.a AS `events.a`) = 1 OR (groups.b AS `groups.b`) = 2)'
        ' AND ((events.c AS `events.c`) = 3 OR (groups.d AS `groups.d`) = 4)'
        )

    # Test column expansion
    conditions = [[['events.tags[foo]', '=', 1], ['groups.b', '=', 2]]]
    expanded = column_expr(dataset, 'events.tags[foo]', Query({}),
                           ParsingContext())
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == '({} = 1 OR (groups.b AS `groups.b`) = 2)'.format(expanded)

    # Test using alias if column has already been expanded in SELECT clause
    reuse_query = Query({})
    parsing_context = ParsingContext()
    conditions = [[['events.tags[foo]', '=', 1], ['groups.b', '=', 2]]]
    column_expr(dataset, 'events.tags[foo]', reuse_query,
                parsing_context)  # Expand it once so the next time is aliased
    assert conditions_expr(dataset, conditions, reuse_query, parsing_context) \
        == '(`events.tags[foo]` = 1 OR (groups.b AS `groups.b`) = 2)'

    # Test special output format of LIKE
    conditions = [['events.primary_hash', 'LIKE', '%foo%']]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == '(events.primary_hash AS `events.primary_hash`) LIKE \'%foo%\''

    conditions = tuplify(
        [[['notEmpty', ['arrayElement', ['events.exception_stacks.type', 1]]],
          '=', 1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty(arrayElement((events.exception_stacks.type AS `events.exception_stacks.type`), 1)) = 1'

    conditions = tuplify([[['notEmpty', ['events.tags[sentry:user]']], '=',
                           1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty(`events.tags[sentry:user]`) = 1'

    conditions = tuplify([[['notEmpty', ['events.tags_key']], '=', 1]])
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'notEmpty((arrayJoin(events.tags.key) AS `events.tags_key`)) = 1'

    # Test scalar condition on array column is expanded as an iterator.
    conditions = [['events.exception_frames.filename', 'LIKE', '%foo%']]
    assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) \
        == 'arrayExists(x -> assumeNotNull(x LIKE \'%foo%\'), (events.exception_frames.filename AS `events.exception_frames.filename`))'

示例#3

0

显示文件

def test_aliases() -> None:
    # No context
    col1 = Column("al1", "column1", "table1")
    col2 = Column("al1", "column1", "table1")

    assert col1.accept(
        ClickhouseExpressionFormatter()) == "(table1.column1 AS al1)"
    assert col2.accept(
        ClickhouseExpressionFormatter()) == "(table1.column1 AS al1)"

    # With Context
    pc = ParsingContext()
    assert col1.accept(
        ClickhouseExpressionFormatter(pc)) == "(table1.column1 AS al1)"
    assert col2.accept(ClickhouseExpressionFormatter(pc)) == "al1"

    # Hierarchical expression inherits parsing context and applies alaises
    f = FunctionCall(
        None,
        "f1",
        (
            FunctionCall("tag[something]", "tag",
                         (Column(None, "column1", "table1"))),
            FunctionCall("tag[something]", "tag",
                         (Column(None, "column1", "table1"))),
            FunctionCall("tag[something]", "tag",
                         (Column(None, "column1", "table1"))),
        ),
    )

    expected = "f1((tag(table1.column1) AS `tag[something]`), `tag[something]`, `tag[something]`)"
    assert f.accept(ClickhouseExpressionFormatter()) == expected

示例#4

0

显示文件

def _format_query_content(
    query: FormattableQuery,
    expression_formatter_type: Type[ExpressionFormatterBase],
) -> Sequence[FormattedNode]:
    """
    Produces the content of the formatted query.
    It works for both the composite query and the simple one as the
    only difference is the presence of the prewhere condition.
    Should we have more differences going on we should break this
    method into smaller ones.
    """
    parsing_context = ParsingContext()
    formatter = expression_formatter_type(parsing_context)

    return [
        v for v in [
            _format_select(query, formatter),
            PaddingNode(
                "FROM",
                DataSourceFormatter(expression_formatter_type).visit(
                    query.get_from_clause()),
            ),
            _format_arrayjoin(query, formatter),
            _build_optional_string_node("PREWHERE", query.get_prewhere_ast(
            ), formatter) if isinstance(query, Query) else None,
            _build_optional_string_node("WHERE", query.get_condition(),
                                        formatter),
            _format_groupby(query, formatter),
            _build_optional_string_node("HAVING", query.get_having(),
                                        formatter),
            _format_orderby(query, formatter),
            _format_limitby(query, formatter),
            _format_limit(query, formatter),
        ] if v is not None
    ]

示例#5

0

显示文件

    def test_nested_aggregate_legacy_format(self, dataset):
        source = dataset.get_dataset_schemas().get_read_schema(
        ).get_data_source()
        priority = [
            'toUInt64(plus(multiply(log(times_seen), 600), last_seen))', '',
            'priority'
        ]
        assert column_expr(
            dataset, '', Query({'aggregations': [priority]}, source),
            ParsingContext(), priority[2], priority[0]
        ) == '(toUInt64(plus(multiply(log(times_seen), 600), last_seen)) AS priority)'

        top_k = ['topK(3)', 'logger', 'top_3']
        assert column_expr(dataset, top_k[1],
                           Query({'aggregations': [top_k]}, source),
                           ParsingContext(), top_k[2],
                           top_k[0]) == '(topK(3)(logger) AS top_3)'

示例#6

0

显示文件

文件： test_join_columns.py 项目： jiankunking/snuba

def test_order_by():
    dataset = get_dataset("groups")
    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    body = {}
    query = Query(body, source)

    assert (
        column_expr(dataset, "-events.event_id", deepcopy(query), ParsingContext())
        == "-(events.event_id AS `events.event_id`)"
    )

    context = ParsingContext()
    context.add_alias("`events.event_id`")
    assert (
        column_expr(dataset, "-events.event_id", deepcopy(query), context,)
        == "-`events.event_id`"
    )

示例#7

0

显示文件

文件： util.py 项目： cafebazaar/snuba

def alias_expr(expr: str, alias: str, parsing_context: ParsingContext) -> str:
    """
    Return the correct expression to use in the final SQL. Keeps a cache of
    the previously created expressions and aliases, so it knows when it can
    subsequently replace a redundant expression with an alias.

    1. If the expression and alias are equal, just return that.
    2. Otherwise, if the expression is new, add it to the cache and its alias so
       it can be reused later and return `expr AS alias`
    3. If the expression has been aliased before, return the alias
    """

    if expr == alias:
        return expr
    elif parsing_context.is_alias_present(alias):
        return alias
    else:
        parsing_context.add_alias(alias)
        return "({} AS {})".format(expr, alias)

示例#8

0

显示文件

文件： test_util.py 项目： ruezetle/snuba

 def test_apdex_expression(self, dataset):
     body = {"aggregations": [["apdex(duration, 300)", "", "apdex_score"]]}
     parsing_context = ParsingContext()
     source = (dataset.get_all_storages()
               [0].get_schemas().get_read_schema().get_data_source())
     exprs = [
         column_expr(dataset, col, Query(body, source), parsing_context,
                     alias, agg)
         for (agg, col, alias) in body["aggregations"]
     ]
     assert exprs == [
         "((countIf(duration <= 300) + (countIf((duration > 300) AND (duration <= 1200)) / 2)) / count() AS apdex_score)"
     ]

示例#9

0

显示文件

 def test_impact_expression(self, dataset):
     body = {
         "aggregations":
         [["impact(duration, 300, user)", "", "impact_score"]]
     }
     parsing_context = ParsingContext()
     source = dataset.get_dataset_schemas().get_read_schema(
     ).get_data_source()
     exprs = [
         column_expr(dataset, col, Query(body, source), parsing_context,
                     alias, agg)
         for (agg, col, alias) in body["aggregations"]
     ]
     assert exprs == [
         "((1 - (countIf(duration <= 300) + (countIf((duration > 300) AND (duration <= 1200)) / 2)) / count()) + ((1 - (1 / sqrt(uniq(user)))) * 3) AS impact_score)"
     ]

示例#10

0

显示文件

 def test_duplicate_expression_alias(self, dataset):
     body = {
         'aggregations': [
             ['top3', 'logger', 'dupe_alias'],
             ['uniq', 'environment', 'dupe_alias'],
         ]
     }
     parsing_context = ParsingContext()
     # In the case where 2 different expressions are aliased
     # to the same thing, one ends up overwriting the other.
     # This may not be ideal as it may mask bugs in query conditions
     exprs = [
         column_expr(dataset, col, Query(body), parsing_context, alias, agg)
         for (agg, col, alias) in body['aggregations']
     ]
     assert exprs == ['(topK(3)(logger) AS dupe_alias)', 'dupe_alias']

示例#11

0

显示文件

    def test_complex_conditions_expr(self, dataset):
        query = Query({})

        assert complex_column_expr(dataset, tuplify(['count', []]), deepcopy(query), ParsingContext()) == 'count()'
        assert complex_column_expr(dataset, tuplify(['notEmpty', ['foo']]), deepcopy(query), ParsingContext()) == 'notEmpty(foo)'
        assert complex_column_expr(dataset, tuplify(['notEmpty', ['arrayElement', ['foo', 1]]]), deepcopy(query), ParsingContext()) == 'notEmpty(arrayElement(foo, 1))'
        assert complex_column_expr(dataset, tuplify(['foo', ['bar', ['qux'], 'baz']]), deepcopy(query), ParsingContext()) == 'foo(bar(qux), baz)'
        assert complex_column_expr(dataset, tuplify(['foo', [], 'a']), deepcopy(query), ParsingContext()) == '(foo() AS a)'
        assert complex_column_expr(dataset, tuplify(['foo', ['b', 'c'], 'd']), deepcopy(query), ParsingContext()) == '(foo(b, c) AS d)'
        assert complex_column_expr(dataset, tuplify(['foo', ['b', 'c', ['d']]]), deepcopy(query), ParsingContext()) == 'foo(b, c(d))'

        assert complex_column_expr(dataset, tuplify(['top3', ['project_id']]), deepcopy(query), ParsingContext()) == 'topK(3)(project_id)'
        assert complex_column_expr(dataset, tuplify(['top10', ['project_id'], 'baz']), deepcopy(query), ParsingContext()) == '(topK(10)(project_id) AS baz)'

        assert complex_column_expr(dataset, tuplify(['emptyIfNull', ['project_id']]), deepcopy(query), ParsingContext()) == 'ifNull(project_id, \'\')'
        assert complex_column_expr(dataset, tuplify(['emptyIfNull', ['project_id'], 'foo']), deepcopy(query), ParsingContext()) == '(ifNull(project_id, \'\') AS foo)'

        assert complex_column_expr(dataset, tuplify(['or', ['a', 'b']]), deepcopy(query), ParsingContext()) == 'or(a, b)'
        assert complex_column_expr(dataset, tuplify(['and', ['a', 'b']]), deepcopy(query), ParsingContext()) == 'and(a, b)'
        assert complex_column_expr(dataset, tuplify(['or', [['or', ['a', 'b']], 'c']]), deepcopy(query), ParsingContext()) == 'or(or(a, b), c)'
        assert complex_column_expr(dataset, tuplify(['and', [['and', ['a', 'b']], 'c']]), deepcopy(query), ParsingContext()) == 'and(and(a, b), c)'
        # (A OR B) AND C
        assert complex_column_expr(dataset, tuplify(['and', [['or', ['a', 'b']], 'c']]), deepcopy(query), ParsingContext()) == 'and(or(a, b), c)'
        # (A AND B) OR C
        assert complex_column_expr(dataset, tuplify(['or', [['and', ['a', 'b']], 'c']]), deepcopy(query), ParsingContext()) == 'or(and(a, b), c)'
        # A OR B OR C OR D
        assert complex_column_expr(dataset, tuplify(['or', [['or', [['or', ['c', 'd']], 'b']], 'a']]), deepcopy(query), ParsingContext()) == 'or(or(or(c, d), b), a)'

        assert complex_column_expr(dataset, tuplify(['if', [['in', ['release', 'tuple', ["'foo'"], ], ], 'release', "'other'"], 'release', ]), deepcopy(query), ParsingContext()) == "(if(in(release, tuple('foo')), release, 'other') AS release)"
        assert complex_column_expr(dataset, tuplify(['if', ['in', ['release', 'tuple', ["'foo'"]], 'release', "'other'", ], 'release']), deepcopy(query), ParsingContext()) == "(if(in(release, tuple('foo')), release, 'other') AS release)"

        # TODO once search_message is filled in everywhere, this can be just 'message' again.
        message_expr = '(coalesce(search_message, message) AS message)'
        assert complex_column_expr(dataset, tuplify(['positionCaseInsensitive', ['message', "'lol 'single' quotes'"]]), deepcopy(query), ParsingContext())\
            == "positionCaseInsensitive({message_expr}, 'lol \\'single\\' quotes')".format(**locals())

        # dangerous characters are allowed but escaped in literals and column names
        assert complex_column_expr(dataset, tuplify(['safe', ['fo`o', "'ba'r'"]]), deepcopy(query), ParsingContext()) == r"safe(`fo\`o`, 'ba\'r')"

        # Dangerous characters not allowed in functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(dataset, tuplify([r"dang'erous", ['message', '`']]), deepcopy(query), ParsingContext())

        # Or nested functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(dataset, tuplify([r"safe", ['dang`erous', ['message']]]), deepcopy(query), ParsingContext())

示例#12

0

显示文件

    def test_alias_in_alias(self):
        source = self.dataset.get_dataset_schemas().get_read_schema().get_data_source()
        query = Query({"groupby": ["tags_key", "tags_value"]}, source,)
        context = ParsingContext()
        assert column_expr(self.dataset, "tags_key", query, context) == (
            "(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) "
            "AS all_tags))[1] AS tags_key)"
        )

        # If we want to use `tags_key` again, make sure we use the
        # already-created alias verbatim
        assert column_expr(self.dataset, "tags_key", query, context) == "tags_key"
        # If we also want to use `tags_value`, make sure that we use
        # the `all_tags` alias instead of re-expanding the tags arrayJoin
        assert (
            column_expr(self.dataset, "tags_value", query, context)
            == "((all_tags)[2] AS tags_value)"
        )

示例#13

0

显示文件

文件： test_util.py 项目： ruezetle/snuba

 def test_duplicate_expression_alias(self, dataset):
     body = {
         "aggregations": [
             ["top3", "logger", "dupe_alias"],
             ["uniq", "environment", "dupe_alias"],
         ]
     }
     parsing_context = ParsingContext()
     source = (dataset.get_all_storages()
               [0].get_schemas().get_read_schema().get_data_source())
     # In the case where 2 different expressions are aliased
     # to the same thing, one ends up overwriting the other.
     # This may not be ideal as it may mask bugs in query conditions
     exprs = [
         column_expr(dataset, col, Query(body, source), parsing_context,
                     alias, agg)
         for (agg, col, alias) in body["aggregations"]
     ]
     assert exprs == ["(topK(3)(logger) AS dupe_alias)", "dupe_alias"]

示例#14

0

显示文件

文件： test_join_columns.py 项目： denisgolius/snuba

def test_alias_in_alias():
    state.set_config('use_escape_alias', 1)
    dataset = get_dataset("groups")
    body = {'groupby': ['events.tags_key', 'events.tags_value']}
    query = Query(body)
    parsing_context = ParsingContext()
    assert column_expr(dataset, 'events.tags_key', query, parsing_context) == (
        '(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) '
        'AS all_tags))[1] AS `events.tags_key`)')

    # If we want to use `tags_key` again, make sure we use the
    # already-created alias verbatim
    assert column_expr(dataset, 'events.tags_key', query,
                       parsing_context) == '`events.tags_key`'
    # If we also want to use `tags_value`, make sure that we use
    # the `all_tags` alias instead of re-expanding the tags arrayJoin
    assert column_expr(
        dataset, 'events.tags_value', query,
        parsing_context) == '((all_tags)[2] AS `events.tags_value`)'

示例#15

0

显示文件

文件： test_join_columns.py 项目： denisgolius/snuba

def test_duplicate_expression_alias():
    dataset = get_dataset("groups")
    state.set_config('use_escape_alias', 1)

    body = {
        'aggregations': [
            ['top3', 'events.logger', 'dupe_alias'],
            ['uniq', 'events.environment', 'dupe_alias'],
        ]
    }
    query = Query(body)
    # In the case where 2 different expressions are aliased
    # to the same thing, one ends up overwriting the other.
    # This may not be ideal as it may mask bugs in query conditions
    parsing_context = ParsingContext()
    exprs = [
        column_expr(dataset, col, query, parsing_context, alias, agg)
        for (agg, col, alias) in body['aggregations']
    ]
    assert exprs == ['(topK(3)(events.logger) AS dupe_alias)', 'dupe_alias']

示例#16

0

显示文件

文件： test_join_columns.py 项目： jiankunking/snuba

def test_duplicate_expression_alias():
    dataset = get_dataset("groups")
    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    state.set_config("use_escape_alias", 1)

    body = {
        "aggregations": [
            ["top3", "events.logger", "dupe_alias"],
            ["uniq", "events.environment", "dupe_alias"],
        ]
    }
    query = Query(body, source)
    # In the case where 2 different expressions are aliased
    # to the same thing, one ends up overwriting the other.
    # This may not be ideal as it may mask bugs in query conditions
    parsing_context = ParsingContext()
    exprs = [
        column_expr(dataset, col, query, parsing_context, alias, agg)
        for (agg, col, alias) in body["aggregations"]
    ]
    assert exprs == ["(topK(3)(events.logger) AS dupe_alias)", "dupe_alias"]

示例#17

0

显示文件

def test_alias_in_alias():
    state.set_config("use_escape_alias", 1)
    dataset = get_dataset("groups")
    source = (dataset.get_all_storages()
              [0].get_schemas().get_read_schema().get_data_source())
    body = {"groupby": ["events.tags_key", "events.tags_value"]}
    query = Query(body, source)
    parsing_context = ParsingContext()
    assert column_expr(dataset, "events.tags_key", query, parsing_context) == (
        "(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) "
        "AS all_tags))[1] AS `events.tags_key`)")

    # If we want to use `tags_key` again, make sure we use the
    # already-created alias verbatim
    assert (column_expr(dataset, "events.tags_key", query,
                        parsing_context) == "`events.tags_key`")
    # If we also want to use `tags_value`, make sure that we use
    # the `all_tags` alias instead of re-expanding the tags arrayJoin
    assert (column_expr(
        dataset, "events.tags_value", query,
        parsing_context) == "((all_tags)[2] AS `events.tags_value`)")

示例#18

0

显示文件

文件： test_util.py 项目： ruezetle/snuba

    def test_complex_conditions_expr(self, dataset):
        source = (dataset.get_all_storages()
                  [0].get_schemas().get_read_schema().get_data_source())
        query = Query({}, source)

        assert (complex_column_expr(dataset, tuplify(["count", []]),
                                    deepcopy(query),
                                    ParsingContext()) == "count()")
        assert (complex_column_expr(
            dataset,
            tuplify(["notEmpty", ["foo"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "notEmpty(foo)")
        assert (complex_column_expr(
            dataset,
            tuplify(["notEmpty", ["arrayElement", ["foo", 1]]]),
            deepcopy(query),
            ParsingContext(),
        ) == "notEmpty(arrayElement(foo, 1))")
        assert (complex_column_expr(
            dataset,
            tuplify(["foo", ["bar", ["qux"], "baz"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "foo(bar(qux), baz)")
        assert (complex_column_expr(dataset, tuplify(["foo", [], "a"]),
                                    deepcopy(query),
                                    ParsingContext()) == "(foo() AS a)")
        assert (complex_column_expr(
            dataset,
            tuplify(["foo", ["b", "c"], "d"]),
            deepcopy(query),
            ParsingContext(),
        ) == "(foo(b, c) AS d)")
        assert (complex_column_expr(
            dataset,
            tuplify(["foo", ["b", "c", ["d"]]]),
            deepcopy(query),
            ParsingContext(),
        ) == "foo(b, c(d))")

        assert (complex_column_expr(
            dataset,
            tuplify(["top3", ["project_id"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "topK(3)(project_id)")
        assert (complex_column_expr(
            dataset,
            tuplify(["top10", ["project_id"], "baz"]),
            deepcopy(query),
            ParsingContext(),
        ) == "(topK(10)(project_id) AS baz)")

        assert (complex_column_expr(
            dataset,
            tuplify(["emptyIfNull", ["project_id"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "ifNull(project_id, '')")
        assert (complex_column_expr(
            dataset,
            tuplify(["emptyIfNull", ["project_id"], "foo"]),
            deepcopy(query),
            ParsingContext(),
        ) == "(ifNull(project_id, '') AS foo)")

        assert (complex_column_expr(dataset, tuplify(["or", ["a", "b"]]),
                                    deepcopy(query),
                                    ParsingContext()) == "or(a, b)")
        assert (complex_column_expr(dataset, tuplify(["and", ["a", "b"]]),
                                    deepcopy(query),
                                    ParsingContext()) == "and(a, b)")
        assert (complex_column_expr(
            dataset,
            tuplify(["or", [["or", ["a", "b"]], "c"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "or(or(a, b), c)")
        assert (complex_column_expr(
            dataset,
            tuplify(["and", [["and", ["a", "b"]], "c"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "and(and(a, b), c)")
        # (A OR B) AND C
        assert (complex_column_expr(
            dataset,
            tuplify(["and", [["or", ["a", "b"]], "c"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "and(or(a, b), c)")
        # (A AND B) OR C
        assert (complex_column_expr(
            dataset,
            tuplify(["or", [["and", ["a", "b"]], "c"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "or(and(a, b), c)")
        # A OR B OR C OR D
        assert (complex_column_expr(
            dataset,
            tuplify(["or", [["or", [["or", ["c", "d"]], "b"]], "a"]]),
            deepcopy(query),
            ParsingContext(),
        ) == "or(or(or(c, d), b), a)")

        assert (complex_column_expr(
            dataset,
            tuplify([
                "if",
                [
                    ["in", ["release", "tuple", ["'foo'"]]],
                    "release",
                    "'other'",
                ],
                "release",
            ]),
            deepcopy(query),
            ParsingContext(),
        ) == "(if(in(release, tuple('foo')), release, 'other') AS release)")
        assert (complex_column_expr(
            dataset,
            tuplify([
                "if",
                ["in", ["release", "tuple", ["'foo'"]], "release", "'other'"],
                "release",
            ]),
            deepcopy(query),
            ParsingContext(),
        ) == "(if(in(release, tuple('foo')), release, 'other') AS release)")

        # TODO once search_message is filled in everywhere, this can be just 'message' again.
        message_expr = "(coalesce(search_message, message) AS message)"
        assert complex_column_expr(
            dataset,
            tuplify([
                "positionCaseInsensitive",
                ["message", "'lol 'single' quotes'"]
            ]),
            deepcopy(query),
            ParsingContext(),
        ) == "positionCaseInsensitive({message_expr}, 'lol \\'single\\' quotes')".format(
            **locals())

        # dangerous characters are allowed but escaped in literals and column names
        assert (complex_column_expr(
            dataset,
            tuplify(["safe", ["fo`o", "'ba'r'"]]),
            deepcopy(query),
            ParsingContext(),
        ) == r"safe(`fo\`o`, 'ba\'r')")

        # Dangerous characters not allowed in functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(
                dataset,
                tuplify([r"dang'erous", ["message", "`"]]),
                deepcopy(query),
                ParsingContext(),
            )

        # Or nested functions
        with pytest.raises(AssertionError):
            assert complex_column_expr(
                dataset,
                tuplify([r"safe", ["dang`erous", ["message"]]]),
                deepcopy(query),
                ParsingContext(),
            )

示例#19

0

显示文件

文件： test_util.py 项目： ruezetle/snuba

    def test_conditions_expr(self, dataset):
        state.set_config("use_escape_alias", 1)
        conditions = [["a", "=", 1]]
        source = (dataset.get_all_storages()
                  [0].get_schemas().get_read_schema().get_data_source())
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        conditions = []
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "")

        conditions = [[[]], []]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "")

        conditions = [[["a", "=", 1]]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        conditions = [["a", "=", 1], ["b", "=", 2]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1 AND b = 2")

        conditions = [[["a", "=", 1], ["b", "=", 2]]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "(a = 1 OR b = 2)")

        conditions = [[["a", "=", 1], ["b", "=", 2]], ["c", "=", 3]]
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "(a = 1 OR b = 2) AND c = 3")

        conditions = [[["a", "=", 1], ["b", "=", 2]],
                      [["c", "=", 3], ["d", "=", 4]]]
        assert (conditions_expr(
            dataset, conditions, Query({}, source),
            ParsingContext()) == "(a = 1 OR b = 2) AND (c = 3 OR d = 4)")

        # Malformed condition input
        conditions = [[["a", "=", 1], []]]
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) == "a = 1")

        # Test column expansion
        conditions = [[["tags[foo]", "=", 1], ["b", "=", 2]]]
        expanded = column_expr(dataset, "tags[foo]", Query({}, source),
                               ParsingContext())
        assert conditions_expr(
            dataset, conditions, Query({}, source),
            ParsingContext()) == "({} = 1 OR b = 2)".format(expanded)

        # Test using alias if column has already been expanded in SELECT clause
        reuse_query = Query({}, source)
        parsing_context = ParsingContext()
        conditions = [[["tags[foo]", "=", 1], ["b", "=", 2]]]
        column_expr(
            dataset, "tags[foo]", reuse_query,
            parsing_context)  # Expand it once so the next time is aliased
        assert (conditions_expr(
            dataset, conditions, reuse_query,
            parsing_context) == "(`tags[foo]` = 1 OR b = 2)")

        # Test special output format of LIKE
        conditions = [["primary_hash", "LIKE", "%foo%"]]
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "primary_hash LIKE '%foo%'")

        conditions = tuplify(
            [[["notEmpty", ["arrayElement", ["exception_stacks.type", 1]]],
              "=", 1]])
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "notEmpty(arrayElement((exception_stacks.type AS `exception_stacks.type`), 1)) = 1"
        )

        conditions = tuplify([[["notEmpty", ["tags[sentry:user]"]], "=", 1]])
        assert (conditions_expr(dataset, conditions, Query({}, source),
                                ParsingContext()) ==
                "notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 1")

        conditions = tuplify([[["notEmpty", ["tags_key"]], "=", 1]])
        assert (conditions_expr(
            dataset,
            conditions,
            Query({"conditions": [[["notEmpty", ["tags_key"]], "=", 1]]},
                  source),
            ParsingContext(),
        ) == "notEmpty((arrayJoin(tags.key) AS tags_key)) = 1")

        conditions = tuplify([
            [
                [["notEmpty", ["tags[sentry:environment]"]], "=", "dev"],
                [["notEmpty", ["tags[sentry:environment]"]], "=", "prod"],
            ],
            [
                [["notEmpty", ["tags[sentry:user]"]], "=", "joe"],
                [["notEmpty", ["tags[sentry:user]"]], "=", "bob"],
            ],
        ])
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            """(notEmpty((tags.value[indexOf(tags.key, 'sentry:environment')] AS `tags[sentry:environment]`)) = 'dev' OR notEmpty(`tags[sentry:environment]`) = 'prod') AND (notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 'joe' OR notEmpty(`tags[sentry:user]`) = 'bob')"""
        )

        # Test scalar condition on array column is expanded as an iterator.
        conditions = [["exception_frames.filename", "LIKE", "%foo%"]]
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "arrayExists(x -> assumeNotNull(x LIKE '%foo%'), (exception_frames.filename AS `exception_frames.filename`))"
        )

        # Test negative scalar condition on array column is expanded as an all() type iterator.
        conditions = [["exception_frames.filename", "NOT LIKE", "%foo%"]]
        assert (
            conditions_expr(dataset, conditions, Query({}, source),
                            ParsingContext()) ==
            "arrayAll(x -> assumeNotNull(x NOT LIKE '%foo%'), (exception_frames.filename AS `exception_frames.filename`))"
        )

        # Test that a duplicate IN condition is deduplicated even if
        # the lists are in different orders.[
        conditions = tuplify([["platform", "IN", ["a", "b", "c"]],
                              ["platform", "IN", ["c", "b", "a"]]])
        assert (conditions_expr(dataset, conditions, Query(
            {}, source), ParsingContext()) == "platform IN ('a', 'b', 'c')")

示例#20

0

显示文件

文件： test_join_columns.py 项目： denisgolius/snuba

def test_simple_column_expr():
    dataset = get_dataset("groups")
    state.set_config('use_escape_alias', 1)

    body = {'granularity': 86400}
    query = Query(body)
    assert column_expr(dataset, "events.event_id", deepcopy(query), ParsingContext()) \
        == "(events.event_id AS `events.event_id`)"

    assert column_expr(dataset, "groups.id", deepcopy(query), ParsingContext()) \
        == "(groups.id AS `groups.id`)"

    assert column_expr(dataset, "events.event_id", deepcopy(query), ParsingContext(), "MyVerboseAlias") \
        == "(events.event_id AS MyVerboseAlias)"

    # Single tag expression
    assert column_expr(dataset, 'events.tags[foo]', deepcopy(query), ParsingContext()) ==\
        "(events.tags.value[indexOf(events.tags.key, \'foo\')] AS `events.tags[foo]`)"

    # Promoted tag expression / no translation
    assert column_expr(dataset, 'events.tags[server_name]', deepcopy(query), ParsingContext()) ==\
        "(events.server_name AS `events.tags[server_name]`)"

    # All tag keys expression
    assert column_expr(dataset, 'events.tags_key', deepcopy(query),
                       ParsingContext()) == (
                           '(arrayJoin(events.tags.key) AS `events.tags_key`)')

    # If we are going to use both tags_key and tags_value, expand both
    tag_group_body = {'groupby': ['events.tags_key', 'events.tags_value']}
    parsing_context = ParsingContext()
    assert column_expr(
        dataset, 'events.tags_key', Query(tag_group_body), parsing_context
    ) == (
        '(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) '
        'AS all_tags))[1] AS `events.tags_key`)')

    assert column_expr(dataset, 'events.time', deepcopy(query), ParsingContext()) ==\
        "(toDate(events.timestamp) AS `events.time`)"

    assert column_expr(dataset, 'events.col', deepcopy(query), ParsingContext(), aggregate='sum') ==\
        "(sum(events.col) AS `events.col`)"

    assert column_expr(dataset, 'events.col', deepcopy(query), ParsingContext(), alias='summation', aggregate='sum') ==\
        "(sum(events.col) AS summation)"

    assert column_expr(dataset, '', deepcopy(query), ParsingContext(), alias='aggregate', aggregate='count()') ==\
        "(count() AS aggregate)"

    # Columns that need escaping
    assert column_expr(dataset, 'events.sentry:release', deepcopy(query),
                       ParsingContext()) == '`events.sentry:release`'

    # A 'column' that is actually a string literal
    assert column_expr(dataset, '\'hello world\'', deepcopy(query),
                       ParsingContext()) == '\'hello world\''

    # Complex expressions (function calls) involving both string and column arguments
    assert column_expr(dataset, tuplify(['concat', ['a', '\':\'', 'b']]),
                       deepcopy(query),
                       ParsingContext()) == 'concat(a, \':\', b)'

    group_id_body = deepcopy(query)
    assert column_expr(
        dataset, 'events.issue', group_id_body,
        ParsingContext()) == '(nullIf(events.group_id, 0) AS `events.issue`)'

    # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
    assert column_expr(
        dataset,
        'events.tags[environment]',
        deepcopy(query),
        ParsingContext(),
        alias='unique_envs',
        aggregate='uniq'
    ) == "(ifNull(uniq(events.environment), 0) AS unique_envs)"

示例#21

0

显示文件

文件： query.py 项目： jiankunking/snuba

    def __init__(
        self,
        dataset: Dataset,
        query: Query,
        settings: RequestSettings,
    ) -> None:
        parsing_context = ParsingContext()

        aggregate_exprs = [
            column_expr(dataset, col, query, parsing_context, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            column_expr(dataset, gb, query, parsing_context) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            column_expr(dataset, util.tuplify(colname), query, parsing_context)
            for colname in column_names
        ]
        select_clause = u"SELECT {}".format(
            ", ".join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u"FROM {}".format(query.get_data_source().format_from())

        if query.get_final():
            from_clause = u"{} FINAL".format(from_clause)

        if not query.get_data_source().supports_sample():
            sample_rate = None
        else:
            if query.get_sample():
                sample_rate = query.get_sample()
            elif settings.get_turbo():
                sample_rate = snuba_settings.TURBO_SAMPLE_RATE
            else:
                sample_rate = None

        if sample_rate:
            from_clause = u"{} SAMPLE {}".format(from_clause, sample_rate)

        join_clause = ""
        if query.get_arrayjoin():
            join_clause = u"ARRAY JOIN {}".format(query.get_arrayjoin())

        where_clause = ""
        if query.get_conditions():
            where_clause = u"WHERE {}".format(
                conditions_expr(dataset, query.get_conditions(), query,
                                parsing_context))

        prewhere_clause = ""
        if query.get_prewhere():
            prewhere_clause = u"PREWHERE {}".format(
                conditions_expr(dataset, query.get_prewhere(), query,
                                parsing_context))

        group_clause = ""
        if groupby:
            group_clause = "GROUP BY ({})".format(", ".join(
                column_expr(dataset, gb, query, parsing_context)
                for gb in groupby))
            if query.has_totals():
                group_clause = "{} WITH TOTALS".format(group_clause)

        having_clause = ""
        having_conditions = query.get_having()
        if having_conditions:
            assert groupby, "found HAVING clause with no GROUP BY"
            having_clause = u"HAVING {}".format(
                conditions_expr(dataset, having_conditions, query,
                                parsing_context))

        order_clause = ""
        if query.get_orderby():
            orderby = [
                column_expr(dataset, util.tuplify(ob), query, parsing_context)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u"{} {}".format(ob.lstrip("-"),
                                "DESC" if ob.startswith("-") else "ASC")
                for ob in orderby
            ]
            order_clause = u"ORDER BY {}".format(", ".join(orderby))

        limitby_clause = ""
        if query.get_limitby() is not None:
            limitby_clause = "LIMIT {} BY {}".format(*query.get_limitby())

        limit_clause = ""
        if query.get_limit() is not None:
            limit_clause = "LIMIT {}, {}".format(query.get_offset(),
                                                 query.get_limit())

        self.__formatted_query = " ".join([
            c for c in [
                select_clause,
                from_clause,
                join_clause,
                prewhere_clause,
                where_clause,
                group_clause,
                having_clause,
                order_clause,
                limitby_clause,
                limit_clause,
            ] if c
        ])

示例#22

0

显示文件

    def test_column_expr(self):
        source = (
            self.dataset.get_all_storages()[0]
            .get_schemas()
            .get_read_schema()
            .get_data_source()
        )
        query = Query({"granularity": 86400}, source,)
        # Single tag expression
        assert (
            column_expr(self.dataset, "tags[foo]", deepcopy(query), ParsingContext())
            == "(tags.value[indexOf(tags.key, 'foo')] AS `tags[foo]`)"
        )

        # Promoted tag expression / no translation
        assert (
            column_expr(
                self.dataset, "tags[server_name]", deepcopy(query), ParsingContext()
            )
            == "(server_name AS `tags[server_name]`)"
        )

        # Promoted tag expression / with translation
        assert (
            column_expr(
                self.dataset, "tags[app.device]", deepcopy(query), ParsingContext()
            )
            == "(app_device AS `tags[app.device]`)"
        )

        # Promoted context expression / with translation
        assert (
            column_expr(
                self.dataset,
                "contexts[device.battery_level]",
                deepcopy(query),
                ParsingContext(),
            )
            == "(toString(device_battery_level) AS `contexts[device.battery_level]`)"
        )

        # All tag keys expression
        q = Query({"granularity": 86400, "selected_columns": ["tags_key"]}, source,)
        assert column_expr(self.dataset, "tags_key", q, ParsingContext()) == (
            "(arrayJoin(tags.key) AS tags_key)"
        )

        # If we are going to use both tags_key and tags_value, expand both
        tag_group_body = {"groupby": ["tags_key", "tags_value"]}
        assert column_expr(
            self.dataset, "tags_key", Query(tag_group_body, source), ParsingContext()
        ) == (
            "(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) "
            "AS all_tags))[1] AS tags_key)"
        )

        assert (
            column_expr(self.dataset, "time", deepcopy(query), ParsingContext())
            == "(toDate(timestamp) AS time)"
        )

        assert (
            column_expr(self.dataset, "rtime", deepcopy(query), ParsingContext())
            == "(toDate(received) AS rtime)"
        )

        assert (
            column_expr(
                self.dataset, "col", deepcopy(query), ParsingContext(), aggregate="sum"
            )
            == "(sum(col) AS col)"
        )

        assert (
            column_expr(
                self.dataset,
                "col",
                deepcopy(query),
                ParsingContext(),
                alias="summation",
                aggregate="sum",
            )
            == "(sum(col) AS summation)"
        )

        # Special cases where count() doesn't need a column
        assert (
            column_expr(
                self.dataset,
                "",
                deepcopy(query),
                ParsingContext(),
                alias="count",
                aggregate="count()",
            )
            == "(count() AS count)"
        )

        assert (
            column_expr(
                self.dataset,
                "",
                deepcopy(query),
                ParsingContext(),
                alias="aggregate",
                aggregate="count()",
            )
            == "(count() AS aggregate)"
        )

        # Columns that need escaping
        assert (
            column_expr(
                self.dataset, "sentry:release", deepcopy(query), ParsingContext()
            )
            == "`sentry:release`"
        )

        # A 'column' that is actually a string literal
        assert (
            column_expr(
                self.dataset, "'hello world'", deepcopy(query), ParsingContext()
            )
            == "'hello world'"
        )

        # Complex expressions (function calls) involving both string and column arguments
        assert (
            column_expr(
                self.dataset,
                tuplify(["concat", ["a", "':'", "b"]]),
                deepcopy(query),
                ParsingContext(),
            )
            == "concat(a, ':', b)"
        )

        group_id_query = deepcopy(query)
        assert (
            column_expr(self.dataset, "group_id", group_id_query, ParsingContext())
            == "(nullIf(group_id, 0) AS group_id)"
        )

        # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
        assert (
            column_expr(
                self.dataset,
                "tags[environment]",
                deepcopy(query),
                ParsingContext(),
                alias="unique_envs",
                aggregate="uniq",
            )
            == "(ifNull(uniq(environment), 0) AS unique_envs)"
        )

示例#23

0

显示文件

def test_conditions_expr():
    dataset = get_dataset("groups")
    source = (dataset.get_all_storages()
              [0].get_schemas().get_read_schema().get_data_source())
    state.set_config("use_escape_alias", 1)
    conditions = [["events.a", "=", 1]]
    query = Query({}, source)
    assert (conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "(events.a AS `events.a`) = 1")

    conditions = [
        [["events.a", "=", 1], ["groups.b", "=", 2]],
        [["events.c", "=", 3], ["groups.d", "=", 4]],
    ]
    assert conditions_expr(
        dataset, conditions, deepcopy(query), ParsingContext()
    ) == (
        "((events.a AS `events.a`) = 1 OR (groups.b AS `groups.b`) = 2)"
        " AND ((events.c AS `events.c`) = 3 OR (groups.d AS `groups.d`) = 4)")

    # Test column expansion
    conditions = [[["events.tags[foo]", "=", 1], ["groups.b", "=", 2]]]
    expanded = column_expr(dataset, "events.tags[foo]", deepcopy(query),
                           ParsingContext())
    assert conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "({} = 1 OR (groups.b AS `groups.b`) = 2)".format(
            expanded)

    # Test using alias if column has already been expanded in SELECT clause
    reuse_query = deepcopy(query)
    parsing_context = ParsingContext()
    conditions = [[["events.tags[foo]", "=", 1], ["groups.b", "=", 2]]]
    column_expr(dataset, "events.tags[foo]", reuse_query,
                parsing_context)  # Expand it once so the next time is aliased
    assert (conditions_expr(dataset, conditions, reuse_query, parsing_context)
            == "(`events.tags[foo]` = 1 OR (groups.b AS `groups.b`) = 2)")

    # Test special output format of LIKE
    conditions = [["events.primary_hash", "LIKE", "%foo%"]]
    assert (conditions_expr(dataset, conditions, deepcopy(query),
                            ParsingContext()) ==
            "(events.primary_hash AS `events.primary_hash`) LIKE '%foo%'")

    conditions = tuplify(
        [[["notEmpty", ["arrayElement", ["events.exception_stacks.type", 1]]],
          "=", 1]])
    assert (
        conditions_expr(dataset, conditions, deepcopy(query),
                        ParsingContext()) ==
        "notEmpty(arrayElement((events.exception_stacks.type AS `events.exception_stacks.type`), 1)) = 1"
    )

    conditions = tuplify([[["notEmpty", ["events.tags[sentry:user]"]], "=",
                           1]])
    assert (conditions_expr(
        dataset, conditions, deepcopy(query),
        ParsingContext()) == "notEmpty(`events.tags[sentry:user]`) = 1")

    conditions = tuplify([[["notEmpty", ["events.tags_key"]], "=", 1]])
    q = Query({"selected_columns": ["events.tags_key"]}, source)
    assert (conditions_expr(dataset, conditions, q, ParsingContext()) ==
            "notEmpty((arrayJoin(events.tags.key) AS `events.tags_key`)) = 1")

    # Test scalar condition on array column is expanded as an iterator.
    conditions = [["events.exception_frames.filename", "LIKE", "%foo%"]]
    assert (
        conditions_expr(dataset, conditions, deepcopy(query),
                        ParsingContext()) ==
        "arrayExists(x -> assumeNotNull(x LIKE '%foo%'), (events.exception_frames.filename AS `events.exception_frames.filename`))"
    )

示例#24

0

显示文件

def test_simple_column_expr():
    dataset = get_dataset("groups")
    source = (dataset.get_all_storages()
              [0].get_schemas().get_read_schema().get_data_source())

    body = {"granularity": 86400}
    query = Query(body, source)
    assert (column_expr(
        dataset, "events.event_id", deepcopy(query),
        ParsingContext()) == "(events.event_id AS `events.event_id`)")

    assert (column_expr(dataset, "groups.id", deepcopy(query),
                        ParsingContext()) == "(groups.id AS `groups.id`)")

    assert (column_expr(
        dataset,
        "events.event_id",
        deepcopy(query),
        ParsingContext(),
        "MyVerboseAlias",
    ) == "(events.event_id AS MyVerboseAlias)")

    # Single tag expression
    assert (
        column_expr(dataset, "events.tags[foo]", deepcopy(query),
                    ParsingContext()) ==
        "(events.tags.value[indexOf(events.tags.key, 'foo')] AS `events.tags[foo]`)"
    )

    # Promoted tag expression / no translation
    assert (column_expr(dataset, "events.tags[server_name]", deepcopy(query),
                        ParsingContext()) ==
            "(events.server_name AS `events.tags[server_name]`)")

    # All tag keys expression
    q = Query({"selected_columns": ["events.tags_key"]}, source)
    assert column_expr(dataset, "events.tags_key", q, ParsingContext()) == (
        "(arrayJoin(events.tags.key) AS `events.tags_key`)")

    # If we are going to use both tags_key and tags_value, expand both
    tag_group_body = {"groupby": ["events.tags_key", "events.tags_value"]}
    parsing_context = ParsingContext()
    assert column_expr(dataset, "events.tags_key", Query(
        tag_group_body, source
    ), parsing_context) == (
        "(((arrayJoin(arrayMap((x,y) -> [x,y], events.tags.key, events.tags.value)) "
        "AS all_tags))[1] AS `events.tags_key`)")

    assert (column_expr(
        dataset, "events.time", deepcopy(query),
        ParsingContext()) == "(toDate(events.timestamp) AS `events.time`)")

    assert (column_expr(
        dataset,
        "events.col",
        deepcopy(query),
        ParsingContext(),
        aggregate="sum") == "(sum(events.col) AS `events.col`)")

    assert (column_expr(
        dataset,
        "events.col",
        deepcopy(query),
        ParsingContext(),
        alias="summation",
        aggregate="sum",
    ) == "(sum(events.col) AS summation)")

    assert (column_expr(
        dataset,
        "",
        deepcopy(query),
        ParsingContext(),
        alias="aggregate",
        aggregate="count()",
    ) == "(count() AS aggregate)")

    # Columns that need escaping
    assert (column_expr(dataset, "events.sentry:release", deepcopy(query),
                        ParsingContext()) == "`events.sentry:release`")

    # A 'column' that is actually a string literal
    assert (column_expr(dataset, "'hello world'", deepcopy(query),
                        ParsingContext()) == "'hello world'")

    # Complex expressions (function calls) involving both string and column arguments
    assert (column_expr(
        dataset,
        tuplify(["concat", ["a", "':'", "b"]]),
        deepcopy(query),
        ParsingContext(),
    ) == "concat(a, ':', b)")

    group_id_body = deepcopy(query)
    assert (column_expr(dataset, "events.group_id", group_id_body,
                        ParsingContext()) ==
            "(nullIf(events.group_id, 0) AS `events.group_id`)")

    # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
    assert (column_expr(
        dataset,
        "events.tags[environment]",
        deepcopy(query),
        ParsingContext(),
        alias="unique_envs",
        aggregate="uniq",
    ) == "(ifNull(uniq(events.environment), 0) AS unique_envs)")

示例#25

0

显示文件

    def test_order_by(self):
        """
        Order by in Snuba are represented as -COL_NAME when ordering DESC.
        since the column is provided with the `-` character in front when reaching
        the column_expr call, this can introduce a ton of corner cases depending
        whether the column is aliased, whether it gets processed into something
        else or whether it is escaped.

        This test is supposed to cover those cases.
        """
        source = (
            self.dataset.get_all_storages()[0]
            .get_schemas()
            .get_read_schema()
            .get_data_source()
        )
        query = Query({}, source)
        # Columns that start with a negative sign (used in orderby to signify
        # sort order) retain the '-' sign outside the escaping backticks (if any)
        assert (
            column_expr(self.dataset, "-timestamp", deepcopy(query), ParsingContext())
            == "-timestamp"
        )
        assert (
            column_expr(
                self.dataset, "-sentry:release", deepcopy(query), ParsingContext()
            )
            == "-`sentry:release`"
        )

        context = ParsingContext()
        context.add_alias("al1")
        assert (
            column_expr(self.dataset, "-timestamp", deepcopy(query), context, "al1")
            == "-al1"
        )

        assert (
            column_expr(
                self.dataset, "-timestamp", deepcopy(query), ParsingContext(), "al1"
            )
            == "-(timestamp AS al1)"
        )

        assert (
            column_expr(
                self.dataset,
                "-exception_stacks.type",
                deepcopy(query),
                ParsingContext(),
            )
            == "-(exception_stacks.type AS `exception_stacks.type`)"
        )

        context = ParsingContext()
        context.add_alias("`exception_stacks.type`")
        assert (
            column_expr(
                self.dataset, "-exception_stacks.type", deepcopy(query), context,
            )
            == "-`exception_stacks.type`"
        )

示例#26

0

显示文件

文件： test_events.py 项目： Appva/snuba

    def test_column_expr(self):
        source = self.dataset.get_dataset_schemas().get_read_schema(
        ).get_data_source()
        query = Query(
            {'granularity': 86400},
            source,
        )
        # Single tag expression
        assert column_expr(self.dataset, 'tags[foo]', deepcopy(query), ParsingContext()) ==\
            "(tags.value[indexOf(tags.key, \'foo\')] AS `tags[foo]`)"

        # Promoted tag expression / no translation
        assert column_expr(self.dataset, 'tags[server_name]', deepcopy(query), ParsingContext()) ==\
            "(server_name AS `tags[server_name]`)"

        # Promoted tag expression / with translation
        assert column_expr(self.dataset, 'tags[app.device]', deepcopy(query), ParsingContext()) ==\
            "(app_device AS `tags[app.device]`)"

        # All tag keys expression
        assert column_expr(
            self.dataset, 'tags_key', deepcopy(query),
            ParsingContext()) == ('(arrayJoin(tags.key) AS tags_key)')

        # If we are going to use both tags_key and tags_value, expand both
        tag_group_body = {'groupby': ['tags_key', 'tags_value']}
        assert column_expr(
            self.dataset, 'tags_key', Query(tag_group_body, source),
            ParsingContext()) == (
                '(((arrayJoin(arrayMap((x,y) -> [x,y], tags.key, tags.value)) '
                'AS all_tags))[1] AS tags_key)')

        assert column_expr(self.dataset, 'time', deepcopy(query), ParsingContext()) ==\
            "(toDate(timestamp) AS time)"

        assert column_expr(self.dataset, 'rtime', deepcopy(query), ParsingContext()) ==\
            "(toDate(received) AS rtime)"

        assert column_expr(self.dataset, 'col', deepcopy(query), ParsingContext(), aggregate='sum') ==\
            "(sum(col) AS col)"

        assert column_expr(self.dataset, 'col', deepcopy(query), ParsingContext(), alias='summation', aggregate='sum') ==\
            "(sum(col) AS summation)"

        # Special cases where count() doesn't need a column
        assert column_expr(self.dataset, '', deepcopy(query), ParsingContext(), alias='count', aggregate='count()') ==\
            "(count() AS count)"

        assert column_expr(self.dataset, '', deepcopy(query), ParsingContext(), alias='aggregate', aggregate='count()') ==\
            "(count() AS aggregate)"

        # Columns that need escaping
        assert column_expr(self.dataset, 'sentry:release', deepcopy(query),
                           ParsingContext()) == '`sentry:release`'

        # Columns that start with a negative sign (used in orderby to signify
        # sort order) retain the '-' sign outside the escaping backticks (if any)
        assert column_expr(self.dataset, '-timestamp', deepcopy(query),
                           ParsingContext()) == '-timestamp'
        assert column_expr(self.dataset, '-sentry:release', deepcopy(query),
                           ParsingContext()) == '-`sentry:release`'

        # A 'column' that is actually a string literal
        assert column_expr(self.dataset, '\'hello world\'', deepcopy(query),
                           ParsingContext()) == '\'hello world\''

        # Complex expressions (function calls) involving both string and column arguments
        assert column_expr(self.dataset,
                           tuplify(['concat', ['a', '\':\'', 'b']]),
                           deepcopy(query),
                           ParsingContext()) == 'concat(a, \':\', b)'

        group_id_query = deepcopy(query)
        assert column_expr(
            self.dataset, 'issue', group_id_query,
            ParsingContext()) == '(nullIf(group_id, 0) AS issue)'
        assert column_expr(
            self.dataset, 'group_id', group_id_query,
            ParsingContext()) == '(nullIf(group_id, 0) AS group_id)'

        # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where a number was expected.
        assert column_expr(self.dataset,
                           'tags[environment]',
                           deepcopy(query),
                           ParsingContext(),
                           alias='unique_envs',
                           aggregate='uniq'
                           ) == "(ifNull(uniq(environment), 0) AS unique_envs)"

示例#27

0

显示文件

文件： astquery.py 项目： cafebazaar/snuba

    def _sql_data_list(self) -> Sequence[Tuple[str, str]]:
        if self.__sql_data_list:
            return self.__sql_data_list

        parsing_context = ParsingContext()
        formatter = ClickhouseExpressionFormatter(parsing_context)

        selected_cols = [
            e.expression.accept(formatter) for e in self.__selected_columns
        ]
        select_clause = f"SELECT {', '.join(selected_cols)}"

        # TODO: The visitor approach will be used for the FROM clause as well.
        from_clause = f"FROM {self.__data_source.format_from()}"

        if self.__final:
            from_clause = f"{from_clause} FINAL"

        # TODO: Sampling rate will become one step of Clickhouse query processing
        if not self.__data_source.supports_sample():
            sample_rate = None
        else:
            if self.__sample:
                sample_rate = self.__sample
            elif self.__settings.get_turbo():
                sample_rate = settings.TURBO_SAMPLE_RATE
            else:
                sample_rate = None
        if sample_rate:
            from_clause = f"{from_clause} SAMPLE {sample_rate}"

        array_join_clause = ""
        if self.__arrayjoin:
            formatted_array_join = self.__arrayjoin.accept(formatter)
            array_join_clause = f"ARRAY JOIN {formatted_array_join}"

        prewhere_clause = ""
        if self.__prewhere:
            formatted_prewhere = self.__prewhere.accept(formatter)
            prewhere_clause = f"PREWHERE {formatted_prewhere}"

        where_clause = ""
        if self.__condition:
            where_clause = f"WHERE {self.__condition.accept(formatter)}"

        group_clause = ""
        if self.__groupby:
            # reformat to use aliases generated during the select clause formatting.
            groupby_expressions = [e.accept(formatter) for e in self.__groupby]
            group_clause = f"GROUP BY ({', '.join(groupby_expressions)})"
            if self.__hastotals:
                group_clause = f"{group_clause} WITH TOTALS"

        having_clause = ""
        if self.__having:
            having_clause = f"HAVING {self.__having.accept(formatter)}"

        order_clause = ""
        if self.__orderby:
            orderby = [
                f"{e.expression.accept(formatter)} {e.direction.value}"
                for e in self.__orderby
            ]
            order_clause = f"ORDER BY {', '.join(orderby)}"

        limitby_clause = ""
        if self.__limitby is not None:
            limitby_clause = "LIMIT {} BY {}".format(*self.__limitby)

        limit_clause = ""
        if self.__limit is not None:
            limit_clause = f"LIMIT {self.__limit} OFFSET {self.__offset}"

        self.__sql_data_list = [
            (k, v)
            for k, v in [
                ("select", select_clause),
                ("from", from_clause),
                ("array_join", array_join_clause),
                ("prewhere", prewhere_clause),
                ("where", where_clause),
                ("group", group_clause),
                ("having", having_clause),
                ("order", order_clause),
                ("limitby", limitby_clause),
                ("limit", limit_clause),
            ]
            if v
        ]

        return self.__sql_data_list

示例#28

0

显示文件

文件： query.py 项目： Appva/snuba

    def __init__(
        self,
        dataset: Dataset,
        query: Query,
        settings: RequestSettings,
        prewhere_conditions: Sequence[str],
    ) -> None:
        parsing_context = ParsingContext()

        aggregate_exprs = [
            column_expr(dataset, col, query, parsing_context, alias, agg)
            for (agg, col, alias) in query.get_aggregations()
        ]
        groupby = util.to_list(query.get_groupby())
        group_exprs = [
            column_expr(dataset, gb, query, parsing_context) for gb in groupby
        ]
        column_names = query.get_selected_columns() or []
        selected_cols = [
            column_expr(dataset, util.tuplify(colname), query, parsing_context)
            for colname in column_names
        ]
        select_clause = u'SELECT {}'.format(
            ', '.join(group_exprs + aggregate_exprs + selected_cols))

        from_clause = u'FROM {}'.format(query.get_data_source().format_from())

        if query.get_final():
            from_clause = u'{} FINAL'.format(from_clause)

        if query.get_sample():
            sample_rate = query.get_sample()
        elif settings.get_turbo():
            sample_rate = snuba_settings.TURBO_SAMPLE_RATE
        else:
            sample_rate = None

        if sample_rate:
            from_clause = u'{} SAMPLE {}'.format(from_clause, sample_rate)

        join_clause = ''
        if query.get_arrayjoin():
            join_clause = u'ARRAY JOIN {}'.format(query.get_arrayjoin())

        where_clause = ''
        if query.get_conditions():
            where_clause = u'WHERE {}'.format(
                conditions_expr(dataset, query.get_conditions(), query,
                                parsing_context))

        prewhere_clause = ''
        if prewhere_conditions:
            prewhere_clause = u'PREWHERE {}'.format(
                conditions_expr(dataset, prewhere_conditions, query,
                                parsing_context))

        group_clause = ''
        if groupby:
            group_clause = 'GROUP BY ({})'.format(', '.join(
                column_expr(dataset, gb, query, parsing_context)
                for gb in groupby))
            if query.has_totals():
                group_clause = '{} WITH TOTALS'.format(group_clause)

        having_clause = ''
        having_conditions = query.get_having()
        if having_conditions:
            assert groupby, 'found HAVING clause with no GROUP BY'
            having_clause = u'HAVING {}'.format(
                conditions_expr(dataset, having_conditions, query,
                                parsing_context))

        order_clause = ''
        if query.get_orderby():
            orderby = [
                column_expr(dataset, util.tuplify(ob), query, parsing_context)
                for ob in util.to_list(query.get_orderby())
            ]
            orderby = [
                u'{} {}'.format(ob.lstrip('-'),
                                'DESC' if ob.startswith('-') else 'ASC')
                for ob in orderby
            ]
            order_clause = u'ORDER BY {}'.format(', '.join(orderby))

        limitby_clause = ''
        if query.get_limitby() is not None:
            limitby_clause = 'LIMIT {} BY {}'.format(*query.get_limitby())

        limit_clause = ''
        if query.get_limit() is not None:
            limit_clause = 'LIMIT {}, {}'.format(query.get_offset(),
                                                 query.get_limit())

        self.__formatted_query = ' '.join([
            c for c in [
                select_clause, from_clause, join_clause, prewhere_clause,
                where_clause, group_clause, having_clause, order_clause,
                limitby_clause, limit_clause
            ] if c
        ])

示例#29

0

显示文件

    def test_conditions_expr(self, dataset):
        state.set_config('use_escape_alias', 1)
        conditions = [['a', '=', 1]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        conditions = [[['a', '=', 1]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        conditions = [['a', '=', 1], ['b', '=', 2]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1 AND b = 2'

        conditions = [[['a', '=', 1], ['b', '=', 2]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2)'

        conditions = [[['a', '=', 1], ['b', '=', 2]], ['c', '=', 3]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2) AND c = 3'

        conditions = [[['a', '=', 1], ['b', '=', 2]], [['c', '=', 3], ['d', '=', 4]]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '(a = 1 OR b = 2) AND (c = 3 OR d = 4)'

        # Malformed condition input
        conditions = [[['a', '=', 1], []]]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'a = 1'

        # Test column expansion
        conditions = [[['tags[foo]', '=', 1], ['b', '=', 2]]]
        expanded = column_expr(dataset, 'tags[foo]', Query({}), ParsingContext())
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == '({} = 1 OR b = 2)'.format(expanded)

        # Test using alias if column has already been expanded in SELECT clause
        reuse_query = Query({})
        parsing_context = ParsingContext()
        conditions = [[['tags[foo]', '=', 1], ['b', '=', 2]]]
        column_expr(dataset, 'tags[foo]', reuse_query, parsing_context)  # Expand it once so the next time is aliased
        assert conditions_expr(dataset, conditions, reuse_query, parsing_context) == '(`tags[foo]` = 1 OR b = 2)'

        # Test special output format of LIKE
        conditions = [['primary_hash', 'LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'primary_hash LIKE \'%foo%\''

        conditions = tuplify([[['notEmpty', ['arrayElement', ['exception_stacks.type', 1]]], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty(arrayElement((exception_stacks.type AS `exception_stacks.type`), 1)) = 1'

        conditions = tuplify([[['notEmpty', ['tags[sentry:user]']], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 1'

        conditions = tuplify([[['notEmpty', ['tags_key']], '=', 1]])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'notEmpty((arrayJoin(tags.key) AS tags_key)) = 1'

        conditions = tuplify([
            [
                [['notEmpty', ['tags[sentry:environment]']], '=', 'dev'], [['notEmpty', ['tags[sentry:environment]']], '=', 'prod']
            ],
            [
                [['notEmpty', ['tags[sentry:user]']], '=', 'joe'], [['notEmpty', ['tags[sentry:user]']], '=', 'bob']
            ],
        ])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == \
            """(notEmpty((tags.value[indexOf(tags.key, 'sentry:environment')] AS `tags[sentry:environment]`)) = 'dev' OR notEmpty(`tags[sentry:environment]`) = 'prod') AND (notEmpty((`sentry:user` AS `tags[sentry:user]`)) = 'joe' OR notEmpty(`tags[sentry:user]`) = 'bob')"""

        # Test scalar condition on array column is expanded as an iterator.
        conditions = [['exception_frames.filename', 'LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'arrayExists(x -> assumeNotNull(x LIKE \'%foo%\'), (exception_frames.filename AS `exception_frames.filename`))'

        # Test negative scalar condition on array column is expanded as an all() type iterator.
        conditions = [['exception_frames.filename', 'NOT LIKE', '%foo%']]
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == 'arrayAll(x -> assumeNotNull(x NOT LIKE \'%foo%\'), (exception_frames.filename AS `exception_frames.filename`))'

        # Test that a duplicate IN condition is deduplicated even if
        # the lists are in different orders.[
        conditions = tuplify([
            ['platform', 'IN', ['a', 'b', 'c']],
            ['platform', 'IN', ['c', 'b', 'a']]
        ])
        assert conditions_expr(dataset, conditions, Query({}), ParsingContext()) == "platform IN ('a', 'b', 'c')"

示例#30

0

显示文件

文件： expression.py 项目： getsentry/snuba

 def __init__(self,
              parsing_context: Optional[ParsingContext] = None) -> None:
     self._parsing_context = (parsing_context if parsing_context is not None
                              else ParsingContext())