Пример #1
0
def test_failures(
    query_body: str,
    expected_exception,
) -> None:
    with pytest.raises(expected_exception):
        events = get_dataset("events")
        parse_snql_query(query_body, events)
Пример #2
0
def test_failures(query_body: str, message: str) -> None:
    state.set_config("query_parsing_expand_aliases", 1)

    # TODO: Potentially remove this once entities have actual join relationships
    mapping = {
        "contains": (EntityKey.TRANSACTIONS, "event_id"),
        "assigned": (EntityKey.GROUPASSIGNEE, "group_id"),
        "bookmark": (EntityKey.GROUPEDMESSAGES, "first_release_id"),
        "activity": (EntityKey.SESSIONS, "org_id"),
    }

    def events_mock(relationship: str) -> Optional[JoinRelationship]:
        if relationship not in mapping:
            return None
        entity_key, rhs_column = mapping[relationship]
        return JoinRelationship(
            rhs_entity=entity_key,
            join_type=JoinType.INNER,
            columns=[("event_id", rhs_column)],
            equivalences=[],
        )

    events = get_dataset("events")
    events_entity = get_entity(EntityKey.EVENTS)
    setattr(events_entity, "get_join_relationship", events_mock)

    with pytest.raises(ParsingException, match=re.escape(message)):
        parse_snql_query(query_body, [], events)
Пример #3
0
def test_failures(
    query_body: MutableMapping[str, Any],
    expected_exception: Type[InvalidQueryException],
) -> None:
    with pytest.raises(expected_exception):
        events = get_dataset("events")
        snql_query = json_to_snql(query_body, "events")
        parse_snql_query(str(snql_query), events)
Пример #4
0
def test_shadowing() -> None:
    with pytest.raises(AliasShadowingException):
        parse_snql_query(
            """
            MATCH (events)
            SELECT f1(column1, column2) AS f1_alias, f2() AS f2_alias, testF(platform, field2) AS f1_alias
            WHERE project_id = 1
            AND timestamp >= toDateTime('2020-01-01 12:00:00')
            AND timestamp < toDateTime('2020-01-02 12:00:00')
            """,
            get_dataset("events"),
        )
Пример #5
0
def test_find_projects(
    query_body: MutableMapping[str, Any], expected_projects: Optional[Set[int]]
) -> None:
    events = get_dataset("events")
    if expected_projects is None:
        with pytest.raises(ParsingException):
            snql_query = json_to_snql(query_body, "events")
            query, _ = parse_snql_query(str(snql_query), events)
            identity_translate(query)
    else:
        snql_query = json_to_snql(query_body, "events")
        query, _ = parse_snql_query(str(snql_query), events)
        query = identity_translate(query)
        project_ids_ast = get_object_ids_in_query_ast(query, "project_id")
        assert project_ids_ast == expected_projects
Пример #6
0
def test_format_expressions(query_body: str, expected_query: LogicalQuery) -> None:
    state.set_config("query_parsing_expand_aliases", 1)
    events = get_dataset("events")

    # TODO: Potentially remove this once entities have actual join relationships
    mapping = {
        "contains": (EntityKey.TRANSACTIONS, "event_id"),
        "assigned": (EntityKey.GROUPASSIGNEE, "group_id"),
        "bookmark": (EntityKey.GROUPEDMESSAGES, "first_release_id"),
        "activity": (EntityKey.SESSIONS, "org_id"),
    }

    def events_mock(relationship: str) -> JoinRelationship:
        entity_key, rhs_column = mapping[relationship]
        return JoinRelationship(
            rhs_entity=entity_key,
            join_type=JoinType.INNER,
            columns=[("event_id", rhs_column)],
            equivalences=[],
        )

    events_entity = get_entity(EntityKey.EVENTS)
    setattr(events_entity, "get_join_relationship", events_mock)

    query = parse_snql_query(query_body, events)

    eq, reason = query.equals(expected_query)
    assert eq, reason
Пример #7
0
def test_select_storage(query_body: MutableMapping[str, Any],
                        is_subscription: bool, expected_table: str) -> None:
    sessions = get_dataset("sessions")
    snql_query = json_to_snql(query_body, "sessions")
    query, snql_anonymized = parse_snql_query(str(snql_query), sessions)
    query_body = json.loads(snql_query.snuba())
    subscription_settings = (SubscriptionQuerySettings
                             if is_subscription else HTTPQuerySettings)

    request = Request(
        id="a",
        original_body=query_body,
        query=query,
        snql_anonymized=snql_anonymized,
        query_settings=subscription_settings(referrer=""),
        attribution_info=AttributionInfo(get_app_id("default"), "blah", None,
                                         None, None),
    )

    def query_runner(query: Query, settings: QuerySettings,
                     reader: Reader) -> QueryResult:
        assert query.get_from_clause().table_name == expected_table
        return QueryResult({}, {})

    sessions.get_default_entity().get_query_pipeline_builder(
    ).build_execution_pipeline(request, query_runner).execute()
def test_entity_column_validation(query_body: str,
                                  expected_query: LogicalQuery,
                                  set_configs: Any) -> None:
    state.set_config("query_parsing_expand_aliases", 1)
    events = get_dataset("events")

    # TODO: Potentially remove this once entities have actual join relationships
    mapping = {
        "contains": (EntityKey.TRANSACTIONS, "event_id"),
        "connected": (EntityKey.SPANS, "trace_id"),
    }

    def events_mock(relationship: str) -> JoinRelationship:
        entity_key, rhs_column = mapping[relationship]
        return JoinRelationship(
            rhs_entity=entity_key,
            join_type=JoinType.INNER,
            columns=[("event_id", rhs_column)],
            equivalences=[],
        )

    events_entity = get_entity(EntityKey.EVENTS)
    old_get_join = events_entity.get_join_relationship

    try:
        setattr(events_entity, "get_join_relationship", events_mock)
        query = parse_snql_query(query_body, [], events)
        eq, reason = query.equals(expected_query)
        assert eq, reason
    finally:
        setattr(events_entity, "get_join_relationship", old_get_join)
Пример #9
0
def test_get_time_range() -> None:
    """
    Test finding the time range of a query.
    """
    body = """
        MATCH (events)
        SELECT event_id
        WHERE timestamp >= toDateTime('2019-09-18T10:00:00')
            AND timestamp >= toDateTime('2000-09-18T10:00:00')
            AND timestamp < toDateTime('2019-09-19T12:00:00')
            AND (timestamp < toDateTime('2019-09-18T12:00:00') OR project_id IN tuple(1))
            AND project_id IN tuple(1)
        """

    events = get_dataset("events")
    query, _ = parse_snql_query(body, events)
    processors = events.get_default_entity().get_query_processors()
    for processor in processors:
        if isinstance(processor, TimeSeriesProcessor):
            processor.process_query(query, HTTPQuerySettings())

    from_date_ast, to_date_ast = get_time_range(identity_translate(query),
                                                "timestamp")
    assert (from_date_ast is not None and isinstance(from_date_ast, datetime)
            and from_date_ast.isoformat() == "2019-09-18T10:00:00")
    assert (to_date_ast is not None and isinstance(to_date_ast, datetime)
            and to_date_ast.isoformat() == "2019-09-19T12:00:00")
Пример #10
0
def test_events_processing() -> None:
    query_body = {
        "query": """
        MATCH (events)
        SELECT tags[transaction], contexts[browser.name]
        WHERE project_id = 1
        AND timestamp >= toDateTime('2020-01-01 12:00:00')
        AND timestamp < toDateTime('2020-01-02 12:00:00')
        """,
        "dataset": "events",
    }

    events_dataset = get_dataset("events")
    events_entity = events_dataset.get_default_entity()

    query, snql_anonymized = parse_snql_query(query_body["query"],
                                              events_dataset)
    request = Request(
        id="",
        original_body=query_body,
        query=query,
        snql_anonymized=snql_anonymized,
        query_settings=HTTPQuerySettings(referrer=""),
        attribution_info=AttributionInfo(get_app_id("blah"), "blah", None,
                                         None, None),
    )

    def query_runner(query: Query, settings: QuerySettings,
                     reader: Reader) -> QueryResult:
        assert query.get_selected_columns() == [
            SelectedExpression(
                "tags[transaction]",
                Column("_snuba_tags[transaction]", None, "transaction_name"),
            ),
            SelectedExpression(
                "contexts[browser.name]",
                FunctionCall(
                    "_snuba_contexts[browser.name]",
                    "arrayElement",
                    (
                        Column(None, None, "contexts.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (
                                Column(None, None, "contexts.key"),
                                Literal(None, "browser.name"),
                            ),
                        ),
                    ),
                ),
            ),
        ]
        return QueryResult({}, {})

    events_entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_runner).execute()
Пример #11
0
def test_alias_validation(query_body: MutableMapping[str, Any],
                          expected_result: bool) -> None:
    events = get_dataset("events")
    snql_query = json_to_snql(query_body, "events")
    query, _ = parse_snql_query(str(snql_query), events)
    settings = HTTPQuerySettings()
    query_plan = (
        events.get_default_entity().get_query_pipeline_builder().build_planner(
            query, settings)).build_best_plan()
    execute_all_clickhouse_processors(query_plan, settings)

    assert query_plan.query.validate_aliases() == expected_result
Пример #12
0
    def validate(
        self, value: MutableMapping[str, Any], dataset: Dataset, referrer: str
    ) -> Request:
        try:
            value = validate_jsonschema(value, self.__composite_schema)
        except jsonschema.ValidationError as error:
            raise JsonSchemaValidationException(str(error)) from error

        query_body = {
            key: value.pop(key)
            for key in self.__query_schema["properties"].keys()
            if key in value
        }
        settings = {
            key: value.pop(key)
            for key in self.__settings_schema["properties"].keys()
            if key in value
        }

        class_name = self.__setting_class
        if isinstance(class_name, type(HTTPRequestSettings)):
            settings_obj: Union[
                HTTPRequestSettings, SubscriptionRequestSettings
            ] = class_name(**settings)
        elif isinstance(class_name, type(SubscriptionRequestSettings)):
            settings_obj = class_name()

        extensions = {}
        for extension_name, extension_schema in self.__extension_schemas.items():
            extensions[extension_name] = {
                key: value.pop(key)
                for key in extension_schema["properties"].keys()
                if key in value
            }

        if self.__language == Language.SNQL:
            query = parse_snql_query(query_body["query"], dataset)
        else:
            query = parse_query(query_body, dataset)
            apply_query_extensions(query, extensions, settings_obj)

        request_id = uuid.uuid4().hex
        return Request(
            request_id,
            # TODO: Replace this with the actual query raw body.
            # this can have an impact on subscriptions so we need
            # to be careful with the change.
            ChainMap(query_body, *extensions.values()),
            query,
            settings_obj,
            referrer,
        )
Пример #13
0
def test_format_expressions(query_body: str, expected_query: Query) -> None:
    state.set_config("query_parsing_expand_aliases", 1)
    events = get_dataset("events")
    query = parse_snql_query(query_body, events)

    assert (query.get_selected_columns_from_ast() ==
            expected_query.get_selected_columns_from_ast())
    assert query.get_orderby_from_ast() == expected_query.get_orderby_from_ast(
    )
    assert query.get_groupby_from_ast() == expected_query.get_groupby_from_ast(
    )
    assert query.get_condition_from_ast(
    ) == expected_query.get_condition_from_ast()
    assert query.get_having_from_ast() == expected_query.get_having_from_ast()
Пример #14
0
def test_circular_aliases() -> None:
    with pytest.raises(CyclicAliasException):
        parse_snql_query(
            """
            MATCH (events)
            SELECT f1(column1, f2) AS f1, f2(f1) AS f2
            WHERE project_id = 1
            AND timestamp >= toDateTime('2020-01-01 12:00:00')
            AND timestamp < toDateTime('2020-01-02 12:00:00')
            """,
            get_dataset("events"),
        )

    with pytest.raises(CyclicAliasException):
        parse_snql_query(
            """
            MATCH (events)
            SELECT f1(f2(c) AS f2) AS c
            WHERE project_id = 1
            AND timestamp >= toDateTime('2020-01-01 12:00:00')
            AND timestamp < toDateTime('2020-01-02 12:00:00')
            """,
            get_dataset("events"),
        )
Пример #15
0
def test_prewhere(
    query_body: MutableMapping[str, Any],
    keys: Sequence[str],
    omit_if_final_keys: Sequence[str],
    new_ast_condition: Optional[Expression],
    new_prewhere_ast_condition: Optional[Expression],
    final: bool,
) -> None:
    settings.MAX_PREWHERE_CONDITIONS = 2
    events = get_dataset("events")
    # HACK until we migrate these tests to SnQL
    query_body["selected_columns"] = ["project_id"]
    query_body["conditions"] += [
        ["timestamp", ">=", "2021-01-01T00:00:00"],
        ["timestamp", "<", "2021-01-02T00:00:00"],
        ["project_id", "=", 1],
    ]
    snql_query = json_to_snql(query_body, "events")
    query, _ = parse_snql_query(str(snql_query), events)
    query = identity_translate(query)
    query.set_from_clause(Table("my_table", all_columns, final=final))

    query_settings = HTTPQuerySettings()
    processor = PrewhereProcessor(keys, omit_if_final=omit_if_final_keys)
    processor.process_query(query, query_settings)

    # HACK until we migrate these tests to SnQL
    def verify_expressions(top_level: Expression,
                           expected: Expression) -> bool:
        actual_conds = get_first_level_and_conditions(top_level)
        expected_conds = get_first_level_and_conditions(expected)
        for cond in expected_conds:
            if cond not in actual_conds:
                return False

        return True

    if new_ast_condition:
        condition = query.get_condition()
        assert condition is not None
        assert verify_expressions(condition, new_ast_condition)

    if new_prewhere_ast_condition:
        prewhere = query.get_prewhere_ast()
        assert prewhere is not None
        assert verify_expressions(prewhere, new_prewhere_ast_condition)
Пример #16
0
def test_time_split_ast() -> None:
    """
    Test that the time split transforms the query properly both on the old representation
    and on the AST representation.
    """
    found_timestamps = []

    def do_query(
        query: ClickhouseQuery,
        query_settings: QuerySettings,
    ) -> QueryResult:
        from_date_ast, to_date_ast = get_time_range(query, "timestamp")
        assert from_date_ast is not None and isinstance(
            from_date_ast, datetime)
        assert to_date_ast is not None and isinstance(to_date_ast, datetime)

        found_timestamps.append(
            (from_date_ast.isoformat(), to_date_ast.isoformat()))

        return QueryResult({"data": []}, {})

    body = """
        MATCH (events)
        SELECT event_id, level, logger, server_name, transaction, timestamp, project_id
        WHERE timestamp >= toDateTime('2019-09-18T10:00:00')
        AND timestamp < toDateTime('2019-09-19T12:00:00')
        AND project_id IN tuple(1)
        ORDER BY timestamp DESC
        LIMIT 10
        """

    query, _ = parse_snql_query(body, get_dataset("events"))
    entity = get_entity(query.get_from_clause().key)
    settings = HTTPQuerySettings()
    for p in entity.get_query_processors():
        p.process_query(query, settings)

    clickhouse_query = identity_translate(query)
    splitter = TimeSplitQueryStrategy("timestamp")
    splitter.execute(clickhouse_query, settings, do_query)

    assert found_timestamps == [
        ("2019-09-19T11:00:00", "2019-09-19T12:00:00"),
        ("2019-09-19T01:00:00", "2019-09-19T11:00:00"),
        ("2019-09-18T10:00:00", "2019-09-19T01:00:00"),
    ]
Пример #17
0
def test_get_all_columns_legacy() -> None:
    query_body = {
        "selected_columns": [
            ["f1", ["column1", "column2"], "f1_alias"],
            ["f2", [], "f2_alias"],
            ["formatDateTime", ["timestamp", "'%Y-%m-%d'"], "formatted_time"],
        ],
        "aggregations": [
            ["count", "platform", "platforms"],
            ["uniq", "platform", "uniq_platforms"],
            ["testF", ["platform", ["anotherF", ["field2"]]], "top_platforms"],
        ],
        "conditions": [
            ["tags[sentry:dist]", "IN", ["dist1", "dist2"]],
            ["timestamp", ">=", "2020-01-01T12:00:00"],
            ["timestamp", "<", "2020-01-02T12:00:00"],
            ["project_id", "=", 1],
        ],
        "having": [["times_seen", ">", 1]],
        "groupby": [["format_eventid", ["event_id"]]],
    }
    events = get_dataset("events")
    snql_query = json_to_snql(query_body, "events")
    query, _ = parse_snql_query(str(snql_query), events)

    assert query.get_all_ast_referenced_columns() == {
        Column("_snuba_column1", None, "column1"),
        Column("_snuba_column2", None, "column2"),
        Column("_snuba_platform", None, "platform"),
        Column("_snuba_field2", None, "field2"),
        Column("_snuba_tags", None, "tags"),
        Column("_snuba_times_seen", None, "times_seen"),
        Column("_snuba_event_id", None, "event_id"),
        Column("_snuba_timestamp", None, "timestamp"),
        Column("_snuba_project_id", None, "project_id"),
    }

    assert query.get_all_ast_referenced_subscripts() == {
        SubscriptableReference(
            "_snuba_tags[sentry:dist]",
            Column("_snuba_tags", None, "tags"),
            Literal(None, "sentry:dist"),
        )
    }
Пример #18
0
def test_data_source(
    query_body: MutableMapping[str, Any],
    expected_entity: EntityKey,
) -> None:
    dataset = get_dataset("discover")
    # HACK until these are converted to proper SnQL queries
    if not query_body.get("conditions"):
        query_body["conditions"] = []
    query_body["conditions"] += [
        ["timestamp", ">=", "2020-01-01T12:00:00"],
        ["timestamp", "<", "2020-01-02T12:00:00"],
        ["project_id", "=", 1],
    ]
    if not query_body.get("selected_columns"):
        query_body["selected_columns"] = ["project_id"]

    snql_query = json_to_snql(query_body, "discover")
    query, _ = parse_snql_query(str(snql_query), dataset)

    assert query.get_from_clause().key == expected_entity
Пример #19
0
def test_get_all_columns() -> None:
    query_body = """
        MATCH (events)
        SELECT f1(column1, column2) AS f1_alias,
            f2() AS f2_alias,
            formatDateTime(timestamp, '%Y-%m-%d') AS formatted_time,
            count() AS platforms,
            uniq(platform) AS uniq_platforms,
            testF(platform, anotherF(field2)) AS top_platforms
        BY format_eventid(event_id)
        WHERE tags[sentry:dist] IN tuple('dist1', 'dist2')
            AND timestamp >= toDateTime('2020-01-01 12:00:00')
            AND timestamp < toDateTime('2020-01-02 12:00:00')
            AND project_id = 1
        HAVING times_seen > 1
        """
    events = get_dataset("events")
    query, _ = parse_snql_query(query_body, events)

    assert query.get_all_ast_referenced_columns() == {
        Column("_snuba_column1", None, "column1"),
        Column("_snuba_column2", None, "column2"),
        Column("_snuba_platform", None, "platform"),
        Column("_snuba_field2", None, "field2"),
        Column("_snuba_tags", None, "tags"),
        Column("_snuba_times_seen", None, "times_seen"),
        Column("_snuba_event_id", None, "event_id"),
        Column("_snuba_timestamp", None, "timestamp"),
        Column("_snuba_project_id", None, "project_id"),
    }

    assert query.get_all_ast_referenced_subscripts() == {
        SubscriptableReference(
            "_snuba_tags[sentry:dist]",
            Column("_snuba_tags", None, "tags"),
            Literal(None, "sentry:dist"),
        )
    }
Пример #20
0
def test_col_split_conditions(id_column: str, project_column: str,
                              timestamp_column: str, query,
                              expected_result) -> None:
    dataset = get_dataset("events")
    snql_query = json_to_snql(query, "events")
    query, _ = parse_snql_query(str(snql_query), dataset)
    splitter = ColumnSplitQueryStrategy(id_column, project_column,
                                        timestamp_column)

    def do_query(query: ClickhouseQuery,
                 query_settings: QuerySettings = None) -> QueryResult:
        return QueryResult(
            {
                "data": [{
                    id_column: "asd123",
                    project_column: 123,
                    timestamp_column: "2019-10-01 22:33:42",
                }]
            },
            {},
        )

    assert (splitter.execute(query, HTTPQuerySettings(), do_query)
            is not None) == expected_result
Пример #21
0
def test_metrics_processing(
    entity_name: str,
    column_name: str,
    entity_key: EntityKey,
    translated_value: Expression,
) -> None:
    settings.ENABLE_DEV_FEATURES = True
    settings.DISABLED_DATASETS = set()

    importlib.reload(factory)
    importlib.reload(storage_factory)
    importlib.reload(cluster)

    query_body = {
        "query": (f"MATCH ({entity_name}) "
                  f"SELECT {column_name} BY org_id, project_id, tags[10] "
                  "WHERE "
                  "timestamp >= toDateTime('2021-05-17 19:42:01') AND "
                  "timestamp < toDateTime('2021-05-17 23:42:01') AND "
                  "org_id = 1 AND "
                  "project_id = 1"),
    }

    metrics_dataset = get_dataset("metrics")
    query = parse_snql_query(query_body["query"], [], metrics_dataset)

    request = Request("", query_body, query, HTTPRequestSettings(), "")

    def query_runner(query: Query, settings: RequestSettings,
                     reader: Reader) -> QueryResult:
        assert query.get_selected_columns() == [
            SelectedExpression(
                "org_id",
                Column("_snuba_org_id", None, "org_id"),
            ),
            SelectedExpression(
                "project_id",
                Column("_snuba_project_id", None, "project_id"),
            ),
            SelectedExpression(
                "tags[10]",
                FunctionCall(
                    "_snuba_tags[10]",
                    "arrayElement",
                    (
                        Column(None, None, "tags.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (Column(None, None, "tags.key"), Literal(None,
                                                                     10)),
                        ),
                    ),
                ),
            ),
            SelectedExpression(
                column_name,
                translated_value,
            ),
        ]
        return QueryResult({}, {})

    entity = get_entity(entity_key)
    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_runner).execute()
Пример #22
0
def test_sessions_processing() -> None:
    query_body = {
        "query": """
        MATCH (sessions)
        SELECT duration_quantiles, sessions, users
        WHERE org_id = 1
        AND project_id = 1
        AND started >= toDateTime('2020-01-01T12:00:00')
        AND started < toDateTime('2020-01-02T12:00:00')
        """,
        "dataset": "sessions",
    }

    sessions = get_dataset("sessions")
    query, snql_anonymized = parse_snql_query(query_body["query"], sessions)
    request = Request(
        id="a",
        original_body=query_body,
        query=query,
        snql_anonymized=snql_anonymized,
        query_settings=HTTPQuerySettings(referrer=""),
        attribution_info=AttributionInfo(get_app_id("default"), "", None, None,
                                         None),
    )

    def query_runner(query: Query, settings: QuerySettings,
                     reader: Reader) -> QueryResult:
        quantiles = tuple(
            Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99, 1])
        assert query.get_selected_columns() == [
            SelectedExpression(
                "duration_quantiles",
                CurriedFunctionCall(
                    "_snuba_duration_quantiles",
                    FunctionCall(
                        None,
                        "quantilesIfMerge",
                        quantiles,
                    ),
                    (Column(None, None, "duration_quantiles"), ),
                ),
            ),
            SelectedExpression(
                "sessions",
                FunctionCall(
                    "_snuba_sessions",
                    "plus",
                    (
                        FunctionCall(None, "countIfMerge",
                                     (Column(None, None, "sessions"), )),
                        FunctionCall(
                            None,
                            "sumIfMerge",
                            (Column(None, None, "sessions_preaggr"), ),
                        ),
                    ),
                ),
            ),
            SelectedExpression(
                "users",
                FunctionCall("_snuba_users", "uniqIfMerge",
                             (Column(None, None, "users"), )),
            ),
        ]
        return QueryResult({}, {})

    sessions.get_default_entity().get_query_pipeline_builder(
    ).build_execution_pipeline(request, query_runner).execute()
Пример #23
0
def test_validation(query_body: str, exception: Exception) -> None:
    events = get_dataset("events")
    with pytest.raises(type(exception), match=re.escape(str(exception))):
        parse_snql_query(query_body, events)
Пример #24
0
def test_tags_expander() -> None:
    query_body = """
    MATCH (events)
    SELECT count(platform) AS platforms, testF(platform, tags_value) AS top_platforms, f1(tags_key, column2) AS f1_alias, f2() AS f2_alias
    WHERE tags_key = 'tags_key'
    AND project_id = 1
    AND timestamp >= toDateTime('2020-01-01 12:00:00')
    AND timestamp < toDateTime('2020-01-02 12:00:00')
    HAVING tags_value IN tuple('tag')
    """

    events = get_dataset("events")
    query, _ = parse_snql_query(query_body, events)

    processor = TagsExpanderProcessor()
    query_settings = HTTPQuerySettings()
    processor.process_query(query, query_settings)

    assert query.get_selected_columns() == [
        SelectedExpression(
            "platforms",
            FunctionCall(
                "_snuba_platforms",
                "count",
                (Column("_snuba_platform", None, "platform"), ),
            ),
        ),
        SelectedExpression(
            "top_platforms",
            FunctionCall(
                "_snuba_top_platforms",
                "testF",
                (
                    Column("_snuba_platform", None, "platform"),
                    FunctionCall(
                        "_snuba_tags_value",
                        "arrayJoin",
                        (Column(None, None, "tags.value"), ),
                    ),
                ),
            ),
        ),
        SelectedExpression(
            "f1_alias",
            FunctionCall(
                "_snuba_f1_alias",
                "f1",
                (
                    FunctionCall(
                        "_snuba_tags_key",
                        "arrayJoin",
                        (Column(None, None, "tags.key"), ),
                    ),
                    Column("_snuba_column2", None, "column2"),
                ),
            ),
        ),
        SelectedExpression("f2_alias",
                           FunctionCall("_snuba_f2_alias", "f2", tuple())),
    ]

    condition = query.get_condition()
    assert condition is not None
    conds = get_first_level_and_conditions(condition)
    assert conds[0] == binary_condition(
        OPERATOR_TO_FUNCTION["="],
        FunctionCall("_snuba_tags_key", "arrayJoin",
                     (Column(None, None, "tags.key"), )),
        Literal(None, "tags_key"),
    )

    assert query.get_having() == in_condition(
        FunctionCall("_snuba_tags_value", "arrayJoin",
                     (Column(None, None, "tags.value"), )),
        [Literal(None, "tag")],
    )
Пример #25
0
def test() -> None:
    cv = threading.Condition()
    query_result = QueryResult({}, {"stats": {}, "sql": "", "experiments": {}})

    def callback_func(primary: Optional[Tuple[str, QueryResult]],
                      other: List[Tuple[str, QueryResult]]) -> None:
        with cv:
            cv.notify()

    mock_callback = Mock(side_effect=callback_func)

    query_body = {
        "query": """
        MATCH (events)
        SELECT type, project_id
        WHERE project_id = 1
        AND timestamp >= toDateTime('2020-01-01 12:00:00')
        AND timestamp < toDateTime('2020-01-02 12:00:00')
        """,
        "dataset": "events",
    }

    events = get_dataset("events")
    query, _ = parse_snql_query(query_body["query"], events)

    errors_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS)), )

    errors_ro_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS_RO)), )

    delegator = PipelineDelegator(
        query_pipeline_builders={
            "errors": errors_pipeline,
            "errors_ro": errors_ro_pipeline,
        },
        selector_func=lambda query, referrer: ("errors", ["errors_ro"]),
        split_rate_limiter=True,
        ignore_secondary_exceptions=True,
        callback_func=mock_callback,
    )

    runner_call_count = 0
    runner_settings: MutableSequence[QuerySettings] = []

    def query_runner(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        nonlocal runner_call_count
        nonlocal runner_settings

        runner_call_count += 1
        runner_settings.append(settings)
        return query_result

    set_config("pipeline_split_rate_limiter", 1)

    with cv:
        query_settings = HTTPQuerySettings(referrer="ref")
        delegator.build_execution_pipeline(
            Request(
                id="asd",
                original_body=query_body,
                query=query,
                snql_anonymized="",
                query_settings=query_settings,
                attribution_info=AttributionInfo(get_app_id("ref"), "ref",
                                                 None, None, None),
            ),
            query_runner,
        ).execute()
        cv.wait(timeout=5)

    assert runner_call_count == 2
    assert len(runner_settings) == 2
    settings, settings_ro = runner_settings
    # Validate that settings have been duplicated
    assert id(settings) != id(settings_ro)

    assert mock_callback.call_args == call(
        query,
        query_settings,
        "ref",
        Result("errors", query_result, ANY),
        [Result("errors_ro", query_result, ANY)],
    )
Пример #26
0
def test_format_expressions(query_body: str, expected_query: Query) -> None:
    events = get_dataset("events")
    query, _ = parse_snql_query(str(query_body), events)

    eq, reason = query.equals(expected_query)
    assert eq, reason
Пример #27
0
def test_metrics_processing(
    entity_name: str,
    column_name: str,
    entity_key: EntityKey,
    translated_value: Expression,
) -> None:
    settings.ENABLE_DEV_FEATURES = True
    settings.DISABLED_DATASETS = set()

    importlib.reload(factory)
    importlib.reload(storage_factory)
    importlib.reload(cluster)

    query_body = {
        "query": (f"MATCH ({entity_name}) "
                  f"SELECT {column_name} BY org_id, project_id, tags[10] "
                  "WHERE "
                  "timestamp >= toDateTime('2021-05-17 19:42:01') AND "
                  "timestamp < toDateTime('2021-05-17 23:42:01') AND "
                  "org_id = 1 AND "
                  "project_id = 1"),
    }

    metrics_dataset = get_dataset("metrics")
    query, snql_anonymized = parse_snql_query(query_body["query"],
                                              metrics_dataset)

    request = Request(
        id="",
        original_body=query_body,
        query=query,
        snql_anonymized="",
        query_settings=HTTPQuerySettings(referrer=""),
        attribution_info=AttributionInfo(get_app_id("blah"), "blah", None,
                                         None, None),
    )

    def query_runner(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        assert query.get_selected_columns() == [
            SelectedExpression(
                "org_id",
                Column("_snuba_org_id", None, "org_id"),
            ),
            SelectedExpression(
                "project_id",
                Column("_snuba_project_id", None, "project_id"),
            ),
            SelectedExpression(
                "tags[10]",
                FunctionCall(
                    "_snuba_tags[10]",
                    "arrayElement",
                    (
                        Column(None, None, "tags.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (Column(None, None, "tags.key"), Literal(None,
                                                                     10)),
                        ),
                    ),
                ),
            ),
            SelectedExpression(
                column_name,
                translated_value,
            ),
        ]
        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )

    entity = get_entity(entity_key)
    entity.get_query_pipeline_builder().build_execution_pipeline(
        request, query_runner).execute()