Пример #1
0
 def do_query(query: ClickhouseQuery,
              query_settings: QuerySettings) -> QueryResult:
     nonlocal query_run_count
     query_run_count += 1
     if query_run_count == 1:
         return QueryResult(
             result={
                 "data": [
                     {
                         "event_id": "a",
                         "project_id": "1",
                         "timestamp": " 2019-10-01 22:33:42",
                     },
                     {
                         "event_id": "a",
                         "project_id": "1",
                         "timestamp": " 2019-10-01 22:44:42",
                     },
                 ]
             },
             extra={},
         )
     else:
         assert query.get_limit() == 2
         return QueryResult({}, {})
Пример #2
0
 def do_query(
     query: ClickhouseQuery,
     request_settings: RequestSettings,
     reader: Reader,
 ) -> QueryResult:
     assert query == query
     return QueryResult({}, {})
Пример #3
0
 def query_runner(query: Query, settings: QuerySettings,
                  reader: Reader) -> QueryResult:
     assert query.get_selected_columns() == [
         SelectedExpression(
             "tags[transaction]",
             Column("_snuba_tags[transaction]", None, "transaction_name"),
         ),
         SelectedExpression(
             "contexts[browser.name]",
             FunctionCall(
                 "_snuba_contexts[browser.name]",
                 "arrayElement",
                 (
                     Column(None, None, "contexts.value"),
                     FunctionCall(
                         None,
                         "indexOf",
                         (
                             Column(None, None, "contexts.key"),
                             Literal(None, "browser.name"),
                         ),
                     ),
                 ),
             ),
         ),
     ]
     return QueryResult({}, {})
def test_split_metadata() -> None:
    result = QueryResult(
        result={
            "meta": [
                {"name": "field1", "type": "String"},
                {"name": "field2", "type": "Datetime('Universal')"},
                {"name": "field3", "type": "Float64"},
                {"name": "field4", "type": "Enum"},
            ],
            "data": [],
            "totals": {},
            "profile": None,
            "trace_output": "asd",
        },
        extra={"stats": {}, "sql": "select something", "experiments": {}},
    )

    split_schema = split_metadata(result)
    assert split_schema.complete is False
    assert split_schema.key_cols == ("field1", "field2")
    assert split_schema.value_cols == ("field3",)

    row = split_row(
        {
            "field1": "asd",
            "field2": datetime(2022, 1, 1, 0, 0, 0),
            "field3": 0.01,
            "field4": "asd",
        },
        split_schema,
    )

    assert row == SplitRow(key=("asd", datetime(2022, 1, 1, 0, 0, 0)), values=(0.01,))
Пример #5
0
    def query_runner(query: Query, settings: RequestSettings,
                     reader: Reader) -> QueryResult:

        if events_storage.get_storage_key() == StorageKey.EVENTS:
            transaction_col_name = "transaction"
        else:
            transaction_col_name = "transaction_name"

        assert query.get_selected_columns_from_ast() == [
            SelectedExpression(
                "tags[transaction]",
                Column("_snuba_tags[transaction]", None, transaction_col_name),
            ),
            SelectedExpression(
                "contexts[browser.name]",
                FunctionCall(
                    "_snuba_contexts[browser.name]",
                    "arrayElement",
                    (
                        Column(None, None, "contexts.value"),
                        FunctionCall(
                            None,
                            "indexOf",
                            (
                                Column(None, None, "contexts.key"),
                                Literal(None, "browser.name"),
                            ),
                        ),
                    ),
                ),
            ),
        ]
        return QueryResult({}, {})
Пример #6
0
def test() -> None:
    cv = threading.Condition()
    query_result = QueryResult({}, {"stats": {}, "sql": ""})
    mock_query_runner = Mock(return_value=query_result)

    def callback_func(args: List[Tuple[str, QueryResult]]) -> None:
        with cv:
            cv.notify()

    mock_callback = Mock(side_effect=callback_func)

    query_body = {
        "selected_columns": ["type", "project_id"],
    }

    events = get_dataset("events")
    query = parse_query(query_body, events)

    events_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.EVENTS)), )

    errors_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS)), )

    delegator = PipelineDelegator(
        query_pipeline_builders={
            "events": events_pipeline,
            "errors": errors_pipeline
        },
        selector_func=lambda query, referrer: ("events", ["errors"]),
        callback_func=mock_callback,
    )

    with cv:
        request_settings = HTTPRequestSettings()
        delegator.build_execution_pipeline(
            Request(
                "",
                query_body,
                query,
                request_settings,
                "ref",
            ),
            mock_query_runner,
        ).execute()
        cv.wait(timeout=5)

    assert mock_query_runner.call_count == 2

    assert mock_callback.call_args == call(
        query,
        request_settings,
        "ref",
        [
            Result("events", query_result, ANY),
            Result("errors", query_result, ANY)
        ],
    )
Пример #7
0
 def query_runner(query: Query, settings: RequestSettings,
                  reader: Reader[SqlQuery]) -> QueryResult:
     assert query.get_selected_columns_from_ast() == [
         SelectedExpression(
             "duration_quantiles",
             CurriedFunctionCall(
                 "duration_quantiles",
                 FunctionCall(
                     None,
                     "quantilesIfMerge",
                     (Literal(None, 0.5), Literal(None, 0.9)),
                 ),
                 (Column(None, None, "duration_quantiles"), ),
             ),
         ),
         SelectedExpression(
             "sessions",
             FunctionCall("sessions", "countIfMerge",
                          (Column(None, None, "sessions"), )),
         ),
         SelectedExpression(
             "users",
             FunctionCall("users", "uniqIfMerge",
                          (Column(None, None, "users"), )),
         ),
     ]
     return QueryResult({}, {})
Пример #8
0
    def do_query(
        query: ClickhouseQuery,
        request_settings: RequestSettings,
    ) -> QueryResult:
        from_date_ast, to_date_ast = get_time_range(query, "timestamp")
        assert from_date_ast is not None and isinstance(
            from_date_ast, datetime)
        assert to_date_ast is not None and isinstance(to_date_ast, datetime)

        conditions = query.get_conditions() or []
        from_date_str = next(
            (condition[2] for condition in conditions
             if condition[0] == "timestamp" and condition[1] == ">="),
            None,
        )
        to_date_str = next(
            (condition[2] for condition in conditions
             if condition[0] == "timestamp" and condition[1] == "<"),
            None,
        )
        assert from_date_str == from_date_ast.isoformat()
        assert to_date_str == to_date_ast.isoformat()

        found_timestamps.append(
            (from_date_ast.isoformat(), to_date_ast.isoformat()))

        return QueryResult({"data": []}, {})
Пример #9
0
 def query_runner(query: Query, settings: RequestSettings,
                  reader: Reader) -> QueryResult:
     assert query.get_selected_columns() == [
         SelectedExpression(
             "org_id",
             Column("_snuba_org_id", None, "org_id"),
         ),
         SelectedExpression(
             "project_id",
             Column("_snuba_project_id", None, "project_id"),
         ),
         SelectedExpression(
             "tags[10]",
             FunctionCall(
                 "_snuba_tags[10]",
                 "arrayElement",
                 (
                     Column(None, None, "tags.value"),
                     FunctionCall(
                         None,
                         "indexOf",
                         (Column(None, None, "tags.key"), Literal(None,
                                                                  10)),
                     ),
                 ),
             ),
         ),
         SelectedExpression(
             column_name,
             translated_value,
         ),
     ]
     return QueryResult({}, {})
Пример #10
0
 def do_query(
     query: ClickhouseQuery,
     request_settings: RequestSettings,
     reader: Reader[SqlQuery],
 ) -> QueryResult:
     selected_col_names = [
         c.expression.column_name
         for c in query.get_selected_columns_from_ast() or []
         if isinstance(c.expression, Column)
     ]
     if selected_col_names == list(first_query_data[0].keys()):
         return QueryResult({"data": first_query_data}, {})
     elif selected_col_names == list(second_query_data[0].keys()):
         return QueryResult({"data": second_query_data}, {})
     else:
         raise ValueError(f"Unexpected selected columns: {selected_col_names}")
Пример #11
0
    def query_verifier(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        assert isinstance(query, Query)
        # in local and CI there's a table name difference
        # errors_local vs errors_dist and discover_local vs discover_dist
        # so we check using `in` instead of `==`
        assert expected_table_name in query.get_from_clause().table_name
        assert query.get_selected_columns() == [
            SelectedExpression(
                name="contexts[trace.span_id]",
                # the select converts the span_id into a lowecase hex string
                expression=FunctionCall(
                    "_snuba_contexts[trace.span_id]",
                    "lower",
                    (FunctionCall(None, "hex",
                                  (Column(None, None, "span_id"), )), ),
                ),
            )
        ]

        class SpanIdVerifier(NoopVisitor):
            def __init__(self) -> None:
                self.found_span_condition = False
                super().__init__()

            def visit_function_call(self, exp: FunctionCall) -> None:
                if exp.function_name == "equals" and exp.parameters[
                        0] == Column(None, None, "span_id"):
                    self.found_span_condition = True
                    # and here we can see that the hex string the client queried us with
                    # has been converted to the correct uint64
                    assert exp.parameters[1] == Literal(
                        None, span_id_as_uint64)
                return super().visit_function_call(exp)

        verifier = SpanIdVerifier()
        condition = query.get_condition()
        assert condition is not None
        condition.accept(verifier)
        assert verifier.found_span_condition

        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )
Пример #12
0
    def do_query(
        query: ClickhouseQuery, request_settings: RequestSettings,
    ) -> QueryResult:
        from_date_ast, to_date_ast = get_time_range(query, "timestamp")
        assert from_date_ast is not None and isinstance(from_date_ast, datetime)
        assert to_date_ast is not None and isinstance(to_date_ast, datetime)

        found_timestamps.append((from_date_ast.isoformat(), to_date_ast.isoformat()))

        return QueryResult({"data": []}, {})
Пример #13
0
 def runner(
     query: Union[ClickhouseQuery, CompositeQuery[Table]],
     request_settings: RequestSettings,
     reader: Reader,
 ) -> QueryResult:
     report = query.equals(processed_query)
     assert report[0], f"Mismatch: {report[1]}"
     return QueryResult(
         {"data": []},
         {},
     )
Пример #14
0
 def do_query(query: ClickhouseQuery,
              request_settings: RequestSettings = None) -> QueryResult:
     return QueryResult(
         {
             "data": [{
                 id_column: "asd123",
                 project_column: 123,
                 timestamp_column: "2019-10-01 22:33:42",
             }]
         },
         {},
     )
Пример #15
0
 def runner(
     query: Union[ClickhouseQuery, CompositeQuery[Table]],
     query_settings: QuerySettings,
     reader: Reader,
 ) -> QueryResult:
     report = query.equals(processed_query)
     assert report[0], f"Mismatch: {report[1]}"
     return QueryResult(
         {"data": []},
         {
             "stats": {},
             "sql": "",
             "experiments": {}
         },
     )
Пример #16
0
 def query_runner(
     query: Union[Query, CompositeQuery[Table]],
     settings: QuerySettings,
     reader: Reader,
 ) -> QueryResult:
     assert query.get_selected_columns() == [
         SelectedExpression(
             "org_id",
             Column("_snuba_org_id", None, "org_id"),
         ),
         SelectedExpression(
             "project_id",
             Column("_snuba_project_id", None, "project_id"),
         ),
         SelectedExpression(
             "tags[10]",
             FunctionCall(
                 "_snuba_tags[10]",
                 "arrayElement",
                 (
                     Column(None, None, "tags.value"),
                     FunctionCall(
                         None,
                         "indexOf",
                         (Column(None, None, "tags.key"), Literal(None,
                                                                  10)),
                     ),
                 ),
             ),
         ),
         SelectedExpression(
             column_name,
             translated_value,
         ),
     ]
     return QueryResult(
         result={
             "meta": [],
             "data": [],
             "totals": {}
         },
         extra={
             "stats": {},
             "sql": "",
             "experiments": {}
         },
     )
Пример #17
0
def _dry_run_query_runner(
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    request_settings: RequestSettings,
    reader: Reader,
) -> QueryResult:
    with sentry_sdk.start_span(description="dryrun_create_query",
                               op="db") as span:
        formatted_query = format_query(clickhouse_query, request_settings)
        span.set_data("query", formatted_query.structured())

    return QueryResult({
        "data": [],
        "meta": []
    }, {
        "stats": {},
        "sql": formatted_query.get_sql()
    })
Пример #18
0
 def query_runner(query: Query, settings: RequestSettings,
                  reader: Reader) -> QueryResult:
     quantiles = tuple(
         Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99, 1])
     assert query.get_selected_columns() == [
         SelectedExpression(
             "duration_quantiles",
             CurriedFunctionCall(
                 "_snuba_duration_quantiles",
                 FunctionCall(
                     None,
                     "quantilesIfMerge",
                     quantiles,
                 ),
                 (Column(None, None, "duration_quantiles"), ),
             ),
         ),
         SelectedExpression(
             "sessions",
             FunctionCall(
                 "_snuba_sessions",
                 "plus",
                 (
                     FunctionCall(None, "countIfMerge",
                                  (Column(None, None, "sessions"), )),
                     FunctionCall(
                         None,
                         "sumIfMerge",
                         (Column(None, None, "sessions_preaggr"), ),
                     ),
                 ),
             ),
         ),
         SelectedExpression(
             "users",
             FunctionCall("_snuba_users", "uniqIfMerge",
                          (Column(None, None, "users"), )),
         ),
     ]
     return QueryResult({}, {})
Пример #19
0
    def query_verifier(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        # The only reason this extends StringifyVisitor is because it has all the other
        # visit methods implemented.
        class NullCastingVerifier(StringifyVisitor):
            def __init__(self) -> None:
                self.sdk_version_cast_to_null = False
                super().__init__()

            def visit_function_call(self, exp: FunctionCall) -> str:
                if (exp.function_name == "cast"
                        and exp.alias == "_snuba_sdk_version"
                        and exp.parameters == (
                            Column(None, None, "sdk_version"),
                            Literal(None, "Nullable(String)"),
                        )):
                    self.sdk_version_cast_to_null = True
                return super().visit_function_call(exp)

        for select_expr in query.get_selected_columns():
            verifier = NullCastingVerifier()
            select_expr.expression.accept(verifier)
            assert verifier.sdk_version_cast_to_null

        return QueryResult(
            result={
                "meta": [],
                "data": [],
                "totals": {}
            },
            extra={
                "stats": {},
                "sql": "",
                "experiments": {}
            },
        )
Пример #20
0
def raw_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    query_settings: QuerySettings,
    formatted_query: FormattedQuery,
    reader: Reader,
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
    stats: MutableMapping[str, Any],
    trace_id: Optional[str] = None,
    robust: bool = False,
) -> QueryResult:
    """
    Submits a raw SQL query to the DB and does some post-processing on it to
    fix some of the formatting issues in the result JSON.
    This function is not supposed to depend on anything higher level than the clickhouse
    query. If this function ends up depending on the dataset, something is wrong.
    """
    all_confs = state.get_all_configs()
    clickhouse_query_settings: MutableMapping[str, Any] = {
        k.split("/", 1)[1]: v
        for k, v in all_confs.items()
        if k.startswith("query_settings/")
    }

    timer.mark("get_configs")

    sql = formatted_query.get_sql()

    update_with_status = partial(
        update_query_metadata_and_stats,
        clickhouse_query,
        sql,
        timer,
        stats,
        query_metadata,
        clickhouse_query_settings,
        trace_id,
    )

    execute_query_strategy = (
        execute_query_with_readthrough_caching
        if state.get_config("use_readthrough_query_cache", 1)
        else execute_query_with_caching
    )

    try:
        result = execute_query_strategy(
            clickhouse_query,
            query_settings,
            formatted_query,
            reader,
            timer,
            stats,
            clickhouse_query_settings,
            robust=robust,
        )
    except Exception as cause:
        if isinstance(cause, RateLimitExceeded):
            stats = update_with_status(QueryStatus.RATE_LIMITED)
        else:
            error_code = None
            with configure_scope() as scope:
                if isinstance(cause, ClickhouseError):
                    error_code = cause.code
                    scope.fingerprint = ["{{default}}", str(cause.code)]
                    if scope.span:
                        if cause.code == errors.ErrorCodes.TOO_SLOW:
                            sentry_sdk.set_tag("timeout", "predicted")
                        elif cause.code == errors.ErrorCodes.TIMEOUT_EXCEEDED:
                            sentry_sdk.set_tag("timeout", "query_timeout")
                        elif cause.code in (
                            errors.ErrorCodes.SOCKET_TIMEOUT,
                            errors.ErrorCodes.NETWORK_ERROR,
                        ):
                            sentry_sdk.set_tag("timeout", "network")
                elif isinstance(
                    cause,
                    (TimeoutError, ExecutionTimeoutError, TigerExecutionTimeoutError),
                ):
                    if scope.span:
                        sentry_sdk.set_tag("timeout", "cache_timeout")

                logger.exception("Error running query: %s\n%s", sql, cause)
            stats = update_with_status(QueryStatus.ERROR, error_code=error_code)
        raise QueryException(
            {
                "stats": stats,
                "sql": sql,
                "experiments": clickhouse_query.get_experiments(),
            }
        ) from cause
    else:
        stats = update_with_status(QueryStatus.SUCCESS, result["profile"])
        return QueryResult(
            result,
            {
                "stats": stats,
                "sql": sql,
                "experiments": clickhouse_query.get_experiments(),
            },
        )
Пример #21
0
def raw_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    request_settings: RequestSettings,
    formatted_query: FormattedQuery,
    reader: Reader,
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
    stats: MutableMapping[str, Any],
    trace_id: Optional[str] = None,
) -> QueryResult:
    """
    Submits a raw SQL query to the DB and does some post-processing on it to
    fix some of the formatting issues in the result JSON.
    This function is not supposed to depend on anything higher level than the clickhouse
    query. If this function ends up depending on the dataset, something is wrong.
    """
    all_confs = state.get_all_configs()
    query_settings: MutableMapping[str, Any] = {
        k.split("/", 1)[1]: v
        for k, v in all_confs.items() if k.startswith("query_settings/")
    }

    timer.mark("get_configs")

    sql = formatted_query.get_sql()

    update_with_status = partial(
        update_query_metadata_and_stats,
        clickhouse_query,
        sql,
        timer,
        stats,
        query_metadata,
        query_settings,
        trace_id,
    )

    execute_query_strategy = (
        execute_query_with_readthrough_caching if state.get_config(
            "use_readthrough_query_cache", 1) else execute_query_with_caching)

    try:
        result = execute_query_strategy(
            clickhouse_query,
            request_settings,
            formatted_query,
            reader,
            timer,
            stats,
            query_settings,
        )
    except Exception as cause:
        if isinstance(cause, RateLimitExceeded):
            stats = update_with_status(QueryStatus.RATE_LIMITED)
        else:
            with configure_scope() as scope:
                if isinstance(cause, ClickhouseError):
                    scope.fingerprint = ["{{default}}", str(cause.code)]
                logger.exception("Error running query: %s\n%s", sql, cause)
            stats = update_with_status(QueryStatus.ERROR)
        raise QueryException({"stats": stats, "sql": sql}) from cause
    else:
        stats = update_with_status(QueryStatus.SUCCESS)
        return QueryResult(result, {"stats": stats, "sql": sql})
Пример #22
0
 def query_runner(query: Query, settings: RequestSettings,
                  reader: Reader) -> QueryResult:
     assert query.get_from_clause().table_name == expected_table
     return QueryResult({}, {})
Пример #23
0
from snuba.reader import Column, Result
from snuba.web import QueryExtraData, QueryResult, transform_column_names

TEST_CASES = [
    pytest.param(
        QueryResult(
            result=Result(
                meta=[
                    Column(name="_snuba_event_id", type="String"),
                    Column(name="_snuba_duration", type="UInt32"),
                    Column(name="_snuba_message", type="String"),
                ],
                data=[
                    {
                        "_snuba_event_id": "asd",
                        "_snuba_duration": 123,
                        "_snuba_message": "msg",
                    },
                    {
                        "_snuba_event_id": "sdf",
                        "_snuba_duration": 321,
                        "_snuba_message": "msg2",
                    },
                ],
            ),
            extra=QueryExtraData(stats={}, sql="...", experiments={}),
        ),
        QueryResult(
            result=Result(
                meta=[
                    Column(name="event_id", type="String"),
                    Column(name="duration", type="UInt32"),
Пример #24
0
def test() -> None:
    cv = threading.Condition()
    query_result = QueryResult({}, {"stats": {}, "sql": "", "experiments": {}})

    def callback_func(primary: Optional[Tuple[str, QueryResult]],
                      other: List[Tuple[str, QueryResult]]) -> None:
        with cv:
            cv.notify()

    mock_callback = Mock(side_effect=callback_func)

    query_body = {
        "query": """
        MATCH (events)
        SELECT type, project_id
        WHERE project_id = 1
        AND timestamp >= toDateTime('2020-01-01 12:00:00')
        AND timestamp < toDateTime('2020-01-02 12:00:00')
        """,
        "dataset": "events",
    }

    events = get_dataset("events")
    query, _ = parse_snql_query(query_body["query"], events)

    errors_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS)), )

    errors_ro_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS_RO)), )

    delegator = PipelineDelegator(
        query_pipeline_builders={
            "errors": errors_pipeline,
            "errors_ro": errors_ro_pipeline,
        },
        selector_func=lambda query, referrer: ("errors", ["errors_ro"]),
        split_rate_limiter=True,
        ignore_secondary_exceptions=True,
        callback_func=mock_callback,
    )

    runner_call_count = 0
    runner_settings: MutableSequence[QuerySettings] = []

    def query_runner(
        query: Union[Query, CompositeQuery[Table]],
        settings: QuerySettings,
        reader: Reader,
    ) -> QueryResult:
        nonlocal runner_call_count
        nonlocal runner_settings

        runner_call_count += 1
        runner_settings.append(settings)
        return query_result

    set_config("pipeline_split_rate_limiter", 1)

    with cv:
        query_settings = HTTPQuerySettings(referrer="ref")
        delegator.build_execution_pipeline(
            Request(
                id="asd",
                original_body=query_body,
                query=query,
                snql_anonymized="",
                query_settings=query_settings,
                attribution_info=AttributionInfo(get_app_id("ref"), "ref",
                                                 None, None, None),
            ),
            query_runner,
        ).execute()
        cv.wait(timeout=5)

    assert runner_call_count == 2
    assert len(runner_settings) == 2
    settings, settings_ro = runner_settings
    # Validate that settings have been duplicated
    assert id(settings) != id(settings_ro)

    assert mock_callback.call_args == call(
        query,
        query_settings,
        "ref",
        Result("errors", query_result, ANY),
        [Result("errors_ro", query_result, ANY)],
    )