def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, referrer: str, clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, reader: Reader, robust: bool, concurrent_queries_gauge: Optional[Gauge] = None, ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ from_clause = clickhouse_query.get_from_clause() visitor = TablesCollector() visitor.visit(from_clause) table_names = ",".join(sorted(visitor.get_tables())) with sentry_sdk.start_span(description="create_query", op="db") as span: _apply_turbo_sampling_if_needed(clickhouse_query, request_settings) formatted_query = format_query(clickhouse_query) span.set_data("query", formatted_query.structured()) span.set_data("query_size_bytes", _string_size_in_bytes(formatted_query.get_sql())) sentry_sdk.set_tag("query_size_group", get_query_size_group(formatted_query.get_sql())) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": table_names, "final": visitor.any_final(), "referrer": referrer, "sample": visitor.get_sample_rate(), } with sentry_sdk.start_span(description=formatted_query.get_sql(), op="db") as span: span.set_tag("table", table_names) def execute() -> QueryResult: return raw_query( clickhouse_query, request_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, robust=robust, ) if concurrent_queries_gauge is not None: with concurrent_queries_gauge: return execute() else: return execute()
def test_format_expressions( query: Query, formatted_seq: Sequence[Any], formatted_str: str, formatted_anonymized_str: str, ) -> None: clickhouse_query = format_query(query) clickhouse_query_anonymized = format_query_anonymized(query) assert clickhouse_query.get_sql() == formatted_str assert clickhouse_query.structured() == formatted_seq assert clickhouse_query_anonymized.get_sql() == formatted_anonymized_str
def test_format_clickhouse_specific_query() -> None: """ Adds a few of the Clickhosue specific fields to the query. """ query = ClickhouseQuery( Table("my_table", ColumnSet([]), final=True, sampling_rate=0.1), selected_columns=[ SelectedExpression("column1", Column(None, None, "column1")), SelectedExpression("column2", Column(None, "table1", "column2")), ], condition=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, "blabla"), ), groupby=[ Column(None, None, "column1"), Column(None, "table1", "column2") ], having=binary_condition( "eq", lhs=Column(None, None, "column1"), rhs=Literal(None, 123), ), order_by=[ OrderBy(OrderByDirection.ASC, Column(None, None, "column1")) ], array_join=Column(None, None, "column1"), totals=True, limitby=LimitBy(10, Column(None, None, "environment")), ) query.set_offset(50) query.set_limit(100) request_settings = HTTPRequestSettings() clickhouse_query = format_query(query, request_settings) expected = [ "SELECT column1, table1.column2", ["FROM", "my_table FINAL SAMPLE 0.1"], "ARRAY JOIN column1", "WHERE eq(column1, 'blabla')", "GROUP BY column1, table1.column2 WITH TOTALS", "HAVING eq(column1, 123)", "ORDER BY column1 ASC", "LIMIT 10 BY environment", "LIMIT 100 OFFSET 50", ] assert clickhouse_query.structured() == expected
def _dry_run_query_runner( clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, reader: Reader, ) -> QueryResult: with sentry_sdk.start_span(description="dryrun_create_query", op="db") as span: formatted_query = format_query(clickhouse_query, request_settings) span.set_data("query", formatted_query.structured()) return QueryResult({ "data": [], "meta": [] }, { "stats": {}, "sql": formatted_query.get_sql() })
def test_aliasing() -> None: """ Validates aliasing works properly when the query contains both tags_key and tags_value. """ processed = parse_and_process({ "aggregations": [], "groupby": [], "selected_columns": ["tags_value"], "conditions": [["tags_key", "IN", ["t1", "t2"]]], }) sql = format_query(processed, HTTPRequestSettings()).get_sql() assert sql == ( "SELECT (tupleElement((arrayJoin(arrayMap((x, y -> tuple(x, y)), " "tags.key, tags.value)) AS snuba_all_tags), 2) AS _snuba_tags_value) " "FROM transactions_local " "WHERE in((tupleElement(snuba_all_tags, 1) AS _snuba_tags_key), tuple('t1', 't2'))" )
def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, referrer: str, clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, reader: Reader, ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ from_clause = clickhouse_query.get_from_clause() visitor = TablesCollector() visitor.visit(from_clause) table_names = ",".join(sorted(visitor.get_tables())) with sentry_sdk.start_span(description="create_query", op="db") as span: formatted_query = format_query(clickhouse_query, request_settings) span.set_data("query", formatted_query.structured()) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": table_names, "final": visitor.any_final(), "referrer": referrer, "sample": visitor.get_sample_rate(), } with sentry_sdk.start_span(description=formatted_query.get_sql(), op="db") as span: span.set_tag("table", table_names) return raw_query( clickhouse_query, request_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, )
def test_format_expressions(query: Query, formatted_seq: Sequence[Any], formatted_str: str) -> None: request_settings = HTTPRequestSettings() clickhouse_query = format_query(query, request_settings) assert clickhouse_query.get_sql() == formatted_str assert clickhouse_query.structured() == formatted_seq
def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, referrer: str, clickhouse_query: Union[Query, CompositeQuery[Table]], query_settings: QuerySettings, reader: Reader, robust: bool, concurrent_queries_gauge: Optional[Gauge] = None, ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ from_clause = clickhouse_query.get_from_clause() visitor = TablesCollector() visitor.visit(from_clause) table_names = ",".join(sorted(visitor.get_tables())) with sentry_sdk.start_span(description="create_query", op="db") as span: _apply_turbo_sampling_if_needed(clickhouse_query, query_settings) formatted_query = format_query(clickhouse_query) query_size_bytes = len(formatted_query.get_sql().encode("utf-8")) span.set_data("query", formatted_query.structured()) span.set_data("query_size_bytes", query_size_bytes) sentry_sdk.set_tag("query_size_group", get_query_size_group(query_size_bytes)) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": table_names, "final": visitor.any_final(), "referrer": referrer, "sample": visitor.get_sample_rate(), } if query_size_bytes > MAX_QUERY_SIZE_BYTES: raise QueryException(extra=QueryExtraData( stats=stats, sql=formatted_query.get_sql(), experiments=clickhouse_query.get_experiments(), )) from QueryTooLongException( f"After processing, query is {query_size_bytes} bytes, " "which is too long for ClickHouse to process. " f"Max size is {MAX_QUERY_SIZE_BYTES} bytes.") with sentry_sdk.start_span(description=formatted_query.get_sql(), op="db") as span: span.set_tag("table", table_names) def execute() -> QueryResult: return raw_query( clickhouse_query, query_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, robust=robust, ) if concurrent_queries_gauge is not None: with concurrent_queries_gauge: return execute() else: return execute()