def test_invalid_uuid(unprocessed: Expression) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) with pytest.raises(ColumnTypeError): UUIDColumnProcessor(set(["column1", "column2" ])).process_query(unprocessed_query, HTTPRequestSettings())
def test_uuid_column_processor( unprocessed: Expression, expected: Expression, formatted_value: str, ) -> None: unprocessed_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=unprocessed, ) expected_query = Query( Table("transactions", ColumnSet([])), selected_columns=[ SelectedExpression("column2", Column(None, None, "column2")) ], condition=expected, ) UUIDColumnProcessor(set(["column1", "column2" ])).process_query(unprocessed_query, HTTPRequestSettings()) assert unprocessed_query.get_selected_columns() == [ SelectedExpression( "column2", FunctionCall( None, "replaceAll", ( FunctionCall( None, "toString", (Column(None, None, "column2"), ), ), Literal(None, "-"), Literal(None, ""), ), ), ) ] assert expected_query.get_condition() == unprocessed_query.get_condition() condition = unprocessed_query.get_condition() assert condition is not None ret = condition.accept(ClickhouseExpressionFormatter()) assert ret == formatted_value
def test_event_id_column_format_expressions() -> None: unprocessed = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression("the_event_id", Column("the_event_id", None, "event_id")), ], ) expected = Query( Table("events", ColumnSet([])), selected_columns=[ SelectedExpression( "transaction.duration", Column("transaction.duration", None, "duration")), SelectedExpression( "the_event_id", FunctionCall( "the_event_id", "replaceAll", ( FunctionCall( None, "toString", (Column(None, None, "event_id"), ), ), Literal(None, "-"), Literal(None, ""), ), ), ), ], ) UUIDColumnProcessor({"event_id"}).process_query(unprocessed, HTTPQuerySettings()) assert expected.get_selected_columns() == unprocessed.get_selected_columns( ) formatted = unprocessed.get_selected_columns()[1].expression.accept( ClickhouseExpressionFormatter()) assert formatted == "(replaceAll(toString(event_id), '-', '') AS the_event_id)"
schema=schema, query_processors=[ MappingColumnPromoter( mapping_specs={ "tags": { "environment": "environment", "sentry:release": "release", "sentry:dist": "dist", "sentry:user": "******", }, "contexts": {"trace.trace_id": "trace_id"}, } ), MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"), ArrayJoinKeyValueOptimizer("tags"), UUIDColumnProcessor(set(["event_id", "trace_id"])), EventsBooleanContextsProcessor(), PrewhereProcessor( [ "event_id", "release", "message", "transaction_name", "environment", "project_id", ] ), ], query_splitters=[ ColumnSplitQueryStrategy( id_column="event_id",
"environment", "project_id", ] query_processors = [ UniqInSelectAndHavingProcessor(), PostReplacementConsistencyEnforcer( project_column="project_id", replacer_state_name=ReplacerState.ERRORS, ), MappingColumnPromoter(mapping_specs={ "tags": promoted_tag_columns, "contexts": promoted_context_columns, }), UserColumnProcessor(), UUIDColumnProcessor({"event_id", "primary_hash", "trace_id"}), HexIntColumnProcessor({"span_id"}), UUIDArrayColumnProcessor({"hierarchical_hashes"}), SliceOfMapOptimizer(), EventsBooleanContextsProcessor(), TypeConditionOptimizer(), MappingOptimizer("tags", "_tags_hash_map", "events_tags_hash_map_enabled"), EmptyTagConditionProcessor(), ArrayJoinKeyValueOptimizer("tags"), PrewhereProcessor( prewhere_candidates, # Environment and release are excluded from prewhere in case of final # queries because of a Clickhouse bug. # group_id instead is excluded since `final` is applied after prewhere. # thus, in this case, we could be filtering out rows that should be # merged together by the final.
from snuba.datasets.storage import WritableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.query.processors.conditions_enforcer import OrgIdEnforcer, ProjectIdEnforcer from snuba.query.processors.table_rate_limit import TableRateLimit from snuba.query.processors.type_converters.uuid_column_processor import ( UUIDColumnProcessor, ) from snuba.utils.streams.topics import Topic PROFILES_LOCAL_TABLE_NAME = "profiles_local" PROFILES_DIST_TABLE_NAME = "profiles_dist" processors = [ UUIDColumnProcessor(set(["profile_id", "transaction_id", "trace_id"])), TableRateLimit(), ] loader = build_kafka_stream_loader_from_settings( processor=ProfilesMessageProcessor(), default_topic=Topic.PROFILES, ) readable_columns = ColumnSet( [ ("organization_id", UInt(64)), ("project_id", UInt(64)), ("transaction_id", UUID()), ("profile_id", UUID()), ("received", DateTime()),