def test_format_expressions( name: str, query: ClickhouseQuery, expected_query: ClickhouseQuery ) -> None: MappingColumnPromoter({"tags": {"promoted_tag": "promoted"}}).process_query( query, HTTPQuerySettings() ) assert query.get_selected_columns() == expected_query.get_selected_columns()
dist_table_name="discover_dist", storage_set_key=StorageSetKey.DISCOVER, mandatory_conditions=mandatory_conditions, ) storage = ReadableTableStorage( storage_key=StorageKey.DISCOVER, storage_set_key=StorageSetKey.DISCOVER, schema=schema, query_processors=[ MappingColumnPromoter( mapping_specs={ "tags": { "environment": "environment", "sentry:release": "release", "sentry:dist": "dist", "sentry:user": "******", }, "contexts": {"trace.trace_id": "trace_id"}, } ), MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"), ArrayJoinKeyValueOptimizer("tags"), UUIDColumnProcessor(set(["event_id", "trace_id"])), EventsBooleanContextsProcessor(), PrewhereProcessor( [ "event_id", "release", "message", "transaction_name",
"project_id", "group_id", "timestamp", "deleted", "retention_days", ] storage = WritableTableStorage( storage_key=StorageKey.ERRORS, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=[ PostReplacementConsistencyEnforcer( project_column="project_id", replacer_state_name=ReplacerState.ERRORS, ), MappingColumnPromoter(mapping_specs={"tags": promoted_tag_columns}), ArrayJoinKeyValueOptimizer("tags"), PrewhereProcessor(), ], stream_loader=KafkaStreamLoader( processor=ErrorsProcessor(promoted_tag_columns), default_topic="events", replacement_topic="errors-replacements", ), replacer_processor=ErrorsReplacer( write_schema=schema, read_schema=schema, required_columns=required_columns, tag_column_map={"tags": promoted_tag_columns, "contexts": {}}, promoted_tags={"tags": list(promoted_tag_columns.keys()), "contexts": []}, state_name=ReplacerState.ERRORS,
"group_id", "tags[sentry:release]", "release", "message", "environment", "project_id", ] query_processors = [ UniqInSelectAndHavingProcessor(), PostReplacementConsistencyEnforcer( project_column="project_id", replacer_state_name=ReplacerState.ERRORS, ), MappingColumnPromoter(mapping_specs={ "tags": promoted_tag_columns, "contexts": promoted_context_columns, }), UserColumnProcessor(), UUIDColumnProcessor({"event_id", "primary_hash", "trace_id"}), HexIntColumnProcessor({"span_id"}), UUIDArrayColumnProcessor({"hierarchical_hashes"}), SliceOfMapOptimizer(), EventsBooleanContextsProcessor(), TypeConditionOptimizer(), MappingOptimizer("tags", "_tags_hash_map", "events_tags_hash_map_enabled"), EmptyTagConditionProcessor(), ArrayJoinKeyValueOptimizer("tags"), PrewhereProcessor( prewhere_candidates, # Environment and release are excluded from prewhere in case of final # queries because of a Clickhouse bug.
"project_id", ] query_processors = [ PostReplacementConsistencyEnforcer( project_column="project_id", # key migration is on going. As soon as all the keys we are interested # into in redis are stored with "EVENTS" in the name, we can change this. replacer_state_name=None, ), EventsColumnProcessor(), MappingColumnPromoter( mapping_specs={ "tags": ChainMap( {col.flattened: col.flattened for col in promoted_tag_columns}, get_promoted_context_tag_col_mapping(), ), "contexts": get_promoted_context_col_mapping(), }, ), # This processor must not be ported to the errors dataset. We should # not support promoting tags/contexts with boolean values. There is # no way to convert them back consistently to the value provided by # the client when the event is ingested, in all ways to access # tags/contexts. Once the errors dataset is in use, we will not have # boolean promoted tags/contexts so this constraint will be easy # to enforce. EventsBooleanContextsProcessor(), MappingOptimizer("tags", "_tags_hash_map", "events_tags_hash_map_enabled"), ArrayJoinKeyValueOptimizer("tags"), PrewhereProcessor(),
def test_events_promoted_boolean_context() -> None: columns = ColumnSet( [ ("device_charging", UInt(8, Modifier(nullable=True))), ("contexts", Nested([("key", String()), ("value", String())])), ] ) query = ClickhouseQuery( Table("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "device.charging"), ), ), ), ), ) ], ) expected = ClickhouseQuery( Table("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "if", ( binary_condition( ConditionFunctions.IN, FunctionCall( None, "toString", (Column(None, None, "device_charging"),), ), literals_tuple( None, [Literal(None, "1"), Literal(None, "True")] ), ), Literal(None, "True"), Literal(None, "False"), ), ), ) ], ) settings = HTTPQuerySettings() MappingColumnPromoter( {"contexts": {"device.charging": "device_charging"}}, cast_to_string=True ).process_query(query, settings) EventsPromotedBooleanContextsProcessor().process_query(query, settings) assert query.get_selected_columns() == expected.get_selected_columns()
def test_events_boolean_context() -> None: columns = ColumnSet([ ("device_charging", Nullable(UInt(8))), ("contexts", Nested([("key", String()), ("value", String())])), ]) query = ClickhouseQuery( LogicalQuery( {}, TableSource("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "arrayElement", ( Column(None, None, "contexts.value"), FunctionCall( None, "indexOf", ( Column(None, None, "contexts.key"), Literal(None, "device.charging"), ), ), ), ), ) ], )) expected = ClickhouseQuery( LogicalQuery( {}, TableSource("events", columns), selected_columns=[ SelectedExpression( "contexts[device.charging]", FunctionCall( "contexts[device.charging]", "multiIf", ( binary_condition( None, ConditionFunctions.EQ, FunctionCall( None, "toString", (Column(None, None, "device_charging"), ), ), Literal(None, ""), ), Literal(None, ""), binary_condition( None, ConditionFunctions.IN, FunctionCall( None, "toString", (Column(None, None, "device_charging"), ), ), literals_tuple(None, [ Literal(None, "1"), Literal(None, "True") ]), ), Literal(None, "True"), Literal(None, "False"), ), ), ) ], )) settings = HTTPRequestSettings() MappingColumnPromoter({ "contexts": { "device.charging": "device_charging" } }).process_query(query, settings) EventsBooleanContextsProcessor().process_query(query, settings) assert (query.get_selected_columns_from_ast() == expected.get_selected_columns_from_ast())