def test_modifiers() -> None: cols = ColumnSet( [ ("col1", WithDefault(String(), "")), ("col2", Nullable(Array(String()))), ("col3", WithCodecs(Materialized(String(), "something"), ["c"]),), ( "col4", WithCodecs(Nullable(Materialized(String(), "something")), ["c"],), ), ] ) assert [WithDefault] == cols["col1"].type.get_all_modifiers() assert [Nullable] == cols["col2"].type.get_all_modifiers() assert [Materialized, WithCodecs] == cols["col3"].type.get_all_modifiers() assert [Materialized, Nullable, WithCodecs] == cols["col4"].type.get_all_modifiers()
def _get_column(column_type: str, default_type: str, default_expr: str, codec_expr: str) -> ColumnType: column: ColumnType = Visitor().visit(grammar.parse(column_type)) if default_type == "MATERIALIZED": column = Materialized(column, _strip_cast(default_expr)) elif default_type == "DEFAULT": column = WithDefault(column, _strip_cast(default_expr)) if codec_expr: column = WithCodecs(column, codec_expr.split(", ")) return column
) if "http_referer" not in current_schema: ret.append( f"ALTER TABLE {clickhouse_table} ADD COLUMN http_referer Nullable(String) AFTER http_method" ) return ret all_columns = ColumnSet( [ ("org_id", UInt(64)), ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", WithCodecs(UUID(), ["NONE"])), ( "event_hash", WithCodecs( Materialized(UInt(64), "cityHash64(toString(event_id))",), ["NONE"], ), ), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''")), ("user_hash", Materialized(UInt(64), "cityHash64(user)"),), ("user_id", Nullable(String())),
(("Array(Nullable(UUID))", "", "", ""), Array(Nullable(UUID()))), # Nullable (("Nullable(String)", "", "", ""), Nullable(String())), (("Nullable(FixedString(8))", "", "", ""), Nullable(FixedString(8))), (("Nullable(Date)", "", "", ""), Nullable(Date())), # Low cardinality (("LowCardinality(String)", "", "", ""), LowCardinality(String())), (("LowCardinality(Nullable(String))", "", "", ""), LowCardinality(Nullable(String()))), # Materialized (("Date", "MATERIALIZED", "toDate(col1)", ""), Materialized(Date(), "toDate(col1)")), (("UInt64", "MATERIALIZED", "CAST(cityHash64(col1), 'UInt64')", ""), Materialized(UInt(64), "cityHash64(col1)")), # Default value (("LowCardinality(String)", "DEFAULT", "a", ""), WithDefault(LowCardinality(String()), "a")), (("UInt8", "DEFAULT", "2", ""), WithDefault(UInt(8), "2")), # With codecs (("UUID", "", "", "NONE"), WithCodecs(UUID(), ["NONE"])), (("DateTime", "", "", "DoubleDelta, LZ4"), WithCodecs(DateTime(), ["DoubleDelta", "LZ4"])), ] @pytest.mark.parametrize("input, expected_output", test_data) def test_parse_column(input, expected_output): (input_name, input_type, default_expr, codec_expr) = input assert _get_column(input_name, input_type, default_expr, codec_expr) == expected_output
def __init__(self) -> None: all_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", WithCodecs(UUID(), ["NONE"])), ( "event_hash", WithCodecs( Materialized( UInt(64), "cityHash64(toString(event_id))", ), ["NONE"], ), ), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''")), ( "user_hash", Materialized(UInt(64), "cityHash64(user)"), ), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", LowCardinality(Nullable(String()))), ("sdk_version", LowCardinality(Nullable(String()))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("transaction_name", WithDefault(LowCardinality(String()), "''")), ( "transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)"), ), ("span_id", Nullable(UInt(64))), ("trace_id", Nullable(UUID())), ("partition", UInt(16)), ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])), ("retention_days", UInt(16)), ("deleted", UInt(8)), ("group_id", UInt(64)), ("primary_hash", FixedString(32)), ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")), ("event_string", WithCodecs(String(), ["NONE"])), ("received", DateTime()), ("message", String()), ("title", String()), ("culprit", String()), ("level", LowCardinality(String())), ("location", Nullable(String())), ("version", LowCardinality(Nullable(String()))), ("type", LowCardinality(String())), ( "exception_stacks", Nested([ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ]), ), ( "exception_frames", Nested([ ("abs_path", Nullable(String())), ("colno", Nullable(UInt(32))), ("filename", Nullable(String())), ("function", Nullable(String())), ("lineno", Nullable(UInt(32))), ("in_app", Nullable(UInt(8))), ("package", Nullable(String())), ("module", Nullable(String())), ("stack_level", Nullable(UInt(16))), ]), ), ("sdk_integrations", Array(String())), ("modules", Nested([("name", String()), ("version", String())])), ]) self.__promoted_tag_columns = { "environment": "environment", "sentry:release": "release", "sentry:dist": "dist", "sentry:user": "******", "transaction": "transaction_name", "level": "level", } schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name="errors_local", dist_table_name="errors_dist", mandatory_conditions=[("deleted", "=", 0)], prewhere_candidates=[ "event_id", "group_id", "tags[sentry:release]", "message", "environment", "project_id", ], order_by= "(org_id, project_id, toStartOfDay(timestamp), primary_hash_hex, event_hash)", partition_by= "(toMonday(timestamp), if(retention_days = 30, 30, 90))", version_column="deleted", sample_expr="event_hash", ttl_expr="timestamp + toIntervalDay(retention_days)", settings={"index_granularity": "8192"}, ) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) table_writer = TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=ErrorsProcessor(self.__promoted_tag_columns), default_topic="events", ), ) super().__init__( dataset_schemas=dataset_schemas, table_writer=table_writer, time_group_columns={ "time": "timestamp", "rtime": "received" }, time_parse_columns=("timestamp", "received"), ) self.__tags_processor = TagColumnProcessor( columns=all_columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )