("project_id", UInt(64)), ("type", String()), ("timestamp", DateTime()), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("transaction_name", String()), ("message", String()), ("title", String()), ("user", String()), ("user_hash", UInt(64)), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_hash_map", Array(UInt(64))), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", UUID(Modifiers(nullable=True))), ("deleted", UInt(8)), ] ) schema = TableSchema( columns=columns,
("trace_id", UUID()), ("span_id", UInt(64)), ("transaction_name", LowCardinality(String())), ("transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)",),), ("transaction_op", LowCardinality(String())), ("transaction_status", WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS))), ("start_ts", DateTime()), ("start_ms", UInt(16)), ("finish_ts", DateTime()), ("finish_ms", UInt(16)), ("duration", UInt(32)), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''",)), ("user_hash", Materialized(UInt(64), "cityHash64(user)"),), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", WithDefault(LowCardinality(String()), "''")), ("sdk_version", WithDefault(LowCardinality(String()), "''")), ("http_method", LowCardinality(Nullable(String()))), ("http_referer", Nullable(String())), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()),
def __init__(self) -> None: columns = ColumnSet( [ ("project_id", UInt(64)), ("event_id", UUID()), ("trace_id", UUID()), ("span_id", UInt(64)), ("transaction_name", LowCardinality(String())), ( "transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)",), ), ("transaction_op", LowCardinality(String())), ("transaction_status", WithDefault(UInt(8), UNKNOWN_SPAN_STATUS)), ("start_ts", DateTime()), ("start_ms", UInt(16)), ("_start_date", Materialized(Date(), "toDate(start_ts)"),), ("finish_ts", DateTime()), ("finish_ms", UInt(16)), ("_finish_date", Materialized(Date(), "toDate(finish_ts)"),), ("duration", UInt(32)), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''",)), ("user_hash", Materialized(UInt(64), "cityHash64(user)"),), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", WithDefault(LowCardinality(String()), "''")), ("sdk_version", WithDefault(LowCardinality(String()), "''")), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("partition", UInt(16)), ("offset", UInt(64)), ("retention_days", UInt(16)), ("deleted", UInt(8)), ] ) schema = ReplacingMergeTreeSchema( columns=columns, local_table_name="transactions_local", dist_table_name="transactions_dist", mandatory_conditions=[], prewhere_candidates=["event_id", "project_id"], order_by="(project_id, _finish_date, transaction_name, cityHash64(span_id))", partition_by="(retention_days, toMonday(_finish_date))", version_column="deleted", sample_expr=None, migration_function=transactions_migrations, ) dataset_schemas = DatasetSchemas(read_schema=schema, write_schema=schema,) self.__tags_processor = TagColumnProcessor( columns=columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), ) super().__init__( dataset_schemas=dataset_schemas, table_writer=TransactionsTableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=TransactionsMessageProcessor(), default_topic="events", ), ), time_group_columns={ "bucketed_start": "start_ts", "bucketed_end": "finish_ts", }, time_parse_columns=("start_ts", "finish_ts"), )
def __init__(self): columns = ColumnSet([ ('project_id', UInt(64)), ('event_id', UUID()), ('trace_id', UUID()), ('span_id', UInt(64)), ('transaction_name', String()), ('transaction_hash', Materialized( UInt(64), 'cityHash64(transaction_name)', )), ('transaction_op', LowCardinality(String())), ('start_ts', DateTime()), ('start_ms', UInt(16)), ('finish_ts', DateTime()), ('finish_ms', UInt(16)), ('duration', Materialized( UInt(32), '((finish_ts - start_ts) * 1000) + (finish_ms - start_ms)', )), ('platform', LowCardinality(String())), ('environment', Nullable(String())), ('release', Nullable(String())), ('dist', Nullable(String())), ('ip_address_v4', Nullable(IPv4())), ('ip_address_v6', Nullable(IPv6())), ('user', WithDefault( String(), "''", )), ('user_id', Nullable(String())), ('user_name', Nullable(String())), ('user_email', Nullable(String())), ('tags', Nested([ ('key', String()), ('value', String()), ])), ('contexts', Nested([ ('key', String()), ('value', String()), ])), ('partition', UInt(16)), ('offset', UInt(64)), ('retention_days', UInt(16)), ('deleted', UInt(8)), ]) schema = ReplacingMergeTreeSchema( columns=columns, local_table_name='transactions_local', dist_table_name='transactions_dist', order_by= '(project_id, toStartOfDay(start_ts), transaction_hash, start_ts, start_ms, trace_id, span_id)', partition_by='(retention_days, toMonday(start_ts))', version_column='deleted', sample_expr=None, ) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) super().__init__( dataset_schemas=dataset_schemas, processor=TransactionsMessageProcessor(), default_topic="events", time_group_columns={ 'bucketed_start': 'start_ts', 'bucketed_end': 'finish_ts', }, )
UInt, UUID, WithCodecs, WithDefault, ) from snuba.migrations.parse_schema import _get_column test_data = [ # Basic types (("Date", "", "", ""), Date()), (("DateTime", "", "", ""), DateTime()), (("Enum8('success' = 0, 'error' = 1)", "", "", ""), Enum([("success", 0), ("error", 1)])), (("FixedString(32)", "", "", ""), FixedString(32)), (("Float32", "", "", ""), Float(32)), (("IPv4", "", "", ""), IPv4()), (("IPv6", "", "", ""), IPv6()), (("String", "", "", ""), String()), (("UInt32", "", "", ""), UInt(32)), (("UUID", "", "", ""), UUID()), # Aggregate functions (("AggregateFunction(uniq, UInt8)", "", "", ""), AggregateFunction("uniq", UInt(8))), (("AggregateFunction(countIf, UUID, UInt8)", "", "", ""), AggregateFunction("countIf", UUID(), UInt(8))), (("AggregateFunction(quantileIf(0.5, 0.9), UInt32, UInt8)", "", "", ""), AggregateFunction("quantileIf(0.5, 0.9)", UInt(32), UInt(8))), # Array (("Array(String)", "", "", ""), Array(String())), (("Array(DateTime)", "", "", ""), Array(DateTime())), (("Array(UInt64)", "", "", ""), Array(UInt(64))),
pytest.param( String(Modifier(nullable=True)), String(), String(), "Nullable(String)", id="strings", ), pytest.param( UUID(Modifier(readonly=True)), UUID(), UUID(Modifier(nullable=True)), "UUID", id="UUIDs", ), pytest.param( IPv4(None), IPv4(), IPv4(Modifier(nullable=True)), "IPv4", id="IPs", ), pytest.param( IPv6(None), IPv6(), IPv6(Modifier(nullable=True)), "IPv6", id="IPs", ), pytest.param( FixedString(32, Modifier(nullable=True)), FixedString(32),
def __init__(self) -> None: all_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", WithCodecs(UUID(), ["NONE"])), ( "event_hash", WithCodecs( Materialized( UInt(64), "cityHash64(toString(event_id))", ), ["NONE"], ), ), ("platform", LowCardinality(String())), ("environment", LowCardinality(Nullable(String()))), ("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''")), ( "user_hash", Materialized(UInt(64), "cityHash64(user)"), ), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", LowCardinality(Nullable(String()))), ("sdk_version", LowCardinality(Nullable(String()))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("transaction_name", WithDefault(LowCardinality(String()), "''")), ( "transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)"), ), ("span_id", Nullable(UInt(64))), ("trace_id", Nullable(UUID())), ("partition", UInt(16)), ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])), ("retention_days", UInt(16)), ("deleted", UInt(8)), ("group_id", UInt(64)), ("primary_hash", FixedString(32)), ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")), ("event_string", WithCodecs(String(), ["NONE"])), ("received", DateTime()), ("message", String()), ("title", String()), ("culprit", String()), ("level", LowCardinality(String())), ("location", Nullable(String())), ("version", LowCardinality(Nullable(String()))), ("type", LowCardinality(String())), ( "exception_stacks", Nested([ ("type", Nullable(String())), ("value", Nullable(String())), ("mechanism_type", Nullable(String())), ("mechanism_handled", Nullable(UInt(8))), ]), ), ( "exception_frames", Nested([ ("abs_path", Nullable(String())), ("colno", Nullable(UInt(32))), ("filename", Nullable(String())), ("function", Nullable(String())), ("lineno", Nullable(UInt(32))), ("in_app", Nullable(UInt(8))), ("package", Nullable(String())), ("module", Nullable(String())), ("stack_level", Nullable(UInt(16))), ]), ), ("sdk_integrations", Array(String())), ("modules", Nested([("name", String()), ("version", String())])), ]) self.__promoted_tag_columns = { "environment": "environment", "sentry:release": "release", "sentry:dist": "dist", "sentry:user": "******", "transaction": "transaction_name", "level": "level", } schema = ReplacingMergeTreeSchema( columns=all_columns, local_table_name="errors_local", dist_table_name="errors_dist", mandatory_conditions=[("deleted", "=", 0)], prewhere_candidates=[ "event_id", "group_id", "tags[sentry:release]", "message", "environment", "project_id", ], order_by= "(org_id, project_id, toStartOfDay(timestamp), primary_hash_hex, event_hash)", partition_by= "(toMonday(timestamp), if(retention_days = 30, 30, 90))", version_column="deleted", sample_expr="event_hash", ttl_expr="timestamp + toIntervalDay(retention_days)", settings={"index_granularity": "8192"}, ) dataset_schemas = DatasetSchemas( read_schema=schema, write_schema=schema, ) table_writer = TableWriter( write_schema=schema, stream_loader=KafkaStreamLoader( processor=ErrorsProcessor(self.__promoted_tag_columns), default_topic="events", ), ) super().__init__( dataset_schemas=dataset_schemas, table_writer=table_writer, time_group_columns={ "time": "timestamp", "rtime": "received" }, time_parse_columns=("timestamp", "received"), ) self.__tags_processor = TagColumnProcessor( columns=all_columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )