def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_dist", column=Column("hierarchical_hashes", Array(UUID())), after="primary_hash", ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_dist", column=Column("hierarchical_hashes", Array(FixedString(32)),), after="primary_hash", ), ]
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("spans.exclusive_time_32", Array(Float(32))), after="spans.group", ), operations.ModifyColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column("spans.exclusive_time_32", Array(Float(32))), ttl_month=("finish_ts", 1), ), ]
def forwards_local(self) -> Sequence[operations.Operation]: return [ operations.CreateTable( storage_set=StorageSetKey.TRANSACTIONS, table_name="spans_experimental_local", columns=columns, engine=table_engines.ReplacingMergeTree( storage_set=StorageSetKey.TRANSACTIONS, version_column="deleted", order_by= ("(project_id, toStartOfDay(finish_ts), transaction_name, " "cityHash64(transaction_span_id), op, cityHash64(trace_id), " "cityHash64(span_id))"), partition_by="(toMonday(finish_ts))", sample_by="cityHash64(span_id)", ttl="finish_ts + toIntervalDay(retention_days)", settings={"index_granularity": "8192"}, ), ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="spans_experimental_local", column=Column( "_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN), ), after="tags.value", ), ]
def backwards_local(self) -> Sequence[operations.SqlOperation]: sample_expr = "cityHash64(toString(event_id))" return [ operations.CreateTable( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", columns=columns, engine=table_engines.ReplacingMergeTree( storage_set=StorageSetKey.EVENTS, version_column="deleted", order_by="(project_id, toStartOfDay(timestamp), %s)" % sample_expr, partition_by="(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))", sample_by=sample_expr, ), ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="sentry_local", column=Column( "_tags_hash_map", Array(UInt(64), Modifiers(materialized=TAGS_HASH_MAP_COLUMN)), ), after="_tags_flattened", ), ]
def test_schema(self): cols = ColumnSet([("foo", UInt(8)), ("bar", Nested([("qux:mux", String())]))]) assert cols.for_schema() == "foo UInt8, bar Nested(`qux:mux` String)" assert cols["foo"].type == UInt(8) assert cols["bar.qux:mux"].type == Array(String())
def test_schema(self): cols = ColumnSet([('foo', UInt(8)), ('bar', Nested([('qux:mux', String())]))]) assert cols.for_schema() == 'foo UInt8, bar Nested(`qux:mux` String)' assert cols['foo'].type == UInt(8) assert cols['bar.qux:mux'].type == Array(String())
def get_forward_migrations_local( source_table_name: str, table_name: str, mv_name: str, aggregation_col_schema: Sequence[Column[Modifiers]], aggregation_states: str, granularity: int, ) -> Sequence[operations.SqlOperation]: aggregated_cols = [*COMMON_AGGR_COLUMNS, *aggregation_col_schema] return [ operations.CreateTable( storage_set=StorageSetKey.METRICS, table_name=table_name, columns=aggregated_cols, engine=table_engines.AggregatingMergeTree( storage_set=StorageSetKey.METRICS, order_by= "(org_id, project_id, metric_id, granularity, timestamp, tags.key, tags.value)", partition_by="(retention_days, toMonday(timestamp))", settings={"index_granularity": "256"}, ), ), operations.AddColumn( storage_set=StorageSetKey.METRICS, table_name=table_name, column=Column( "_tags_hash", Array(UInt(64), Modifiers(materialized=INT_TAGS_HASH_MAP_COLUMN)), ), after="tags.value", ), operations.AddIndex( storage_set=StorageSetKey.METRICS, table_name=table_name, index_name="bf_tags_hash", index_expression="_tags_hash", index_type="bloom_filter()", granularity=1, ), operations.AddIndex( storage_set=StorageSetKey.METRICS, table_name=table_name, index_name="bf_tags_key_hash", index_expression="tags.key", index_type="bloom_filter()", granularity=1, ), ] + [ get_forward_view_migration_local( source_table_name, table_name, mv_name, aggregation_col_schema, aggregation_states, granularity, ) ]
def forwards_dist(self) -> Sequence[operations.Operation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_dist", column=Column("_tags_hash_map", Array(UInt(64)),), after="_tags_flattened", ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column("spans.exclusive_time_32", Array(Float(32))), after="spans.group", ), ]
def forwards_local(self) -> Sequence[operations.Operation]: return [ operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_local", column=Column( "_tags_hash_map", Array(UInt(64), Modifiers(materialized=TAGS_HASH_MAP_COLUMN)), ), after="_tags_flattened", ), ]
def forwards_local(self) -> Sequence[operations.Operation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_local", column=Column( "_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN), ), after="_tags_flattened", ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="transactions_dist", column=Column( "_tags_hash_map", Array(UInt(64)), ), after="_tags_flattened", ), ]
def test_flattened(self): columns = enforce_table_writer(self.dataset).get_schema().get_columns() assert columns["group_id"].type == UInt(64) assert columns["group_id"].name == "group_id" assert columns["group_id"].base_name is None assert columns["group_id"].flattened == "group_id" assert columns["exception_frames.in_app"].type == Array( Nullable(UInt(8))) assert columns["exception_frames.in_app"].name == "in_app" assert columns[ "exception_frames.in_app"].base_name == "exception_frames" assert columns[ "exception_frames.in_app"].flattened == "exception_frames.in_app"
def test_flattened(self): columns = self.dataset.get_dataset_schemas().get_write_schema_enforce( ).get_columns() assert columns['group_id'].type == UInt(64) assert columns['group_id'].name == 'group_id' assert columns['group_id'].base_name is None assert columns['group_id'].flattened == 'group_id' assert columns['exception_frames.in_app'].type == Array( Nullable(UInt(8))) assert columns['exception_frames.in_app'].name == 'in_app' assert columns[ 'exception_frames.in_app'].base_name == 'exception_frames' assert columns[ 'exception_frames.in_app'].flattened == 'exception_frames.in_app'
def __forward_migrations( self, table_name: str) -> Sequence[operations.Operation]: return [ operations.ModifyColumn( StorageSetKey.QUERYLOG, table_name, Column("status", LowCardinality(String())), ), operations.ModifyColumn( StorageSetKey.QUERYLOG, table_name, Column("clickhouse_queries.status", Array(LowCardinality(String()))), ), ]
def __backwards_migrations( self, table_name: str) -> Sequence[operations.Operation]: status_type = Enum([("success", 0), ("error", 1), ("rate-limited", 2)]) return [ operations.ModifyColumn( StorageSetKey.QUERYLOG, table_name, Column("status", status_type), ), operations.ModifyColumn( StorageSetKey.QUERYLOG, table_name, Column("clickhouse_queries.status", Array(status_type)), ), ]
def test_flattened() -> None: columns = (get_writable_storage( StorageKey.ERRORS).get_table_writer().get_schema().get_columns()) # columns = enforce_table_writer(self.dataset).get_schema().get_columns() assert columns["group_id"].type == UInt(64) assert columns["group_id"].name == "group_id" assert columns["group_id"].base_name is None assert columns["group_id"].flattened == "group_id" assert columns["exception_frames.in_app"].type == Array( UInt(8, Modifier(nullable=True))) assert columns["exception_frames.in_app"].name == "in_app" assert columns["exception_frames.in_app"].base_name == "exception_frames" assert columns[ "exception_frames.in_app"].flattened == "exception_frames.in_app"
def __forward_migrations( self, table_name: str) -> Sequence[operations.SqlOperation]: return [ operations.AddColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("_tags_hash_map", Array(UInt(64))), after="tags", ), operations.AddColumn( storage_set=StorageSetKey.DISCOVER, table_name=table_name, column=Column("deleted", UInt(8)), after="contexts", ), ]
def test_modifiers() -> None: cols = ColumnSet( [ ("col1", WithDefault(String(), "")), ("col2", Nullable(Array(String()))), ("col3", WithCodecs(Materialized(String(), "something"), ["c"]),), ( "col4", WithCodecs(Nullable(Materialized(String(), "something")), ["c"],), ), ] ) assert [WithDefault] == cols["col1"].type.get_all_modifiers() assert [Nullable] == cols["col2"].type.get_all_modifiers() assert [Materialized, WithCodecs] == cols["col3"].type.get_all_modifiers() assert [Materialized, Nullable, WithCodecs] == cols["col4"].type.get_all_modifiers()
def forwards_local(self) -> Sequence[operations.SqlOperation]: return [ operations.CreateTable( storage_set=StorageSetKey.METRICS, table_name=self.table_name, columns=self.aggregated_cols, engine=table_engines.AggregatingMergeTree( storage_set=StorageSetKey.METRICS, order_by= "(use_case_id, org_id, project_id, metric_id, granularity, timestamp, tags.key, tags.value, retention_days)", primary_key= "(use_case_id, org_id, project_id, metric_id, granularity, timestamp)", partition_by="(retention_days, toMonday(timestamp))", settings={"index_granularity": self.granularity}, ttl="timestamp + toIntervalDay(retention_days)", ), ), operations.AddColumn( storage_set=StorageSetKey.METRICS, table_name=self.table_name, column=Column( "_tags_hash", Array(UInt(64), Modifiers(materialized=INT_TAGS_HASH_MAP_COLUMN)), ), after="tags.value", ), operations.AddIndex( storage_set=StorageSetKey.METRICS, table_name=self.table_name, index_name="bf_tags_hash", index_expression="_tags_hash", index_type="bloom_filter()", granularity=1, ), operations.AddIndex( storage_set=StorageSetKey.METRICS, table_name=self.table_name, index_name="bf_tags_key_hash", index_expression="tags.key", index_type="bloom_filter()", granularity=1, ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.CreateTable( storage_set=StorageSetKey.METRICS, table_name=self.dist_table_name, columns=self.aggregated_cols, engine=table_engines.Distributed( local_table_name=self.table_name, sharding_key=None), ), operations.AddColumn( storage_set=StorageSetKey.METRICS, table_name=self.dist_table_name, column=Column( "_tags_hash", Array(UInt(64), Modifiers(materialized=INT_TAGS_HASH_MAP_COLUMN)), ), after="tags.value", ), ]
def forwards_dist(self) -> Sequence[operations.Operation]: return [ operations.CreateTable( storage_set=StorageSetKey.TRANSACTIONS, table_name="spans_experimental_dist", columns=columns, engine=table_engines.Distributed( local_table_name="spans_experimental_local", sharding_key="cityHash64(transaction_span_id)", ), ), operations.AddColumn( storage_set=StorageSetKey.TRANSACTIONS, table_name="spans_experimental_dist", column=Column( "_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN), ), after="tags.value", ), ]
def forwards_dist(self) -> Sequence[operations.SqlOperation]: return [ operations.CreateTable( storage_set=StorageSetKey.EVENTS, table_name="errors_dist_new", columns=columns, engine=table_engines.Distributed( local_table_name="errors_local", sharding_key=sample_expr, ), ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_dist_new", column=Column( "_tags_hash_map", Array(UInt(64), Modifiers(materialized=TAGS_HASH_MAP_COLUMN)), ), after="tags", ), operations.DropTable( storage_set=StorageSetKey.EVENTS, table_name="errors_dist", ), operations.RenameTable( storage_set=StorageSetKey.EVENTS, old_table_name="errors_dist_new", new_table_name="errors_dist", ), operations.CreateTable( storage_set=StorageSetKey.EVENTS_RO, table_name="errors_dist_ro", columns=columns, engine=table_engines.Distributed( local_table_name="errors_local", sharding_key=sample_expr, ), ), ]
def forwards_local(self) -> Sequence[operations.Operation]: return [ operations.CreateTable( storage_set=StorageSetKey.EVENTS, table_name="errors_local_new", columns=columns, engine=table_engines.ReplacingMergeTree( storage_set=StorageSetKey.EVENTS, version_column="deleted", order_by= "(project_id, toStartOfDay(timestamp), primary_hash, %s)" % sample_expr, partition_by="(retention_days, toMonday(timestamp))", sample_by=sample_expr, ttl="timestamp + toIntervalDay(retention_days)", settings={"index_granularity": "8192"}, ), ), operations.AddColumn( storage_set=StorageSetKey.EVENTS, table_name="errors_local_new", column=Column( "_tags_hash_map", Array(UInt(64), Modifiers(materialized=TAGS_HASH_MAP_COLUMN)), ), after="tags", ), operations.DropTable( storage_set=StorageSetKey.EVENTS, table_name="errors_local", ), operations.RenameTable( storage_set=StorageSetKey.EVENTS, old_table_name="errors_local_new", new_table_name="errors_local", ), ]
def get_forward_migrations_dist( dist_table_name: str, local_table_name: str, aggregation_col_schema: Sequence[Column[Modifiers]], ) -> Sequence[operations.SqlOperation]: return [ operations.CreateTable( storage_set=StorageSetKey.METRICS, table_name=dist_table_name, columns=[*COMMON_AGGR_COLUMNS, *aggregation_col_schema], engine=table_engines.Distributed(local_table_name=local_table_name, sharding_key=None), ), operations.AddColumn( storage_set=StorageSetKey.METRICS, table_name=dist_table_name, column=Column( "_tags_hash", Array(UInt(64), Modifiers(materialized=INT_TAGS_HASH_MAP_COLUMN)), ), after="tags.value", ), ]
("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''",)), ("user_hash", Materialized(UInt(64), "cityHash64(user)"),), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", WithDefault(LowCardinality(String()), "''")), ("sdk_version", WithDefault(LowCardinality(String()), "''")), ("http_method", LowCardinality(Nullable(String()))), ("http_referer", Nullable(String())), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ( "measurements", Nested([("key", LowCardinality(String())), ("value", Float(64))]), ), ("partition", UInt(16)), ("offset", UInt(64)), ("message_timestamp", DateTime()), ("retention_days", UInt(16)), ("deleted", UInt(8)), ] ) schema = ReplacingMergeTreeSchema(
("transaction_name", String()), ("message", String()), ("title", String()), ("user", String()), ("user_hash", UInt(64)), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_hash_map", Array(UInt(64))), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", UUID(Modifiers(nullable=True))), ("deleted", UInt(8)), ] ) schema = TableSchema( columns=columns, local_table_name="discover_local", dist_table_name="discover_dist", storage_set_key=StorageSetKey.DISCOVER, mandatory_conditions=mandatory_conditions, ) storage = ReadableTableStorage(
def visit_array(self, node: Node, visited_children: Iterable[Any]) -> ColumnType: (_arr, _paren, _sp, inner_type, _sp, _paren) = visited_children return Array(inner_type)
("release", LowCardinality(Nullable(String()))), ("dist", LowCardinality(Nullable(String()))), ("ip_address_v4", Nullable(IPv4())), ("ip_address_v6", Nullable(IPv6())), ("user", WithDefault(String(), "''")), ("user_hash", Materialized(UInt(64), "cityHash64(user)"),), ("user_id", Nullable(String())), ("user_name", Nullable(String())), ("user_email", Nullable(String())), ("sdk_name", LowCardinality(Nullable(String()))), ("sdk_version", LowCardinality(Nullable(String()))), ("http_method", LowCardinality(Nullable(String()))), ("http_referer", Nullable(String())), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_flattened", String()), ("_tags_hash_map", Materialized(Array(UInt(64)), TAGS_HASH_MAP_COLUMN)), ("contexts", Nested([("key", String()), ("value", String())])), ("_contexts_flattened", String()), ("transaction_name", WithDefault(LowCardinality(String()), "''")), ("transaction_hash", Materialized(UInt(64), "cityHash64(transaction_name)"),), ("span_id", Nullable(UInt(64))), ("trace_id", Nullable(UUID())), ("partition", UInt(16)), ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])), ("message_timestamp", DateTime()), ("retention_days", UInt(16)), ("deleted", UInt(8)), ("group_id", UInt(64)), ("primary_hash", FixedString(32)), ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")), ("event_string", WithCodecs(String(), ["NONE"])),
("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("user", String()), ("user_hash", UInt(64, Modifiers(readonly=True))), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))), ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))), ("contexts", Nested([("key", String()), ("value", String())])), ("transaction_name", String()), ("transaction_hash", UInt(64, Modifiers(readonly=True))), ("span_id", UInt(64, Modifiers(nullable=True))), ("trace_id", UUID(Modifiers(nullable=True))), ("partition", UInt(16)), ("offset", UInt(64)), ("message_timestamp", DateTime()), ("retention_days", UInt(16)), ("deleted", UInt(8)), ("group_id", UInt(64)), ("primary_hash", UUID()), ("hierarchical_hashes", Array(UUID())), ("received", DateTime()), ("message", String()),