Python TableWriter примеры, snuba.datasets.table_storage.TableWriter Python примеры использования

Пример #1

0

Показать файл

Файл: storage.py Проект: pombredanne/snuba

 def __init__(
     self,
     storage_key: StorageKey,
     storage_set_key: StorageSetKey,
     schema: Schema,
     query_processors: Sequence[QueryProcessor],
     stream_loader: KafkaStreamLoader,
     query_splitters: Optional[Sequence[QuerySplitStrategy]] = None,
     mandatory_condition_checkers: Optional[
         Sequence[ConditionChecker]] = None,
     replacer_processor: Optional[ReplacerProcessor[Any]] = None,
     writer_options: ClickhouseWriterOptions = None,
 ) -> None:
     super().__init__(
         storage_key,
         storage_set_key,
         schema,
         query_processors,
         query_splitters,
         mandatory_condition_checkers,
     )
     assert isinstance(schema, WritableTableSchema)
     self.__table_writer = TableWriter(
         storage_set=storage_set_key,
         write_schema=schema,
         stream_loader=stream_loader,
         replacer_processor=replacer_processor,
         writer_options=writer_options,
     )

Пример #2

0

Показать файл

def build_batch_writer(
    table_writer: TableWriter,
    metrics: MetricsBackend,
    replacements_producer: Optional[ConfluentKafkaProducer] = None,
    replacements_topic: Optional[Topic] = None,
) -> Callable[[], ProcessedMessageBatchWriter]:

    assert not (replacements_producer is None) ^ (replacements_topic is None)
    supports_replacements = replacements_producer is not None

    writer = table_writer.get_batch_writer(
        metrics,
        {
            "load_balancing": "in_order",
            "insert_distributed_sync": 1
        },
    )

    def build_writer() -> ProcessedMessageBatchWriter:
        insert_batch_writer = InsertBatchWriter(
            writer, MetricsWrapper(metrics, "insertions"))

        replacement_batch_writer: Optional[ReplacementBatchWriter]
        if supports_replacements:
            assert replacements_producer is not None
            assert replacements_topic is not None
            replacement_batch_writer = ReplacementBatchWriter(
                replacements_producer, replacements_topic)
        else:
            replacement_batch_writer = None

        return ProcessedMessageBatchWriter(insert_batch_writer,
                                           replacement_batch_writer)

    return build_writer

Пример #3

0

Показать файл

            ("max_threads", UInt(8)),
            ("num_days", UInt(32)),
            ("clickhouse_table", LowCardinality(String())),
            ("query_id", String()),
            ("is_duplicate", UInt(8)),
            ("consistent", UInt(8)),
        ]),
    ),
])

schema = MergeTreeSchema(
    columns=columns,
    local_table_name="querylog_local",
    dist_table_name="querylog_dist",
    order_by="(toStartOfDay(timestamp), request_id)",
    partition_by="(toMonday(timestamp))",
    sample_expr="request_id",
)

storage = WritableTableStorage(
    schemas=StorageSchemas(read_schema=schema, write_schema=schema),
    table_writer=TableWriter(
        write_schema=schema,
        stream_loader=KafkaStreamLoader(
            processor=QuerylogProcessor(),
            default_topic=settings.QUERIES_TOPIC,
        ),
    ),
    query_processors=[],
)

Пример #4

0

Показать файл

Файл: outcomes.py Проект: jiankunking/snuba

    def __init__(self) -> None:
        write_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("key_id", Nullable(UInt(64))),
            ("timestamp", DateTime()),
            ("outcome", UInt(8)),
            ("reason", LowCardinality(Nullable(String()))),
            ("event_id", Nullable(UUID())),
        ])

        write_schema = MergeTreeSchema(
            columns=write_columns,
            # TODO: change to outcomes.raw_local when we add multi DB support
            local_table_name=WRITE_LOCAL_TABLE_NAME,
            dist_table_name=WRITE_DIST_TABLE_NAME,
            order_by="(org_id, project_id, timestamp)",
            partition_by="(toMonday(timestamp))",
            settings={"index_granularity": 16384},
        )

        read_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("key_id", UInt(64)),
            ("timestamp", DateTime()),
            ("outcome", UInt(8)),
            ("reason", LowCardinality(String())),
            ("times_seen", UInt(64)),
        ])

        read_schema = SummingMergeTreeSchema(
            columns=read_columns,
            local_table_name=READ_LOCAL_TABLE_NAME,
            dist_table_name=READ_DIST_TABLE_NAME,
            order_by="(org_id, project_id, key_id, outcome, reason, timestamp)",
            partition_by="(toMonday(timestamp))",
            settings={"index_granularity": 256},
        )

        materialized_view_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("key_id", UInt(64)),
            ("timestamp", DateTime()),
            ("outcome", UInt(8)),
            ("reason", String()),
            ("times_seen", UInt(64)),
        ])

        # TODO: Find a better way to specify a query for a materialized view
        # The problem right now is that we have a way to define our columns in a ColumnSet abstraction but the query
        # doesn't use it.
        query = """
               SELECT
                   org_id,
                   project_id,
                   ifNull(key_id, 0) AS key_id,
                   toStartOfHour(timestamp) AS timestamp,
                   outcome,
                   ifNull(reason, 'none') AS reason,
                   count() AS times_seen
               FROM %(source_table_name)s
               GROUP BY org_id, project_id, key_id, timestamp, outcome, reason
               """

        materialized_view = MaterializedViewSchema(
            local_materialized_view_name="outcomes_mv_hourly_local",
            dist_materialized_view_name="outcomes_mv_hourly_dist",
            prewhere_candidates=["project_id", "org_id"],
            columns=materialized_view_columns,
            query=query,
            local_source_table_name=WRITE_LOCAL_TABLE_NAME,
            local_destination_table_name=READ_LOCAL_TABLE_NAME,
            dist_source_table_name=WRITE_DIST_TABLE_NAME,
            dist_destination_table_name=READ_DIST_TABLE_NAME,
        )

        dataset_schemas = DatasetSchemas(
            read_schema=read_schema,
            write_schema=write_schema,
            intermediary_schemas=[materialized_view],
        )

        table_writer = TableWriter(
            write_schema=write_schema,
            stream_loader=KafkaStreamLoader(
                processor=OutcomesProcessor(),
                default_topic="outcomes",
            ),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={"time": "timestamp"},
            time_parse_columns=("timestamp", ),
        )

Пример #5

0

Показать файл

        ]
        for col in get_promoted_columns()
    }


storage = WritableTableStorage(
    schemas=StorageSchemas(read_schema=schema, write_schema=schema),
    table_writer=TableWriter(
        write_schema=schema,
        stream_loader=KafkaStreamLoader(
            processor=EventsProcessor(promoted_tag_columns),
            default_topic="events",
            replacement_topic="event-replacements",
            commit_log_topic="snuba-commit-log",
        ),
        replacer_processor=ErrorsReplacer(
            write_schema=schema,
            read_schema=schema,
            required_columns=[col.escaped for col in required_columns],
            tag_column_map=get_tag_column_map(),
            promoted_tags=get_promoted_tags(),
            state_name=ReplacerState.EVENTS,
        ),
    ),
    query_processors=[
        # TODO: This one should become an entirely separate storage and picked
        # in the storage selector.
        ReadOnlyTableSelector("sentry_dist", "sentry_dist_ro"),
        EventsColumnProcessor(),
        PrewhereProcessor(),
    ],

Пример #6

0

Показать файл

Файл: events.py Проект: Appva/snuba

    def __init__(self):
        metadata_columns = ColumnSet([
            # optional stream related data
            ('offset', Nullable(UInt(64))),
            ('partition', Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ('level', Nullable(String())),
            ('logger', Nullable(String())),
            ('server_name', Nullable(String())),  # future name: device_id?
            ('transaction', Nullable(String())),
            ('environment', Nullable(String())),
            ('sentry:release', Nullable(String())),
            ('sentry:dist', Nullable(String())),
            ('sentry:user', Nullable(String())),
            ('site', Nullable(String())),
            ('url', Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ('app_device', Nullable(String())),
            ('device', Nullable(String())),
            ('device_family', Nullable(String())),
            ('runtime', Nullable(String())),
            ('runtime_name', Nullable(String())),
            ('browser', Nullable(String())),
            ('browser_name', Nullable(String())),
            ('os', Nullable(String())),
            ('os_name', Nullable(String())),
            ('os_rooted', Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ('os_build', Nullable(String())),
            ('os_kernel_version', Nullable(String())),
            ('device_name', Nullable(String())),
            ('device_brand', Nullable(String())),
            ('device_locale', Nullable(String())),
            ('device_uuid', Nullable(String())),
            ('device_model_id', Nullable(String())),
            ('device_arch', Nullable(String())),
            ('device_battery_level', Nullable(Float(32))),
            ('device_orientation', Nullable(String())),
            ('device_simulator', Nullable(UInt(8))),
            ('device_online', Nullable(UInt(8))),
            ('device_charging', Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ('event_id', FixedString(32)),
            ('project_id', UInt(64)),
            ('group_id', UInt(64)),
            ('timestamp', DateTime()),
            ('deleted', UInt(8)),
            ('retention_days', UInt(16)),
        ])

        all_columns = required_columns + [
            # required for non-deleted
            ('platform', Nullable(String())),
            ('message', Nullable(String())),
            ('primary_hash', Nullable(FixedString(32))),
            ('received', Nullable(DateTime())),

            ('search_message', Nullable(String())),
            ('title', Nullable(String())),
            ('location', Nullable(String())),

            # optional user
            ('user_id', Nullable(String())),
            ('username', Nullable(String())),
            ('email', Nullable(String())),
            ('ip_address', Nullable(String())),

            # optional geo
            ('geo_country_code', Nullable(String())),
            ('geo_region', Nullable(String())),
            ('geo_city', Nullable(String())),

            ('sdk_name', Nullable(String())),
            ('sdk_version', Nullable(String())),
            ('type', Nullable(String())),
            ('version', Nullable(String())),
        ] + metadata_columns \
            + promoted_context_columns \
            + promoted_tag_columns \
            + promoted_context_tag_columns \
            + [
                # other tags
                ('tags', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # other context
                ('contexts', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # http interface
                ('http_method', Nullable(String())),
                ('http_referer', Nullable(String())),

                # exception interface
                ('exception_stacks', Nested([
                    ('type', Nullable(String())),
                    ('value', Nullable(String())),
                    ('mechanism_type', Nullable(String())),
                    ('mechanism_handled', Nullable(UInt(8))),
                ])),
                ('exception_frames', Nested([
                    ('abs_path', Nullable(String())),
                    ('filename', Nullable(String())),
                    ('package', Nullable(String())),
                    ('module', Nullable(String())),
                    ('function', Nullable(String())),
                    ('in_app', Nullable(UInt(8))),
                    ('colno', Nullable(UInt(32))),
                    ('lineno', Nullable(UInt(32))),
                    ('stack_level', UInt(16)),
                ])),

                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ('culprit', Nullable(String())),
                ('sdk_integrations', Array(String())),
                ('modules', Nested([
                    ('name', String()),
                    ('version', String()),
                ])),
        ]

        sample_expr = 'cityHash64(toString(event_id))'
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name='sentry_local',
            dist_table_name='sentry_dist',
            mandatory_conditions=[('deleted', '=', 0)],
            order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr,
            partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))',
            version_column='deleted',
            sample_expr=sample_expr,
            migration_function=events_migrations)

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=EventsProcessor(promoted_tag_columns),
                default_topic="events",
                replacement_topic="event-replacements",
                commit_log_topic="snuba-commit-log",
            )
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                'time': 'timestamp',
                'rtime': 'received'
            },
            time_parse_columns=('timestamp', 'received')
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )

Пример #7

0

Показать файл

Файл: outcomes.py Проект: ruezetle/snuba

    dist_materialized_view_name="outcomes_mv_hourly_dist",
    prewhere_candidates=["project_id", "org_id"],
    columns=materialized_view_columns,
    query=query,
    local_source_table_name=WRITE_LOCAL_TABLE_NAME,
    local_destination_table_name=READ_LOCAL_TABLE_NAME,
    dist_source_table_name=WRITE_DIST_TABLE_NAME,
    dist_destination_table_name=READ_DIST_TABLE_NAME,
)

raw_storage = WritableTableStorage(
    schemas=StorageSchemas(read_schema=raw_schema, write_schema=raw_schema),
    table_writer=TableWriter(
        write_schema=raw_schema,
        stream_loader=KafkaStreamLoader(
            processor=OutcomesProcessor(),
            default_topic="outcomes",
        ),
    ),
    query_processors=[],
)

materialized_storage = ReadableTableStorage(
    schemas=StorageSchemas(
        read_schema=read_schema,
        write_schema=None,
        intermediary_schemas=[materialized_view_schema],
    ),
    query_processors=[PrewhereProcessor()],
)

Пример #8

0

Показать файл

    "deleted",
    "retention_days",
]

storage = WritableTableStorage(
    schemas=StorageSchemas(read_schema=schema, write_schema=schema),
    table_writer=TableWriter(
        write_schema=schema,
        stream_loader=KafkaStreamLoader(
            processor=ErrorsProcessor(promoted_tag_columns),
            default_topic="events",
            replacement_topic="errors-replacements",
        ),
        replacer_processor=ErrorsReplacer(
            write_schema=schema,
            read_schema=schema,
            required_columns=required_columns,
            tag_column_map={
                "tags": promoted_tag_columns,
                "contexts": {},
            },
            promoted_tags={
                "tags": promoted_tag_columns.keys(),
                "contexts": {},
            },
            state_name=ReplacerState.ERRORS,
        ),
    ),
    query_processors=[PrewhereProcessor()],
)

Пример #9

0

Показать файл

    def __init__(self) -> None:
        metadata_columns = ColumnSet([
            # optional stream related data
            ("offset", Nullable(UInt(64))),
            ("partition", Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ("level", Nullable(String())),
            ("logger", Nullable(String())),
            ("server_name", Nullable(String())),  # future name: device_id?
            ("transaction", Nullable(String())),
            ("environment", Nullable(String())),
            ("sentry:release", Nullable(String())),
            ("sentry:dist", Nullable(String())),
            ("sentry:user", Nullable(String())),
            ("site", Nullable(String())),
            ("url", Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ("app_device", Nullable(String())),
            ("device", Nullable(String())),
            ("device_family", Nullable(String())),
            ("runtime", Nullable(String())),
            ("runtime_name", Nullable(String())),
            ("browser", Nullable(String())),
            ("browser_name", Nullable(String())),
            ("os", Nullable(String())),
            ("os_name", Nullable(String())),
            ("os_rooted", Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ("os_build", Nullable(String())),
            ("os_kernel_version", Nullable(String())),
            ("device_name", Nullable(String())),
            ("device_brand", Nullable(String())),
            ("device_locale", Nullable(String())),
            ("device_uuid", Nullable(String())),
            ("device_model_id", Nullable(String())),
            ("device_arch", Nullable(String())),
            ("device_battery_level", Nullable(Float(32))),
            ("device_orientation", Nullable(String())),
            ("device_simulator", Nullable(UInt(8))),
            ("device_online", Nullable(UInt(8))),
            ("device_charging", Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("group_id", UInt(64)),
            ("timestamp", DateTime()),
            ("deleted", UInt(8)),
            ("retention_days", UInt(16)),
        ])

        all_columns = (
            required_columns + [
                # required for non-deleted
                ("platform", Nullable(String())),
                ("message", Nullable(String())),
                ("primary_hash", Nullable(FixedString(32))),
                ("received", Nullable(DateTime())),
                ("search_message", Nullable(String())),
                ("title", Nullable(String())),
                ("location", Nullable(String())),
                # optional user
                ("user_id", Nullable(String())),
                ("username", Nullable(String())),
                ("email", Nullable(String())),
                ("ip_address", Nullable(String())),
                # optional geo
                ("geo_country_code", Nullable(String())),
                ("geo_region", Nullable(String())),
                ("geo_city", Nullable(String())),
                ("sdk_name", Nullable(String())),
                ("sdk_version", Nullable(String())),
                ("type", Nullable(String())),
                ("version", Nullable(String())),
            ] + metadata_columns + promoted_context_columns +
            promoted_tag_columns + promoted_context_tag_columns + [
                # other tags
                ("tags", Nested([("key", String()), ("value", String())])),
                ("_tags_flattened", String()),
                # other context
                ("contexts", Nested([("key", String()), ("value", String())])),
                # http interface
                ("http_method", Nullable(String())),
                ("http_referer", Nullable(String())),
                # exception interface
                (
                    "exception_stacks",
                    Nested([
                        ("type", Nullable(String())),
                        ("value", Nullable(String())),
                        ("mechanism_type", Nullable(String())),
                        ("mechanism_handled", Nullable(UInt(8))),
                    ]),
                ),
                (
                    "exception_frames",
                    Nested([
                        ("abs_path", Nullable(String())),
                        ("filename", Nullable(String())),
                        ("package", Nullable(String())),
                        ("module", Nullable(String())),
                        ("function", Nullable(String())),
                        ("in_app", Nullable(UInt(8))),
                        ("colno", Nullable(UInt(32))),
                        ("lineno", Nullable(UInt(32))),
                        ("stack_level", UInt(16)),
                    ]),
                ),
                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ("culprit", Nullable(String())),
                ("sdk_integrations", Array(String())),
                ("modules", Nested([("name", String()),
                                    ("version", String())])),
            ])

        sample_expr = "cityHash64(toString(event_id))"
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name="sentry_local",
            dist_table_name="sentry_dist",
            mandatory_conditions=[("deleted", "=", 0)],
            prewhere_candidates=[
                "event_id",
                "group_id",
                "tags[sentry:release]",
                "message",
                "environment",
                "project_id",
            ],
            order_by="(project_id, toStartOfDay(timestamp), %s)" % sample_expr,
            partition_by=
            "(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))",
            version_column="deleted",
            sample_expr=sample_expr,
            migration_function=events_migrations,
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=EventsProcessor(promoted_tag_columns),
                default_topic="events",
                replacement_topic="event-replacements",
                commit_log_topic="snuba-commit-log",
            ),
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                "time": "timestamp",
                "rtime": "received"
            },
            time_parse_columns=("timestamp", "received"),
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )

Пример #10

0

Показать файл

Файл: errors.py Проект: jiankunking/snuba

    def __init__(self) -> None:
        all_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("timestamp", DateTime()),
            ("event_id", WithCodecs(UUID(), ["NONE"])),
            (
                "event_hash",
                WithCodecs(
                    Materialized(
                        UInt(64),
                        "cityHash64(toString(event_id))",
                    ),
                    ["NONE"],
                ),
            ),
            ("platform", LowCardinality(String())),
            ("environment", LowCardinality(Nullable(String()))),
            ("release", LowCardinality(Nullable(String()))),
            ("dist", LowCardinality(Nullable(String()))),
            ("ip_address_v4", Nullable(IPv4())),
            ("ip_address_v6", Nullable(IPv6())),
            ("user", WithDefault(String(), "''")),
            (
                "user_hash",
                Materialized(UInt(64), "cityHash64(user)"),
            ),
            ("user_id", Nullable(String())),
            ("user_name", Nullable(String())),
            ("user_email", Nullable(String())),
            ("sdk_name", LowCardinality(Nullable(String()))),
            ("sdk_version", LowCardinality(Nullable(String()))),
            ("tags", Nested([("key", String()), ("value", String())])),
            ("_tags_flattened", String()),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("_contexts_flattened", String()),
            ("transaction_name", WithDefault(LowCardinality(String()), "''")),
            (
                "transaction_hash",
                Materialized(UInt(64), "cityHash64(transaction_name)"),
            ),
            ("span_id", Nullable(UInt(64))),
            ("trace_id", Nullable(UUID())),
            ("partition", UInt(16)),
            ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])),
            ("retention_days", UInt(16)),
            ("deleted", UInt(8)),
            ("group_id", UInt(64)),
            ("primary_hash", FixedString(32)),
            ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")),
            ("event_string", WithCodecs(String(), ["NONE"])),
            ("received", DateTime()),
            ("message", String()),
            ("title", String()),
            ("culprit", String()),
            ("level", LowCardinality(String())),
            ("location", Nullable(String())),
            ("version", LowCardinality(Nullable(String()))),
            ("type", LowCardinality(String())),
            (
                "exception_stacks",
                Nested([
                    ("type", Nullable(String())),
                    ("value", Nullable(String())),
                    ("mechanism_type", Nullable(String())),
                    ("mechanism_handled", Nullable(UInt(8))),
                ]),
            ),
            (
                "exception_frames",
                Nested([
                    ("abs_path", Nullable(String())),
                    ("colno", Nullable(UInt(32))),
                    ("filename", Nullable(String())),
                    ("function", Nullable(String())),
                    ("lineno", Nullable(UInt(32))),
                    ("in_app", Nullable(UInt(8))),
                    ("package", Nullable(String())),
                    ("module", Nullable(String())),
                    ("stack_level", Nullable(UInt(16))),
                ]),
            ),
            ("sdk_integrations", Array(String())),
            ("modules", Nested([("name", String()), ("version", String())])),
        ])

        self.__promoted_tag_columns = {
            "environment": "environment",
            "sentry:release": "release",
            "sentry:dist": "dist",
            "sentry:user": "******",
            "transaction": "transaction_name",
            "level": "level",
        }

        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name="errors_local",
            dist_table_name="errors_dist",
            mandatory_conditions=[("deleted", "=", 0)],
            prewhere_candidates=[
                "event_id",
                "group_id",
                "tags[sentry:release]",
                "message",
                "environment",
                "project_id",
            ],
            order_by=
            "(org_id, project_id, toStartOfDay(timestamp), primary_hash_hex, event_hash)",
            partition_by=
            "(toMonday(timestamp), if(retention_days = 30, 30, 90))",
            version_column="deleted",
            sample_expr="event_hash",
            ttl_expr="timestamp + toIntervalDay(retention_days)",
            settings={"index_granularity": "8192"},
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=ErrorsProcessor(self.__promoted_tag_columns),
                default_topic="events",
            ),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                "time": "timestamp",
                "rtime": "received"
            },
            time_parse_columns=("timestamp", "received"),
        )

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )

Python TableWriter примеры использования