Exemplo n.º 1
0
    Column,
    DateTime,
    Nullable,
    UInt,
)
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines

columns = [
    # Kafka topic offset
    Column("offset", UInt(64)),
    Column("record_deleted", UInt(8)),
    # PG columns
    Column("project_id", UInt(64)),
    Column("group_id", UInt(64)),
    Column("date_added", Nullable(DateTime())),
    Column("user_id", Nullable(UInt(64))),
    Column("team_id", Nullable(UInt(64))),
]


class Migration(migration.MultiStepMigration):
    blocking = False

    def forwards_local(self) -> Sequence[operations.Operation]:
        return [
            operations.CreateTable(
                storage_set=StorageSetKey.EVENTS,
                table_name="groupassignee_local",
                columns=columns,
                engine=table_engines.ReplacingMergeTree(
Exemplo n.º 2
0
from snuba.datasets.table_storage import KafkaStreamLoader
from snuba.query.conditions import ConditionFunctions, binary_condition
from snuba.query.expressions import Column, Literal
from snuba.query.processors.arrayjoin_keyvalue_optimizer import (
    ArrayJoinKeyValueOptimizer, )
from snuba.query.processors.mapping_promoter import MappingColumnPromoter
from snuba.query.processors.prewhere import PrewhereProcessor

all_columns = ColumnSet([
    ("org_id", UInt(64)),
    ("project_id", UInt(64)),
    ("timestamp", DateTime()),
    ("event_id", UUID()),
    ("event_hash", ReadOnly(UInt(64))),
    ("platform", String()),
    ("environment", Nullable(String())),
    ("release", Nullable(String())),
    ("dist", Nullable(String())),
    ("ip_address_v4", Nullable(IPv4())),
    ("ip_address_v6", Nullable(IPv6())),
    ("user", String()),
    ("user_hash", ReadOnly(UInt(64))),
    ("user_id", Nullable(String())),
    ("user_name", Nullable(String())),
    ("user_email", Nullable(String())),
    ("sdk_name", Nullable(String())),
    ("sdk_version", Nullable(String())),
    ("http_method", Nullable(String())),
    ("http_referer", Nullable(String())),
    ("tags", Nested([("key", String()), ("value", String())])),
    ("_tags_flattened", String()),
 def forwards_local(self) -> Sequence[operations.Operation]:
     return [
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("duration", UInt(32)),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("sdk_name",
                           WithDefault(LowCardinality(String()), "''")),
             after="user_email",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("sdk_version",
                           WithDefault(LowCardinality(String()), "''")),
             after="sdk_name",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("transaction_status",
                           WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS))),
             after="transaction_op",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("_tags_flattened", String()),
             after="tags",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("_contexts_flattened", String()),
             after="contexts",
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("user_hash",
                           Materialized(UInt(64), "cityHash64(user)")),
             after="user",
         ),
         # The following columns were originally created as non low cardinality strings
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("transaction_name", LowCardinality(String())),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("release", LowCardinality(Nullable(String()))),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("dist", LowCardinality(Nullable(String()))),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("sdk_name",
                           WithDefault(LowCardinality(String()), "''")),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("sdk_version",
                           WithDefault(LowCardinality(String()), "''")),
         ),
         operations.ModifyColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("environment",
                           LowCardinality(Nullable(String()))),
         ),
         operations.AddColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column=Column("message_timestamp", DateTime()),
             after="offset",
         ),
         operations.DropColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column_name="_start_date",
         ),
         operations.DropColumn(
             storage_set=StorageSetKey.TRANSACTIONS,
             table_name="transactions_local",
             column_name="_finish_date",
         ),
     ]
Exemplo n.º 4
0
 def visit_nullable(self, node: Node,
                    visited_children: Iterable[Any]) -> ColumnType:
     (_null, _paren, _sp, inner_type, _sp, _paren) = visited_children
     return Nullable(inner_type)
Exemplo n.º 5
0
    WritableTableSchema,
)
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import KafkaStreamLoader
from snuba.query.processors.prewhere import PrewhereProcessor

WRITE_LOCAL_TABLE_NAME = "outcomes_raw_local"
WRITE_DIST_TABLE_NAME = "outcomes_raw_dist"
READ_LOCAL_TABLE_NAME = "outcomes_hourly_local"
READ_DIST_TABLE_NAME = "outcomes_hourly_dist"

write_columns = ColumnSet(
    [
        ("org_id", UInt(64)),
        ("project_id", UInt(64)),
        ("key_id", Nullable(UInt(64))),
        ("timestamp", DateTime()),
        ("outcome", UInt(8)),
        ("reason", Nullable(String())),
        ("event_id", Nullable(UUID())),
    ]
)

raw_schema = WritableTableSchema(
    columns=write_columns,
    # TODO: change to outcomes.raw_local when we add multi DB support
    local_table_name=WRITE_LOCAL_TABLE_NAME,
    dist_table_name=WRITE_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.OUTCOMES,
)
Exemplo n.º 6
0
    Nullable,
    String,
    UInt,
)

from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines

columns = [
    Column("event_id", FixedString(32)),
    Column("project_id", UInt(64)),
    Column("group_id", UInt(64)),
    Column("timestamp", DateTime()),
    Column("deleted", UInt(8)),
    Column("retention_days", UInt(16)),
    Column("platform", Nullable(String())),
    Column("message", Nullable(String())),
    Column("primary_hash", Nullable(FixedString(32))),
    Column("received", Nullable(DateTime())),
    Column("search_message", Nullable(String())),
    Column("title", Nullable(String())),
    Column("location", Nullable(String())),
    Column("user_id", Nullable(String())),
    Column("username", Nullable(String())),
    Column("email", Nullable(String())),
    Column("ip_address", Nullable(String())),
    Column("geo_country_code", Nullable(String())),
    Column("geo_region", Nullable(String())),
    Column("geo_city", Nullable(String())),
    Column("sdk_name", Nullable(String())),
    Column("sdk_version", Nullable(String())),
Exemplo n.º 7
0
     ("AggregateFunction(uniq, UInt8)", "", "", ""),
     AggregateFunction("uniq", UInt(8)),
 ),
 (
     ("AggregateFunction(countIf, UUID, UInt8)", "", "", ""),
     AggregateFunction("countIf", UUID(), UInt(8)),
 ),
 (
     ("AggregateFunction(quantileIf(0.5, 0.9), UInt32, UInt8)", "", "", ""),
     AggregateFunction("quantileIf(0.5, 0.9)", UInt(32), UInt(8)),
 ),
 # Array
 (("Array(String)", "", "", ""), Array(String())),
 (("Array(DateTime)", "", "", ""), Array(DateTime())),
 (("Array(UInt64)", "", "", ""), Array(UInt(64))),
 (("Array(Nullable(UUID))", "", "", ""), Array(Nullable(UUID()))),
 (("Array(Array(Nullable(UUID)))", "", "", ""), Array(Array(Nullable(UUID())))),
 # Nullable
 (("Nullable(String)", "", "", ""), Nullable(String())),
 (("Nullable(FixedString(8))", "", "", ""), Nullable(FixedString(8))),
 (("Nullable(Date)", "", "", ""), Nullable(Date())),
 # Low cardinality
 (("LowCardinality(String)", "", "", ""), LowCardinality(String())),
 (
     ("LowCardinality(Nullable(String))", "", "", ""),
     LowCardinality(Nullable(String())),
 ),
 # Materialized
 (
     ("Date", "MATERIALIZED", "toDate(col1)", ""),
     Materialized(Date(), "toDate(col1)"),
Exemplo n.º 8
0
    UInt,
    UUID,
)
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import LowCardinality

status_type = Enum([("success", 0), ("error", 1), ("rate-limited", 2)])

columns = [
    Column("request_id", UUID()),
    Column("request_body", String()),
    Column("referrer", LowCardinality(String())),
    Column("dataset", LowCardinality(String())),
    Column("projects", Array(UInt(64))),
    Column("organization", Nullable(UInt(64))),
    Column("timestamp", DateTime()),
    Column("duration_ms", UInt(32)),
    Column("status", status_type),
    Column(
        "clickhouse_queries",
        Nested([
            Column("sql", String()),
            Column("status", status_type),
            Column("trace_id", Nullable(UUID())),
            Column("duration_ms", UInt(32)),
            Column("stats", String()),
            Column("final", UInt(8)),
            Column("cache_hit", UInt(8)),
            Column("sample", Float(32)),
            Column("max_threads", UInt(8)),
Exemplo n.º 9
0
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.querylog_processor import QuerylogProcessor
from snuba.datasets.schemas.tables import WritableTableSchema
from snuba.datasets.storage import WritableTableStorage
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import KafkaStreamLoader

NESTED_ARRAY_DEFAULT = "arrayResize([['']], length(clickhouse_queries.sql))"

columns = ColumnSet([
    ("request_id", UUID()),
    ("request_body", String()),
    ("referrer", LowCardinality(String())),
    ("dataset", LowCardinality(String())),
    ("projects", Array(UInt(64))),
    ("organization", Nullable(UInt(64))),
    ("timestamp", DateTime()),
    ("duration_ms", UInt(32)),
    ("status", LowCardinality(String())),
    # clickhouse_queries Nested columns.
    # This is expanded into arrays instead of being expressed as a
    # Nested column because, when adding new columns to a nested field
    # we need to provide a default for the entire array (each new column
    # is an array).
    # The same schema cannot be achieved with the Nested construct (where
    # we can only provide default for individual values), so, if we
    # use the Nested construct, this schema cannot match the one generated
    # by the migration framework (or by any ALTER statement).
    ("clickhouse_queries.sql", Array(String())),
    ("clickhouse_queries.status", Array(LowCardinality(String()))),
    ("clickhouse_queries.trace_id", Array(Nullable(UUID()))),
Exemplo n.º 10
0
from snuba.query.conditions import ConditionFunctions, binary_condition
from snuba.query.expressions import Column, Literal

columns = ColumnSet([
    # columns to maintain the dataset
    # Kafka topic offset
    ("offset", UInt(64)),
    # GroupStatus in Sentry does not have a 'DELETED' state that reflects the deletion
    # of the record. Having a dedicated clickhouse-only flag to identify this case seems
    # more consistent than add an additional value into the status field below that does not
    # exists on the Sentry side.
    ("record_deleted", UInt(8)),
    # PG columns
    ("project_id", UInt(64)),
    ("id", UInt(64)),
    ("status", Nullable(UInt(8))),
    ("last_seen", Nullable(DateTime())),
    ("first_seen", Nullable(DateTime())),
    ("active_at", Nullable(DateTime())),
    ("first_release_id", Nullable(UInt(64))),
])

schema = WritableTableSchema(
    columns=columns,
    local_table_name="groupedmessage_local",
    dist_table_name="groupedmessage_dist",
    storage_set_key=StorageSetKey.EVENTS,
    mandatory_conditions=[
        binary_condition(
            None,
            ConditionFunctions.EQ,
Exemplo n.º 11
0
def test_events_boolean_context() -> None:
    columns = ColumnSet([
        ("device_charging", Nullable(UInt(8))),
        ("contexts", Nested([("key", String()), ("value", String())])),
    ])
    query = ClickhouseQuery(
        LogicalQuery(
            {},
            TableSource("events", columns),
            selected_columns=[
                SelectedExpression(
                    "contexts[device.charging]",
                    FunctionCall(
                        "contexts[device.charging]",
                        "arrayElement",
                        (
                            Column(None, None, "contexts.value"),
                            FunctionCall(
                                None,
                                "indexOf",
                                (
                                    Column(None, None, "contexts.key"),
                                    Literal(None, "device.charging"),
                                ),
                            ),
                        ),
                    ),
                )
            ],
        ))

    expected = ClickhouseQuery(
        LogicalQuery(
            {},
            TableSource("events", columns),
            selected_columns=[
                SelectedExpression(
                    "contexts[device.charging]",
                    FunctionCall(
                        "contexts[device.charging]",
                        "multiIf",
                        (
                            binary_condition(
                                None,
                                ConditionFunctions.EQ,
                                FunctionCall(
                                    None,
                                    "toString",
                                    (Column(None, None, "device_charging"), ),
                                ),
                                Literal(None, ""),
                            ),
                            Literal(None, ""),
                            binary_condition(
                                None,
                                ConditionFunctions.IN,
                                FunctionCall(
                                    None,
                                    "toString",
                                    (Column(None, None, "device_charging"), ),
                                ),
                                literals_tuple(None, [
                                    Literal(None, "1"),
                                    Literal(None, "True")
                                ]),
                            ),
                            Literal(None, "True"),
                            Literal(None, "False"),
                        ),
                    ),
                )
            ],
        ))

    settings = HTTPRequestSettings()
    MappingColumnPromoter({
        "contexts": {
            "device.charging": "device_charging"
        }
    }).process_query(query, settings)
    EventsBooleanContextsProcessor().process_query(query, settings)

    assert (query.get_selected_columns_from_ast() ==
            expected.get_selected_columns_from_ast())
Exemplo n.º 12
0
    def __init__(self) -> None:
        metadata_columns = ColumnSet([
            # optional stream related data
            ("offset", Nullable(UInt(64))),
            ("partition", Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ("level", Nullable(String())),
            ("logger", Nullable(String())),
            ("server_name", Nullable(String())),  # future name: device_id?
            ("transaction", Nullable(String())),
            ("environment", Nullable(String())),
            ("sentry:release", Nullable(String())),
            ("sentry:dist", Nullable(String())),
            ("sentry:user", Nullable(String())),
            ("site", Nullable(String())),
            ("url", Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ("app_device", Nullable(String())),
            ("device", Nullable(String())),
            ("device_family", Nullable(String())),
            ("runtime", Nullable(String())),
            ("runtime_name", Nullable(String())),
            ("browser", Nullable(String())),
            ("browser_name", Nullable(String())),
            ("os", Nullable(String())),
            ("os_name", Nullable(String())),
            ("os_rooted", Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ("os_build", Nullable(String())),
            ("os_kernel_version", Nullable(String())),
            ("device_name", Nullable(String())),
            ("device_brand", Nullable(String())),
            ("device_locale", Nullable(String())),
            ("device_uuid", Nullable(String())),
            ("device_model_id", Nullable(String())),
            ("device_arch", Nullable(String())),
            ("device_battery_level", Nullable(Float(32))),
            ("device_orientation", Nullable(String())),
            ("device_simulator", Nullable(UInt(8))),
            ("device_online", Nullable(UInt(8))),
            ("device_charging", Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ("event_id", FixedString(32)),
            ("project_id", UInt(64)),
            ("group_id", UInt(64)),
            ("timestamp", DateTime()),
            ("deleted", UInt(8)),
            ("retention_days", UInt(16)),
        ])

        all_columns = (
            required_columns + [
                # required for non-deleted
                ("platform", Nullable(String())),
                ("message", Nullable(String())),
                ("primary_hash", Nullable(FixedString(32))),
                ("received", Nullable(DateTime())),
                ("search_message", Nullable(String())),
                ("title", Nullable(String())),
                ("location", Nullable(String())),
                # optional user
                ("user_id", Nullable(String())),
                ("username", Nullable(String())),
                ("email", Nullable(String())),
                ("ip_address", Nullable(String())),
                # optional geo
                ("geo_country_code", Nullable(String())),
                ("geo_region", Nullable(String())),
                ("geo_city", Nullable(String())),
                ("sdk_name", Nullable(String())),
                ("sdk_version", Nullable(String())),
                ("type", Nullable(String())),
                ("version", Nullable(String())),
            ] + metadata_columns + promoted_context_columns +
            promoted_tag_columns + promoted_context_tag_columns + [
                # other tags
                ("tags", Nested([("key", String()), ("value", String())])),
                ("_tags_flattened", String()),
                # other context
                ("contexts", Nested([("key", String()), ("value", String())])),
                # http interface
                ("http_method", Nullable(String())),
                ("http_referer", Nullable(String())),
                # exception interface
                (
                    "exception_stacks",
                    Nested([
                        ("type", Nullable(String())),
                        ("value", Nullable(String())),
                        ("mechanism_type", Nullable(String())),
                        ("mechanism_handled", Nullable(UInt(8))),
                    ]),
                ),
                (
                    "exception_frames",
                    Nested([
                        ("abs_path", Nullable(String())),
                        ("filename", Nullable(String())),
                        ("package", Nullable(String())),
                        ("module", Nullable(String())),
                        ("function", Nullable(String())),
                        ("in_app", Nullable(UInt(8))),
                        ("colno", Nullable(UInt(32))),
                        ("lineno", Nullable(UInt(32))),
                        ("stack_level", UInt(16)),
                    ]),
                ),
                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ("culprit", Nullable(String())),
                ("sdk_integrations", Array(String())),
                ("modules", Nested([("name", String()),
                                    ("version", String())])),
            ])

        sample_expr = "cityHash64(toString(event_id))"
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name="sentry_local",
            dist_table_name="sentry_dist",
            mandatory_conditions=[("deleted", "=", 0)],
            prewhere_candidates=[
                "event_id",
                "group_id",
                "tags[sentry:release]",
                "message",
                "environment",
                "project_id",
            ],
            order_by="(project_id, toStartOfDay(timestamp), %s)" % sample_expr,
            partition_by=
            "(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))",
            version_column="deleted",
            sample_expr=sample_expr,
            migration_function=events_migrations,
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=EventsProcessor(promoted_tag_columns),
                default_topic="events",
                replacement_topic="event-replacements",
                commit_log_topic="snuba-commit-log",
            ),
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                "time": "timestamp",
                "rtime": "received"
            },
            time_parse_columns=("timestamp", "received"),
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )
Exemplo n.º 13
0
import pytest

from snuba.clickhouse.columns import ColumnSet, Nested, Nullable, String, UInt
from snuba.clickhouse.query import Query as ClickhouseQuery
from snuba.datasets.schemas.tables import TableSource
from snuba.query.expressions import Column, FunctionCall, Literal
from snuba.query.logical import Query as LogicalQuery
from snuba.query.logical import SelectedExpression
from snuba.query.processors.mapping_promoter import MappingColumnPromoter
from snuba.request.request_settings import HTTPRequestSettings

columns = ColumnSet(
    [
        ("promoted", Nullable(UInt(8))),
        ("tags", Nested([("key", String()), ("value", String())])),
    ]
)

test_cases = [
    (
        "not promoted",
        ClickhouseQuery(
            LogicalQuery(
                {},
                TableSource("events", columns),
                selected_columns=[
                    SelectedExpression(
                        "tags[foo]",
                        FunctionCall(
                            "tags[foo]",
                            "arrayValue",
Exemplo n.º 14
0
    SummingMergeTreeSchema,
    MaterializedViewSchema,
)
from snuba.datasets.storages import StorageKey
from snuba.datasets.table_storage import KafkaStreamLoader
from snuba.query.processors.prewhere import PrewhereProcessor

WRITE_LOCAL_TABLE_NAME = "outcomes_raw_local"
WRITE_DIST_TABLE_NAME = "outcomes_raw_dist"
READ_LOCAL_TABLE_NAME = "outcomes_hourly_local"
READ_DIST_TABLE_NAME = "outcomes_hourly_dist"

write_columns = ColumnSet([
    ("org_id", UInt(64)),
    ("project_id", UInt(64)),
    ("key_id", Nullable(UInt(64))),
    ("timestamp", DateTime()),
    ("outcome", UInt(8)),
    ("reason", LowCardinality(Nullable(String()))),
    ("event_id", Nullable(UUID())),
])

raw_schema = MergeTreeSchema(
    columns=write_columns,
    # TODO: change to outcomes.raw_local when we add multi DB support
    local_table_name=WRITE_LOCAL_TABLE_NAME,
    dist_table_name=WRITE_DIST_TABLE_NAME,
    storage_set_key=StorageSetKey.OUTCOMES,
    order_by="(org_id, project_id, timestamp)",
    partition_by="(toMonday(timestamp))",
    settings={"index_granularity": "16384"},
Exemplo n.º 15
0
 Column("event_id", UUID()),
 Column("trace_id", UUID()),
 Column("span_id", UInt(64)),
 Column("transaction_name", LowCardinality(String())),
 Column("transaction_hash",
        Materialized(UInt(64), "cityHash64(transaction_name)")),
 Column("transaction_op", LowCardinality(String())),
 Column("transaction_status", WithDefault(UInt(8),
                                          str(UNKNOWN_SPAN_STATUS))),
 Column("start_ts", DateTime()),
 Column("start_ms", UInt(16)),
 Column("finish_ts", DateTime()),
 Column("finish_ms", UInt(16)),
 Column("duration", UInt(32)),
 Column("platform", LowCardinality(String())),
 Column("environment", LowCardinality(Nullable(String()))),
 Column("release", LowCardinality(Nullable(String()))),
 Column("dist", LowCardinality(Nullable(String()))),
 Column("ip_address_v4", Nullable(IPv4())),
 Column("ip_address_v6", Nullable(IPv6())),
 Column("user", WithDefault(
     String(),
     "''",
 )),
 Column("user_hash", Materialized(UInt(64), "cityHash64(user)")),
 Column("user_id", Nullable(String())),
 Column("user_name", Nullable(String())),
 Column("user_email", Nullable(String())),
 Column("sdk_name", WithDefault(LowCardinality(String()), "''")),
 Column("sdk_version", WithDefault(LowCardinality(String()), "''")),
 Column("tags", Nested([("key", String()), ("value", String())])),
Exemplo n.º 16
0
    def __init__(self):
        metadata_columns = ColumnSet([
            # optional stream related data
            ('offset', Nullable(UInt(64))),
            ('partition', Nullable(UInt(16))),
        ])

        promoted_tag_columns = ColumnSet([
            # These are the classic tags, they are saved in Snuba exactly as they
            # appear in the event body.
            ('level', Nullable(String())),
            ('logger', Nullable(String())),
            ('server_name', Nullable(String())),  # future name: device_id?
            ('transaction', Nullable(String())),
            ('environment', Nullable(String())),
            ('sentry:release', Nullable(String())),
            ('sentry:dist', Nullable(String())),
            ('sentry:user', Nullable(String())),
            ('site', Nullable(String())),
            ('url', Nullable(String())),
        ])

        promoted_context_tag_columns = ColumnSet([
            # These are promoted tags that come in in `tags`, but are more closely
            # related to contexts.  To avoid naming confusion with Clickhouse nested
            # columns, they are stored in the database with s/./_/
            # promoted tags
            ('app_device', Nullable(String())),
            ('device', Nullable(String())),
            ('device_family', Nullable(String())),
            ('runtime', Nullable(String())),
            ('runtime_name', Nullable(String())),
            ('browser', Nullable(String())),
            ('browser_name', Nullable(String())),
            ('os', Nullable(String())),
            ('os_name', Nullable(String())),
            ('os_rooted', Nullable(UInt(8))),
        ])

        promoted_context_columns = ColumnSet([
            ('os_build', Nullable(String())),
            ('os_kernel_version', Nullable(String())),
            ('device_name', Nullable(String())),
            ('device_brand', Nullable(String())),
            ('device_locale', Nullable(String())),
            ('device_uuid', Nullable(String())),
            ('device_model_id', Nullable(String())),
            ('device_arch', Nullable(String())),
            ('device_battery_level', Nullable(Float(32))),
            ('device_orientation', Nullable(String())),
            ('device_simulator', Nullable(UInt(8))),
            ('device_online', Nullable(UInt(8))),
            ('device_charging', Nullable(UInt(8))),
        ])

        required_columns = ColumnSet([
            ('event_id', FixedString(32)),
            ('project_id', UInt(64)),
            ('group_id', UInt(64)),
            ('timestamp', DateTime()),
            ('deleted', UInt(8)),
            ('retention_days', UInt(16)),
        ])

        all_columns = required_columns + [
            # required for non-deleted
            ('platform', Nullable(String())),
            ('message', Nullable(String())),
            ('primary_hash', Nullable(FixedString(32))),
            ('received', Nullable(DateTime())),

            ('search_message', Nullable(String())),
            ('title', Nullable(String())),
            ('location', Nullable(String())),

            # optional user
            ('user_id', Nullable(String())),
            ('username', Nullable(String())),
            ('email', Nullable(String())),
            ('ip_address', Nullable(String())),

            # optional geo
            ('geo_country_code', Nullable(String())),
            ('geo_region', Nullable(String())),
            ('geo_city', Nullable(String())),

            ('sdk_name', Nullable(String())),
            ('sdk_version', Nullable(String())),
            ('type', Nullable(String())),
            ('version', Nullable(String())),
        ] + metadata_columns \
            + promoted_context_columns \
            + promoted_tag_columns \
            + promoted_context_tag_columns \
            + [
                # other tags
                ('tags', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # other context
                ('contexts', Nested([
                    ('key', String()),
                    ('value', String()),
                ])),

                # http interface
                ('http_method', Nullable(String())),
                ('http_referer', Nullable(String())),

                # exception interface
                ('exception_stacks', Nested([
                    ('type', Nullable(String())),
                    ('value', Nullable(String())),
                    ('mechanism_type', Nullable(String())),
                    ('mechanism_handled', Nullable(UInt(8))),
                ])),
                ('exception_frames', Nested([
                    ('abs_path', Nullable(String())),
                    ('filename', Nullable(String())),
                    ('package', Nullable(String())),
                    ('module', Nullable(String())),
                    ('function', Nullable(String())),
                    ('in_app', Nullable(UInt(8))),
                    ('colno', Nullable(UInt(32))),
                    ('lineno', Nullable(UInt(32))),
                    ('stack_level', UInt(16)),
                ])),

                # These are columns we added later in the life of the (current) production
                # database. They don't necessarily belong here in a logical/readability sense
                # but they are here to match the order of columns in production becase
                # `insert_distributed_sync` is very sensitive to column existence and ordering.
                ('culprit', Nullable(String())),
                ('sdk_integrations', Array(String())),
                ('modules', Nested([
                    ('name', String()),
                    ('version', String()),
                ])),
        ]

        sample_expr = 'cityHash64(toString(event_id))'
        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name='sentry_local',
            dist_table_name='sentry_dist',
            mandatory_conditions=[('deleted', '=', 0)],
            order_by='(project_id, toStartOfDay(timestamp), %s)' % sample_expr,
            partition_by='(toMonday(timestamp), if(equals(retention_days, 30), 30, 90))',
            version_column='deleted',
            sample_expr=sample_expr,
            migration_function=events_migrations)

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=EventsProcessor(promoted_tag_columns),
                default_topic="events",
                replacement_topic="event-replacements",
                commit_log_topic="snuba-commit-log",
            )
        )

        super(EventsDataset, self).__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                'time': 'timestamp',
                'rtime': 'received'
            },
            time_parse_columns=('timestamp', 'received')
        )

        self.__metadata_columns = metadata_columns
        self.__promoted_tag_columns = promoted_tag_columns
        self.__promoted_context_tag_columns = promoted_context_tag_columns
        self.__promoted_context_columns = promoted_context_columns
        self.__required_columns = required_columns

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )
Exemplo n.º 17
0
    def __init__(self) -> None:
        self.__common_columns = ColumnSet(
            [
                ("event_id", FixedString(32)),
                ("project_id", UInt(64)),
                ("type", Nullable(String())),
                ("timestamp", DateTime()),
                ("platform", Nullable(String())),
                ("environment", Nullable(String())),
                ("release", Nullable(String())),
                ("dist", Nullable(String())),
                ("user", Nullable(String())),
                ("transaction", Nullable(String())),
                ("message", Nullable(String())),
                ("title", Nullable(String())),
                # User
                ("user_id", Nullable(String())),
                ("username", Nullable(String())),
                ("email", Nullable(String())),
                ("ip_address", Nullable(String())),
                # SDK
                ("sdk_name", Nullable(String())),
                ("sdk_version", Nullable(String())),
                # geo location context
                ("geo_country_code", Nullable(String())),
                ("geo_region", Nullable(String())),
                ("geo_city", Nullable(String())),
                ("http_method", Nullable(String())),
                ("http_referer", Nullable(String())),
                # Other tags and context
                ("tags", Nested([("key", String()), ("value", String())])),
                ("contexts", Nested([("key", String()), ("value", String())])),
            ]
        )

        self.__events_columns = ColumnSet(
            [
                ("group_id", Nullable(UInt(64))),
                ("primary_hash", Nullable(FixedString(32))),
                # Promoted tags
                ("level", Nullable(String())),
                ("logger", Nullable(String())),
                ("server_name", Nullable(String())),
                ("site", Nullable(String())),
                ("url", Nullable(String())),
                ("search_message", Nullable(String())),
                ("location", Nullable(String())),
                ("culprit", Nullable(String())),
                ("received", Nullable(DateTime())),
                ("sdk_integrations", Nullable(Array(String()))),
                ("version", Nullable(String())),
                # exception interface
                (
                    "exception_stacks",
                    Nested(
                        [
                            ("type", Nullable(String())),
                            ("value", Nullable(String())),
                            ("mechanism_type", Nullable(String())),
                            ("mechanism_handled", Nullable(UInt(8))),
                        ]
                    ),
                ),
                (
                    "exception_frames",
                    Nested(
                        [
                            ("abs_path", Nullable(String())),
                            ("filename", Nullable(String())),
                            ("package", Nullable(String())),
                            ("module", Nullable(String())),
                            ("function", Nullable(String())),
                            ("in_app", Nullable(UInt(8))),
                            ("colno", Nullable(UInt(32))),
                            ("lineno", Nullable(UInt(32))),
                            ("stack_level", UInt(16)),
                        ]
                    ),
                ),
                ("modules", Nested([("name", String()), ("version", String())])),
            ]
        )

        self.__transactions_columns = ColumnSet(
            [
                ("trace_id", Nullable(UUID())),
                ("span_id", Nullable(UInt(64))),
                ("transaction_hash", Nullable(UInt(64))),
                ("transaction_op", Nullable(String())),
                ("transaction_status", Nullable(UInt(8))),
                ("duration", Nullable(UInt(32))),
                (
                    "measurements",
                    Nested([("key", LowCardinality(String())), ("value", Float(64))]),
                ),
            ]
        )

        events_storage = get_storage(StorageKey.EVENTS)
        events_ro_storage = get_storage(StorageKey.EVENTS_RO)
        transactions_storage = get_storage(StorageKey.TRANSACTIONS)

        self.__time_group_columns: Mapping[str, str] = {}
        self.__time_parse_columns = ("timestamp",)

        super().__init__(
            storages=[events_storage, transactions_storage],
            query_plan_builder=SelectedStorageQueryPlanBuilder(
                selector=DiscoverQueryStorageSelector(
                    events_table=events_storage,
                    events_ro_table=events_ro_storage,
                    abstract_events_columns=self.__events_columns,
                    transactions_table=transactions_storage,
                    abstract_transactions_columns=self.__transactions_columns,
                ),
            ),
            abstract_column_set=(
                self.__common_columns
                + self.__events_columns
                + self.__transactions_columns
            ),
            writable_storage=None,
        )
Exemplo n.º 18
0
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.storages.tags_hash_map import TAGS_HASH_MAP_COLUMN
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import LowCardinality, Materialized, WithDefault

UNKNOWN_SPAN_STATUS = SPAN_STATUS_NAME_TO_CODE["unknown"]

tags_col = Column("tags", Nested([("key", String()), ("value", String())]))

columns = [
    Column("project_id", UInt(64)),
    Column("transaction_id", UUID()),
    Column("trace_id", UUID()),
    Column("transaction_span_id", UInt(64)),
    Column("span_id", UInt(64)),
    Column("parent_span_id", Nullable(UInt(64))),
    Column("transaction_name", LowCardinality(String())),
    Column("description", String()),  # description in span
    Column("op", LowCardinality(String())),
    Column(
        "status",
        WithDefault(UInt(8), str(UNKNOWN_SPAN_STATUS)),
    ),
    Column("start_ts", DateTime()),
    Column("start_ns", UInt(32)),
    Column("finish_ts", DateTime()),
    Column("finish_ns", UInt(32)),
    Column("duration_ms", UInt(32)),
    tags_col,
    Column("retention_days", UInt(16)),
    Column("deleted", UInt(8)),
Exemplo n.º 19
0
    def __init__(self) -> None:
        all_columns = ColumnSet([
            ("org_id", UInt(64)),
            ("project_id", UInt(64)),
            ("timestamp", DateTime()),
            ("event_id", WithCodecs(UUID(), ["NONE"])),
            (
                "event_hash",
                WithCodecs(
                    Materialized(
                        UInt(64),
                        "cityHash64(toString(event_id))",
                    ),
                    ["NONE"],
                ),
            ),
            ("platform", LowCardinality(String())),
            ("environment", LowCardinality(Nullable(String()))),
            ("release", LowCardinality(Nullable(String()))),
            ("dist", LowCardinality(Nullable(String()))),
            ("ip_address_v4", Nullable(IPv4())),
            ("ip_address_v6", Nullable(IPv6())),
            ("user", WithDefault(String(), "''")),
            (
                "user_hash",
                Materialized(UInt(64), "cityHash64(user)"),
            ),
            ("user_id", Nullable(String())),
            ("user_name", Nullable(String())),
            ("user_email", Nullable(String())),
            ("sdk_name", LowCardinality(Nullable(String()))),
            ("sdk_version", LowCardinality(Nullable(String()))),
            ("tags", Nested([("key", String()), ("value", String())])),
            ("_tags_flattened", String()),
            ("contexts", Nested([("key", String()), ("value", String())])),
            ("_contexts_flattened", String()),
            ("transaction_name", WithDefault(LowCardinality(String()), "''")),
            (
                "transaction_hash",
                Materialized(UInt(64), "cityHash64(transaction_name)"),
            ),
            ("span_id", Nullable(UInt(64))),
            ("trace_id", Nullable(UUID())),
            ("partition", UInt(16)),
            ("offset", WithCodecs(UInt(64), ["DoubleDelta", "LZ4"])),
            ("retention_days", UInt(16)),
            ("deleted", UInt(8)),
            ("group_id", UInt(64)),
            ("primary_hash", FixedString(32)),
            ("primary_hash_hex", Materialized(UInt(64), "hex(primary_hash)")),
            ("event_string", WithCodecs(String(), ["NONE"])),
            ("received", DateTime()),
            ("message", String()),
            ("title", String()),
            ("culprit", String()),
            ("level", LowCardinality(String())),
            ("location", Nullable(String())),
            ("version", LowCardinality(Nullable(String()))),
            ("type", LowCardinality(String())),
            (
                "exception_stacks",
                Nested([
                    ("type", Nullable(String())),
                    ("value", Nullable(String())),
                    ("mechanism_type", Nullable(String())),
                    ("mechanism_handled", Nullable(UInt(8))),
                ]),
            ),
            (
                "exception_frames",
                Nested([
                    ("abs_path", Nullable(String())),
                    ("colno", Nullable(UInt(32))),
                    ("filename", Nullable(String())),
                    ("function", Nullable(String())),
                    ("lineno", Nullable(UInt(32))),
                    ("in_app", Nullable(UInt(8))),
                    ("package", Nullable(String())),
                    ("module", Nullable(String())),
                    ("stack_level", Nullable(UInt(16))),
                ]),
            ),
            ("sdk_integrations", Array(String())),
            ("modules", Nested([("name", String()), ("version", String())])),
        ])

        self.__promoted_tag_columns = {
            "environment": "environment",
            "sentry:release": "release",
            "sentry:dist": "dist",
            "sentry:user": "******",
            "transaction": "transaction_name",
            "level": "level",
        }

        schema = ReplacingMergeTreeSchema(
            columns=all_columns,
            local_table_name="errors_local",
            dist_table_name="errors_dist",
            mandatory_conditions=[("deleted", "=", 0)],
            prewhere_candidates=[
                "event_id",
                "group_id",
                "tags[sentry:release]",
                "message",
                "environment",
                "project_id",
            ],
            order_by=
            "(org_id, project_id, toStartOfDay(timestamp), primary_hash_hex, event_hash)",
            partition_by=
            "(toMonday(timestamp), if(retention_days = 30, 30, 90))",
            version_column="deleted",
            sample_expr="event_hash",
            ttl_expr="timestamp + toIntervalDay(retention_days)",
            settings={"index_granularity": "8192"},
        )

        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=schema,
        )

        table_writer = TableWriter(
            write_schema=schema,
            stream_loader=KafkaStreamLoader(
                processor=ErrorsProcessor(self.__promoted_tag_columns),
                default_topic="events",
            ),
        )

        super().__init__(
            dataset_schemas=dataset_schemas,
            table_writer=table_writer,
            time_group_columns={
                "time": "timestamp",
                "rtime": "received"
            },
            time_parse_columns=("timestamp", "received"),
        )

        self.__tags_processor = TagColumnProcessor(
            columns=all_columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )