("outcome", UInt(8)), ("reason", String()), ("times_seen", UInt(64)), ]) materialized_view_schema = TableSchema( local_table_name="outcomes_mv_hourly_local", dist_table_name="outcomes_mv_hourly_dist", storage_set_key=StorageSetKey.OUTCOMES, columns=materialized_view_columns, ) raw_storage = WritableTableStorage( storage_key=StorageKey.OUTCOMES_RAW, storage_set_key=StorageSetKey.OUTCOMES, schema=raw_schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.OUTCOMES_RAW, processor=OutcomesProcessor(), default_topic_name="outcomes", ), ) materialized_storage = ReadableTableStorage( storage_key=StorageKey.OUTCOMES_HOURLY, storage_set_key=StorageSetKey.OUTCOMES, schema=read_schema, query_processors=[PrewhereProcessor(["project_id", "org_id"])], )
dist_table_name="errors_dist", storage_set_key=StorageSetKey.EVENTS, mandatory_conditions=mandatory_conditions, part_format=[util.PartSegment.RETENTION_DAYS, util.PartSegment.DATE], ) storage = WritableTableStorage( storage_key=StorageKey.ERRORS, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=query_processors, query_splitters=query_splitters, stream_loader=build_kafka_stream_loader_from_settings( StorageKey.ERRORS, processor=ErrorsProcessor(promoted_tag_columns), default_topic_name="events", replacement_topic_name="event-replacements", commit_log_topic_name="snuba-commit-log", ), replacer_processor=ErrorsReplacer( schema=schema, required_columns=required_columns, tag_column_map={ "tags": promoted_tag_columns, "contexts": {} }, promoted_tags={ "tags": list(promoted_tag_columns.keys()), "contexts": [] }, state_name=ReplacerState.ERRORS,
("clickhouse_queries.is_duplicate", Array(UInt(8))), ("clickhouse_queries.consistent", Array(UInt(8))), ("clickhouse_queries.all_columns", Array(Array(String()))), ("clickhouse_queries.or_conditions", Array(UInt(8))), ("clickhouse_queries.where_columns", Array(Array(String()))), ("clickhouse_queries.where_mapping_columns", Array(Array(String()))), ("clickhouse_queries.groupby_columns", Array(Array(String()))), ("clickhouse_queries.array_join_columns", Array(Array(String()))), ]) # Note, we are using the simplified WritableTableSchema class here instead of # the MergeTreeSchema that corresponds to the actual table engine. This is because # the querylog table isn't generated by the old migration system. schema = WritableTableSchema( columns=columns, local_table_name="querylog_local", dist_table_name="querylog_dist", storage_set_key=StorageSetKey.QUERYLOG, ) storage = WritableTableStorage( storage_key=StorageKey.QUERYLOG, storage_set_key=StorageSetKey.QUERYLOG, schema=schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( processor=QuerylogProcessor(), default_topic=Topic.QUERYLOG, ), )
UUIDColumnProcessor(set(["event_id", "trace_id"])), HexIntColumnProcessor({"span_id"}), EventsBooleanContextsProcessor(), MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"), EmptyTagConditionProcessor(), ArrayJoinKeyValueOptimizer("tags"), ArrayJoinKeyValueOptimizer("measurements"), ArrayJoinKeyValueOptimizer("span_op_breakdowns"), PrewhereProcessor( [ "event_id", "trace_id", "span_id", "transaction_name", "transaction", "title", ] ), TableRateLimit(), ], stream_loader=build_kafka_stream_loader_from_settings( processor=TransactionsMessageProcessor(), default_topic=Topic.EVENTS, commit_log_topic=Topic.COMMIT_LOG, subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_TRANSACTIONS, ), query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")], mandatory_condition_checkers=[ProjectIdEnforcer()], writer_options={"insert_allow_materialized_columns": 1}, )
*PRE_VALUE_COLUMNS, Column("count_value", Float(64)), Column("set_values", Array(UInt(64))), Column("distribution_values", Array(Float(64))), *POST_VALUE_COLUMNS, ]), local_table_name="metrics_raw_v2_local", dist_table_name="metrics_raw_v2_dist", storage_set_key=StorageSetKey.METRICS, ), query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( processor=PolymorphicMetricsProcessor(), default_topic=Topic.METRICS, commit_log_topic=Topic.METRICS_COMMIT_LOG, subscription_scheduler_mode=SchedulingWatermarkMode.GLOBAL, subscription_scheduled_topic=Topic.SUBSCRIPTION_SCHEDULED_METRICS, subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_METRICS, dead_letter_queue_policy_creator=produce_policy_creator, ), ) aggregated_columns = [ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("granularity", UInt(32)), Column("timestamp", DateTime()), Column("retention_days", UInt(16)), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), Column("_tags_hash", Array(UInt(64), SchemaModifiers(readonly=True))),
materialized_view_schema = TableSchema( local_table_name="outcomes_mv_hourly_local", dist_table_name="outcomes_mv_hourly_dist", storage_set_key=StorageSetKey.OUTCOMES, columns=materialized_view_columns, ) raw_storage = WritableTableStorage( storage_key=StorageKey.OUTCOMES_RAW, storage_set_key=StorageSetKey.OUTCOMES, schema=raw_schema, query_processors=[TableRateLimit()], mandatory_condition_checkers=[OrgIdEnforcer()], stream_loader=build_kafka_stream_loader_from_settings( processor=OutcomesProcessor(), default_topic=Topic.OUTCOMES, ), ) materialized_storage = ReadableTableStorage( storage_key=StorageKey.OUTCOMES_HOURLY, storage_set_key=StorageSetKey.OUTCOMES, schema=read_schema, query_processors=[ PrewhereProcessor(["project_id", "org_id"]), TableRateLimit() ], mandatory_condition_checkers=[OrgIdEnforcer()], )
columns=columns, local_table_name="transactions_local", dist_table_name="transactions_dist", storage_set_key=StorageSetKey.TRANSACTIONS, mandatory_conditions=[], part_format=[util.PartSegment.RETENTION_DAYS, util.PartSegment.DATE], ) storage = WritableTableStorage( storage_key=StorageKey.TRANSACTIONS, storage_set_key=StorageSetKey.TRANSACTIONS, schema=schema, query_processors=query_processors, stream_loader=build_kafka_stream_loader_from_settings( processor=TransactionsMessageProcessor(), pre_filter=KafkaHeaderFilterWithBypass("transaction_forwarder", "0", 100), default_topic=Topic.TRANSACTIONS, commit_log_topic=Topic.TRANSACTIONS_COMMIT_LOG, subscription_scheduler_mode=SchedulingWatermarkMode.GLOBAL, subscription_scheduled_topic=Topic.SUBSCRIPTION_SCHEDULED_TRANSACTIONS, subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_TRANSACTIONS, ), query_splitters=query_splitters, mandatory_condition_checkers=mandatory_condition_checkers, writer_options={ "insert_allow_materialized_columns": 1, "input_format_skip_unknown_fields": 1, }, )
("user_id", UInt(64, Modifiers(nullable=True))), ("team_id", UInt(64, Modifiers(nullable=True))), ]) schema = WritableTableSchema( columns=columns, local_table_name="groupassignee_local", dist_table_name="groupassignee_dist", storage_set_key=StorageSetKey.CDC, ) POSTGRES_TABLE = "sentry_groupasignee" storage = CdcStorage( storage_key=StorageKey.GROUPASSIGNEES, storage_set_key=StorageSetKey.CDC, schema=schema, query_processors=[ PrewhereProcessor(["project_id"]), ConsistencyEnforcerProcessor(), ], stream_loader=build_kafka_stream_loader_from_settings( processor=GroupAssigneeProcessor(POSTGRES_TABLE), default_topic=Topic.CDC, pre_filter=CdcTableNameMessageFilter(POSTGRES_TABLE), ), default_control_topic="cdc_control", postgres_table=POSTGRES_TABLE, row_processor=lambda row: GroupAssigneeRow.from_bulk(row).to_clickhouse(), )
sets_buckets = WritableTableStorage( storage_key=StorageKey.METRICS_BUCKETS, storage_set_key=StorageSetKey.METRICS, schema=WritableTableSchema( columns=ColumnSet([ *PRE_VALUE_COLUMNS, Column("set_values", Array(UInt(64))), *POST_VALUE_COLUMNS, ]), local_table_name="metrics_buckets_local", dist_table_name="metrics_buckets_dist", storage_set_key=StorageSetKey.METRICS, ), query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( processor=SetsMetricsProcessor(), default_topic=Topic.METRICS, ), ) counters_buckets = WritableTableStorage( storage_key=StorageKey.METRICS_COUNTERS_BUCKETS, storage_set_key=StorageSetKey.METRICS, schema=WritableTableSchema( columns=ColumnSet([ *PRE_VALUE_COLUMNS, Column("value", Float(64)), *POST_VALUE_COLUMNS ]), local_table_name="metrics_counters_buckets_local", dist_table_name="metrics_counters_buckets_dist", storage_set_key=StorageSetKey.METRICS, ),
dist_table_name="sentry_dist", storage_set_key=StorageSetKey.EVENTS, mandatory_conditions=mandatory_conditions, part_format=[util.PartSegment.DATE, util.PartSegment.RETENTION_DAYS], ) storage = WritableTableStorage( storage_key=StorageKey.EVENTS, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=query_processors, stream_loader=build_kafka_stream_loader_from_settings( processor=EventsProcessor(promoted_tag_columns), default_topic=Topic.EVENTS, replacement_topic=Topic.EVENT_REPLACEMENTS_LEGACY, commit_log_topic=Topic.COMMIT_LOG, subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_EVENTS, ), query_splitters=query_splitters, mandatory_condition_checkers=[ProjectIdEnforcer()], replacer_processor=ErrorsReplacer( schema=schema, required_columns=[col.escaped for col in required_columns], tag_column_map=get_tag_column_map(), promoted_tags=get_promoted_tags(), state_name=ReplacerState.EVENTS, use_promoted_prewhere=True, ), )
def process_query(self, query: Query, request_settings: RequestSettings) -> None: # NOTE: the product side is restricted to a 6h window, however it rounds # outwards, which extends the window to 7h. from_date, to_date = get_time_range(query, "started") if not from_date or not to_date or (to_date - from_date) > timedelta(hours=7): raise ValidationException( "Minute-resolution queries are restricted to a 7-hour time window." ) # The raw table we write onto, and that potentially we could # query. raw_storage = WritableTableStorage( storage_key=StorageKey.SESSIONS_RAW, storage_set_key=StorageSetKey.SESSIONS, schema=raw_schema, query_processors=[MinuteResolutionProcessor()], mandatory_condition_checkers=[OrgIdEnforcer(), ProjectIdEnforcer()], stream_loader=build_kafka_stream_loader_from_settings( processor=SessionsProcessor(), default_topic=Topic.SESSIONS, ), ) # The materialized view we query aggregate data from. materialized_storage = ReadableTableStorage( storage_key=StorageKey.SESSIONS_HOURLY, storage_set_key=StorageSetKey.SESSIONS, schema=read_schema, query_processors=[PrewhereProcessor(["project_id", "org_id"])], mandatory_condition_checkers=[OrgIdEnforcer(), ProjectIdEnforcer()], )
mandatory_conditions=mandatory_conditions, part_format=[util.PartSegment.RETENTION_DAYS, util.PartSegment.DATE], ) storage = WritableTableStorage( storage_key=StorageKey.ERRORS_V2, storage_set_key=StorageSetKey.ERRORS_V2, schema=schema, query_processors=query_processors, query_splitters=query_splitters, mandatory_condition_checkers=[ProjectIdEnforcer()], stream_loader=build_kafka_stream_loader_from_settings( processor=ErrorsProcessor(promoted_tag_columns), pre_filter=KafkaHeaderFilter("transaction_forwarder", "1"), default_topic=Topic.EVENTS, replacement_topic=Topic.EVENT_REPLACEMENTS, commit_log_topic=Topic.COMMIT_LOG, subscription_scheduler_mode=SchedulingWatermarkMode.PARTITION, subscription_scheduled_topic=Topic.SUBSCRIPTION_SCHEDULED_EVENTS, subscription_result_topic=Topic.SUBSCRIPTION_RESULTS_EVENTS, ), replacer_processor=ErrorsReplacer( schema=schema, required_columns=required_columns, tag_column_map={ "tags": promoted_tag_columns, "contexts": {} }, promoted_tags={ "tags": list(promoted_tag_columns.keys()), "contexts": [] },
("clickhouse_queries.all_columns", Array(Array(String()))), ("clickhouse_queries.or_conditions", Array(UInt(8))), ("clickhouse_queries.where_columns", Array(Array(String()))), ("clickhouse_queries.where_mapping_columns", Array(Array(String()))), ("clickhouse_queries.groupby_columns", Array(Array(String()))), ("clickhouse_queries.array_join_columns", Array(Array(String()))), ] ) # Note, we are using the simplified WritableTableSchema class here instead of # the MergeTreeSchema that corresponds to the actual table engine. This is because # the querylog table isn't generated by the old migration system. schema = WritableTableSchema( columns=columns, local_table_name="querylog_local", dist_table_name="querylog_dist", storage_set_key=StorageSetKey.QUERYLOG, ) storage = WritableTableStorage( storage_key=StorageKey.QUERYLOG, storage_set_key=StorageSetKey.QUERYLOG, schema=schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.QUERYLOG, processor=QuerylogProcessor(), default_topic_name=settings.QUERIES_TOPIC, ), )
dist_table_name="transactions_dist", storage_set_key=StorageSetKey.TRANSACTIONS, mandatory_conditions=[], prewhere_candidates=[ "event_id", "transaction_name", "transaction", "title" ], part_format=[util.PartSegment.RETENTION_DAYS, util.PartSegment.DATE], ) storage = WritableTableStorage( storage_key=StorageKey.TRANSACTIONS, storage_set_key=StorageSetKey.TRANSACTIONS, schema=schema, query_processors=[ MappingOptimizer("tags", "_tags_hash_map", "tags_hash_map_enabled"), EventIdColumnProcessor(), ArrayJoinKeyValueOptimizer("tags"), ArrayJoinKeyValueOptimizer("measurements"), UUIDColumnProcessor(set(["event_id", "trace_id"])), PrewhereProcessor(), ], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.TRANSACTIONS, processor=TransactionsMessageProcessor(), default_topic_name="events", commit_log_topic_name="snuba-commit-log", ), query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")], writer_options={"insert_allow_materialized_columns": 1}, )
from snuba.query.processors.type_converters.uuid_column_processor import ( UUIDColumnProcessor, ) from snuba.utils.streams.topics import Topic PROFILES_LOCAL_TABLE_NAME = "profiles_local" PROFILES_DIST_TABLE_NAME = "profiles_dist" processors = [ UUIDColumnProcessor(set(["profile_id", "transaction_id", "trace_id"])), TableRateLimit(), ] loader = build_kafka_stream_loader_from_settings( processor=ProfilesMessageProcessor(), default_topic=Topic.PROFILES, ) readable_columns = ColumnSet( [ ("organization_id", UInt(64)), ("project_id", UInt(64)), ("transaction_id", UUID()), ("profile_id", UUID()), ("received", DateTime()), ("profile", String()), ("android_api_level", UInt(32, Modifiers(nullable=True))), ("device_classification", String()), ("device_locale", String()), ("device_manufacturer", String()), ("device_model", String()),
local_table_name="groupedmessage_local", dist_table_name="groupedmessage_dist", storage_set_key=StorageSetKey.EVENTS, mandatory_conditions=[ binary_condition( ConditionFunctions.EQ, Column(None, None, "record_deleted"), Literal(None, 0), ), ], prewhere_candidates=["project_id", "id"], ) POSTGRES_TABLE = "sentry_groupedmessage" storage = CdcStorage( storage_key=StorageKey.GROUPEDMESSAGES, storage_set_key=StorageSetKey.EVENTS, schema=schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.GROUPEDMESSAGES, processor=GroupedMessageProcessor(POSTGRES_TABLE), default_topic_name="cdc", pre_filter=CdcTableNameMessageFilter(POSTGRES_TABLE), ), default_control_topic="cdc_control", postgres_table=POSTGRES_TABLE, row_processor=lambda row: GroupedMessageRow.from_bulk(row).to_clickhouse(), )
# sdk info ("sdk_name", String()), ("sdk_version", String()), ("tags", Nested([("key", String()), ("value", String())])), # deletion info ("retention_days", UInt(16)), ("partition", UInt(16)), ("offset", UInt(64)), ]) schema = WritableTableSchema( columns=columns, local_table_name=LOCAL_TABLE_NAME, dist_table_name=DIST_TABLE_NAME, storage_set_key=StorageSetKey.REPLAYS, ) # TODO: set up deadletter queue for bad messages. storage = WritableTableStorage( storage_key=StorageKey.REPLAYS, storage_set_key=StorageSetKey.REPLAYS, schema=schema, query_processors=[TableRateLimit()], mandatory_condition_checkers=[ProjectIdEnforcer()], stream_loader=build_kafka_stream_loader_from_settings( processor=ReplaysProcessor(), default_topic=Topic.REPLAYEVENTS, ), )
dist_table_name=READ_DIST_TABLE_NAME, storage_set_key=StorageSetKey.SESSIONS, ) materialized_view_schema = TableSchema( local_table_name=READ_LOCAL_MV_NAME, dist_table_name=READ_DIST_MV_NAME, storage_set_key=StorageSetKey.SESSIONS, columns=read_columns, ) # The raw table we write onto, and that potentially we could # query. raw_storage = WritableTableStorage( storage_key=StorageKey.SESSIONS_RAW, storage_set_key=StorageSetKey.SESSIONS, schema=raw_schema, query_processors=[], stream_loader=build_kafka_stream_loader_from_settings( StorageKey.SESSIONS_RAW, processor=SessionsProcessor(), default_topic_name="ingest-sessions", ), ) # The materialized view we query aggregate data from. materialized_storage = ReadableTableStorage( storage_key=StorageKey.SESSIONS_HOURLY, storage_set_key=StorageSetKey.SESSIONS, schema=read_schema, query_processors=[PrewhereProcessor(["project_id", "org_id"])], )
("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))), ("retention_days", UInt(16)), ("deleted", UInt(8)), ]) schema = WritableTableSchema( columns=columns, local_table_name="spans_experimental_local", dist_table_name="spans_experimental_dist", storage_set_key=StorageSetKey.TRANSACTIONS, ) storage = WritableTableStorage( storage_key=StorageKey.SPANS, storage_set_key=StorageSetKey.TRANSACTIONS, schema=schema, query_processors=[HexIntColumnProcessor({"transaction_span_id"})], stream_loader=build_kafka_stream_loader_from_settings( processor=SpansMessageProcessor(), default_topic=Topic.EVENTS, commit_log_topic=Topic.COMMIT_LOG, ), query_splitters=[TimeSplitQueryStrategy(timestamp_col="finish_ts")], )