def test_add_column() -> None: assert ( AddColumn( StorageSetKey.EVENTS, "test_table", Column("test", String(Modifiers(nullable=True))), after="id", ).format_sql() == "ALTER TABLE test_table ADD COLUMN IF NOT EXISTS test Nullable(String) AFTER id;" )
def test_like_validator( expressions: Sequence[Expression], expected_types: Sequence[ParamType], extra_param: bool, should_raise: bool, ) -> None: schema = ColumnSet([ ("event_id", String()), ("level", String(Modifiers(nullable=True))), ("str_col", String()), ("timestamp", DateTime()), ("received", DateTime(Modifiers(nullable=True))), ]) validator = SignatureValidator(expected_types, extra_param) if should_raise: with pytest.raises(InvalidFunctionCall): validator.validate(expressions, schema) else: validator.validate(expressions, schema)
def __init__(self) -> None: storage = get_writable_storage(StorageKey.SPANS) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=TranslationMappers( subscriptables=[ SubscriptableMapper(None, "tags", None, "tags") ], ), ), ), abstract_column_set=ColumnSet( [ ("project_id", UInt(64)), ("transaction_id", UUID()), ("trace_id", UUID()), ("transaction_span_id", UInt(64)), ("span_id", UInt(64)), ("parent_span_id", UInt(64, Modifiers(nullable=True))), ("transaction_name", String()), ("op", String()), ("status", UInt(8)), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ] ), join_relationships={ "contained": JoinRelationship( rhs_entity=EntityKey.TRANSACTIONS, columns=[ ("project_id", "project_id"), ("transaction_span_id", "span_id"), ], join_type=JoinType.INNER, equivalences=[ ColumnEquivalence("transaction_id", "event_id"), ColumnEquivalence("transaction_name", "transaction_name"), ColumnEquivalence("trace_id", "trace_id"), ], ) }, writable_storage=storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column=None, )
def test_like_validator( expressions: Sequence[Expression], expected_types: Sequence[ParamType], extra_param: bool, should_raise: bool, ) -> None: entity = QueryEntity( EntityKey.EVENTS, ColumnSet([ ("event_id", String()), ("level", String(Modifiers(nullable=True))), ("str_col", String()), ("timestamp", DateTime()), ("received", DateTime(Modifiers(nullable=True))), ]), ) func_name = "like" validator = SignatureValidator(expected_types, extra_param) if should_raise: with pytest.raises(InvalidFunctionCall): validator.validate(func_name, expressions, entity) else: validator.validate(func_name, expressions, entity)
def test_create_table() -> None: columns = [ Column("id", String()), Column("name", String(Modifiers(nullable=True))), Column("version", UInt(64)), ] assert ( CreateTable( StorageSetKey.EVENTS, "test_table", columns, ReplacingMergeTree( storage_set=StorageSetKey.EVENTS, version_column="version", order_by="version", settings={"index_granularity": "256"}, ), ).format_sql() == "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplacingMergeTree(version) ORDER BY version SETTINGS index_granularity=256;" )
def test_create_table() -> None: database = os.environ.get("CLICKHOUSE_DATABASE", "default") columns = [ Column("id", String()), Column("name", String(Modifiers(nullable=True))), Column("version", UInt(64)), ] assert CreateTable( StorageSetKey.EVENTS, "test_table", columns, ReplacingMergeTree( storage_set=StorageSetKey.EVENTS, version_column="version", order_by="version", settings={"index_granularity": "256"}, ), ).format_sql() in [ "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplacingMergeTree(version) ORDER BY version SETTINGS index_granularity=256;", "CREATE TABLE IF NOT EXISTS test_table (id String, name Nullable(String), version UInt64) ENGINE ReplicatedReplacingMergeTree('/clickhouse/tables/events/{shard}/" + f"{database}/test_table'" + ", '{replica}', version) ORDER BY version SETTINGS index_granularity=256;", ]
from snuba.clickhouse.columns import String, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.querylog_processor import QuerylogProcessor from snuba.datasets.schemas.tables import WritableTableSchema from snuba.datasets.storage import WritableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.utils.streams.topics import Topic columns = ColumnSet([ ("request_id", UUID()), ("request_body", String()), ("referrer", String()), ("dataset", String()), ("projects", Array(UInt(64))), ("organization", UInt(64, Modifiers(nullable=True))), ("timestamp", DateTime()), ("duration_ms", UInt(32)), ("status", String()), # clickhouse_queries Nested columns. # This is expanded into arrays instead of being expressed as a # Nested column because, when adding new columns to a nested field # we need to provide a default for the entire array (each new column # is an array). # The same schema cannot be achieved with the Nested construct (where # we can only provide default for individual values), so, if we # use the Nested construct, this schema cannot match the one generated # by the migration framework (or by any ALTER statement). ("clickhouse_queries.sql", Array(String())), ("clickhouse_queries.status", Array(String())), ("clickhouse_queries.trace_id", Array(UUID(Modifiers(nullable=True)))),
from snuba.query.processors.table_rate_limit import TableRateLimit from snuba.utils.schemas import Nested from snuba.utils.streams.topics import Topic LOCAL_TABLE_NAME = "replays_local" DIST_TABLE_NAME = "replays_dist" columns = ColumnSet([ ("replay_id", UUID()), ("sequence_id", UInt(16)), ("timestamp", DateTime()), ( "trace_ids", Array(UUID()), ), # TODO: create bloom filter index / materialize column ("title", String(Modifiers(readonly=True))), ### common sentry event columns ("project_id", UInt(64)), # release/environment info ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), # user columns ("user", String()), ("user_hash", UInt(64, Modifiers(readonly=True))), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))),
OrganizationRateLimiterProcessor, ProjectRateLimiterProcessor, ProjectReferrerRateLimiter, ReferrerRateLimiterProcessor, ) from snuba.query.processors.quota_processor import ResourceQuotaProcessor from snuba.query.validation.validators import EntityRequiredColumnValidator profile_columns = EntityColumnSet([ Column("organization_id", UInt(64)), Column("project_id", UInt(64)), Column("transaction_id", UUID()), Column("profile_id", UUID()), Column("received", DateTime()), Column("profile", String()), Column("android_api_level", UInt(32, Modifiers(nullable=True))), Column("device_classification", String()), Column("device_locale", String()), Column("device_manufacturer", String()), Column("device_model", String()), Column("device_os_build_number", String(Modifiers(nullable=True))), Column("device_os_name", String()), Column("device_os_version", String()), Column("duration_ns", UInt(64)), Column("environment", String(Modifiers(nullable=True))), Column("platform", String()), Column("trace_id", UUID()), Column("transaction_name", String()), Column("version_name", String()), Column("version_code", String()), ])
from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.datasets.transactions_processor import TransactionsMessageProcessor from snuba.query.processors.arrayjoin_keyvalue_optimizer import ( ArrayJoinKeyValueOptimizer, ) from snuba.query.processors.mapping_optimizer import MappingOptimizer from snuba.query.processors.prewhere import PrewhereProcessor from snuba.query.processors.uuid_column_processor import UUIDColumnProcessor from snuba.web.split import TimeSplitQueryStrategy columns = ColumnSet([ ("project_id", UInt(64)), ("event_id", UUID()), ("trace_id", UUID()), ("span_id", UInt(64)), ("transaction_name", String()), ("transaction_hash", UInt(64, Modifiers(readonly=True))), ("transaction_op", String()), ("transaction_status", UInt(8)), ("start_ts", DateTime()), ("start_ms", UInt(16)), ("finish_ts", DateTime()), ("finish_ms", UInt(16)), ("duration", UInt(32)), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("user", String()), ("user_hash", UInt(64, Modifiers(readonly=True))),
from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.schemas.tables import WritableTableSchema from snuba.datasets.spans_processor import SpansMessageProcessor from snuba.datasets.storage import WritableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.query.processors.prewhere import PrewhereProcessor from snuba.web.split import TimeSplitQueryStrategy columns = ColumnSet([ ("project_id", UInt(64)), ("transaction_id", UUID()), ("trace_id", UUID()), ("transaction_span_id", UInt(64)), ("span_id", UInt(64)), ("parent_span_id", UInt(64, Modifiers(nullable=True))), ("transaction_name", String()), ("description", String()), # description in span ("op", String()), ("status", UInt(8)), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))), ("retention_days", UInt(16)), ("deleted", UInt(8)), ])
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers.concat( transaction_translation_mappers). concat(null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", ), ColumnToMapping(None, "geo_region", None, "contexts", "geo.region"), ColumnToMapping(None, "geo_city", None, "contexts", "geo.city"), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), )) def selector_func(_query: Query) -> Tuple[str, List[str]]: if random.random() < float( state.get_config("discover_query_percentage", 0)): return "events", ["discover"] return "events", [] super().__init__( storages=[events_storage, discover_storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "discover": discover_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "discover"), ), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) super().__init__( storages=[events_storage], query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), writable_storage=None, )
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder: Union[PipelineDelegator, SimplePipelineBuilder] if settings.ERRORS_ROLLOUT_ALL: storage = discover_storage sampled_pipeline_builder = SampledSimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) pipeline_builder = PipelineDelegator( query_pipeline_builders={ "primary": discover_pipeline_builder, "sampler": sampled_pipeline_builder, }, selector_func=sampling_selector_func, callback_func=sampling_callback_func, ) else: storage = events_storage pipeline_builder = events_pipeline_builder super().__init__( storages=[storage], query_pipeline_builder=pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
from snuba.datasets.outcomes_processor import OutcomesProcessor from snuba.datasets.schemas.tables import TableSchema, WritableTableSchema from snuba.datasets.storage import ReadableTableStorage, WritableTableStorage from snuba.datasets.storages import StorageKey from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.query.processors.prewhere import PrewhereProcessor WRITE_LOCAL_TABLE_NAME = "outcomes_raw_local" WRITE_DIST_TABLE_NAME = "outcomes_raw_dist" READ_LOCAL_TABLE_NAME = "outcomes_hourly_local" READ_DIST_TABLE_NAME = "outcomes_hourly_dist" write_columns = ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("key_id", UInt(64, Modifiers(nullable=True))), ("timestamp", DateTime()), ("outcome", UInt(8)), ("reason", String(Modifiers(nullable=True))), ("event_id", UUID(Modifiers(nullable=True))), ]) raw_schema = WritableTableSchema( columns=write_columns, # TODO: change to outcomes.raw_local when we add multi DB support local_table_name=WRITE_LOCAL_TABLE_NAME, dist_table_name=WRITE_DIST_TABLE_NAME, storage_set_key=StorageSetKey.OUTCOMES, ) read_columns = ColumnSet([
subscript_names: Set[str] def attempt_map( self, expression: SubscriptableReference, children_translator: SnubaClickhouseStrictTranslator, ) -> Optional[FunctionCall]: if expression.column.column_name in self.subscript_names: return identity(Literal(None, None), expression.alias) else: return None EVENTS_COLUMNS = ColumnSet([ ("group_id", UInt(64, Modifiers(nullable=True))), ("primary_hash", FixedString(32, Modifiers(nullable=True))), # Promoted tags ("level", String(Modifiers(nullable=True))), ("logger", String(Modifiers(nullable=True))), ("server_name", String(Modifiers(nullable=True))), ("site", String(Modifiers(nullable=True))), ("url", String(Modifiers(nullable=True))), ("location", String(Modifiers(nullable=True))), ("culprit", String(Modifiers(nullable=True))), ("received", DateTime(Modifiers(nullable=True))), ("sdk_integrations", Array(String(), Modifiers(nullable=True))), ("version", String(Modifiers(nullable=True))), # exception interface ( "exception_stacks",
import pytest from snuba.clickhouse.columns import ColumnSet, Nested from snuba.clickhouse.columns import SchemaModifiers as Modifiers from snuba.clickhouse.columns import String, UInt from snuba.clickhouse.query import Query as ClickhouseQuery from snuba.query import SelectedExpression from snuba.query.data_source.simple import Table from snuba.query.expressions import Column, FunctionCall, Literal from snuba.query.processors.mapping_promoter import MappingColumnPromoter from snuba.request.request_settings import HTTPRequestSettings columns = ColumnSet([ ("promoted", UInt(8, Modifiers(nullable=True))), ("tags", Nested([("key", String()), ("value", String())])), ]) test_cases = [ ( "not promoted", ClickhouseQuery( Table("events", columns), selected_columns=[ SelectedExpression( "tags[foo]", FunctionCall( "tags[foo]", "arrayValue", ( Column(None, None, "tags.value"), FunctionCall( None,
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ("trace_id", String(Modifiers(nullable=True))), ("span_id", UInt(64, Modifiers(nullable=True))), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS discover_storage = get_storage(StorageKey.DISCOVER) discover_storage_plan_builder = SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), ) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=discover_storage_plan_builder) super().__init__( storages=[discover_storage], query_pipeline_builder=discover_pipeline_builder, abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column="timestamp", )
from snuba.query.conditions import ConditionFunctions, binary_condition from snuba.query.expressions import Column, Literal columns = ColumnSet([ # columns to maintain the dataset # Kafka topic offset ("offset", UInt(64)), # GroupStatus in Sentry does not have a 'DELETED' state that reflects the deletion # of the record. Having a dedicated clickhouse-only flag to identify this case seems # more consistent than add an additional value into the status field below that does not # exists on the Sentry side. ("record_deleted", UInt(8)), # PG columns ("project_id", UInt(64)), ("id", UInt(64)), ("status", UInt(8, Modifiers(nullable=True))), ("last_seen", DateTime(Modifiers(nullable=True))), ("first_seen", DateTime(Modifiers(nullable=True))), ("active_at", DateTime(Modifiers(nullable=True))), ("first_release_id", UInt(64, Modifiers(nullable=True))), ]) schema = WritableTableSchema( columns=columns, local_table_name="groupedmessage_local", dist_table_name="groupedmessage_dist", storage_set_key=StorageSetKey.EVENTS, mandatory_conditions=[ binary_condition( ConditionFunctions.EQ, Column(None, None, "record_deleted"),
from snuba.clickhouse.columns import SchemaModifiers as Modifiers from snuba.clickhouse.columns import String, UInt from snuba.clickhouse.query import Query from snuba.clusters.storage_sets import StorageSetKey from snuba.datasets.schemas.tables import TableSchema from snuba.datasets.storage import ReadableTableStorage from snuba.datasets.storages import StorageKey from snuba.query import SelectedExpression from snuba.query.data_source.simple import Table from snuba.query.expressions import Column, FunctionCall, Literal from snuba.query.processors.null_column_caster import NullColumnCaster from snuba.query.query_settings import HTTPQuerySettings columns1 = ColumnSet([ ("not_mismatched", DateTime()), ("mismatched1", String(Modifiers(nullable=True))), ("mismatched2", UInt(64, Modifiers(nullable=True))), ]) columns2 = ColumnSet([ ("timestamp", DateTime()), ("mismatched1", String()), # non-nullable by default ("mismatched2", UInt(64, Modifiers(nullable=False))), ]) schema1 = TableSchema( columns=columns1, local_table_name="discover_local", dist_table_name="discover_dist", storage_set_key=StorageSetKey.DISCOVER, mandatory_conditions=[],
from snuba.datasets.storages import StorageKey from snuba.datasets.storages.processors.consistency_enforcer import ( ConsistencyEnforcerProcessor, ) from snuba.datasets.table_storage import build_kafka_stream_loader_from_settings from snuba.query.processors.prewhere import PrewhereProcessor from snuba.utils.streams.topics import Topic columns = ColumnSet([ # columns to maintain the dataset # Kafka topic offset ("offset", UInt(64)), ("record_deleted", UInt(8)), # PG columns ("project_id", UInt(64)), ("group_id", UInt(64)), ("date_added", DateTime(Modifiers(nullable=True))), ("user_id", UInt(64, Modifiers(nullable=True))), ("team_id", UInt(64, Modifiers(nullable=True))), ]) schema = WritableTableSchema( columns=columns, local_table_name="groupassignee_local", dist_table_name="groupassignee_dist", storage_set_key=StorageSetKey.CDC, ) POSTGRES_TABLE = "sentry_groupasignee" storage = CdcStorage( storage_key=StorageKey.GROUPASSIGNEES,
from snuba.query.composite import CompositeQuery from snuba.query.conditions import binary_condition from snuba.query.data_source.join import ( IndividualNode, JoinClause, JoinCondition, JoinConditionExpression, JoinType, ) from snuba.query.data_source.simple import Entity, Table from snuba.query.expressions import Column, FunctionCall, Literal from snuba.query.formatters.tracing import TExpression, format_query from snuba.query.logical import Query as LogicalQuery from tests.query.joins.equivalence_schema import EVENTS_SCHEMA, GROUPS_SCHEMA columns = ColumnSet([("some_int", UInt(8, Modifiers(nullable=True)))]) BASIC_JOIN = JoinClause( left_node=IndividualNode( alias="ev", data_source=Entity(EntityKey.EVENTS, EVENTS_SCHEMA, None), ), right_node=IndividualNode( alias="gr", data_source=Entity(EntityKey.GROUPEDMESSAGES, GROUPS_SCHEMA, None), ), keys=[ JoinCondition( left=JoinConditionExpression("ev", "group_id"), right=JoinConditionExpression("gr", "id"), )
required_columns = [ "event_id", "primary_hash", "project_id", "group_id", "timestamp", "deleted", "retention_days", ] all_columns = ColumnSet([ ("project_id", UInt(64)), ("timestamp", DateTime()), ("event_id", UUID()), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("user", String()), ("user_hash", UInt(64, Modifiers(readonly=True))), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))), ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), ("tags", Nested([("key", String()), ("value", String())])), ("_tags_hash_map", Array(UInt(64), Modifiers(readonly=True))),
PostReplacementConsistencyEnforcer, ) from snuba.query.conditions import ConditionFunctions, binary_condition from snuba.query.expressions import Column, Literal from snuba.query.processors.arrayjoin_keyvalue_optimizer import ( ArrayJoinKeyValueOptimizer, ) from snuba.query.processors.mapping_optimizer import MappingOptimizer from snuba.query.processors.mapping_promoter import MappingColumnPromoter from snuba.query.processors.prewhere import PrewhereProcessor from snuba.web.split import ColumnSplitQueryStrategy, TimeSplitQueryStrategy metadata_columns = ColumnSet( [ # optional stream related data ("offset", UInt(64, Modifiers(nullable=True))), ("partition", UInt(16, Modifiers(nullable=True))), ("message_timestamp", DateTime()), ] ) promoted_tag_columns = ColumnSet( [ # These are the classic tags, they are saved in Snuba exactly as they # appear in the event body. ("level", String(Modifiers(nullable=True))), ("logger", String(Modifiers(nullable=True))), ("server_name", String(Modifiers(nullable=True))), # future name: device_id? ("transaction", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("sentry:release", String(Modifiers(nullable=True))),
from snuba.query.processors.mapping_optimizer import MappingOptimizer from snuba.query.processors.mapping_promoter import MappingColumnPromoter from snuba.query.processors.prewhere import PrewhereProcessor from snuba.query.processors.type_converters.uuid_column_processor import ( UUIDColumnProcessor, ) from snuba.web.split import ColumnSplitQueryStrategy, TimeSplitQueryStrategy columns = ColumnSet( [ ("event_id", UUID()), ("project_id", UInt(64)), ("type", String()), ("timestamp", DateTime()), ("platform", String()), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("transaction_name", String()), ("message", String()), ("title", String()), ("user", String()), ("user_hash", UInt(64)), ("user_id", String(Modifiers(nullable=True))), ("user_name", String(Modifiers(nullable=True))), ("user_email", String(Modifiers(nullable=True))), ("ip_address_v4", IPv4(Modifiers(nullable=True))), ("ip_address_v6", IPv6(Modifiers(nullable=True))), ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))),
def __init__(self) -> None: self.__common_columns = ColumnSet([ ("event_id", FixedString(32)), ("project_id", UInt(64)), ("type", String(Modifiers(nullable=True))), ("timestamp", DateTime()), ("platform", String(Modifiers(nullable=True))), ("environment", String(Modifiers(nullable=True))), ("release", String(Modifiers(nullable=True))), ("dist", String(Modifiers(nullable=True))), ("user", String(Modifiers(nullable=True))), ("transaction", String(Modifiers(nullable=True))), ("message", String(Modifiers(nullable=True))), ("title", String(Modifiers(nullable=True))), # User ("user_id", String(Modifiers(nullable=True))), ("username", String(Modifiers(nullable=True))), ("email", String(Modifiers(nullable=True))), ("ip_address", String(Modifiers(nullable=True))), # SDK ("sdk_name", String(Modifiers(nullable=True))), ("sdk_version", String(Modifiers(nullable=True))), # geo location context ("geo_country_code", String(Modifiers(nullable=True))), ("geo_region", String(Modifiers(nullable=True))), ("geo_city", String(Modifiers(nullable=True))), ("http_method", String(Modifiers(nullable=True))), ("http_referer", String(Modifiers(nullable=True))), # Other tags and context ("tags", Nested([("key", String()), ("value", String())])), ("contexts", Nested([("key", String()), ("value", String())])), ]) self.__events_columns = EVENTS_COLUMNS self.__transactions_columns = TRANSACTIONS_COLUMNS events_storage = get_storage(StorageKey.EVENTS) events_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SelectedStorageQueryPlanBuilder( selector=EventsQueryStorageSelector( mappers=events_translation_mappers. concat(transaction_translation_mappers).concat( null_function_translation_mappers).concat( TranslationMappers( # XXX: Remove once we are using errors columns=[ ColumnToMapping(None, "release", None, "tags", "sentry:release"), ColumnToMapping(None, "dist", None, "tags", "sentry:dist"), ColumnToMapping(None, "user", None, "tags", "sentry:user"), ], subscriptables=[ SubscriptableMapper( None, "tags", None, "tags"), SubscriptableMapper( None, "contexts", None, "contexts"), ], )))), ) discover_storage = get_storage(StorageKey.DISCOVER) discover_pipeline_builder = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=discover_storage, mappers=events_translation_mappers.concat( transaction_translation_mappers). concat(null_function_translation_mappers).concat( TranslationMappers(columns=[ ColumnToFunction( None, "ip_address", "coalesce", ( FunctionCall( None, "IPv4NumToString", (Column(None, None, "ip_address_v4"), ), ), FunctionCall( None, "IPv6NumToString", (Column(None, None, "ip_address_v6"), ), ), ), ), ColumnToColumn(None, "transaction", None, "transaction_name"), ColumnToColumn(None, "username", None, "user_name"), ColumnToColumn(None, "email", None, "user_email"), ColumnToMapping( None, "geo_country_code", None, "contexts", "geo.country_code", nullable=True, ), ColumnToMapping( None, "geo_region", None, "contexts", "geo.region", nullable=True, ), ColumnToMapping( None, "geo_city", None, "contexts", "geo.city", nullable=True, ), ColumnToFunction( None, "user", "nullIf", (Column(None, None, "user"), Literal(None, "")), ), ])).concat( TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), SubscriptableMapper(None, "contexts", None, "contexts"), ], )), )) def selector_func(_query: Query, referrer: str) -> Tuple[str, List[str]]: # In case something goes wrong, set this to 1 to revert to the events storage. kill_rollout = state.get_config("errors_rollout_killswitch", 0) assert isinstance(kill_rollout, (int, str)) if int(kill_rollout): return "events", [] if referrer in settings.ERRORS_ROLLOUT_BY_REFERRER: return "discover", [] if settings.ERRORS_ROLLOUT_ALL: return "discover", [] default_threshold = state.get_config("discover_query_percentage", 0) assert isinstance(default_threshold, (float, int, str)) threshold = settings.ERRORS_QUERY_PERCENTAGE_BY_REFERRER.get( referrer, default_threshold) if random.random() < float(threshold): return "events", ["discover"] return "events", [] super().__init__( storages=[events_storage, discover_storage], query_pipeline_builder=PipelineDelegator( query_pipeline_builders={ "events": events_pipeline_builder, "discover": discover_pipeline_builder, }, selector_func=selector_func, callback_func=partial(callback_func, "discover"), ), abstract_column_set=(self.__common_columns + self.__events_columns + self.__transactions_columns), join_relationships={}, writable_storage=None, required_filter_columns=["project_id"], required_time_column="timestamp", )