def test() -> None: cv = threading.Condition() query_result = QueryResult({}, {"stats": {}, "sql": ""}) mock_query_runner = Mock(return_value=query_result) def callback_func(args: List[Tuple[str, QueryResult]]) -> None: with cv: cv.notify() mock_callback = Mock(side_effect=callback_func) query_body = { "selected_columns": ["type", "project_id"], } events = get_dataset("events") query = parse_query(query_body, events) events_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.EVENTS)), ) errors_pipeline = SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=get_storage(StorageKey.ERRORS)), ) delegator = PipelineDelegator( query_pipeline_builders={ "events": events_pipeline, "errors": errors_pipeline }, selector_func=lambda query, referrer: ("events", ["errors"]), callback_func=mock_callback, ) with cv: request_settings = HTTPRequestSettings() delegator.build_execution_pipeline( Request( "", query_body, query, request_settings, "ref", ), mock_query_runner, ).execute() cv.wait(timeout=5) assert mock_query_runner.call_count == 2 assert mock_callback.call_args == call( query, request_settings, "ref", [ Result("events", query_result, ANY), Result("errors", query_result, ANY) ], )
def __init__(self) -> None: # The raw table we write onto, and that potentially we could # query. writable_storage = get_writable_storage(StorageKey.OUTCOMES_RAW) # The materialized view we query aggregate data from. materialized_storage = get_storage(StorageKey.OUTCOMES_HOURLY) read_schema = materialized_storage.get_schema() super().__init__( storages=[writable_storage, materialized_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( # TODO: Once we are ready to expose the raw data model and select whether to use # materialized storage or the raw one here, replace this with a custom storage # selector that decides when to use the materialized data. storage=materialized_storage, ), ), abstract_column_set=read_schema.get_columns(), join_relationships={}, writable_storage=writable_storage, validators=[EntityRequiredColumnValidator({"org_id"})], required_time_column="timestamp", )
def __init__( self, writable_storage_key: StorageKey, readable_storage_key: StorageKey, value_schema: Sequence[Column[SchemaModifiers]], mappers: TranslationMappers, ) -> None: writable_storage = get_writable_storage(writable_storage_key) readable_storage = get_storage(readable_storage_key) super().__init__( storages=[writable_storage, readable_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( readable_storage, mappers=TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), ], ).concat(mappers), )), abstract_column_set=ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), *value_schema, ]), join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator({"org_id", "project_id"}) ], required_time_column="timestamp", )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: storage = get_writable_storage(StorageKey.TRANSACTIONS) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=transaction_translator if custom_mappers is None else transaction_translator.concat(custom_mappers), ), ), abstract_column_set=schema.get_columns(), join_relationships={ "contains": JoinRelationship( rhs_entity=EntityKey.SPANS, columns=[ ("project_id", "project_id"), ("span_id", "transaction_span_id"), ], join_type=JoinType.INNER, equivalences=[ ColumnEquivalence("event_id", "transaction_id"), ColumnEquivalence("transaction_name", "transaction_name"), ColumnEquivalence("trace_id", "trace_id"), ], ) }, writable_storage=storage, )
def __init__(self) -> None: storage = get_storage(StorageKey.OUTCOMES_RAW) super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=storage.get_schema().get_columns(), writable_storage=None, )
def __init__(self) -> None: storage = get_cdc_storage("groupassignees") schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=schema.get_columns(), writable_storage=storage, )
def __init__(self) -> None: storage = get_cdc_storage(StorageKey.GROUPEDMESSAGES) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=schema.get_columns(), writable_storage=storage, )
def __init__(self) -> None: storage = get_writable_storage("querylog") columns = storage.get_table_writer().get_schema().get_columns() super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=columns, writable_storage=storage, )
def __init__(self) -> None: storage = get_storage(StorageKey.OUTCOMES_RAW) self.__time_group_columns = {"time": "timestamp"} self.__time_parse_columns = ("timestamp", ) super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=storage.get_schema().get_columns(), writable_storage=None, )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.SPANS) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=TranslationMappers( subscriptables=[ SubscriptableMapper(None, "tags", None, "tags") ], ), ), ), abstract_column_set=ColumnSet( [ ("project_id", UInt(64)), ("transaction_id", UUID()), ("trace_id", UUID()), ("transaction_span_id", UInt(64)), ("span_id", UInt(64)), ("parent_span_id", UInt(64, Modifiers(nullable=True))), ("transaction_name", String()), ("op", String()), ("status", UInt(8)), ("start_ts", DateTime()), ("start_ns", UInt(32)), ("finish_ts", DateTime()), ("finish_ns", UInt(32)), ("duration_ms", UInt(32)), ("tags", Nested([("key", String()), ("value", String())])), ] ), join_relationships={ "contained": JoinRelationship( rhs_entity=EntityKey.TRANSACTIONS, columns=[ ("project_id", "project_id"), ("transaction_span_id", "span_id"), ], join_type=JoinType.INNER, equivalences=[ ColumnEquivalence("transaction_id", "event_id"), ColumnEquivalence("transaction_name", "transaction_name"), ColumnEquivalence("trace_id", "trace_id"), ], ) }, writable_storage=storage, validators=[EntityRequiredColumnValidator({"project_id"})], required_time_column=None, )
def __init__(self) -> None: storage = get_storage(StorageKey.OUTCOMES_RAW) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage), ), abstract_column_set=storage.get_schema().get_columns(), join_relationships={}, writable_storage=None, )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.TRANSACTIONS) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=transaction_translator ), abstract_column_set=schema.get_columns(), writable_storage=storage, )
def __init__(self) -> None: storage = get_storage("outcomes_raw") read_schema = storage.get_schemas().get_read_schema() self.__time_group_columns = {"time": "timestamp"} super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=read_schema.get_columns(), writable_storage=None, time_group_columns=self.__time_group_columns, time_parse_columns=("timestamp",), )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.ERRORS) schema = storage.get_table_writer().get_schema() columns = schema.get_columns() super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=errors_translators ), abstract_column_set=columns, writable_storage=storage, )
def __init__(self) -> None: storage = get_cdc_storage(StorageKey.GROUPEDMESSAGES) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage), ), abstract_column_set=schema.get_columns(), join_relationships={}, writable_storage=storage, )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.ERRORS) schema = storage.get_table_writer().get_schema() columns = schema.get_columns() self.__time_group_columns = {"time": "timestamp", "rtime": "received"} self.__time_parse_columns = ("timestamp", "received") super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=errors_translators), abstract_column_set=columns, writable_storage=storage, )
def __init__(self) -> None: storage = get_storage(StorageKey.OUTCOMES_RAW) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), ), abstract_column_set=storage.get_schema().get_columns(), join_relationships={}, writable_storage=None, validators=[EntityRequiredColumnValidator({"org_id"})], required_time_column="timestamp", )
def __init__( self, writable_storage_key: Optional[StorageKey], readable_storage_key: StorageKey, value_schema: Sequence[Column[SchemaModifiers]], mappers: TranslationMappers, abstract_column_set: Optional[ColumnSet] = None, validators: Optional[Sequence[QueryValidator]] = None, ) -> None: writable_storage = (get_writable_storage(writable_storage_key) if writable_storage_key else None) readable_storage = get_storage(readable_storage_key) storages = [readable_storage] if writable_storage: storages.append(writable_storage) if abstract_column_set is None: abstract_column_set = ColumnSet([ Column("org_id", UInt(64)), Column("project_id", UInt(64)), Column("metric_id", UInt(64)), Column("timestamp", DateTime()), Column("bucketed_time", DateTime()), Column("tags", Nested([("key", UInt(64)), ("value", UInt(64))])), *value_schema, ]) if validators is None: validators = [ EntityRequiredColumnValidator({"org_id", "project_id"}), GranularityValidator(minimum=10), ] super().__init__( storages=storages, query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( readable_storage, mappers=TranslationMappers(subscriptables=[ SubscriptableMapper(None, "tags", None, "tags"), ], ).concat(mappers), )), abstract_column_set=abstract_column_set, join_relationships={}, writable_storage=writable_storage, validators=validators, required_time_column="timestamp", )
def __init__(self, custom_mappers: Optional[TranslationMappers] = None) -> None: storage = get_writable_storage(StorageKey.TRANSACTIONS) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=transaction_translator if custom_mappers is None else transaction_translator.concat(custom_mappers), ), abstract_column_set=schema.get_columns(), writable_storage=storage, )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.TRANSACTIONS) schema = storage.get_table_writer().get_schema() self.__time_group_columns = { "time": "finish_ts", } self.__time_parse_columns = ("start_ts", "finish_ts") super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=transaction_translator), abstract_column_set=schema.get_columns(), writable_storage=storage, )
def __init__(self) -> None: storage = get_writable_storage(StorageKey.ERRORS) schema = storage.get_table_writer().get_schema() columns = schema.get_columns() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage, mappers=errors_translators), ), abstract_column_set=columns, join_relationships={}, writable_storage=storage, required_filter_columns=["project_id"], required_time_column="timestamp", )
def parse_and_process(query_body: MutableMapping[str, Any]) -> ClickhouseQuery: dataset = get_dataset("transactions") query = parse_query(query_body, dataset) request = Request("a", query_body, query, HTTPRequestSettings(), "r") entity = get_entity(query.get_from_clause().key) for p in entity.get_query_processors(): p.process_query(query, request.settings) ArrayJoinKeyValueOptimizer("tags").process_query(query, request.settings) query_plan = SingleStorageQueryPlanBuilder( storage=entity.get_writable_storage(), mappers=transaction_translator, ).build_and_rank_plans(query, request.settings)[0] return query_plan.query
def __init__(self, ) -> None: writable_storage = get_writable_storage(StorageKey.PROFILES) super().__init__( storages=[writable_storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( writable_storage)), abstract_column_set=profile_columns, join_relationships={}, writable_storage=writable_storage, validators=[ EntityRequiredColumnValidator( {"organization_id", "project_id"}), ], required_time_column="received", )
def __init__(self) -> None: writable_storage = get_writable_storage("sessions_raw") materialized_storage = get_storage("sessions_hourly") read_schema = materialized_storage.get_schemas().get_read_schema() self.__time_group_columns = {"bucketed_started": "started"} super().__init__( storages=[writable_storage, materialized_storage], # TODO: Once we are ready to expose the raw data model and select whether to use # materialized storage or the raw one here, replace this with a custom storage # selector that decides when to use the materialized data. query_plan_builder=SingleStorageQueryPlanBuilder( storage=materialized_storage, ), abstract_column_set=read_schema.get_columns(), writable_storage=writable_storage, time_group_columns=self.__time_group_columns, time_parse_columns=("started", "received"), )
def __init__(self) -> None: storage = get_storage(StorageKey.ORG_SESSIONS) super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=storage)), abstract_column_set=ColumnSet([ ("org_id", UInt(64)), ("project_id", UInt(64)), ("started", DateTime()), ]), join_relationships={}, writable_storage=None, validators=None, required_time_column="started", )
def __init__(self) -> None: writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW) materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY) read_schema = materialized_storage.get_schema() super().__init__( storages=[writable_storage, materialized_storage], # TODO: Once we are ready to expose the raw data model and select whether to use # materialized storage or the raw one here, replace this with a custom storage # selector that decides when to use the materialized data. query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder( storage=materialized_storage, mappers=sessions_translators, ), ), abstract_column_set=read_schema.get_columns(), join_relationships={}, writable_storage=writable_storage, )
def __init__(self) -> None: storage = get_writable_storage("errors") schema = storage.get_table_writer().get_schema() columns = schema.get_columns() self.__time_group_columns = {"time": "timestamp", "rtime": "received"} super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=columns, writable_storage=storage, time_group_columns=self.__time_group_columns, time_parse_columns=("timestamp", "received"), ) self.__tags_processor = TagColumnProcessor( columns=columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), )
def __init__(self) -> None: # The raw table we write onto, and that potentially we could # query. writable_storage = get_writable_storage(StorageKey.OUTCOMES_RAW) # The materialized view we query aggregate data from. materialized_storage = get_storage(StorageKey.OUTCOMES_HOURLY) read_schema = materialized_storage.get_schema() super().__init__( storages=[writable_storage, materialized_storage], query_plan_builder=SingleStorageQueryPlanBuilder( # TODO: Once we are ready to expose the raw data model and select whether to use # materialized storage or the raw one here, replace this with a custom storage # selector that decides when to use the materialized data. storage=materialized_storage, ), abstract_column_set=read_schema.get_columns(), writable_storage=writable_storage, )
def __init__(self) -> None: storage = get_writable_storage("transactions") schema = storage.get_table_writer().get_schema() columns = schema.get_columns() self.__tags_processor = TagColumnProcessor( columns=columns, promoted_columns=self._get_promoted_columns(), column_tag_map=self._get_column_tag_map(), ) self.__time_group_columns = { "bucketed_start": "start_ts", "bucketed_end": "finish_ts", } super().__init__( storages=[storage], query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), abstract_column_set=schema.get_columns(), writable_storage=storage, time_group_columns=self.__time_group_columns, time_parse_columns=("start_ts", "finish_ts"), )
def __init__(self) -> None: storage = get_cdc_storage(StorageKey.GROUPASSIGNEES) schema = storage.get_table_writer().get_schema() super().__init__( storages=[storage], query_pipeline_builder=SimplePipelineBuilder( query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage), ), abstract_column_set=schema.get_columns(), join_relationships={ "owns": JoinRelationship( rhs_entity=EntityKey.EVENTS, columns=[("project_id", "project_id"), ("group_id", "group_id")], join_type=JoinType.LEFT, equivalences=[], ) }, writable_storage=storage, required_filter_columns=None, required_time_column=None, )