Exemplo n.º 1
0
def test() -> None:
    cv = threading.Condition()
    query_result = QueryResult({}, {"stats": {}, "sql": ""})
    mock_query_runner = Mock(return_value=query_result)

    def callback_func(args: List[Tuple[str, QueryResult]]) -> None:
        with cv:
            cv.notify()

    mock_callback = Mock(side_effect=callback_func)

    query_body = {
        "selected_columns": ["type", "project_id"],
    }

    events = get_dataset("events")
    query = parse_query(query_body, events)

    events_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.EVENTS)), )

    errors_pipeline = SimplePipelineBuilder(
        query_plan_builder=SingleStorageQueryPlanBuilder(
            storage=get_storage(StorageKey.ERRORS)), )

    delegator = PipelineDelegator(
        query_pipeline_builders={
            "events": events_pipeline,
            "errors": errors_pipeline
        },
        selector_func=lambda query, referrer: ("events", ["errors"]),
        callback_func=mock_callback,
    )

    with cv:
        request_settings = HTTPRequestSettings()
        delegator.build_execution_pipeline(
            Request(
                "",
                query_body,
                query,
                request_settings,
                "ref",
            ),
            mock_query_runner,
        ).execute()
        cv.wait(timeout=5)

    assert mock_query_runner.call_count == 2

    assert mock_callback.call_args == call(
        query,
        request_settings,
        "ref",
        [
            Result("events", query_result, ANY),
            Result("errors", query_result, ANY)
        ],
    )
Exemplo n.º 2
0
    def __init__(self) -> None:

        # The raw table we write onto, and that potentially we could
        # query.
        writable_storage = get_writable_storage(StorageKey.OUTCOMES_RAW)

        # The materialized view we query aggregate data from.
        materialized_storage = get_storage(StorageKey.OUTCOMES_HOURLY)
        read_schema = materialized_storage.get_schema()
        super().__init__(
            storages=[writable_storage, materialized_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    # TODO: Once we are ready to expose the raw data model and select whether to use
                    # materialized storage or the raw one here, replace this with a custom storage
                    # selector that decides when to use the materialized data.
                    storage=materialized_storage,
                ),
            ),
            abstract_column_set=read_schema.get_columns(),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[EntityRequiredColumnValidator({"org_id"})],
            required_time_column="timestamp",
        )
Exemplo n.º 3
0
    def __init__(
        self,
        writable_storage_key: StorageKey,
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
    ) -> None:
        writable_storage = get_writable_storage(writable_storage_key)
        readable_storage = get_storage(readable_storage_key)

        super().__init__(
            storages=[writable_storage, readable_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ]),
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator({"org_id", "project_id"})
            ],
            required_time_column="timestamp",
        )
Exemplo n.º 4
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        storage = get_writable_storage(StorageKey.TRANSACTIONS)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=transaction_translator if custom_mappers is None
                    else transaction_translator.concat(custom_mappers),
                ), ),
            abstract_column_set=schema.get_columns(),
            join_relationships={
                "contains":
                JoinRelationship(
                    rhs_entity=EntityKey.SPANS,
                    columns=[
                        ("project_id", "project_id"),
                        ("span_id", "transaction_span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("event_id", "transaction_id"),
                        ColumnEquivalence("transaction_name",
                                          "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
        )
Exemplo n.º 5
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.OUTCOMES_RAW)

        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=storage.get_schema().get_columns(),
            writable_storage=None,
        )
Exemplo n.º 6
0
    def __init__(self) -> None:
        storage = get_cdc_storage("groupassignees")
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=schema.get_columns(),
            writable_storage=storage,
        )
Exemplo n.º 7
0
    def __init__(self) -> None:
        storage = get_cdc_storage(StorageKey.GROUPEDMESSAGES)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=schema.get_columns(),
            writable_storage=storage,
        )
Exemplo n.º 8
0
    def __init__(self) -> None:

        storage = get_writable_storage("querylog")
        columns = storage.get_table_writer().get_schema().get_columns()

        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=columns,
            writable_storage=storage,
        )
Exemplo n.º 9
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.OUTCOMES_RAW)

        self.__time_group_columns = {"time": "timestamp"}
        self.__time_parse_columns = ("timestamp", )
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=storage.get_schema().get_columns(),
            writable_storage=None,
        )
Exemplo n.º 10
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.SPANS)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage,
                    mappers=TranslationMappers(
                        subscriptables=[
                            SubscriptableMapper(None, "tags", None, "tags")
                        ],
                    ),
                ),
            ),
            abstract_column_set=ColumnSet(
                [
                    ("project_id", UInt(64)),
                    ("transaction_id", UUID()),
                    ("trace_id", UUID()),
                    ("transaction_span_id", UInt(64)),
                    ("span_id", UInt(64)),
                    ("parent_span_id", UInt(64, Modifiers(nullable=True))),
                    ("transaction_name", String()),
                    ("op", String()),
                    ("status", UInt(8)),
                    ("start_ts", DateTime()),
                    ("start_ns", UInt(32)),
                    ("finish_ts", DateTime()),
                    ("finish_ns", UInt(32)),
                    ("duration_ms", UInt(32)),
                    ("tags", Nested([("key", String()), ("value", String())])),
                ]
            ),
            join_relationships={
                "contained": JoinRelationship(
                    rhs_entity=EntityKey.TRANSACTIONS,
                    columns=[
                        ("project_id", "project_id"),
                        ("transaction_span_id", "span_id"),
                    ],
                    join_type=JoinType.INNER,
                    equivalences=[
                        ColumnEquivalence("transaction_id", "event_id"),
                        ColumnEquivalence("transaction_name", "transaction_name"),
                        ColumnEquivalence("trace_id", "trace_id"),
                    ],
                )
            },
            writable_storage=storage,
            validators=[EntityRequiredColumnValidator({"project_id"})],
            required_time_column=None,
        )
Exemplo n.º 11
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.OUTCOMES_RAW)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage), ),
            abstract_column_set=storage.get_schema().get_columns(),
            join_relationships={},
            writable_storage=None,
        )
Exemplo n.º 12
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.TRANSACTIONS)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=storage, mappers=transaction_translator
            ),
            abstract_column_set=schema.get_columns(),
            writable_storage=storage,
        )
Exemplo n.º 13
0
    def __init__(self) -> None:
        storage = get_storage("outcomes_raw")
        read_schema = storage.get_schemas().get_read_schema()

        self.__time_group_columns = {"time": "timestamp"}
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=read_schema.get_columns(),
            writable_storage=None,
            time_group_columns=self.__time_group_columns,
            time_parse_columns=("timestamp",),
        )
Exemplo n.º 14
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.ERRORS)
        schema = storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=storage, mappers=errors_translators
            ),
            abstract_column_set=columns,
            writable_storage=storage,
        )
Exemplo n.º 15
0
    def __init__(self) -> None:
        storage = get_cdc_storage(StorageKey.GROUPEDMESSAGES)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage), ),
            abstract_column_set=schema.get_columns(),
            join_relationships={},
            writable_storage=storage,
        )
Exemplo n.º 16
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.ERRORS)
        schema = storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        self.__time_group_columns = {"time": "timestamp", "rtime": "received"}
        self.__time_parse_columns = ("timestamp", "received")
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=storage, mappers=errors_translators),
            abstract_column_set=columns,
            writable_storage=storage,
        )
Exemplo n.º 17
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.OUTCOMES_RAW)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            ),
            abstract_column_set=storage.get_schema().get_columns(),
            join_relationships={},
            writable_storage=None,
            validators=[EntityRequiredColumnValidator({"org_id"})],
            required_time_column="timestamp",
        )
Exemplo n.º 18
0
    def __init__(
        self,
        writable_storage_key: Optional[StorageKey],
        readable_storage_key: StorageKey,
        value_schema: Sequence[Column[SchemaModifiers]],
        mappers: TranslationMappers,
        abstract_column_set: Optional[ColumnSet] = None,
        validators: Optional[Sequence[QueryValidator]] = None,
    ) -> None:
        writable_storage = (get_writable_storage(writable_storage_key)
                            if writable_storage_key else None)
        readable_storage = get_storage(readable_storage_key)
        storages = [readable_storage]
        if writable_storage:
            storages.append(writable_storage)

        if abstract_column_set is None:
            abstract_column_set = ColumnSet([
                Column("org_id", UInt(64)),
                Column("project_id", UInt(64)),
                Column("metric_id", UInt(64)),
                Column("timestamp", DateTime()),
                Column("bucketed_time", DateTime()),
                Column("tags", Nested([("key", UInt(64)),
                                       ("value", UInt(64))])),
                *value_schema,
            ])

        if validators is None:
            validators = [
                EntityRequiredColumnValidator({"org_id", "project_id"}),
                GranularityValidator(minimum=10),
            ]

        super().__init__(
            storages=storages,
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    readable_storage,
                    mappers=TranslationMappers(subscriptables=[
                        SubscriptableMapper(None, "tags", None, "tags"),
                    ], ).concat(mappers),
                )),
            abstract_column_set=abstract_column_set,
            join_relationships={},
            writable_storage=writable_storage,
            validators=validators,
            required_time_column="timestamp",
        )
Exemplo n.º 19
0
    def __init__(self,
                 custom_mappers: Optional[TranslationMappers] = None) -> None:
        storage = get_writable_storage(StorageKey.TRANSACTIONS)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=storage,
                mappers=transaction_translator if custom_mappers is None else
                transaction_translator.concat(custom_mappers),
            ),
            abstract_column_set=schema.get_columns(),
            writable_storage=storage,
        )
Exemplo n.º 20
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.TRANSACTIONS)
        schema = storage.get_table_writer().get_schema()

        self.__time_group_columns = {
            "time": "finish_ts",
        }
        self.__time_parse_columns = ("start_ts", "finish_ts")
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=storage, mappers=transaction_translator),
            abstract_column_set=schema.get_columns(),
            writable_storage=storage,
        )
Exemplo n.º 21
0
    def __init__(self) -> None:
        storage = get_writable_storage(StorageKey.ERRORS)
        schema = storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage, mappers=errors_translators), ),
            abstract_column_set=columns,
            join_relationships={},
            writable_storage=storage,
            required_filter_columns=["project_id"],
            required_time_column="timestamp",
        )
Exemplo n.º 22
0
def parse_and_process(query_body: MutableMapping[str, Any]) -> ClickhouseQuery:
    dataset = get_dataset("transactions")
    query = parse_query(query_body, dataset)
    request = Request("a", query_body, query, HTTPRequestSettings(), "r")
    entity = get_entity(query.get_from_clause().key)
    for p in entity.get_query_processors():
        p.process_query(query, request.settings)

    ArrayJoinKeyValueOptimizer("tags").process_query(query, request.settings)

    query_plan = SingleStorageQueryPlanBuilder(
        storage=entity.get_writable_storage(),
        mappers=transaction_translator,
    ).build_and_rank_plans(query, request.settings)[0]

    return query_plan.query
Exemplo n.º 23
0
    def __init__(self, ) -> None:
        writable_storage = get_writable_storage(StorageKey.PROFILES)

        super().__init__(
            storages=[writable_storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    writable_storage)),
            abstract_column_set=profile_columns,
            join_relationships={},
            writable_storage=writable_storage,
            validators=[
                EntityRequiredColumnValidator(
                    {"organization_id", "project_id"}),
            ],
            required_time_column="received",
        )
Exemplo n.º 24
0
    def __init__(self) -> None:
        writable_storage = get_writable_storage("sessions_raw")
        materialized_storage = get_storage("sessions_hourly")
        read_schema = materialized_storage.get_schemas().get_read_schema()

        self.__time_group_columns = {"bucketed_started": "started"}
        super().__init__(
            storages=[writable_storage, materialized_storage],
            # TODO: Once we are ready to expose the raw data model and select whether to use
            # materialized storage or the raw one here, replace this with a custom storage
            # selector that decides when to use the materialized data.
            query_plan_builder=SingleStorageQueryPlanBuilder(
                storage=materialized_storage, ),
            abstract_column_set=read_schema.get_columns(),
            writable_storage=writable_storage,
            time_group_columns=self.__time_group_columns,
            time_parse_columns=("started", "received"),
        )
Exemplo n.º 25
0
    def __init__(self) -> None:
        storage = get_storage(StorageKey.ORG_SESSIONS)

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=storage)),
            abstract_column_set=ColumnSet([
                ("org_id", UInt(64)),
                ("project_id", UInt(64)),
                ("started", DateTime()),
            ]),
            join_relationships={},
            writable_storage=None,
            validators=None,
            required_time_column="started",
        )
Exemplo n.º 26
0
    def __init__(self) -> None:
        writable_storage = get_writable_storage(StorageKey.SESSIONS_RAW)
        materialized_storage = get_storage(StorageKey.SESSIONS_HOURLY)
        read_schema = materialized_storage.get_schema()

        super().__init__(
            storages=[writable_storage, materialized_storage],
            # TODO: Once we are ready to expose the raw data model and select whether to use
            # materialized storage or the raw one here, replace this with a custom storage
            # selector that decides when to use the materialized data.
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(
                    storage=materialized_storage,
                    mappers=sessions_translators,
                ), ),
            abstract_column_set=read_schema.get_columns(),
            join_relationships={},
            writable_storage=writable_storage,
        )
Exemplo n.º 27
0
    def __init__(self) -> None:
        storage = get_writable_storage("errors")
        schema = storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        self.__time_group_columns = {"time": "timestamp", "rtime": "received"}
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=columns,
            writable_storage=storage,
            time_group_columns=self.__time_group_columns,
            time_parse_columns=("timestamp", "received"),
        )

        self.__tags_processor = TagColumnProcessor(
            columns=columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )
Exemplo n.º 28
0
    def __init__(self) -> None:

        # The raw table we write onto, and that potentially we could
        # query.
        writable_storage = get_writable_storage(StorageKey.OUTCOMES_RAW)

        # The materialized view we query aggregate data from.
        materialized_storage = get_storage(StorageKey.OUTCOMES_HOURLY)
        read_schema = materialized_storage.get_schema()
        super().__init__(
            storages=[writable_storage, materialized_storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(
                # TODO: Once we are ready to expose the raw data model and select whether to use
                # materialized storage or the raw one here, replace this with a custom storage
                # selector that decides when to use the materialized data.
                storage=materialized_storage,
            ),
            abstract_column_set=read_schema.get_columns(),
            writable_storage=writable_storage,
        )
Exemplo n.º 29
0
    def __init__(self) -> None:
        storage = get_writable_storage("transactions")
        schema = storage.get_table_writer().get_schema()
        columns = schema.get_columns()

        self.__tags_processor = TagColumnProcessor(
            columns=columns,
            promoted_columns=self._get_promoted_columns(),
            column_tag_map=self._get_column_tag_map(),
        )
        self.__time_group_columns = {
            "bucketed_start": "start_ts",
            "bucketed_end": "finish_ts",
        }
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=schema.get_columns(),
            writable_storage=storage,
            time_group_columns=self.__time_group_columns,
            time_parse_columns=("start_ts", "finish_ts"),
        )
Exemplo n.º 30
0
    def __init__(self) -> None:
        storage = get_cdc_storage(StorageKey.GROUPASSIGNEES)
        schema = storage.get_table_writer().get_schema()

        super().__init__(
            storages=[storage],
            query_pipeline_builder=SimplePipelineBuilder(
                query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            ),
            abstract_column_set=schema.get_columns(),
            join_relationships={
                "owns": JoinRelationship(
                    rhs_entity=EntityKey.EVENTS,
                    columns=[("project_id", "project_id"), ("group_id", "group_id")],
                    join_type=JoinType.LEFT,
                    equivalences=[],
                )
            },
            writable_storage=storage,
            required_filter_columns=None,
            required_time_column=None,
        )