示例#1
0
    def select_storage(self, query: Query,
                       request_settings: RequestSettings) -> StorageAndMappers:
        use_readonly_storage = (state.get_config(
            "enable_events_readonly_table", False)
                                and not request_settings.get_consistent())

        storage = (self.__events_ro_table
                   if use_readonly_storage else self.__events_table)
        return StorageAndMappers(storage, event_translator)
示例#2
0
def execute_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Query,
    request_settings: RequestSettings,
    formatted_query: SqlQuery,
    reader: Reader[SqlQuery],
    timer: Timer,
    stats: MutableMapping[str, Any],
    query_settings: MutableMapping[str, Any],
) -> Result:
    """
    Execute a query and return a result.
    """
    # Experiment, if we are going to grab more than X columns worth of data,
    # don't use uncompressed_cache in ClickHouse.
    uc_max = state.get_config("uncompressed_cache_max_cols", 5)
    if (len(
            set((
                # Skip aliases when counting columns
                (c.table_name, c.column_name)
                for c in clickhouse_query.get_all_ast_referenced_columns()))) >
            uc_max):
        query_settings["use_uncompressed_cache"] = 0

    # Force query to use the first shard replica, which
    # should have synchronously received any cluster writes
    # before this query is run.
    consistent = request_settings.get_consistent()
    stats["consistent"] = consistent
    if consistent:
        query_settings["load_balancing"] = "in_order"
        query_settings["max_threads"] = 1

    result = reader.execute(
        formatted_query,
        query_settings,
        with_totals=clickhouse_query.has_totals(),
    )

    timer.mark("execute")
    stats.update({
        "result_rows": len(result["data"]),
        "result_cols": len(result["meta"])
    })

    return result
示例#3
0
def execute_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Union[Query, CompositeQuery[Table]],
    request_settings: RequestSettings,
    formatted_query: FormattedQuery,
    reader: Reader,
    timer: Timer,
    stats: MutableMapping[str, Any],
    query_settings: MutableMapping[str, Any],
    robust: bool,
) -> Result:
    """
    Execute a query and return a result.
    """
    # Experiment, if we are going to grab more than X columns worth of data,
    # don't use uncompressed_cache in ClickHouse.
    uc_max = state.get_config("uncompressed_cache_max_cols", 5)
    assert isinstance(uc_max, int)
    column_counter = ReferencedColumnsCounter()
    column_counter.visit(clickhouse_query.get_from_clause())
    if column_counter.count_columns() > uc_max:
        query_settings["use_uncompressed_cache"] = 0

    # Force query to use the first shard replica, which
    # should have synchronously received any cluster writes
    # before this query is run.
    consistent = request_settings.get_consistent()
    stats["consistent"] = consistent
    if consistent:
        query_settings["load_balancing"] = "in_order"
        query_settings["max_threads"] = 1

    result = reader.execute(
        formatted_query,
        query_settings,
        with_totals=clickhouse_query.has_totals(),
        robust=robust,
    )

    timer.mark("execute")
    stats.update({
        "result_rows": len(result["data"]),
        "result_cols": len(result["meta"])
    })

    return result
示例#4
0
    def select_storage(self, query: Query,
                       request_settings: RequestSettings) -> StorageAndMappers:
        table = detect_table(
            query,
            self.__abstract_events_columns,
            self.__abstract_transactions_columns,
            True,
        )

        if table == TRANSACTIONS:
            return StorageAndMappers(self.__transactions_table,
                                     self.__transaction_translator)
        else:
            use_readonly_storage = (state.get_config(
                "enable_events_readonly_table", False)
                                    and not request_settings.get_consistent())
            return (StorageAndMappers(self.__events_ro_table,
                                      self.__event_translator)
                    if use_readonly_storage else StorageAndMappers(
                        self.__events_table, self.__event_translator))
示例#5
0
    def process_query(self, query: Query,
                      request_settings: RequestSettings) -> None:
        readonly_enabled = state.get_config("enable_events_readonly_table",
                                            False)
        if not readonly_enabled:
            return

        if request_settings.get_consistent():
            return

        data_source = query.get_data_source()

        if data_source.format_from() != self.__table_to_replace:
            return

        new_source = TableSource(
            table_name=self.__read_only_table,
            columns=data_source.get_columns(),
            mandatory_conditions=data_source.get_mandatory_conditions(),
            prewhere_candidates=data_source.get_prewhere_candidates(),
        )
        query.set_data_source(new_source)
示例#6
0
def callback_func(
    storage: str,
    query: Query,
    request_settings: RequestSettings,
    referrer: str,
    results: List[Result[QueryResult]],
) -> None:
    cache_hit = False
    is_duplicate = False

    # Captures if any of the queries involved was a cache hit or duplicate, as cache
    # hits may a cause of inconsistency between results.
    # Doesn't attempt to distinguish between all of the specific scenarios (one or both
    # queries, or splits of those queries could have hit the cache).
    if any([result.result.extra["stats"].get("cache_hit", 0) for result in results]):
        cache_hit = True
    elif any(
        [result.result.extra["stats"].get("is_duplicate", 0) for result in results]
    ):
        is_duplicate = True

    consistent = request_settings.get_consistent()

    if not results:
        metrics.increment(
            "query_result",
            tags={"storage": storage, "match": "empty", "referrer": referrer},
        )
        return

    primary_result = results.pop(0)
    primary_result_data = primary_result.result.result["data"]

    for result in results:
        result_data = result.result.result["data"]

        metrics.timing(
            "diff_ms",
            round((result.execution_time - primary_result.execution_time) * 1000),
            tags={
                "referrer": referrer,
                "cache_hit": str(cache_hit),
                "is_duplicate": str(is_duplicate),
                "consistent": str(consistent),
            },
        )

        # Do not bother diffing the actual results of sampled queries
        if request_settings.get_turbo() or query.get_sample() not in [None, 1.0]:
            return

        if result_data == primary_result_data:
            metrics.increment(
                "query_result",
                tags={
                    "storage": storage,
                    "match": "true",
                    "referrer": referrer,
                    "cache_hit": str(cache_hit),
                    "is_duplicate": str(is_duplicate),
                    "consistent": str(consistent),
                },
            )
        else:
            # Do not log cache hits to Sentry as it creates too much noise
            if cache_hit:
                continue

            reason = assign_reason_category(result_data, primary_result_data, referrer)

            metrics.increment(
                "query_result",
                tags={
                    "storage": storage,
                    "match": "false",
                    "referrer": referrer,
                    "reason": reason,
                    "cache_hit": str(cache_hit),
                    "is_duplicate": str(is_duplicate),
                    "consistent": str(consistent),
                },
            )

            if len(result_data) != len(primary_result_data):
                sentry_sdk.capture_message(
                    f"Non matching {storage} result - different length",
                    level="warning",
                    tags={
                        "referrer": referrer,
                        "storage": storage,
                        "reason": reason,
                        "cache_hit": str(cache_hit),
                        "is_duplicate": str(is_duplicate),
                        "consistent": str(consistent),
                    },
                    extras={
                        "query": format_query(query),
                        "primary_result": len(primary_result_data),
                        "other_result": len(result_data),
                    },
                )

                break

            # Avoid sending too much data to Sentry - just one row for now
            for idx in range(len(result_data)):
                if result_data[idx] != primary_result_data[idx]:
                    sentry_sdk.capture_message(
                        "Non matching result - different result",
                        level="warning",
                        tags={
                            "referrer": referrer,
                            "storage": storage,
                            "reason": reason,
                            "cache_hit": str(cache_hit),
                            "is_duplicate": str(is_duplicate),
                            "consistent": str(consistent),
                        },
                        extras={
                            "query": format_query(query),
                            "primary_result": primary_result_data[idx],
                            "other_result": result_data[idx],
                        },
                    )

                    break