Exemplo n.º 1
0
    def process_query(self, query: Query, request_settings: RequestSettings,) -> None:
        from_clause = query.get_data_source()
        if not isinstance(from_clause, JoinClause):
            return

        referenced_columns = query.get_all_referenced_columns()
        referenced_aliases = set()
        for qualified_column in referenced_columns:
            # This will be much better when we will represent columns
            # with a more structured data type than strings.
            match = QUALIFIED_COLUMN_REGEX.match(qualified_column)
            if match:
                # match[1] is the first parenthesized group in the regex, thus
                # the table alias.
                table_alias = match[1]
                referenced_aliases.add(table_alias)

        assert (
            len(referenced_aliases) > 0
        ), "Trying to otpimize a join query without aliases"
        if len(referenced_aliases) > 1:
            return

        from_tables = from_clause.get_tables()
        table = from_tables[referenced_aliases.pop()]

        query.set_data_source(table)
Exemplo n.º 2
0
def execute_query_with_caching(
    clickhouse_query: Query,
    request_settings: RequestSettings,
    formatted_query: SqlQuery,
    reader: Reader[SqlQuery],
    timer: Timer,
    stats: MutableMapping[str, Any],
    query_settings: MutableMapping[str, Any],
) -> Result:
    # XXX: ``uncompressed_cache_max_cols`` is used to control both the result
    # cache, as well as the uncompressed cache. These should be independent.
    use_cache, uc_max = state.get_configs(
        [("use_cache", settings.USE_RESULT_CACHE), ("uncompressed_cache_max_cols", 5)]
    )

    if len(clickhouse_query.get_all_referenced_columns()) > uc_max:
        use_cache = False

    execute = partial(
        execute_query_with_rate_limits,
        clickhouse_query,
        request_settings,
        formatted_query,
        reader,
        timer,
        stats,
        query_settings,
    )

    with sentry_sdk.start_span(description="execute", op="db") as span:
        if use_cache:
            key = get_query_cache_key(formatted_query)
            result = cache.get(key)
            timer.mark("cache_get")
            stats["cache_hit"] = result is not None
            if result is not None:
                span.set_tag("cache", "hit")
                return result

            span.set_tag("cache", "miss")
            result = execute()
            cache.set(key, result)
            timer.mark("cache_set")
            return result
        else:
            return execute()
Exemplo n.º 3
0
def execute_query(
    # TODO: Passing the whole clickhouse query here is needed as long
    # as the execute method depends on it. Otherwise we can make this
    # file rely either entirely on clickhouse query or entirely on
    # the formatter.
    clickhouse_query: Query,
    request_settings: RequestSettings,
    formatted_query: SqlQuery,
    reader: Reader[SqlQuery],
    timer: Timer,
    stats: MutableMapping[str, Any],
    query_settings: MutableMapping[str, Any],
) -> Result:
    """
    Execute a query and return a result.
    """
    # Experiment, if we are going to grab more than X columns worth of data,
    # don't use uncompressed_cache in ClickHouse.
    uc_max = state.get_config("uncompressed_cache_max_cols", 5)
    if len(clickhouse_query.get_all_referenced_columns()) > uc_max:
        query_settings["use_uncompressed_cache"] = 0

    # Force query to use the first shard replica, which
    # should have synchronously received any cluster writes
    # before this query is run.
    consistent = request_settings.get_consistent()
    stats["consistent"] = consistent
    if consistent:
        query_settings["load_balancing"] = "in_order"
        query_settings["max_threads"] = 1

    result = reader.execute(
        formatted_query, query_settings, with_totals=clickhouse_query.has_totals(),
    )

    timer.mark("execute")
    stats.update(
        {"result_rows": len(result["data"]), "result_cols": len(result["meta"])}
    )

    return result