Exemplos de Query.get_all_ast_referenced_columns em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: snuba.clickhouse.query

Classe / Tipo: Query

Método / Função: get_all_ast_referenced_columns

Exemplos em hotexamples.com: 4

Query.get_all_ast_referenced_columns em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de snuba.clickhouse.query.Query.get_all_ast_referenced_columns em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

get_condition(30)

get_from_clause(25)

Query(21)

get_selected_columns(21)

get_condition_from_ast(18)

transform_expressions(17)

get_selected_columns_from_ast(16)

set_ast_condition(13)

get_conditions(10)

get_all_expressions(7)

get_limit(7)

get_groupby_from_ast(7)

get_final(7)

get_having_from_ast(7)

set_prewhere_ast_condition(6)

get_groupby(6)

add_condition_to_ast(6)

get_data_source(6)

get_orderby_from_ast(5)

get_offset(4)

set_offset(4)

get_all_ast_referenced_columns(4)

set_limit(4)

get_arrayjoin_from_ast(4)

get_orderby(4)

set_from_clause(3)

set_conditions(3)

set_ast_having(3)

has_totals(3)

get_sample(3)

get_having(3)

get_prewhere_ast(3)

get_all_referenced_columns(3)

add_experiment(2)

add_conditions(2)

get_limitby(2)

transform(2)

set_final(2)

get_granularity(2)

get_experiment_value(1)

set_prewhere(1)

get_experiments(1)

set_experiments(1)

set_data_source(1)

get_columns_referenced_in_select(1)

get_arrayjoin(1)

get_columns(1)

get_aggregations(1)

Métodos Frequentes

get_condition (30)

get_from_clause (25)

Query (21)

get_selected_columns (21)

get_condition_from_ast (18)

transform_expressions (17)

get_selected_columns_from_ast (16)

set_ast_condition (13)

get_conditions (10)

get_all_expressions (7)

Métodos Frequentes

get_limit (7)

get_groupby_from_ast (7)

get_final (7)

get_having_from_ast (7)

set_prewhere_ast_condition (6)

get_groupby (6)

add_condition_to_ast (6)

get_data_source (6)

get_orderby_from_ast (5)

get_offset (4)

set_offset (4)

get_all_ast_referenced_columns (4)

set_limit (4)

get_arrayjoin_from_ast (4)

get_orderby (4)

set_from_clause (3)

set_conditions (3)

set_ast_having (3)

has_totals (3)

get_sample (3)

Métodos Frequentes

set_offset (4)

get_all_ast_referenced_columns (4)

set_limit (4)

get_arrayjoin_from_ast (4)

get_orderby (4)

set_from_clause (3)

set_conditions (3)

set_ast_having (3)

has_totals (3)

get_sample (3)

get_having (3)

get_prewhere_ast (3)

get_all_referenced_columns (3)

add_experiment (2)

add_conditions (2)

get_limitby (2)

transform (2)

set_final (2)

get_granularity (2)

get_experiment_value (1)

set_prewhere (1)

get_experiments (1)

set_experiments (1)

set_data_source (1)

get_columns_referenced_in_select (1)

get_arrayjoin (1)

get_columns (1)

get_aggregations (1)

Métodos Frequentes

get_having (3)

get_prewhere_ast (3)

get_all_referenced_columns (3)

add_experiment (2)

add_conditions (2)

get_limitby (2)

transform (2)

set_final (2)

get_granularity (2)

get_experiment_value (1)

set_prewhere (1)

get_experiments (1)

set_experiments (1)

set_data_source (1)

get_columns_referenced_in_select (1)

get_arrayjoin (1)

get_columns (1)

get_aggregations (1)

Exemplo n.º 1

0

Exibir arquivo

def execute_query_with_caching( clickhouse_query: Query, request_settings: RequestSettings, formatted_query: SqlQuery, reader: Reader[SqlQuery], timer: Timer, stats: MutableMapping[str, Any], query_settings: MutableMapping[str, Any], ) -> Result: # XXX: ``uncompressed_cache_max_cols`` is used to control both the result # cache, as well as the uncompressed cache. These should be independent. use_cache, uc_max = state.get_configs([("use_cache", settings.USE_RESULT_CACHE), ("uncompressed_cache_max_cols", 5)]) if (len( set(( # Skip aliases when counting columns (c.table_name, c.column_name) for c in clickhouse_query.get_all_ast_referenced_columns()))) > uc_max): use_cache = False execute = partial( execute_query_with_rate_limits, clickhouse_query, request_settings, formatted_query, reader, timer, stats, query_settings, ) with sentry_sdk.start_span(description="execute", op="db") as span: if use_cache: key = get_query_cache_key(formatted_query) result = cache.get(key) timer.mark("cache_get") stats["cache_hit"] = result is not None if result is not None: span.set_tag("cache", "hit") return result span.set_tag("cache", "miss") result = execute() cache.set(key, result) timer.mark("cache_set") return result else: return execute()

Exemplo n.º 2

0

Exibir arquivo

def execute_query( # TODO: Passing the whole clickhouse query here is needed as long # as the execute method depends on it. Otherwise we can make this # file rely either entirely on clickhouse query or entirely on # the formatter. clickhouse_query: Query, request_settings: RequestSettings, formatted_query: SqlQuery, reader: Reader[SqlQuery], timer: Timer, stats: MutableMapping[str, Any], query_settings: MutableMapping[str, Any], ) -> Result: """ Execute a query and return a result. """ # Experiment, if we are going to grab more than X columns worth of data, # don't use uncompressed_cache in ClickHouse. uc_max = state.get_config("uncompressed_cache_max_cols", 5) if (len( set(( # Skip aliases when counting columns (c.table_name, c.column_name) for c in clickhouse_query.get_all_ast_referenced_columns()))) > uc_max): query_settings["use_uncompressed_cache"] = 0 # Force query to use the first shard replica, which # should have synchronously received any cluster writes # before this query is run. consistent = request_settings.get_consistent() stats["consistent"] = consistent if consistent: query_settings["load_balancing"] = "in_order" query_settings["max_threads"] = 1 result = reader.execute( formatted_query, query_settings, with_totals=clickhouse_query.has_totals(), ) timer.mark("execute") stats.update({ "result_rows": len(result["data"]), "result_cols": len(result["meta"]) }) return result

Exemplo n.º 3

0

Exibir arquivo

Arquivo: query_profiler.py Projeto: anthonynsimon/snuba

def _get_all_columns(query: Query) -> Columnset: return {c.column_name for c in query.get_all_ast_referenced_columns()}

Exemplo n.º 4

0

Exibir arquivo

Arquivo: split.py Projeto: anthonynsimon/snuba

def execute( self, query: Query, request_settings: RequestSettings, runner: SplitQueryRunner, ) -> Optional[QueryResult]: """ Split query in 2 steps if a large number of columns is being selected. - First query only selects event_id, project_id and timestamp. - Second query selects all fields for only those events. - Shrink the date range. """ limit = query.get_limit() if (limit is None or limit == 0 or query.get_groupby() or query.get_aggregations() or not query.get_selected_columns()): return None if limit > settings.COLUMN_SPLIT_MAX_LIMIT: metrics.increment("column_splitter.query_above_limit") return None # Do not split if there is already a = or IN condition on an ID column id_column_matcher = FunctionCall( Or([String(ConditionFunctions.EQ), String(ConditionFunctions.IN)]), ( Column(None, String(self.__id_column)), AnyExpression(), ), ) for expr in query.get_condition_from_ast() or []: match = id_column_matcher.match(expr) if match: return None # We need to count the number of table/column name pairs # not the number of distinct Column objects in the query # so to avoid counting aliased columns multiple times. total_columns = {(col.table_name, col.column_name) for col in query.get_all_ast_referenced_columns()} minimal_query = copy.deepcopy(query) minimal_query.set_selected_columns( [self.__id_column, self.__project_column, self.__timestamp_column]) # TODO: provide the table alias name to this splitter if we ever use it # in joins. minimal_query.set_ast_selected_columns([ SelectedExpression(self.__id_column, ColumnExpr(None, None, self.__id_column)), SelectedExpression(self.__project_column, ColumnExpr(None, None, self.__project_column)), SelectedExpression( self.__timestamp_column, ColumnExpr(None, None, self.__timestamp_column), ), ]) for exp in minimal_query.get_all_expressions(): if exp.alias in ( self.__id_column, self.__project_column, self.__timestamp_column, ) and not (isinstance(exp, ColumnExpr) and exp.column_name == exp.alias): logger.warning( "Potential alias shadowing due to column splitter", extra={"expression": exp}, exc_info=True, ) minimal_columns = { (col.table_name, col.column_name) for col in minimal_query.get_all_ast_referenced_columns() } if len(total_columns) <= len(minimal_columns): return None # Ensures the AST minimal query is actually runnable on its own. if not minimal_query.validate_aliases(): return None legacy_references = set(minimal_query.get_all_referenced_columns()) ast_column_names = { c.column_name for c in minimal_query.get_all_ast_referenced_columns() } # Ensures the legacy minimal query (which does not expand alias references) # does not contain alias references we removed when creating minimal_query. if legacy_references - ast_column_names: metrics.increment("columns.skip_invalid_legacy_query") return None result = runner(minimal_query, request_settings) del minimal_query if not result.result["data"]: return None # Making a copy just in case runner returned None (which would drive the execution # strategy to ignore the result of this splitter and try the next one). query = copy.deepcopy(query) event_ids = list( set([event[self.__id_column] for event in result.result["data"]])) if len(event_ids) > settings.COLUMN_SPLIT_MAX_RESULTS: # We may be runing a query that is beyond clickhouse maximum query size, # so we cowardly abandon. metrics.increment( "column_splitter.intermediate_results_beyond_limit") return None query.add_conditions([(self.__id_column, "IN", event_ids)]) query.add_condition_to_ast( in_condition( None, ColumnExpr(None, None, self.__id_column), [LiteralExpr(None, e_id) for e_id in event_ids], )) query.set_offset(0) # TODO: This is technically wrong. Event ids are unique per project, not globally. # So, if the minimal query only returned the same event_id from two projects, we # would be underestimating the limit here. query.set_limit(len(event_ids)) project_ids = list( set([ event[self.__project_column] for event in result.result["data"] ])) _replace_condition( query, self.__project_column, "IN", project_ids, ) _replace_ast_condition( query, self.__project_column, "IN", literals_tuple(None, [LiteralExpr(None, p_id) for p_id in project_ids]), ) timestamps = [ event[self.__timestamp_column] for event in result.result["data"] ] _replace_condition( query, self.__timestamp_column, ">=", util.parse_datetime(min(timestamps)).isoformat(), ) _replace_ast_condition( query, self.__timestamp_column, ">=", LiteralExpr(None, util.parse_datetime(min(timestamps))), ) # We add 1 second since this gets translated to ('timestamp', '<', to_date) # and events are stored with a granularity of 1 second. _replace_condition( query, self.__timestamp_column, "<", (util.parse_datetime(max(timestamps)) + timedelta(seconds=1)).isoformat(), ) _replace_ast_condition( query, self.__timestamp_column, "<", LiteralExpr( None, (util.parse_datetime(max(timestamps)) + timedelta(seconds=1)), ), ) return runner(query, request_settings)