def test_project_extension_query_adds_rate_limits(): extension = ProjectExtension(processor=ProjectExtensionProcessor( project_column="project_id")) raw_data = {'project': [2, 3]} valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query( {"conditions": []}, TableSource("my_table", ColumnSet([])), ) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) num_rate_limits_before_processing = len( request_settings.get_rate_limit_params()) extension.get_processor().process_query(query, valid_data, request_settings) rate_limits = request_settings.get_rate_limit_params() # make sure a rate limit was added by the processing assert len(rate_limits) == num_rate_limits_before_processing + 1 most_recent_rate_limit = rate_limits[-1] assert most_recent_rate_limit.bucket == '2' assert most_recent_rate_limit.per_second_limit == 1000 assert most_recent_rate_limit.concurrent_limit == 1000
def process_query(self, query: Query, request_settings: RequestSettings) -> None: # If the settings don't already have a project rate limit, add one existing = request_settings.get_rate_limit_params() for ex in existing: if ex.rate_limit_name == PROJECT_RATE_LIMIT_NAME: return project_ids = get_project_ids_in_query_ast(query, self.project_column) if not project_ids: return # TODO: Use all the projects, not just one project_id = project_ids.pop() prl, pcl = get_configs([("project_per_second_limit", 1000), ("project_concurrent_limit", 1000)]) # Specific projects can have their rate limits overridden (per_second, concurr) = get_configs([ ("project_per_second_limit_{}".format(project_id), prl), ("project_concurrent_limit_{}".format(project_id), pcl), ]) rate_limit = RateLimitParameters( rate_limit_name=PROJECT_RATE_LIMIT_NAME, bucket=str(project_id), per_second_limit=per_second, concurrent_limit=concurr, ) request_settings.add_rate_limit(rate_limit)
def execute_query_with_rate_limits( clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, stats: MutableMapping[str, Any], query_settings: MutableMapping[str, Any], ) -> Result: # XXX: We should consider moving this that it applies to the logical query, # not the physical query. with RateLimitAggregator(request_settings.get_rate_limit_params() ) as rate_limit_stats_container: stats.update(rate_limit_stats_container.to_dict()) timer.mark("rate_limit") project_rate_limit_stats = rate_limit_stats_container.get_stats( PROJECT_RATE_LIMIT_NAME) if ("max_threads" in query_settings and project_rate_limit_stats is not None and project_rate_limit_stats.concurrent > 1): maxt = query_settings["max_threads"] query_settings["max_threads"] = max( 1, maxt - project_rate_limit_stats.concurrent + 1) return execute_query( clickhouse_query, request_settings, formatted_query, reader, timer, stats, query_settings, )
def test_project_extension_project_rate_limits_are_overridden(): extension = ProjectExtension( processor=ProjectExtensionProcessor() ) raw_data = { 'project': [2, 3] } valid_data = validate_jsonschema(raw_data, extension.get_schema()) query = Query({ 'conditions': [] }) request_settings = RequestSettings(turbo=False, consistent=False, debug=False) state.set_config('project_per_second_limit_2', 5) state.set_config('project_concurrent_limit_2', 10) extension.get_processor().process_query(query, valid_data, request_settings) rate_limits = request_settings.get_rate_limit_params() most_recent_rate_limit = rate_limits[-1] assert most_recent_rate_limit.bucket == '2' assert most_recent_rate_limit.per_second_limit == 5 assert most_recent_rate_limit.concurrent_limit == 10