def get_rates(bucket: str, rollup: int = 60) -> Sequence[Any]: now = int(time.time()) bucket = "{}{}".format(ratelimit_prefix, bucket) pipe = rds.pipeline(transaction=False) rate_history_s = get_config("rate_history_sec", 3600) for i in reversed(range(now - rollup, now - rate_history_s, -rollup)): pipe.zcount(bucket, i, "({:f}".format(i + rollup)) return [c / float(rollup) for c in pipe.execute()]
def get_rates(bucket, rollup=60): now = int(time.time()) bucket = '{}{}'.format(ratelimit_prefix, bucket) pipe = rds.pipeline(transaction=False) rate_history_s = get_config('rate_history_sec', 3600) for i in reversed(range(now - rollup, now - rate_history_s, -rollup)): pipe.zcount(bucket, i, '({:f}'.format(i + rollup)) return [c / float(rollup) for c in pipe.execute()]
def rate_limit(bucket, per_second_limit=None, concurrent_limit=None): """ A context manager for rate limiting that allows for limiting based on on a rolling-window per-second rate as well as the number of requests concurrently running. Uses a single redis sorted set per rate-limiting bucket to track both the concurrency and rate, the score is the query timestamp. Queries are thrown ahead in time when they start so we can count them as concurrent, and thrown back to their start time once they finish so we can count them towards the historical rate. time >>-----> +-----------------------------+--------------------------------+ | historical query window | currently executing queries | +-----------------------------+--------------------------------+ ^ now """ bucket = '{}{}'.format(ratelimit_prefix, bucket) query_id = uuid.uuid4() now = time.time() bypass_rate_limit, rate_history_s = get_configs([ ('bypass_rate_limit', 0), ('rate_history_sec', 3600) ]) if bypass_rate_limit == 1: yield (True, 0, 0) return pipe = rds.pipeline(transaction=False) pipe.zremrangebyscore(bucket, '-inf', '({:f}'.format(now - rate_history_s)) # cleanup pipe.zadd(bucket, now + max_query_duration_s, query_id) # add query pipe.zcount(bucket, now - rate_lookback_s, now) # get rate pipe.zcount(bucket, '({:f}'.format(now), '+inf') # get concurrent try: _, _, rate, concurrent = pipe.execute() except Exception as ex: logger.exception(ex) yield (True, 0, 0) # fail open if redis is having issues return per_second = rate / float(rate_lookback_s) allowed = (per_second_limit is None or per_second <= per_second_limit) and\ (concurrent_limit is None or concurrent <= concurrent_limit) try: yield (allowed, per_second, concurrent) finally: try: if allowed: # return the query to its start time rds.zincrby(bucket, query_id, -float(max_query_duration_s)) else: rds.zrem(bucket, query_id) # not allowed / not counted except Exception as ex: logger.exception(ex) pass
def record_query(data: Mapping[str, Optional[Any]]) -> None: global kfk max_redis_queries = 200 try: data = safe_dumps(data) rds.pipeline(transaction=False).lpush(queries_list, data).ltrim( queries_list, 0, max_redis_queries - 1).execute() if kfk is None: kfk = Producer( {"bootstrap.servers": ",".join(settings.DEFAULT_BROKERS)}) kfk.produce( settings.QUERIES_TOPIC, data.encode("utf-8"), ) except Exception as ex: logger.exception("Could not record query due to error: %r", ex)
def _add_to_redis(self, num_hits: int) -> None: """ Increments the current time entry in Redis by number of hits """ now = int(time()) p = redis_client.pipeline() p.hincrby(self.__name, str(now), num_hits) p.hkeys(self.__name) self._prune(now, p.execute()[1])
def record_query(query_metadata: Mapping[str, Any]) -> None: global kfk max_redis_queries = 200 try: data = safe_dumps(query_metadata) rds.pipeline(transaction=False).lpush(queries_list, data).ltrim( queries_list, 0, max_redis_queries - 1).execute() if kfk is None: kfk = Producer(build_default_kafka_producer_configuration()) kfk.poll(0) # trigger queued delivery callbacks kfk.produce( settings.QUERIES_TOPIC, data.encode("utf-8"), on_delivery=_record_query_delivery_callback, ) except Exception as ex: logger.exception("Could not record query due to error: %r", ex)
def record_query(data): global kfk max_redis_queries = 200 try: data = safe_dumps(data) rds.pipeline(transaction=False)\ .lpush(queries_list, data)\ .ltrim(queries_list, 0, max_redis_queries - 1)\ .execute() if kfk is None: kfk = Producer( {'bootstrap.servers': ','.join(settings.DEFAULT_BROKERS)}) kfk.produce( settings.QUERIES_TOPIC, data.encode('utf-8'), ) except Exception as ex: logger.exception('Could not record query due to error: %r', ex)
def set_project_needs_final( project_id: int, state_name: Optional[ReplacerState], replacement_type: ReplacementType, ) -> None: key, type_key = ProjectsQueryFlags._build_project_needs_final_key_and_type_key( project_id, state_name ) p = redis_client.pipeline() p.set(key, time.time(), ex=settings.REPLACER_KEY_TTL) p.set(type_key, replacement_type, ex=settings.REPLACER_KEY_TTL) p.execute()
def set_project_exclude_groups(project_id, group_ids): """Add {group_id: now, ...} to the ZSET for each `group_id` to exclude, remove outdated entries based on `settings.REPLACER_KEY_TTL`, and expire the entire ZSET incase it's rarely touched.""" now = time.time() key = get_project_exclude_groups_key(project_id) p = redis_client.pipeline() p.zadd(key, **{str(group_id): now for group_id in group_ids}) p.zremrangebyscore(key, -1, now - settings.REPLACER_KEY_TTL) p.expire(key, int(settings.REPLACER_KEY_TTL)) p.execute()
def record_query(data): global kfk max_redis_queries = 200 data = json.dumps(data, for_json=True) try: rds.pipeline(transaction=False)\ .lpush(queries_list, data)\ .ltrim(queries_list, 0, max_redis_queries - 1)\ .execute() if settings.RECORD_QUERIES: if kfk is None: kfk = Producer({ 'bootstrap.servers': ','.join(settings.DEFAULT_BROKERS) }) kfk.produce( settings.QUERIES_TOPIC, data.encode('utf-8'), ) except Exception as ex: logger.exception(ex) pass
def set_project_exclude_groups( project_id: int, group_ids: Sequence[int], state_name: Optional[ReplacerState] ) -> None: """Add {group_id: now, ...} to the ZSET for each `group_id` to exclude, remove outdated entries based on `settings.REPLACER_KEY_TTL`, and expire the entire ZSET incase it's rarely touched.""" now = time.time() key = get_project_exclude_groups_key(project_id, state_name) p = redis_client.pipeline() group_id_data: Mapping[str, float] = {str(group_id): now for group_id in group_ids} p.zadd(key, **group_id_data) p.zremrangebyscore(key, -1, now - settings.REPLACER_KEY_TTL) p.expire(key, int(settings.REPLACER_KEY_TTL)) p.execute()
def get_projects_query_flags( project_ids: Sequence[int], state_name: Optional[ReplacerState] ) -> Tuple[bool, Sequence[int]]: """\ 1. Fetch `needs_final` for each Project 2. Fetch groups to exclude for each Project 3. Trim groups to exclude ZSET for each Project Returns (needs_final, group_ids_to_exclude) """ s_project_ids = set(project_ids) now = time.time() p = redis_client.pipeline() needs_final_keys = [ get_project_needs_final_key(project_id, state_name) for project_id in s_project_ids ] for needs_final_key in needs_final_keys: p.get(needs_final_key) exclude_groups_keys = [ get_project_exclude_groups_key(project_id, state_name) for project_id in s_project_ids ] for exclude_groups_key in exclude_groups_keys: p.zremrangebyscore( exclude_groups_key, float("-inf"), now - settings.REPLACER_KEY_TTL ) p.zrevrangebyscore( exclude_groups_key, float("inf"), now - settings.REPLACER_KEY_TTL ) results = p.execute() needs_final = any(results[: len(s_project_ids)]) exclude_groups = sorted( {int(group_id) for group_id in sum(results[(len(s_project_ids) + 1) :: 2], [])} ) return (needs_final, exclude_groups)
def set_project_exclude_groups( project_id: int, group_ids: Sequence[int], state_name: Optional[ReplacerState], # replacement type is just for metrics, not necessary for functionality replacement_type: ReplacementType, ) -> None: """ This method is called when a replacement comes in. For a specific project, record the group ids which were deleted as a result of this replacement Add {group_id: now, ...} to the ZSET for each `group_id` to exclude, remove outdated entries based on `settings.REPLACER_KEY_TTL`, and expire the entire ZSET incase it's rarely touched. Add replacement type for this replacement. """ now = time.time() key, type_key = ProjectsQueryFlags._build_project_exclude_groups_key_and_type_key( project_id, state_name ) p = redis_client.pipeline() group_id_data: Mapping[str | bytes, bytes | float | int | str] = { str(group_id): now for group_id in group_ids } p.zadd(key, group_id_data) # remove group id deletions that should have been merged by now p.zremrangebyscore(key, -1, now - settings.REPLACER_KEY_TTL) p.expire(key, int(settings.REPLACER_KEY_TTL)) # store the replacement type data replacement_type_data: Mapping[str | bytes, bytes | float | int | str] = { replacement_type: now } p.zadd(type_key, replacement_type_data) p.zremrangebyscore(type_key, -1, now - settings.REPLACER_KEY_TTL) p.expire(type_key, int(settings.REPLACER_KEY_TTL)) p.execute()
def get_projects_query_flags(project_ids): """\ 1. Fetch `needs_final` for each Project 2. Fetch groups to exclude for each Project 3. Trim groups to exclude ZSET for each Project Returns (needs_final, group_ids_to_exclude) """ project_ids = set(project_ids) now = time.time() p = redis_client.pipeline() needs_final_keys = [ get_project_needs_final_key(project_id) for project_id in project_ids ] for needs_final_key in needs_final_keys: p.get(needs_final_key) exclude_groups_keys = [ get_project_exclude_groups_key(project_id) for project_id in project_ids ] for exclude_groups_key in exclude_groups_keys: p.zremrangebyscore(exclude_groups_key, float('-inf'), now - settings.REPLACER_KEY_TTL) p.zrevrangebyscore(exclude_groups_key, float('inf'), now - settings.REPLACER_KEY_TTL) results = p.execute() needs_final = any(results[:len(project_ids)]) exclude_groups = sorted({ int(group_id) for group_id in sum(results[(len(project_ids) + 1)::2], []) }) return (needs_final, exclude_groups)
def rate_limit( rate_limit_params: RateLimitParameters, ) -> Iterator[Optional[RateLimitStats]]: """ A context manager for rate limiting that allows for limiting based on on a rolling-window per-second rate as well as the number of requests concurrently running. Uses a single redis sorted set per rate-limiting bucket to track both the concurrency and rate, the score is the query timestamp. Queries are thrown ahead in time when they start so we can count them as concurrent, and thrown back to their start time once they finish so we can count them towards the historical rate. time >>-----> +-----------------------------+--------------------------------+ | historical query window | currently executing queries | +-----------------------------+--------------------------------+ ^ now """ bucket = "{}{}".format(state.ratelimit_prefix, rate_limit_params.bucket) query_id = uuid.uuid4() now = time.time() bypass_rate_limit, rate_history_s = state.get_configs([ ("bypass_rate_limit", 0), ("rate_history_sec", 3600) ]) assert isinstance(rate_history_s, (int, float)) if bypass_rate_limit == 1: yield None return pipe = rds.pipeline(transaction=False) pipe.zremrangebyscore(bucket, "-inf", "({:f}".format(now - rate_history_s)) # cleanup pipe.zadd(bucket, now + state.max_query_duration_s, query_id) # type: ignore if rate_limit_params.per_second_limit is None: pipe.exists("nosuchkey") # no-op if we don't need per-second else: pipe.zcount(bucket, now - state.rate_lookback_s, now) # get historical if rate_limit_params.concurrent_limit is None: pipe.exists("nosuchkey") # no-op if we don't need concurrent else: pipe.zcount(bucket, "({:f}".format(now), "+inf") # get concurrent try: _, _, historical, concurrent = pipe.execute() historical = int(historical) concurrent = int(concurrent) except Exception as ex: logger.exception(ex) yield None # fail open if redis is having issues return per_second = historical / float(state.rate_lookback_s) stats = RateLimitStats(rate=per_second, concurrent=concurrent) rate_limit_name = rate_limit_params.rate_limit_name Reason = namedtuple("Reason", "scope name val limit") reasons = [ Reason( rate_limit_name, "concurrent", concurrent, rate_limit_params.concurrent_limit, ), Reason( rate_limit_name, "per-second", per_second, rate_limit_params.per_second_limit, ), ] reason = next( (r for r in reasons if r.limit is not None and r.val > r.limit), None) if reason: try: rds.zrem(bucket, query_id) # not allowed / not counted except Exception as ex: logger.exception(ex) raise RateLimitExceeded( "{r.scope} {r.name} of {r.val:.0f} exceeds limit of {r.limit:.0f}". format(r=reason)) try: yield stats finally: try: # return the query to its start time rds.zincrby(bucket, query_id, -float(state.max_query_duration_s)) except Exception as ex: logger.exception(ex)
def rate_limit( rate_limit_params: RateLimitParameters, ) -> Iterator[Optional[RateLimitStats]]: """ A context manager for rate limiting that allows for limiting based on: * a rolling-window per-second rate * the number of queries concurrently running. It uses one redis sorted set to keep track of both of these limits The following mapping is kept in redis: bucket: SortedSet([(timestamp1, query_id1), (timestamp2, query_id2) ...]) Queries are thrown ahead in time when they start so we can count them as concurrent, and thrown back to their start time once they finish so we can count them towards the historical rate. See the comments for an example. time >>-----> +-----------------------------+--------------------------------+ | historical query window | currently executing queries | +-----------------------------+--------------------------------+ ^ now """ bucket = "{}{}".format(state.ratelimit_prefix, rate_limit_params.bucket) query_id = str(uuid.uuid4()) now = time.time() bypass_rate_limit, rate_history_s = state.get_configs( [("bypass_rate_limit", 0), ("rate_history_sec", 3600)] # ^ number of seconds the timestamps are kept ) assert isinstance(rate_history_s, (int, float)) if bypass_rate_limit == 1: yield None return pipe = rds.pipeline(transaction=False) # cleanup old query timestamps past our retention window stale_queries = pipe.zremrangebyscore(bucket, "-inf", "({:f}".format(now - rate_history_s)) metrics.increment("rate_limit.stale", stale_queries, tags={"bucket": bucket}) # Now for the tricky bit: # ====================== # The query's *deadline* is added to the sorted set of timestamps, therefore # labeling its execution as in the future. # All queries with timestamps in the future are considered to be executing *right now* # Example: # now = 100 # max_query_duration_s = 30 # rate_lookback_s = 10 # sorted_set (timestamps only for clarity) = [91, 94, 97, 103, 105, 130] # EXPLANATION: # =========== # queries that have finished running # (in this example there are 3 queries in the last 10 seconds # thus the per second rate is 3/10 = 0.3) # | # v # ----------- v--- the current query, vaulted into the future # [91, 94, 97, 103, 105, 130] # -------------- < - queries currently running # (how many queries are # running concurrently; in this case 3) # ^ # | current time pipe.zadd(bucket, {query_id: now + state.max_query_duration_s}) if rate_limit_params.per_second_limit is None: pipe.exists("nosuchkey") # no-op if we don't need per-second else: # count queries that have finished for the per-second rate pipe.zcount(bucket, now - state.rate_lookback_s, now) if rate_limit_params.concurrent_limit is None: pipe.exists("nosuchkey") # no-op if we don't need concurrent else: # count the amount queries in the "future" which tells us the amount # of concurrent queries pipe.zcount(bucket, "({:f}".format(now), "+inf") try: _, _, historical, concurrent = pipe.execute() historical = int(historical) concurrent = int(concurrent) except Exception as ex: logger.exception(ex) yield None # fail open if redis is having issues return per_second = historical / float(state.rate_lookback_s) stats = RateLimitStats(rate=per_second, concurrent=concurrent) rate_limit_name = rate_limit_params.rate_limit_name Reason = namedtuple("Reason", "scope name val limit") reasons = [ Reason( rate_limit_name, "concurrent", concurrent, rate_limit_params.concurrent_limit, ), Reason( rate_limit_name, "per-second", per_second, rate_limit_params.per_second_limit, ), ] reason = next( (r for r in reasons if r.limit is not None and r.val > r.limit), None) if reason: try: # Remove the query from the sorted set # because we rate limited it. It shouldn't count towards # rate limiting future queries in this bucket. rds.zrem(bucket, query_id) except Exception as ex: logger.exception(ex) raise RateLimitExceeded( "{r.scope} {r.name} of {r.val:.0f} exceeds limit of {r.limit:.0f}". format(r=reason), scope=reason.scope, name=reason.name, ) rate_limited = False try: yield stats _, err, _ = sys.exc_info() if isinstance(err, RateLimitExceeded): # If another rate limiter throws an exception, it won't be propagated # through this context. So check for the exception explicitly. # If another rate limit was hit, we don't want to count this query # against this limit. try: rds.zrem(bucket, query_id) # not allowed / not counted rate_limited = True except Exception as ex: logger.exception(ex) finally: try: # return the query to its start time, if the query_id was actually added. if not rate_limited: rds.zincrby(bucket, -float(state.max_query_duration_s), query_id) except Exception as ex: logger.exception(ex)
def test_latest_replacement_time_by_projects(self) -> None: project_ids = [1, 2, 3] p = redis_client.pipeline() exclude_groups_keys = [ errors_replacer.ProjectsQueryFlags. _build_project_exclude_groups_key_and_type_key( project_id, ReplacerState.ERRORS) for project_id in project_ids ] project_needs_final_keys = [ errors_replacer.ProjectsQueryFlags. _build_project_needs_final_key_and_type_key( project_id, ReplacerState.ERRORS) for project_id in project_ids ] now = datetime.now() # No replacements or needs final flags = ProjectsQueryFlags.load_from_redis(project_ids, ReplacerState.ERRORS) assert flags.latest_replacement_time is None # All projects need final time_offset = 0 for project_needs_final_key, _ in project_needs_final_keys: p.set(project_needs_final_key, now.timestamp() + time_offset) time_offset += 10 p.execute() flags = ProjectsQueryFlags.load_from_redis(project_ids, ReplacerState.ERRORS) expected_time = now + timedelta(seconds=20) assert (flags.latest_replacement_time is not None and abs( (flags.latest_replacement_time - expected_time).total_seconds()) < 1) redis_client.flushdb() # Some projects need final time_offset = 0 for project_needs_final_key, _ in project_needs_final_keys[1:]: p.set(project_needs_final_key, now.timestamp() + time_offset) time_offset += 10 p.execute() flags = ProjectsQueryFlags.load_from_redis(project_ids, ReplacerState.ERRORS) expected_time = now + timedelta(seconds=10) assert (flags.latest_replacement_time is not None and abs( (flags.latest_replacement_time - expected_time).total_seconds()) < 1) redis_client.flushdb() # One exclude group per project group_id_data_asc: MutableMapping[str, float] = {"1": now.timestamp()} for exclude_groups_key, _ in exclude_groups_keys: group_id_data_asc["1"] += 10 to_insert: Mapping[str | bytes, bytes | int | float | str] = { "1": group_id_data_asc["1"], } # typing error fix p.zadd(exclude_groups_key, to_insert) p.execute() expected_time = now + timedelta(seconds=30) flags = ProjectsQueryFlags.load_from_redis(project_ids, ReplacerState.ERRORS) assert (flags.latest_replacement_time is not None and abs( (flags.latest_replacement_time - expected_time).total_seconds()) < 1) redis_client.flushdb() # Multiple exclude groups per project group_id_data_multiple: MutableMapping[str, float] = { "1": (now + timedelta(seconds=10)).timestamp(), "2": now.timestamp(), } for exclude_groups_key, _ in exclude_groups_keys: group_id_data_multiple["1"] -= 10 group_id_data_multiple["2"] -= 10 to_insert = { "1": group_id_data_multiple["1"], "2": group_id_data_multiple["2"], } # typing error fix p.zadd(exclude_groups_key, to_insert) p.execute() expected_time = now flags = ProjectsQueryFlags.load_from_redis(project_ids, ReplacerState.ERRORS) assert (flags.latest_replacement_time is not None and abs( (flags.latest_replacement_time - expected_time).total_seconds()) < 1) redis_client.flushdb()