def backend() -> Iterator[Cache[bytes]]: codec: PassthroughCodec[bytes] = PassthroughCodec() backend: Cache[bytes] = RedisCache(redis_client, "test", codec, ThreadPoolExecutor()) try: yield backend finally: redis_client.flushdb()
def __build_consumer( self, worker: ConsumerWorker ) -> BatchingConsumer[KafkaPayload]: configuration = build_kafka_consumer_configuration( bootstrap_servers=self.bootstrap_servers, group_id=self.group_id, auto_offset_reset=self.auto_offset_reset, queued_max_messages_kbytes=self.queued_max_messages_kbytes, queued_min_messages=self.queued_min_messages, ) codec: PassthroughCodec[KafkaPayload] = PassthroughCodec() if self.commit_log_topic is None: consumer = KafkaConsumer( configuration, codec=codec, commit_retry_policy=self.__commit_retry_policy, ) else: consumer = KafkaConsumerWithCommitLog( configuration, codec=codec, producer=self.producer, commit_log_topic=self.commit_log_topic, commit_retry_policy=self.__commit_retry_policy, ) return BatchingConsumer( consumer, self.raw_topic, worker=worker, max_batch_size=self.max_batch_size, max_batch_time=self.max_batch_time_ms, metrics=self.metrics, recoverable_errors=[TransportError], )
def replacer( *, replacements_topic: Optional[str], consumer_group: str, bootstrap_server: Sequence[str], dataset_name: Optional[str], storage_name: str, max_batch_size: int, max_batch_time_ms: int, auto_offset_reset: str, queued_max_messages_kbytes: int, queued_min_messages: int, log_level: Optional[str] = None, ) -> None: from snuba.clickhouse.native import ClickhousePool from snuba.replacer import ReplacerWorker from snuba.utils.codecs import PassthroughCodec from snuba.utils.streams.batching import BatchingConsumer from snuba.utils.streams.kafka import ( KafkaConsumer, KafkaPayload, TransportError, build_kafka_consumer_configuration, ) from snuba.utils.streams.types import Topic setup_logging(log_level) setup_sentry() storage = get_writable_storage(storage_name) metrics_tags = {"group": consumer_group, "storage": storage_name} # If dataset_name is provided, use the writable storage from that dataset. # This can be removed once we are passing storage_name instead of # dataset_name everywhere if dataset_name: dataset = get_dataset(dataset_name) storage = dataset.get_writable_storage() metrics_tags = {"group": consumer_group, "dataset": dataset_name} stream_loader = storage.get_table_writer().get_stream_loader() default_replacement_topic_spec = stream_loader.get_replacement_topic_spec() assert (default_replacement_topic_spec is not None ), f"Storage {type(storage)} does not have a replacement topic." replacements_topic = replacements_topic or default_replacement_topic_spec.topic_name metrics = MetricsWrapper( environment.metrics, "replacer", tags=metrics_tags, ) client_settings = { # Replacing existing rows requires reconstructing the entire tuple for each # event (via a SELECT), which is a Hard Thing (TM) for columnstores to do. With # the default settings it's common for ClickHouse to go over the default max_memory_usage # of 10GB per query. Lowering the max_block_size reduces memory usage, and increasing the # max_memory_usage gives the query more breathing room. "max_block_size": settings.REPLACER_MAX_BLOCK_SIZE, "max_memory_usage": settings.REPLACER_MAX_MEMORY_USAGE, # Don't use up production cache for the count() queries. "use_uncompressed_cache": 0, } clickhouse = ClickhousePool( settings.CLICKHOUSE_HOST, settings.CLICKHOUSE_PORT, client_settings=client_settings, ) codec: PassthroughCodec[KafkaPayload] = PassthroughCodec() replacer = BatchingConsumer( KafkaConsumer( build_kafka_consumer_configuration( bootstrap_servers=bootstrap_server, group_id=consumer_group, auto_offset_reset=auto_offset_reset, queued_max_messages_kbytes=queued_max_messages_kbytes, queued_min_messages=queued_min_messages, ), codec=codec, ), Topic(replacements_topic), worker=ReplacerWorker(clickhouse, storage, metrics=metrics), max_batch_size=max_batch_size, max_batch_time=max_batch_time_ms, metrics=metrics, recoverable_errors=[TransportError], ) def handler(signum, frame) -> None: replacer.signal_shutdown() signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGTERM, handler) replacer.run()
def subscriptions( *, dataset_name: str, topic: Optional[str], partitions: Optional[int], commit_log_topic: Optional[str], commit_log_groups: Sequence[str], consumer_group: str, auto_offset_reset: str, bootstrap_servers: Sequence[str], max_batch_size: int, max_batch_time_ms: int, schedule_ttl: int, result_topic: Optional[str], log_level: Optional[str], ) -> None: """Evaluates subscribed queries for a dataset.""" assert result_topic is not None setup_logging(log_level) setup_sentry() dataset = get_dataset(dataset_name) if not bootstrap_servers: bootstrap_servers = settings.DEFAULT_DATASET_BROKERS.get( dataset_name, settings.DEFAULT_BROKERS ) loader = enforce_table_writer(dataset).get_stream_loader() consumer = TickConsumer( SynchronizedConsumer( KafkaConsumer( build_kafka_consumer_configuration( bootstrap_servers, consumer_group, auto_offset_reset=auto_offset_reset, ), PassthroughCodec(), ), KafkaConsumer( build_kafka_consumer_configuration( bootstrap_servers, f"subscriptions-commit-log-{uuid.uuid1().hex}", auto_offset_reset="earliest", ), CommitCodec(), ), ( Topic(commit_log_topic) if commit_log_topic is not None else Topic(loader.get_commit_log_topic_spec().topic_name) ), set(commit_log_groups), ) ) producer = KafkaProducer( { "bootstrap.servers": ",".join(bootstrap_servers), "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB }, SubscriptionResultCodec(), ) with closing(consumer), closing(producer): batching_consumer = BatchingConsumer( consumer, ( Topic(topic) if topic is not None else Topic(loader.get_default_topic_spec().topic_name) ), SubscriptionWorker( SubscriptionExecutor( dataset, ThreadPoolExecutor( max_workers=settings.SUBSCRIPTIONS_MAX_CONCURRENT_QUERIES ), ), { index: SubscriptionScheduler( RedisSubscriptionDataStore( redis_client, dataset, PartitionId(index) ), PartitionId(index), cache_ttl=timedelta(seconds=schedule_ttl), ) for index in range( partitions if partitions is not None else loader.get_default_topic_spec().partitions_number ) }, producer, Topic(result_topic), ), max_batch_size, max_batch_time_ms, create_metrics( "snuba.subscriptions", tags={"group": consumer_group, "dataset": dataset_name}, ), ) def handler(signum, frame) -> None: batching_consumer.signal_shutdown() signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGTERM, handler) batching_consumer.run()
def test_passthrough_codec() -> None: codec: Codec[object, object] = PassthroughCodec() value = object() assert codec.decode(value) is value assert codec.encode(value) is value