def __init__( self, commit_function: Callable[[Mapping[Partition, Position]], None], commit_max_batch_size: int, commit_max_batch_time: float, ) -> None: snuba_metrics = settings.KAFKA_TOPICS[settings.KAFKA_SNUBA_METRICS] snuba_metrics_producer = Producer( kafka_config.get_kafka_producer_cluster_options( snuba_metrics["cluster"]), ) producer = snuba_metrics_producer self.__producer = producer self.__producer_topic = settings.KAFKA_TOPICS[ settings.KAFKA_SNUBA_METRICS].get("topic", "snuba-metrics") self.__commit_function = commit_function self.__closed = False self.__metrics = get_metrics() self.__produced_message_offsets: MutableMapping[Partition, Position] = {} self.__callbacks = 0 self.__started = time.time() # TODO: Need to make these flags self.__commit_max_batch_size = commit_max_batch_size self.__commit_max_batch_time = commit_max_batch_time self.__producer_queue_max_size = 80000 self.__producer_long_poll_timeout = 3.0 # poll duration metrics self.__poll_start_time = time.time() self.__poll_duration_sum = 0.0
def get(self, key): cluster_name = settings.KAFKA_TOPICS[key]["cluster"] producer = self.__producers.get(cluster_name) if producer: return producer from confluent_kafka import Producer cluster_options = get_kafka_producer_cluster_options(cluster_name) producer = self.__producers[cluster_name] = Producer(cluster_options) @atexit.register def exit_handler(): pending_count = len(producer) if pending_count == 0: return logger.debug( "Waiting for %d messages to be flushed from %s before exiting...", pending_count, cluster_name, ) producer.flush() return producer
def __init__( self, commit_function: Callable[[Mapping[Partition, Position]], None], producer: Optional[AbstractProducer] = None, ) -> None: if not producer: snuba_metrics = settings.KAFKA_TOPICS[settings.KAFKA_SNUBA_METRICS] snuba_metrics_producer = KafkaProducer( kafka_config.get_kafka_producer_cluster_options( snuba_metrics["cluster"]), ) producer = snuba_metrics_producer self.__producer = producer self.__producer_topic = settings.KAFKA_TOPICS[ settings.KAFKA_SNUBA_METRICS].get("topic", "snuba-metrics") self.__commit_function = commit_function self.__futures: Deque[ProducerResultFuture] = deque() self.__closed = False # TODO(meredith): make this an option to pass in self.__max_buffer_size = 10000 # XXX(meredith): This is only temporary to record how much # time we spend committing when we commit once per message # instead of batching commits self.__commit_start = time.time() self.__commit_duration_sum = 0.0 self.__metrics = get_metrics()
def get_metrics_consumer(topic: Optional[str] = None, **options: Dict[str, str]) -> BatchingKafkaConsumer: snuba_metrics = settings.KAFKA_TOPICS[settings.KAFKA_SNUBA_METRICS] snuba_metrics_producer = Producer( kafka_config.get_kafka_producer_cluster_options( snuba_metrics["cluster"]), ) return create_batching_kafka_consumer( {topic}, worker=MetricsIndexerWorker(producer=snuba_metrics_producer), **options, )
def process_profile(profile: MutableMapping[str, Any], **kwargs: Any) -> None: if profile["platform"] == "cocoa": if not _validate_ios_profile(profile=profile): return None profile = _symbolicate(profile=profile) profile = _normalize(profile=profile) global processed_profiles_publisher if processed_profiles_publisher is None: config = settings.KAFKA_TOPICS[settings.KAFKA_PROFILES] processed_profiles_publisher = KafkaPublisher( kafka_config.get_kafka_producer_cluster_options( config["cluster"]), ) processed_profiles_publisher.publish( "processed-profiles", json.dumps(profile), )
def test_bootstrap_format(): with override_settings(KAFKA_CLUSTERS={ "default": { "common": { "bootstrap.servers": ["I", "am", "a", "list"] } } }): with pytest.raises(ValueError): get_kafka_consumer_cluster_options("default") # legacy should not raise an error with override_settings(KAFKA_CLUSTERS={ "default": { "bootstrap.servers": ["I", "am", "a", "list"] } }): cluster_options = get_kafka_producer_cluster_options("default") assert cluster_options["bootstrap.servers"] == "I,am,a,list" cluster_options = get_kafka_consumer_cluster_options("default") assert cluster_options["bootstrap.servers"] == "I,am,a,list"
def track_outcome( org_id, project_id, key_id, outcome, reason=None, timestamp=None, event_id=None, category=None, quantity=None, ): """ This is a central point to track org/project counters per incoming event. NB: This should only ever be called once per incoming event, which means it should only be called at the point we know the final outcome for the event (invalid, rate_limited, accepted, discarded, etc.) This sends the "outcome" message to Kafka which is used by Snuba to serve data for SnubaTSDB and RedisSnubaTSDB, such as # of rate-limited/filtered events. """ global outcomes_publisher if outcomes_publisher is None: cluster_name = outcomes["cluster"] outcomes_publisher = KafkaPublisher( kafka_config.get_kafka_producer_cluster_options(cluster_name)) if quantity is None: quantity = 1 assert isinstance(org_id, int) assert isinstance(project_id, int) assert isinstance(key_id, (type(None), int)) assert isinstance(outcome, Outcome) assert isinstance(timestamp, (type(None), datetime)) assert isinstance(category, (type(None), DataCategory)) assert isinstance(quantity, int) timestamp = timestamp or to_datetime(time.time()) # Send a snuba metrics payload. outcomes_publisher.publish( outcomes["topic"], json.dumps({ "timestamp": timestamp, "org_id": org_id, "project_id": project_id, "key_id": key_id, "outcome": outcome.value, "reason": reason, "event_id": event_id, "category": category, "quantity": quantity, }), ) metrics.incr( "events.outcomes", skip_internal=True, tags={ "outcome": outcome.name.lower(), "reason": reason, "category": category.api_name() if category is not None else "null", }, )
def track_outcome( org_id: int, project_id: int, key_id: Optional[int], outcome: Outcome, reason: Optional[str] = None, timestamp: Optional[datetime] = None, event_id: Optional[str] = None, category: Optional[DataCategory] = None, quantity: Optional[int] = None, ) -> None: """ This is a central point to track org/project counters per incoming event. NB: This should only ever be called once per incoming event, which means it should only be called at the point we know the final outcome for the event (invalid, rate_limited, accepted, discarded, etc.) This sends the "outcome" message to Kafka which is used by Snuba to serve data for SnubaTSDB and RedisSnubaTSDB, such as # of rate-limited/filtered events. """ global outcomes_publisher global billing_publisher if quantity is None: quantity = 1 assert isinstance(org_id, int) assert isinstance(project_id, int) assert isinstance(key_id, (type(None), int)) assert isinstance(outcome, Outcome) assert isinstance(timestamp, (type(None), datetime)) assert isinstance(category, (type(None), DataCategory)) assert isinstance(quantity, int) outcomes_config = settings.KAFKA_TOPICS[settings.KAFKA_OUTCOMES] billing_config = settings.KAFKA_TOPICS.get(settings.KAFKA_OUTCOMES_BILLING) or outcomes_config # Create a second producer instance only if the cluster differs. Otherwise, # reuse the same producer and just send to the other topic. if outcome.is_billing() and billing_config["cluster"] != outcomes_config["cluster"]: if billing_publisher is None: cluster_name = billing_config["cluster"] billing_publisher = KafkaPublisher( kafka_config.get_kafka_producer_cluster_options(cluster_name) ) publisher = billing_publisher else: if outcomes_publisher is None: cluster_name = outcomes_config["cluster"] outcomes_publisher = KafkaPublisher( kafka_config.get_kafka_producer_cluster_options(cluster_name) ) publisher = outcomes_publisher timestamp = timestamp or to_datetime(time.time()) # Send billing outcomes to a dedicated topic if there is a separate # configuration for it. Otherwise, fall back to the regular outcomes topic. # This does NOT switch the producer, if both topics are on the same cluster. # # In Sentry, there is no significant difference between the classes of # outcome. In Sentry SaaS, they have elevated stability requirements as they # are used for spike protection and quota enforcement. topic_name = billing_config["topic"] if outcome.is_billing() else outcomes_config["topic"] # Send a snuba metrics payload. publisher.publish( topic_name, json.dumps( { "timestamp": timestamp, "org_id": org_id, "project_id": project_id, "key_id": key_id, "outcome": outcome.value, "reason": reason, "event_id": event_id, "category": category, "quantity": quantity, } ), ) metrics.incr( "events.outcomes", skip_internal=True, tags={ "outcome": outcome.name.lower(), "reason": reason, "category": category.api_name() if category is not None else "null", "topic": topic_name, }, )