示例#1
0
    def __init__(
        self,
        commit_function: Callable[[Mapping[Partition, Position]], None],
        commit_max_batch_size: int,
        commit_max_batch_time: float,
    ) -> None:
        snuba_metrics = settings.KAFKA_TOPICS[settings.KAFKA_SNUBA_METRICS]
        snuba_metrics_producer = Producer(
            kafka_config.get_kafka_producer_cluster_options(
                snuba_metrics["cluster"]), )
        producer = snuba_metrics_producer
        self.__producer = producer
        self.__producer_topic = settings.KAFKA_TOPICS[
            settings.KAFKA_SNUBA_METRICS].get("topic", "snuba-metrics")
        self.__commit_function = commit_function

        self.__closed = False
        self.__metrics = get_metrics()
        self.__produced_message_offsets: MutableMapping[Partition,
                                                        Position] = {}
        self.__callbacks = 0
        self.__started = time.time()
        # TODO: Need to make these flags
        self.__commit_max_batch_size = commit_max_batch_size
        self.__commit_max_batch_time = commit_max_batch_time
        self.__producer_queue_max_size = 80000
        self.__producer_long_poll_timeout = 3.0

        # poll duration metrics
        self.__poll_start_time = time.time()
        self.__poll_duration_sum = 0.0
示例#2
0
    def get(self, key):
        cluster_name = settings.KAFKA_TOPICS[key]["cluster"]
        producer = self.__producers.get(cluster_name)

        if producer:
            return producer

        from confluent_kafka import Producer

        cluster_options = get_kafka_producer_cluster_options(cluster_name)
        producer = self.__producers[cluster_name] = Producer(cluster_options)

        @atexit.register
        def exit_handler():
            pending_count = len(producer)
            if pending_count == 0:
                return

            logger.debug(
                "Waiting for %d messages to be flushed from %s before exiting...",
                pending_count,
                cluster_name,
            )
            producer.flush()

        return producer
示例#3
0
    def __init__(
        self,
        commit_function: Callable[[Mapping[Partition, Position]], None],
        producer: Optional[AbstractProducer] = None,
    ) -> None:
        if not producer:
            snuba_metrics = settings.KAFKA_TOPICS[settings.KAFKA_SNUBA_METRICS]
            snuba_metrics_producer = KafkaProducer(
                kafka_config.get_kafka_producer_cluster_options(
                    snuba_metrics["cluster"]), )
            producer = snuba_metrics_producer
        self.__producer = producer
        self.__producer_topic = settings.KAFKA_TOPICS[
            settings.KAFKA_SNUBA_METRICS].get("topic", "snuba-metrics")
        self.__commit_function = commit_function

        self.__futures: Deque[ProducerResultFuture] = deque()
        self.__closed = False

        # TODO(meredith): make this an option to pass in
        self.__max_buffer_size = 10000

        # XXX(meredith): This is only temporary to record how much
        # time we spend committing when we commit once per message
        # instead of batching commits
        self.__commit_start = time.time()
        self.__commit_duration_sum = 0.0
        self.__metrics = get_metrics()
示例#4
0
def get_metrics_consumer(topic: Optional[str] = None,
                         **options: Dict[str, str]) -> BatchingKafkaConsumer:
    snuba_metrics = settings.KAFKA_TOPICS[settings.KAFKA_SNUBA_METRICS]
    snuba_metrics_producer = Producer(
        kafka_config.get_kafka_producer_cluster_options(
            snuba_metrics["cluster"]), )
    return create_batching_kafka_consumer(
        {topic},
        worker=MetricsIndexerWorker(producer=snuba_metrics_producer),
        **options,
    )
示例#5
0
def process_profile(profile: MutableMapping[str, Any], **kwargs: Any) -> None:
    if profile["platform"] == "cocoa":
        if not _validate_ios_profile(profile=profile):
            return None
        profile = _symbolicate(profile=profile)

    profile = _normalize(profile=profile)

    global processed_profiles_publisher

    if processed_profiles_publisher is None:
        config = settings.KAFKA_TOPICS[settings.KAFKA_PROFILES]
        processed_profiles_publisher = KafkaPublisher(
            kafka_config.get_kafka_producer_cluster_options(
                config["cluster"]), )

    processed_profiles_publisher.publish(
        "processed-profiles",
        json.dumps(profile),
    )
示例#6
0
def test_bootstrap_format():
    with override_settings(KAFKA_CLUSTERS={
            "default": {
                "common": {
                    "bootstrap.servers": ["I", "am", "a", "list"]
                }
            }
    }):
        with pytest.raises(ValueError):
            get_kafka_consumer_cluster_options("default")

    # legacy should not raise an error
    with override_settings(KAFKA_CLUSTERS={
            "default": {
                "bootstrap.servers": ["I", "am", "a", "list"]
            }
    }):
        cluster_options = get_kafka_producer_cluster_options("default")
        assert cluster_options["bootstrap.servers"] == "I,am,a,list"

        cluster_options = get_kafka_consumer_cluster_options("default")
        assert cluster_options["bootstrap.servers"] == "I,am,a,list"
示例#7
0
def track_outcome(
    org_id,
    project_id,
    key_id,
    outcome,
    reason=None,
    timestamp=None,
    event_id=None,
    category=None,
    quantity=None,
):
    """
    This is a central point to track org/project counters per incoming event.
    NB: This should only ever be called once per incoming event, which means
    it should only be called at the point we know the final outcome for the
    event (invalid, rate_limited, accepted, discarded, etc.)

    This sends the "outcome" message to Kafka which is used by Snuba to serve
    data for SnubaTSDB and RedisSnubaTSDB, such as # of rate-limited/filtered
    events.
    """
    global outcomes_publisher
    if outcomes_publisher is None:
        cluster_name = outcomes["cluster"]
        outcomes_publisher = KafkaPublisher(
            kafka_config.get_kafka_producer_cluster_options(cluster_name))

    if quantity is None:
        quantity = 1

    assert isinstance(org_id, int)
    assert isinstance(project_id, int)
    assert isinstance(key_id, (type(None), int))
    assert isinstance(outcome, Outcome)
    assert isinstance(timestamp, (type(None), datetime))
    assert isinstance(category, (type(None), DataCategory))
    assert isinstance(quantity, int)

    timestamp = timestamp or to_datetime(time.time())

    # Send a snuba metrics payload.
    outcomes_publisher.publish(
        outcomes["topic"],
        json.dumps({
            "timestamp": timestamp,
            "org_id": org_id,
            "project_id": project_id,
            "key_id": key_id,
            "outcome": outcome.value,
            "reason": reason,
            "event_id": event_id,
            "category": category,
            "quantity": quantity,
        }),
    )

    metrics.incr(
        "events.outcomes",
        skip_internal=True,
        tags={
            "outcome": outcome.name.lower(),
            "reason": reason,
            "category":
            category.api_name() if category is not None else "null",
        },
    )
示例#8
0
def track_outcome(
    org_id: int,
    project_id: int,
    key_id: Optional[int],
    outcome: Outcome,
    reason: Optional[str] = None,
    timestamp: Optional[datetime] = None,
    event_id: Optional[str] = None,
    category: Optional[DataCategory] = None,
    quantity: Optional[int] = None,
) -> None:
    """
    This is a central point to track org/project counters per incoming event.
    NB: This should only ever be called once per incoming event, which means
    it should only be called at the point we know the final outcome for the
    event (invalid, rate_limited, accepted, discarded, etc.)

    This sends the "outcome" message to Kafka which is used by Snuba to serve
    data for SnubaTSDB and RedisSnubaTSDB, such as # of rate-limited/filtered
    events.
    """
    global outcomes_publisher
    global billing_publisher

    if quantity is None:
        quantity = 1

    assert isinstance(org_id, int)
    assert isinstance(project_id, int)
    assert isinstance(key_id, (type(None), int))
    assert isinstance(outcome, Outcome)
    assert isinstance(timestamp, (type(None), datetime))
    assert isinstance(category, (type(None), DataCategory))
    assert isinstance(quantity, int)

    outcomes_config = settings.KAFKA_TOPICS[settings.KAFKA_OUTCOMES]
    billing_config = settings.KAFKA_TOPICS.get(settings.KAFKA_OUTCOMES_BILLING) or outcomes_config

    # Create a second producer instance only if the cluster differs. Otherwise,
    # reuse the same producer and just send to the other topic.
    if outcome.is_billing() and billing_config["cluster"] != outcomes_config["cluster"]:
        if billing_publisher is None:
            cluster_name = billing_config["cluster"]
            billing_publisher = KafkaPublisher(
                kafka_config.get_kafka_producer_cluster_options(cluster_name)
            )
        publisher = billing_publisher

    else:
        if outcomes_publisher is None:
            cluster_name = outcomes_config["cluster"]
            outcomes_publisher = KafkaPublisher(
                kafka_config.get_kafka_producer_cluster_options(cluster_name)
            )
        publisher = outcomes_publisher

    timestamp = timestamp or to_datetime(time.time())

    # Send billing outcomes to a dedicated topic if there is a separate
    # configuration for it. Otherwise, fall back to the regular outcomes topic.
    # This does NOT switch the producer, if both topics are on the same cluster.
    #
    # In Sentry, there is no significant difference between the classes of
    # outcome. In Sentry SaaS, they have elevated stability requirements as they
    # are used for spike protection and quota enforcement.
    topic_name = billing_config["topic"] if outcome.is_billing() else outcomes_config["topic"]

    # Send a snuba metrics payload.
    publisher.publish(
        topic_name,
        json.dumps(
            {
                "timestamp": timestamp,
                "org_id": org_id,
                "project_id": project_id,
                "key_id": key_id,
                "outcome": outcome.value,
                "reason": reason,
                "event_id": event_id,
                "category": category,
                "quantity": quantity,
            }
        ),
    )

    metrics.incr(
        "events.outcomes",
        skip_internal=True,
        tags={
            "outcome": outcome.name.lower(),
            "reason": reason,
            "category": category.api_name() if category is not None else "null",
            "topic": topic_name,
        },
    )