Пример #1
0
    def eventstream(*, dataset: Dataset):
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data, []),
            datetime.now(),
        )

        type_ = record[1]

        storage = dataset.get_default_entity().get_writable_storage()
        assert storage is not None

        if type_ == "insert":
            from snuba.consumer import StreamingConsumerStrategyFactory

            table_writer = storage.get_table_writer()
            stream_loader = table_writer.get_stream_loader()
            strategy = StreamingConsumerStrategyFactory(
                stream_loader.get_pre_filter(),
                stream_loader.get_processor(),
                table_writer.get_batch_writer(metrics),
                metrics,
                max_batch_size=1,
                max_batch_time=1.0,
                processes=None,
                input_block_size=None,
                output_block_size=None,
            ).create(lambda offsets: None)
            strategy.submit(message)
            strategy.close()
            strategy.join()
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(storage, metrics=metrics)
            processed = worker.process_message(message)
            if processed is not None:
                batch = [processed]
                worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
Пример #2
0
    def __build_streaming_strategy_factory(
        self,
        processor_wrapper: Optional[Callable[[MessageProcessor],
                                             MessageProcessor]] = None,
    ) -> ProcessingStrategyFactory[KafkaPayload]:
        table_writer = self.storage.get_table_writer()
        stream_loader = table_writer.get_stream_loader()

        processor = stream_loader.get_processor()
        if processor_wrapper is not None:
            processor = processor_wrapper(processor)

        strategy_factory: ProcessingStrategyFactory[
            KafkaPayload] = StreamingConsumerStrategyFactory(
                stream_loader.get_pre_filter(),
                processor,
                table_writer.get_batch_writer(
                    self.metrics,
                    {
                        "load_balancing": "in_order",
                        "insert_distributed_sync": 1
                    },
                ),
                self.metrics,
                max_batch_size=self.max_batch_size,
                max_batch_time=self.max_batch_time_ms / 1000.0,
                processes=self.processes,
                input_block_size=self.input_block_size,
                output_block_size=self.output_block_size,
                replacements_producer=(self.producer if self.replacements_topic
                                       is not None else None),
                replacements_topic=self.replacements_topic,
            )

        if self.__profile_path is not None:
            strategy_factory = ProcessingStrategyProfilerWrapperFactory(
                strategy_factory,
                self.__profile_path,
            )

        return strategy_factory
Пример #3
0
 def __build_streaming_strategy_factory(self) -> StreamingConsumerStrategyFactory:
     table_writer = self.storage.get_table_writer()
     stream_loader = table_writer.get_stream_loader()
     return StreamingConsumerStrategyFactory(
         stream_loader.get_pre_filter(),
         stream_loader.get_processor(),
         table_writer.get_batch_writer(
             self.metrics,
             {"load_balancing": "in_order", "insert_distributed_sync": 1},
         ),
         self.metrics,
         max_batch_size=self.max_batch_size,
         max_batch_time=self.max_batch_time_ms / 1000.0,
         processes=self.processes,
         input_block_size=self.input_block_size,
         output_block_size=self.output_block_size,
         replacements_producer=(
             self.producer if self.replacements_topic is not None else None
         ),
         replacements_topic=self.replacements_topic,
     )
Пример #4
0
def test_streaming_consumer_strategy() -> None:
    messages = (Message(
        Partition(Topic("events"), 0),
        i,
        KafkaPayload(None, b"{}", None),
        datetime.now(),
    ) for i in itertools.count())

    replacements_producer = FakeConfluentKafkaProducer()

    processor = Mock()
    processor.process_message.side_effect = [
        None,
        InsertBatch([{}]),
        ReplacementBatch("key", [{}]),
    ]

    writer = Mock()

    metrics = TestingMetricsBackend()

    factory = StreamingConsumerStrategyFactory(
        None,
        processor,
        writer,
        metrics,
        max_batch_size=10,
        max_batch_time=60,
        processes=None,
        input_block_size=None,
        output_block_size=None,
        replacements_producer=replacements_producer,
        replacements_topic=Topic("replacements"),
    )

    commit_function = Mock()
    strategy = factory.create(commit_function)

    for i in range(3):
        strategy.poll()
        strategy.submit(next(messages))

    assert metrics.calls == []

    processor.process_message.side_effect = [{}]

    with pytest.raises(TypeError):
        strategy.poll()
        strategy.submit(next(messages))

    def get_number_of_insertion_metrics() -> int:
        count = 0
        for call in metrics.calls:
            if isinstance(call,
                          Timing) and call.name == "insertions.latency_ms":
                count += 1
        return count

    expected_write_count = 1

    with assert_changes(get_number_of_insertion_metrics, 0,
                        expected_write_count), assert_changes(
                            lambda: writer.write.call_count, 0,
                            expected_write_count), assert_changes(
                                lambda: len(replacements_producer.messages), 0,
                                1):
        strategy.close()
        strategy.join()