示例#1
0
    def test_send_message(
        self,
        value: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        storage = get_storage("groupedmessages")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            storage=storage,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            1,
            KafkaPayload(
                None,
                value.encode("utf-8"),
                [("table", "sentry_groupedmessage".encode())],
            ),
            datetime.now(),
        )

        ret = worker.process_message(message)
        assert ret == expected
示例#2
0
    def test_send_message(
        self,
        message: bytes,
        expected: Optional[Tuple[int, Mapping[str, Any]]],
    ) -> None:
        dataset = get_dataset("groupedmessage")
        snapshot_id = uuid1()
        transact_data = TransactionData(
            xmin=100,
            xmax=200,
            xip_list=[120, 130]
        )

        worker = SnapshotAwareWorker(
            dataset=dataset,
            producer=FakeKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=None
        )

        ret = worker.process_message(
            build_msg(1, 0, message)
        )
        assert ret == expected
示例#3
0
    def test_send_message(
        self,
        message: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        dataset = get_dataset("groupedmessage")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            dataset=dataset,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        ret = worker.process_message(
            KafkaMessage(
                TopicPartition('topic', 0),
                1,
                message.encode('utf-8'),
            ))
        assert ret == expected
示例#4
0
def test_send_message(xid: int, expected: Optional[ProcessedMessage]) -> None:
    processor = (get_writable_storage(StorageKey.GROUPEDMESSAGES).
                 get_table_writer().get_stream_loader().get_processor())

    worker = SnapshotProcessor(
        processor=processor,
        snapshot_id=SnapshotId(str(uuid1())),
        transaction_data=TransactionData(xmin=Xid(100),
                                         xmax=Xid(200),
                                         xip_list=[Xid(120),
                                                   Xid(130)]),
    )

    ret = worker.process_message(
        get_insert_event(xid),
        KafkaMessageMetadata(offset=1, partition=0, timestamp=datetime.now()),
    )

    assert ret == expected
示例#5
0
def confirm_load(
    *,
    control_topic: Optional[str],
    bootstrap_server: Sequence[str],
    storage_name: str,
    source: str,
    log_level: Optional[str] = None,
) -> None:
    """
    Confirms the snapshot has been loaded by sending the
    snapshot-loaded message on the control topic.
    """

    setup_logging(log_level)
    setup_sentry()

    logger = logging.getLogger("snuba.loaded-snapshot")
    logger.info(
        "Sending load completion message for storage %s, from source %s",
        storage_name,
        source,
    )

    storage_key = StorageKey(storage_name)
    storage = get_cdc_storage(storage_key)

    stream_loader = storage.get_table_writer().get_stream_loader()

    control_topic = control_topic or storage.get_default_control_topic()

    snapshot_source = PostgresSnapshot.load(
        product=settings.SNAPSHOT_LOAD_PRODUCT, path=source,
    )

    descriptor = snapshot_source.get_descriptor()

    producer = Producer(
        build_kafka_producer_configuration(
            stream_loader.get_default_topic_spec().topic,
            bootstrap_servers=bootstrap_server,
            override_params={
                "partitioner": "consistent",
                "message.max.bytes": 50000000,  # 50MB, default is 1MB
            },
        )
    )

    msg = SnapshotLoaded(
        id=descriptor.id,
        transaction_info=TransactionData(
            xmin=descriptor.xmin, xmax=descriptor.xmax, xip_list=descriptor.xip_list,
        ),
    )
    json_string = json.dumps(msg.to_dict())

    def delivery_callback(error: KafkaError, message: Message) -> None:
        if error is not None:
            raise error
        else:
            logger.info("Message sent %r", message.value())

    producer.produce(
        control_topic, value=json_string, on_delivery=delivery_callback,
    )

    producer.flush()
示例#6
0
def confirm_load(
    *,
    control_topic: Optional[str],
    bootstrap_server: Sequence[str],
    dataset_name: str,
    source: Optional[str],
    log_level: Optional[str] = None,
) -> None:
    """
    Confirms the snapshot has been loaded by sending the
    snapshot-loaded message on the control topic.
    """

    setup_logging(log_level)
    setup_sentry()

    logger = logging.getLogger("snuba.loaded-snapshot")
    logger.info(
        "Sending load completion message for dataset %s, from source %s",
        dataset_name,
        source,
    )

    dataset = get_dataset(dataset_name)

    storage = dataset.get_writable_storage()

    assert isinstance(
        storage, CdcStorage
    ), "Only CDC storages have a control topic thus are supported."

    control_topic = control_topic or storage.get_default_control_topic()

    snapshot_source = PostgresSnapshot.load(
        product=settings.SNAPSHOT_LOAD_PRODUCT,
        path=source,
    )

    descriptor = snapshot_source.get_descriptor()

    if not bootstrap_server:
        bootstrap_server = settings.DEFAULT_DATASET_BROKERS.get(
            dataset,
            settings.DEFAULT_BROKERS,
        )

    producer = Producer({
        "bootstrap.servers": ",".join(bootstrap_server),
        "partitioner": "consistent",
        "message.max.bytes": 50000000,  # 50MB, default is 1MB
    })

    msg = SnapshotLoaded(
        id=descriptor.id,
        transaction_info=TransactionData(
            xmin=descriptor.xmin,
            xmax=descriptor.xmax,
            xip_list=descriptor.xip_list,
        ),
    )
    json_string = json.dumps(msg.to_dict())

    def delivery_callback(error, message) -> None:
        if error is not None:
            raise error
        else:
            logger.info("Message sent %r", message.value())

    producer.produce(
        control_topic,
        value=json_string,
        on_delivery=delivery_callback,
    )

    producer.flush()
示例#7
0
class TestRecoveryState:
    transaction_data = TransactionData(xmin=Xid(1), xmax=Xid(2), xip_list=[])
    snapshot_id = SnapshotId("123asd")
    test_data = [
        (
            # Empty topic.
            [],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # One snapshot started for a table I am not interested into
            [(
                SnapshotInit(id=snapshot_id,
                             product="snuba",
                             tables=["some_table"]),
                CommitDecision.COMMIT_THIS,
            )],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # One snapshot started
            [(
                SnapshotInit(
                    id=snapshot_id,
                    product="snuba",
                    tables=["sentry_groupedmessage"],
                ),
                CommitDecision.COMMIT_PREV,
            )],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "123asd",
        ),
        (
            # initialized and aborted snapshot
            [
                (
                    SnapshotInit(
                        id=snapshot_id,
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (SnapshotAbort(id=snapshot_id), CommitDecision.COMMIT_THIS),
            ],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # Initialized and ready
            [
                (
                    SnapshotInit(
                        id=snapshot_id,
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (
                    SnapshotLoaded(id=snapshot_id,
                                   transaction_info=transaction_data),
                    CommitDecision.DO_NOT_COMMIT,
                ),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED,
            "123asd",
        ),
        (
            # Initialized and multiple overlapping snapshots that are ignored
            [
                (
                    SnapshotInit(
                        id=snapshot_id,
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (
                    SnapshotInit(
                        id=SnapshotId("234asd"),
                        product="someoneelse",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.DO_NOT_COMMIT,
                ),
                (SnapshotAbort(id=SnapshotId("234asd")),
                 CommitDecision.DO_NOT_COMMIT),
                (
                    SnapshotInit(
                        id=SnapshotId("345asd"),
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.DO_NOT_COMMIT,
                ),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "123asd",
        ),
        (
            # Multiple successful consecutive snapshots
            [
                (
                    SnapshotInit(
                        id=snapshot_id,
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (
                    SnapshotLoaded(
                        id=snapshot_id,
                        transaction_info=transaction_data,
                    ),
                    CommitDecision.DO_NOT_COMMIT,
                ),
                (
                    SnapshotInit(
                        id=SnapshotId("234asd"),
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (
                    SnapshotLoaded(
                        id=SnapshotId("234asd"),
                        transaction_info=transaction_data,
                    ),
                    CommitDecision.DO_NOT_COMMIT,
                ),
                (
                    SnapshotInit(
                        id=SnapshotId("345asd"),
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "345asd",
        ),
    ]

    @pytest.mark.parametrize("events, outcome, expected_id", test_data)
    def test_recovery(
        self,
        events: Sequence[Tuple[ControlMessage, CommitDecision]],
        outcome: ConsumerStateCompletionEvent,
        expected_id: str,
    ) -> None:
        recovery = RecoveryState("sentry_groupedmessage")
        for message, expected_commit_decision in events:
            if isinstance(message, SnapshotInit):
                decision = recovery.process_init(message)
            elif isinstance(message, SnapshotAbort):
                decision = recovery.process_abort(message)
            elif isinstance(message, SnapshotLoaded):
                decision = recovery.process_snapshot_loaded(message, )
            assert decision == expected_commit_decision

        assert recovery.get_completion_event() == outcome
        active_snapshot = recovery.get_active_snapshot()
        if expected_id:
            assert active_snapshot is not None
            assert active_snapshot[0] == expected_id
        else:
            assert active_snapshot is None
示例#8
0
class TestRecoveryState:
    transaction_data = TransactionData(
        xmin=1,
        xmax=2,
        xip_list=[],
    )
    test_data = [
        (
            # Empty topic.
            [],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # One snapshot started
            [(SnapshotInit(id="123asd", product="snuba",
                           tables=None), CommitDecision.COMMIT_PREV)],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "123asd",
        ),
        (
            # initialized and aborted snapshot
            [
                (SnapshotInit(id="123asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
                (SnapshotAbort(id="123asd"), CommitDecision.COMMIT_THIS),
            ],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # Initialized and ready
            [(SnapshotInit(id="123asd", product="snuba",
                           tables=None), CommitDecision.COMMIT_PREV),
             (SnapshotLoaded(
                 id="123asd",
                 transaction_info=transaction_data,
             ), CommitDecision.DO_NOT_COMMIT)],
            ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED,
            "123asd"),
        (
            # Initialized and multiple overlapping snapshots that are ignored
            [
                (SnapshotInit(id="123asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
                (SnapshotInit(id="234asd", product="someoneelse",
                              tables=None), CommitDecision.DO_NOT_COMMIT),
                (SnapshotAbort(id="234asd"), CommitDecision.DO_NOT_COMMIT),
                (SnapshotInit(id="345asd", product="snuba",
                              tables=None), CommitDecision.DO_NOT_COMMIT),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "123asd"),
        (
            # Multiple successful consecutive snapshots
            [
                (SnapshotInit(id="123asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
                (SnapshotLoaded(
                    id="123asd",
                    transaction_info=transaction_data,
                ), CommitDecision.DO_NOT_COMMIT),
                (SnapshotInit(id="234asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
                (SnapshotLoaded(
                    id="234asd",
                    transaction_info=transaction_data,
                ), CommitDecision.DO_NOT_COMMIT),
                (SnapshotInit(id="345asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "345asd")
    ]

    @pytest.mark.parametrize("events, outcome, expected_id", test_data)
    def test_recovery(self, events, outcome, expected_id) -> None:
        recovery = RecoveryState()
        for message, expected_commit_decision in events:
            if isinstance(message, SnapshotInit):
                decision = recovery.process_init(message)
            elif isinstance(message, SnapshotAbort):
                decision = recovery.process_abort(message)
            elif isinstance(message, SnapshotLoaded):
                decision = recovery.process_snapshot_loaded(message, )
            assert decision == expected_commit_decision

        assert recovery.get_completion_event() == outcome
        if expected_id:
            assert recovery.get_active_snapshot()[0] == expected_id
        else:
            assert recovery.get_active_snapshot() is None
示例#9
0
def confirm_load(control_topic, bootstrap_server, dataset, source, log_level):
    """
    Confirms the snapshot has been loaded by sending the
    snapshot-loaded message on the control topic.
    """
    import sentry_sdk

    sentry_sdk.init(dsn=settings.SENTRY_DSN)
    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')

    logger = logging.getLogger('snuba.loaded-snapshot')
    logger.info(
        "Sending load completion message for dataset %s, from source %s",
        dataset, source)

    dataset = get_dataset(dataset)
    assert isinstance(dataset, CdcDataset), \
        "Only CDC dataset have a control topic thus are supported."

    control_topic = control_topic or dataset.get_default_control_topic()

    snapshot_source = PostgresSnapshot.load(
        product=settings.SNAPSHOT_LOAD_PRODUCT,
        path=source,
    )

    descriptor = snapshot_source.get_descriptor()

    if not bootstrap_server:
        bootstrap_server = settings.DEFAULT_DATASET_BROKERS.get(
            dataset,
            settings.DEFAULT_BROKERS,
        )

    producer = Producer({
        'bootstrap.servers': ','.join(bootstrap_server),
        'partitioner': 'consistent',
        'message.max.bytes': 50000000,  # 50MB, default is 1MB
    })

    msg = SnapshotLoaded(
        id=descriptor.id,
        transaction_info=TransactionData(
            xmin=descriptor.xmin,
            xmax=descriptor.xmax,
            xip_list=descriptor.xip_list,
        ),
    )
    json_string = json.dumps(msg.to_dict())

    def delivery_callback(error, message):
        if error is not None:
            raise error
        else:
            logger.info("Message sent %r", message.value())

    producer.produce(
        control_topic,
        value=json_string,
        on_delivery=delivery_callback,
    )

    producer.flush()