Пример #1
0
def confirm_load(
    *,
    control_topic: Optional[str],
    bootstrap_server: Sequence[str],
    storage_name: str,
    source: str,
    log_level: Optional[str] = None,
) -> None:
    """
    Confirms the snapshot has been loaded by sending the
    snapshot-loaded message on the control topic.
    """

    setup_logging(log_level)
    setup_sentry()

    logger = logging.getLogger("snuba.loaded-snapshot")
    logger.info(
        "Sending load completion message for storage %s, from source %s",
        storage_name,
        source,
    )

    storage_key = StorageKey(storage_name)
    storage = get_cdc_storage(storage_key)

    stream_loader = storage.get_table_writer().get_stream_loader()

    control_topic = control_topic or storage.get_default_control_topic()

    snapshot_source = PostgresSnapshot.load(
        product=settings.SNAPSHOT_LOAD_PRODUCT, path=source,
    )

    descriptor = snapshot_source.get_descriptor()

    producer = Producer(
        build_kafka_producer_configuration(
            stream_loader.get_default_topic_spec().topic,
            bootstrap_servers=bootstrap_server,
            override_params={
                "partitioner": "consistent",
                "message.max.bytes": 50000000,  # 50MB, default is 1MB
            },
        )
    )

    msg = SnapshotLoaded(
        id=descriptor.id,
        transaction_info=TransactionData(
            xmin=descriptor.xmin, xmax=descriptor.xmax, xip_list=descriptor.xip_list,
        ),
    )
    json_string = json.dumps(msg.to_dict())

    def delivery_callback(error: KafkaError, message: Message) -> None:
        if error is not None:
            raise error
        else:
            logger.info("Message sent %r", message.value())

    producer.produce(
        control_topic, value=json_string, on_delivery=delivery_callback,
    )

    producer.flush()
Пример #2
0
def confirm_load(
    *,
    control_topic: Optional[str],
    bootstrap_server: Sequence[str],
    dataset_name: str,
    source: Optional[str],
    log_level: Optional[str] = None,
) -> None:
    """
    Confirms the snapshot has been loaded by sending the
    snapshot-loaded message on the control topic.
    """

    setup_logging(log_level)
    setup_sentry()

    logger = logging.getLogger("snuba.loaded-snapshot")
    logger.info(
        "Sending load completion message for dataset %s, from source %s",
        dataset_name,
        source,
    )

    dataset = get_dataset(dataset_name)

    storage = dataset.get_writable_storage()

    assert isinstance(
        storage, CdcStorage
    ), "Only CDC storages have a control topic thus are supported."

    control_topic = control_topic or storage.get_default_control_topic()

    snapshot_source = PostgresSnapshot.load(
        product=settings.SNAPSHOT_LOAD_PRODUCT,
        path=source,
    )

    descriptor = snapshot_source.get_descriptor()

    if not bootstrap_server:
        bootstrap_server = settings.DEFAULT_DATASET_BROKERS.get(
            dataset,
            settings.DEFAULT_BROKERS,
        )

    producer = Producer({
        "bootstrap.servers": ",".join(bootstrap_server),
        "partitioner": "consistent",
        "message.max.bytes": 50000000,  # 50MB, default is 1MB
    })

    msg = SnapshotLoaded(
        id=descriptor.id,
        transaction_info=TransactionData(
            xmin=descriptor.xmin,
            xmax=descriptor.xmax,
            xip_list=descriptor.xip_list,
        ),
    )
    json_string = json.dumps(msg.to_dict())

    def delivery_callback(error, message) -> None:
        if error is not None:
            raise error
        else:
            logger.info("Message sent %r", message.value())

    producer.produce(
        control_topic,
        value=json_string,
        on_delivery=delivery_callback,
    )

    producer.flush()
Пример #3
0
class TestRecoveryState:
    transaction_data = TransactionData(
        xmin=1,
        xmax=2,
        xip_list=[],
    )
    test_data = [
        (
            # Empty topic.
            [],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # One snapshot started
            [(SnapshotInit(id="123asd", product="snuba",
                           tables=None), CommitDecision.COMMIT_PREV)],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "123asd",
        ),
        (
            # initialized and aborted snapshot
            [
                (SnapshotInit(id="123asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
                (SnapshotAbort(id="123asd"), CommitDecision.COMMIT_THIS),
            ],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # Initialized and ready
            [(SnapshotInit(id="123asd", product="snuba",
                           tables=None), CommitDecision.COMMIT_PREV),
             (SnapshotLoaded(
                 id="123asd",
                 transaction_info=transaction_data,
             ), CommitDecision.DO_NOT_COMMIT)],
            ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED,
            "123asd"),
        (
            # Initialized and multiple overlapping snapshots that are ignored
            [
                (SnapshotInit(id="123asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
                (SnapshotInit(id="234asd", product="someoneelse",
                              tables=None), CommitDecision.DO_NOT_COMMIT),
                (SnapshotAbort(id="234asd"), CommitDecision.DO_NOT_COMMIT),
                (SnapshotInit(id="345asd", product="snuba",
                              tables=None), CommitDecision.DO_NOT_COMMIT),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "123asd"),
        (
            # Multiple successful consecutive snapshots
            [
                (SnapshotInit(id="123asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
                (SnapshotLoaded(
                    id="123asd",
                    transaction_info=transaction_data,
                ), CommitDecision.DO_NOT_COMMIT),
                (SnapshotInit(id="234asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
                (SnapshotLoaded(
                    id="234asd",
                    transaction_info=transaction_data,
                ), CommitDecision.DO_NOT_COMMIT),
                (SnapshotInit(id="345asd", product="snuba",
                              tables=None), CommitDecision.COMMIT_PREV),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "345asd")
    ]

    @pytest.mark.parametrize("events, outcome, expected_id", test_data)
    def test_recovery(self, events, outcome, expected_id) -> None:
        recovery = RecoveryState()
        for message, expected_commit_decision in events:
            if isinstance(message, SnapshotInit):
                decision = recovery.process_init(message)
            elif isinstance(message, SnapshotAbort):
                decision = recovery.process_abort(message)
            elif isinstance(message, SnapshotLoaded):
                decision = recovery.process_snapshot_loaded(message, )
            assert decision == expected_commit_decision

        assert recovery.get_completion_event() == outcome
        if expected_id:
            assert recovery.get_active_snapshot()[0] == expected_id
        else:
            assert recovery.get_active_snapshot() is None
Пример #4
0
class TestRecoveryState:
    transaction_data = TransactionData(xmin=Xid(1), xmax=Xid(2), xip_list=[])
    snapshot_id = SnapshotId("123asd")
    test_data = [
        (
            # Empty topic.
            [],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # One snapshot started for a table I am not interested into
            [(
                SnapshotInit(id=snapshot_id,
                             product="snuba",
                             tables=["some_table"]),
                CommitDecision.COMMIT_THIS,
            )],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # One snapshot started
            [(
                SnapshotInit(
                    id=snapshot_id,
                    product="snuba",
                    tables=["sentry_groupedmessage"],
                ),
                CommitDecision.COMMIT_PREV,
            )],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "123asd",
        ),
        (
            # initialized and aborted snapshot
            [
                (
                    SnapshotInit(
                        id=snapshot_id,
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (SnapshotAbort(id=snapshot_id), CommitDecision.COMMIT_THIS),
            ],
            ConsumerStateCompletionEvent.NO_SNAPSHOT,
            None,
        ),
        (
            # Initialized and ready
            [
                (
                    SnapshotInit(
                        id=snapshot_id,
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (
                    SnapshotLoaded(id=snapshot_id,
                                   transaction_info=transaction_data),
                    CommitDecision.DO_NOT_COMMIT,
                ),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED,
            "123asd",
        ),
        (
            # Initialized and multiple overlapping snapshots that are ignored
            [
                (
                    SnapshotInit(
                        id=snapshot_id,
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (
                    SnapshotInit(
                        id=SnapshotId("234asd"),
                        product="someoneelse",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.DO_NOT_COMMIT,
                ),
                (SnapshotAbort(id=SnapshotId("234asd")),
                 CommitDecision.DO_NOT_COMMIT),
                (
                    SnapshotInit(
                        id=SnapshotId("345asd"),
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.DO_NOT_COMMIT,
                ),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "123asd",
        ),
        (
            # Multiple successful consecutive snapshots
            [
                (
                    SnapshotInit(
                        id=snapshot_id,
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (
                    SnapshotLoaded(
                        id=snapshot_id,
                        transaction_info=transaction_data,
                    ),
                    CommitDecision.DO_NOT_COMMIT,
                ),
                (
                    SnapshotInit(
                        id=SnapshotId("234asd"),
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
                (
                    SnapshotLoaded(
                        id=SnapshotId("234asd"),
                        transaction_info=transaction_data,
                    ),
                    CommitDecision.DO_NOT_COMMIT,
                ),
                (
                    SnapshotInit(
                        id=SnapshotId("345asd"),
                        product="snuba",
                        tables=["sentry_groupedmessage"],
                    ),
                    CommitDecision.COMMIT_PREV,
                ),
            ],
            ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED,
            "345asd",
        ),
    ]

    @pytest.mark.parametrize("events, outcome, expected_id", test_data)
    def test_recovery(
        self,
        events: Sequence[Tuple[ControlMessage, CommitDecision]],
        outcome: ConsumerStateCompletionEvent,
        expected_id: str,
    ) -> None:
        recovery = RecoveryState("sentry_groupedmessage")
        for message, expected_commit_decision in events:
            if isinstance(message, SnapshotInit):
                decision = recovery.process_init(message)
            elif isinstance(message, SnapshotAbort):
                decision = recovery.process_abort(message)
            elif isinstance(message, SnapshotLoaded):
                decision = recovery.process_snapshot_loaded(message, )
            assert decision == expected_commit_decision

        assert recovery.get_completion_event() == outcome
        active_snapshot = recovery.get_active_snapshot()
        if expected_id:
            assert active_snapshot is not None
            assert active_snapshot[0] == expected_id
        else:
            assert active_snapshot is None
Пример #5
0
def confirm_load(control_topic, bootstrap_server, dataset, source, log_level):
    """
    Confirms the snapshot has been loaded by sending the
    snapshot-loaded message on the control topic.
    """
    import sentry_sdk

    sentry_sdk.init(dsn=settings.SENTRY_DSN)
    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')

    logger = logging.getLogger('snuba.loaded-snapshot')
    logger.info(
        "Sending load completion message for dataset %s, from source %s",
        dataset, source)

    dataset = get_dataset(dataset)
    assert isinstance(dataset, CdcDataset), \
        "Only CDC dataset have a control topic thus are supported."

    control_topic = control_topic or dataset.get_default_control_topic()

    snapshot_source = PostgresSnapshot.load(
        product=settings.SNAPSHOT_LOAD_PRODUCT,
        path=source,
    )

    descriptor = snapshot_source.get_descriptor()

    if not bootstrap_server:
        bootstrap_server = settings.DEFAULT_DATASET_BROKERS.get(
            dataset,
            settings.DEFAULT_BROKERS,
        )

    producer = Producer({
        'bootstrap.servers': ','.join(bootstrap_server),
        'partitioner': 'consistent',
        'message.max.bytes': 50000000,  # 50MB, default is 1MB
    })

    msg = SnapshotLoaded(
        id=descriptor.id,
        transaction_info=TransactionData(
            xmin=descriptor.xmin,
            xmax=descriptor.xmax,
            xip_list=descriptor.xip_list,
        ),
    )
    json_string = json.dumps(msg.to_dict())

    def delivery_callback(error, message):
        if error is not None:
            raise error
        else:
            logger.info("Message sent %r", message.value())

    producer.produce(
        control_topic,
        value=json_string,
        on_delivery=delivery_callback,
    )

    producer.flush()