def test_send_message( self, value: str, expected: Optional[ProcessedMessage], ) -> None: storage = get_storage("groupedmessages") snapshot_id = uuid1() transact_data = TransactionData(xmin=100, xmax=200, xip_list=[120, 130]) worker = SnapshotAwareWorker( storage=storage, producer=FakeConfluentKafkaProducer(), snapshot_id=str(snapshot_id), transaction_data=transact_data, replacements_topic=None, metrics=DummyMetricsBackend(strict=True), ) message: Message[KafkaPayload] = Message( Partition(Topic("topic"), 0), 1, KafkaPayload( None, value.encode("utf-8"), [("table", "sentry_groupedmessage".encode())], ), datetime.now(), ) ret = worker.process_message(message) assert ret == expected
def test_send_message( self, message: bytes, expected: Optional[Tuple[int, Mapping[str, Any]]], ) -> None: dataset = get_dataset("groupedmessage") snapshot_id = uuid1() transact_data = TransactionData( xmin=100, xmax=200, xip_list=[120, 130] ) worker = SnapshotAwareWorker( dataset=dataset, producer=FakeKafkaProducer(), snapshot_id=str(snapshot_id), transaction_data=transact_data, replacements_topic=None, metrics=None ) ret = worker.process_message( build_msg(1, 0, message) ) assert ret == expected
def test_send_message( self, message: str, expected: Optional[ProcessedMessage], ) -> None: dataset = get_dataset("groupedmessage") snapshot_id = uuid1() transact_data = TransactionData(xmin=100, xmax=200, xip_list=[120, 130]) worker = SnapshotAwareWorker( dataset=dataset, producer=FakeConfluentKafkaProducer(), snapshot_id=str(snapshot_id), transaction_data=transact_data, replacements_topic=None, metrics=DummyMetricsBackend(strict=True), ) ret = worker.process_message( KafkaMessage( TopicPartition('topic', 0), 1, message.encode('utf-8'), )) assert ret == expected
def test_send_message(xid: int, expected: Optional[ProcessedMessage]) -> None: processor = (get_writable_storage(StorageKey.GROUPEDMESSAGES). get_table_writer().get_stream_loader().get_processor()) worker = SnapshotProcessor( processor=processor, snapshot_id=SnapshotId(str(uuid1())), transaction_data=TransactionData(xmin=Xid(100), xmax=Xid(200), xip_list=[Xid(120), Xid(130)]), ) ret = worker.process_message( get_insert_event(xid), KafkaMessageMetadata(offset=1, partition=0, timestamp=datetime.now()), ) assert ret == expected
def confirm_load( *, control_topic: Optional[str], bootstrap_server: Sequence[str], storage_name: str, source: str, log_level: Optional[str] = None, ) -> None: """ Confirms the snapshot has been loaded by sending the snapshot-loaded message on the control topic. """ setup_logging(log_level) setup_sentry() logger = logging.getLogger("snuba.loaded-snapshot") logger.info( "Sending load completion message for storage %s, from source %s", storage_name, source, ) storage_key = StorageKey(storage_name) storage = get_cdc_storage(storage_key) stream_loader = storage.get_table_writer().get_stream_loader() control_topic = control_topic or storage.get_default_control_topic() snapshot_source = PostgresSnapshot.load( product=settings.SNAPSHOT_LOAD_PRODUCT, path=source, ) descriptor = snapshot_source.get_descriptor() producer = Producer( build_kafka_producer_configuration( stream_loader.get_default_topic_spec().topic, bootstrap_servers=bootstrap_server, override_params={ "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB }, ) ) msg = SnapshotLoaded( id=descriptor.id, transaction_info=TransactionData( xmin=descriptor.xmin, xmax=descriptor.xmax, xip_list=descriptor.xip_list, ), ) json_string = json.dumps(msg.to_dict()) def delivery_callback(error: KafkaError, message: Message) -> None: if error is not None: raise error else: logger.info("Message sent %r", message.value()) producer.produce( control_topic, value=json_string, on_delivery=delivery_callback, ) producer.flush()
def confirm_load( *, control_topic: Optional[str], bootstrap_server: Sequence[str], dataset_name: str, source: Optional[str], log_level: Optional[str] = None, ) -> None: """ Confirms the snapshot has been loaded by sending the snapshot-loaded message on the control topic. """ setup_logging(log_level) setup_sentry() logger = logging.getLogger("snuba.loaded-snapshot") logger.info( "Sending load completion message for dataset %s, from source %s", dataset_name, source, ) dataset = get_dataset(dataset_name) storage = dataset.get_writable_storage() assert isinstance( storage, CdcStorage ), "Only CDC storages have a control topic thus are supported." control_topic = control_topic or storage.get_default_control_topic() snapshot_source = PostgresSnapshot.load( product=settings.SNAPSHOT_LOAD_PRODUCT, path=source, ) descriptor = snapshot_source.get_descriptor() if not bootstrap_server: bootstrap_server = settings.DEFAULT_DATASET_BROKERS.get( dataset, settings.DEFAULT_BROKERS, ) producer = Producer({ "bootstrap.servers": ",".join(bootstrap_server), "partitioner": "consistent", "message.max.bytes": 50000000, # 50MB, default is 1MB }) msg = SnapshotLoaded( id=descriptor.id, transaction_info=TransactionData( xmin=descriptor.xmin, xmax=descriptor.xmax, xip_list=descriptor.xip_list, ), ) json_string = json.dumps(msg.to_dict()) def delivery_callback(error, message) -> None: if error is not None: raise error else: logger.info("Message sent %r", message.value()) producer.produce( control_topic, value=json_string, on_delivery=delivery_callback, ) producer.flush()
class TestRecoveryState: transaction_data = TransactionData(xmin=Xid(1), xmax=Xid(2), xip_list=[]) snapshot_id = SnapshotId("123asd") test_data = [ ( # Empty topic. [], ConsumerStateCompletionEvent.NO_SNAPSHOT, None, ), ( # One snapshot started for a table I am not interested into [( SnapshotInit(id=snapshot_id, product="snuba", tables=["some_table"]), CommitDecision.COMMIT_THIS, )], ConsumerStateCompletionEvent.NO_SNAPSHOT, None, ), ( # One snapshot started [( SnapshotInit( id=snapshot_id, product="snuba", tables=["sentry_groupedmessage"], ), CommitDecision.COMMIT_PREV, )], ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED, "123asd", ), ( # initialized and aborted snapshot [ ( SnapshotInit( id=snapshot_id, product="snuba", tables=["sentry_groupedmessage"], ), CommitDecision.COMMIT_PREV, ), (SnapshotAbort(id=snapshot_id), CommitDecision.COMMIT_THIS), ], ConsumerStateCompletionEvent.NO_SNAPSHOT, None, ), ( # Initialized and ready [ ( SnapshotInit( id=snapshot_id, product="snuba", tables=["sentry_groupedmessage"], ), CommitDecision.COMMIT_PREV, ), ( SnapshotLoaded(id=snapshot_id, transaction_info=transaction_data), CommitDecision.DO_NOT_COMMIT, ), ], ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED, "123asd", ), ( # Initialized and multiple overlapping snapshots that are ignored [ ( SnapshotInit( id=snapshot_id, product="snuba", tables=["sentry_groupedmessage"], ), CommitDecision.COMMIT_PREV, ), ( SnapshotInit( id=SnapshotId("234asd"), product="someoneelse", tables=["sentry_groupedmessage"], ), CommitDecision.DO_NOT_COMMIT, ), (SnapshotAbort(id=SnapshotId("234asd")), CommitDecision.DO_NOT_COMMIT), ( SnapshotInit( id=SnapshotId("345asd"), product="snuba", tables=["sentry_groupedmessage"], ), CommitDecision.DO_NOT_COMMIT, ), ], ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED, "123asd", ), ( # Multiple successful consecutive snapshots [ ( SnapshotInit( id=snapshot_id, product="snuba", tables=["sentry_groupedmessage"], ), CommitDecision.COMMIT_PREV, ), ( SnapshotLoaded( id=snapshot_id, transaction_info=transaction_data, ), CommitDecision.DO_NOT_COMMIT, ), ( SnapshotInit( id=SnapshotId("234asd"), product="snuba", tables=["sentry_groupedmessage"], ), CommitDecision.COMMIT_PREV, ), ( SnapshotLoaded( id=SnapshotId("234asd"), transaction_info=transaction_data, ), CommitDecision.DO_NOT_COMMIT, ), ( SnapshotInit( id=SnapshotId("345asd"), product="snuba", tables=["sentry_groupedmessage"], ), CommitDecision.COMMIT_PREV, ), ], ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED, "345asd", ), ] @pytest.mark.parametrize("events, outcome, expected_id", test_data) def test_recovery( self, events: Sequence[Tuple[ControlMessage, CommitDecision]], outcome: ConsumerStateCompletionEvent, expected_id: str, ) -> None: recovery = RecoveryState("sentry_groupedmessage") for message, expected_commit_decision in events: if isinstance(message, SnapshotInit): decision = recovery.process_init(message) elif isinstance(message, SnapshotAbort): decision = recovery.process_abort(message) elif isinstance(message, SnapshotLoaded): decision = recovery.process_snapshot_loaded(message, ) assert decision == expected_commit_decision assert recovery.get_completion_event() == outcome active_snapshot = recovery.get_active_snapshot() if expected_id: assert active_snapshot is not None assert active_snapshot[0] == expected_id else: assert active_snapshot is None
class TestRecoveryState: transaction_data = TransactionData( xmin=1, xmax=2, xip_list=[], ) test_data = [ ( # Empty topic. [], ConsumerStateCompletionEvent.NO_SNAPSHOT, None, ), ( # One snapshot started [(SnapshotInit(id="123asd", product="snuba", tables=None), CommitDecision.COMMIT_PREV)], ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED, "123asd", ), ( # initialized and aborted snapshot [ (SnapshotInit(id="123asd", product="snuba", tables=None), CommitDecision.COMMIT_PREV), (SnapshotAbort(id="123asd"), CommitDecision.COMMIT_THIS), ], ConsumerStateCompletionEvent.NO_SNAPSHOT, None, ), ( # Initialized and ready [(SnapshotInit(id="123asd", product="snuba", tables=None), CommitDecision.COMMIT_PREV), (SnapshotLoaded( id="123asd", transaction_info=transaction_data, ), CommitDecision.DO_NOT_COMMIT)], ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED, "123asd"), ( # Initialized and multiple overlapping snapshots that are ignored [ (SnapshotInit(id="123asd", product="snuba", tables=None), CommitDecision.COMMIT_PREV), (SnapshotInit(id="234asd", product="someoneelse", tables=None), CommitDecision.DO_NOT_COMMIT), (SnapshotAbort(id="234asd"), CommitDecision.DO_NOT_COMMIT), (SnapshotInit(id="345asd", product="snuba", tables=None), CommitDecision.DO_NOT_COMMIT), ], ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED, "123asd"), ( # Multiple successful consecutive snapshots [ (SnapshotInit(id="123asd", product="snuba", tables=None), CommitDecision.COMMIT_PREV), (SnapshotLoaded( id="123asd", transaction_info=transaction_data, ), CommitDecision.DO_NOT_COMMIT), (SnapshotInit(id="234asd", product="snuba", tables=None), CommitDecision.COMMIT_PREV), (SnapshotLoaded( id="234asd", transaction_info=transaction_data, ), CommitDecision.DO_NOT_COMMIT), (SnapshotInit(id="345asd", product="snuba", tables=None), CommitDecision.COMMIT_PREV), ], ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED, "345asd") ] @pytest.mark.parametrize("events, outcome, expected_id", test_data) def test_recovery(self, events, outcome, expected_id) -> None: recovery = RecoveryState() for message, expected_commit_decision in events: if isinstance(message, SnapshotInit): decision = recovery.process_init(message) elif isinstance(message, SnapshotAbort): decision = recovery.process_abort(message) elif isinstance(message, SnapshotLoaded): decision = recovery.process_snapshot_loaded(message, ) assert decision == expected_commit_decision assert recovery.get_completion_event() == outcome if expected_id: assert recovery.get_active_snapshot()[0] == expected_id else: assert recovery.get_active_snapshot() is None
def confirm_load(control_topic, bootstrap_server, dataset, source, log_level): """ Confirms the snapshot has been loaded by sending the snapshot-loaded message on the control topic. """ import sentry_sdk sentry_sdk.init(dsn=settings.SENTRY_DSN) logging.basicConfig(level=getattr(logging, log_level.upper()), format='%(asctime)s %(message)s') logger = logging.getLogger('snuba.loaded-snapshot') logger.info( "Sending load completion message for dataset %s, from source %s", dataset, source) dataset = get_dataset(dataset) assert isinstance(dataset, CdcDataset), \ "Only CDC dataset have a control topic thus are supported." control_topic = control_topic or dataset.get_default_control_topic() snapshot_source = PostgresSnapshot.load( product=settings.SNAPSHOT_LOAD_PRODUCT, path=source, ) descriptor = snapshot_source.get_descriptor() if not bootstrap_server: bootstrap_server = settings.DEFAULT_DATASET_BROKERS.get( dataset, settings.DEFAULT_BROKERS, ) producer = Producer({ 'bootstrap.servers': ','.join(bootstrap_server), 'partitioner': 'consistent', 'message.max.bytes': 50000000, # 50MB, default is 1MB }) msg = SnapshotLoaded( id=descriptor.id, transaction_info=TransactionData( xmin=descriptor.xmin, xmax=descriptor.xmax, xip_list=descriptor.xip_list, ), ) json_string = json.dumps(msg.to_dict()) def delivery_callback(error, message): if error is not None: raise error else: logger.info("Message sent %r", message.value()) producer.produce( control_topic, value=json_string, on_delivery=delivery_callback, ) producer.flush()