def test_setup_contains_pii_from_schematizer_once(self, message): schematizer_client = get_schematizer() with attach_spy_on_func(schematizer_client, 'get_schema_by_id') as spy: message.contains_pii assert spy.call_count == 1 with attach_spy_on_func(schematizer_client, 'get_schema_by_id') as spy: message.contains_pii assert spy.call_count == 0
def test_producer_registration_message_on_exit(self, producer_instance): producer = producer_instance.__enter__() with attach_spy_on_func(producer.registrar, 'stop') as func_spy: producer.publish( CreateMessage(schema_id=1, payload=bytes("Test message"))) producer.__exit__(None, None, None) assert func_spy.call_count == 1
def test_skip_commit_offset_if_offset_unchanged(self, publish_messages, message, consumer_instance): asserter = ConsumerAsserter(consumer=consumer_instance, expected_message=message) with consumer_instance as consumer: publish_messages(message, 4) with attach_spy_on_func(consumer.kafka_client, 'send_offset_commit_request') as func_spy: msgs_r1 = consumer.get_messages(count=2, blocking=True, timeout=TIMEOUT) asserter.assert_messages(msgs_r1, 2) consumer.commit_messages(msgs_r1) assert func_spy.call_count == 1 func_spy.reset_mock() # call_count does not increase # when no new msgs are commited consumer.commit_messages(msgs_r1) assert func_spy.call_count == 0 # assert that next call to get_message should # get message from next offset msgs_r2 = consumer.get_messages(count=2, blocking=True, timeout=TIMEOUT) asserter.assert_messages(msgs_r2, 2)
def test_consumer_periodic_registration_messages( self, publish_messages, input_compatible_message, consumer_instance ): """ This function tests whether a Consumer correctly periodically creates and sends registration messages once it has received messages from a topic it is consuming from. Note: Tests fails when threshold is set significanly below 1 second """ TIMEOUT = 1.8 consumer_instance.registrar.threshold = 1 with consumer_instance as consumer: with attach_spy_on_func( consumer.registrar.clog_writer, 'publish' ) as func_spy: publish_messages(input_compatible_message, count=1) consumer.get_message(blocking=True, timeout=TIMEOUT) consumer.registrar.threshold = 1 consumer.registrar.start() time.sleep(2.5) assert func_spy.call_count == 2 consumer.registrar.stop()
def test_publish_to_new_topic(self, create_new_schema, producer): new_schema = create_new_schema(source='retry_source') message = CreateMessage(new_schema.schema_id, payload=str('1')) with attach_spy_on_func( producer._kafka_producer.kafka_client, 'send_produce_request' ) as send_request_spy: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) send_request_spy.reset() producer.publish(message) producer.flush() # it should fail at least the 1st time because the topic doesn't # exist. Depending on how fast the topic is created, it could retry # more than 2 times. assert send_request_spy.call_count >= 2 messages = self.get_messages_from_start(message.topic) self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=1 )
def test_producer_initial_registration_messages(self, use_work_pool): with attach_spy_on_func(clog, 'log_line') as func_spy: with Producer( producer_name='producer_1', team_name='bam', expected_frequency_seconds=ExpectedFrequency.constantly, use_work_pool=use_work_pool, schema_id_list=[1, 2, 3]): assert func_spy.call_count == 3
def test_producer_registration_message_on_exit(self, producer_instance): producer = producer_instance.__enter__() with attach_spy_on_func( producer.registrar, 'stop' ) as func_spy: producer.publish(CreateMessage(schema_id=1, payload=bytes("Test message"))) producer.__exit__(None, None, None) assert func_spy.call_count == 1
def test_consumer_registration_message_on_exit(self, publish_messages, input_compatible_message, consumer_instance): TIMEOUT = 1.8 consumer = consumer_instance.__enter__() with attach_spy_on_func(consumer.registrar, 'stop') as func_spy: publish_messages(input_compatible_message, count=1) consumer.get_message(blocking=True, timeout=TIMEOUT) consumer.__exit__(None, None, None) assert func_spy.call_count == 1
def skip_test_offset_cache_cleared_at_rebalance( self, topic, pii_topic, publish_messages, consumer_instance, consumer_two_instance, message, pii_message ): # TODO [DATAPIPE-249] previous version of test has an issue that # sometimes the consumer one doesn't get any message right after # consumer two starts. It's unclear the cause and may be related # to how the tests are setup. Re-writting the test to bypass it # and defer addressing it in the DATAPIPE-249. consumer_one_rebalanced_event = Event() with consumer_instance as consumer_one: publish_messages(message, count=10) publish_messages(pii_message, count=10) consumer_one_message = consumer_one.get_message( blocking=True, timeout=TIMEOUT ) consumer_one.commit_message(consumer_one_message) # trigger rebalancing by starting another consumer with same name consumer_two_process = Process( target=self._run_consumer_two, args=(consumer_two_instance, consumer_one_rebalanced_event) ) consumer_two_process.start() # consumer one is rebalanced during `get_message` consumer_one.get_message(blocking=True, timeout=TIMEOUT) consumer_one_rebalanced_event.set() consumer_two_process.join(timeout=1) assert not consumer_two_process.exitcode # force consumer rebalance again; consumer rebalance occurs when # get_message is called; set short timeout because we don't care # if there is any message left. consumer_one.get_message(blocking=True, timeout=0.1) # The same offset should be committed again because the rebalancing # will clear the internal offset cache. with attach_spy_on_func( consumer_one.kafka_client, 'send_offset_commit_request' ) as func_spy: consumer_one.commit_message(consumer_one_message) assert func_spy.call_count == 1
def test_producer_initial_registration_messages(self, use_work_pool): with attach_spy_on_func( clog, 'log_line' ) as func_spy: with Producer( producer_name='producer_1', team_name='bam', expected_frequency_seconds=ExpectedFrequency.constantly, use_work_pool=use_work_pool, schema_id_list=[1, 2, 3] ): assert func_spy.call_count == 3
def test_consumer_initial_registration_message(self, topic): """ Assert that an initial RegistrationMessage is sent upon starting the Consumer with a non-empty topic_to_consumer_topic_state_map. """ with attach_spy_on_func(clog, 'log_line') as func_spy: fake_topic = ConsumerTopicState({}, 23) with Consumer( consumer_name='test_consumer', team_name='bam', expected_frequency_seconds=ExpectedFrequency.constantly, topic_to_consumer_topic_state_map={topic: fake_topic}): assert func_spy.call_count == 1
def test_periodic_wake_calls(self, registrar): """ Test that calling start() periodically publishes messages at the expected rate until stop() is called. """ with attach_spy_on_func(registrar, 'publish_registration_messages') as func_spy: registrar.threshold = 1 registrar.start() time.sleep(3.5) registrar.stop() time.sleep(.5) # One call to publish_registration_messages happens on stop() assert func_spy.call_count == 4
def test_call_kafka_commit_offsets_when_offset_change( self, publish_messages, message, consumer_instance ): asserter = ConsumerAsserter( consumer=consumer_instance, expected_message=message ) with consumer_instance as consumer: publish_messages(message, 4) with attach_spy_on_func( consumer.kafka_client, 'send_offset_commit_request' ) as func_spy: msgs_r1 = consumer.get_messages( count=3, blocking=True, timeout=TIMEOUT ) asserter.assert_messages(msgs_r1, 3) consumer.commit_messages(msgs_r1) assert func_spy.call_count == 1 func_spy.reset_mock() # call_count increases # when offset is different from last commited offset consumer.commit_message(msgs_r1[0]) assert func_spy.call_count == 1 func_spy.reset_mock() consumer.commit_message(msgs_r1[2]) assert func_spy.call_count == 1 # assert that next call to get_message should # get message from next offset msgs_r2 = consumer.get_messages( count=1, blocking=True, timeout=TIMEOUT ) assert len(msgs_r2) == 1 asserter.assert_messages(msgs_r2, 1)
def test_periodic_wake_calls(self, registrar): """ Test that calling start() periodically publishes messages at the expected rate until stop() is called. """ with attach_spy_on_func( registrar, 'publish_registration_messages' ) as func_spy: registrar.threshold = 1 registrar.start() time.sleep(3.5) registrar.stop() time.sleep(.5) # One call to publish_registration_messages happens on stop() assert func_spy.call_count == 4
def test_consumer_registration_message_on_exit( self, publish_messages, input_compatible_message, consumer_instance ): TIMEOUT = 1.8 consumer = consumer_instance.__enter__() with attach_spy_on_func( consumer.registrar, 'stop' ) as func_spy: publish_messages(input_compatible_message, count=1) consumer.get_message(blocking=True, timeout=TIMEOUT) consumer.__exit__(None, None, None) assert func_spy.call_count == 1
def test_producer_periodic_registration_messages(self, producer_instance): """ Note: Tests fails when threshold is set significanly below 1 second, presumably because of the nature of threading. Should be irrelevant if the threshold in registrar is set significantly higher. """ producer_instance.registrar.threshold = 1 with producer_instance as producer: with attach_spy_on_func(producer.registrar.clog_writer, 'publish') as func_spy: producer.publish( CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE"))) producer.publish( CreateMessage(schema_id=2, payload=bytes("DIFFERENT FAKE MESSAGE"))) time.sleep(2.5) assert func_spy.call_count == 4
def test_consumer_initial_registration_message(self, topic): """ Assert that an initial RegistrationMessage is sent upon starting the Consumer with a non-empty topic_to_consumer_topic_state_map. """ with attach_spy_on_func( clog, 'log_line' ) as func_spy: fake_topic = ConsumerTopicState({}, 23) with Consumer( consumer_name='test_consumer', team_name='bam', expected_frequency_seconds=ExpectedFrequency.constantly, topic_to_consumer_topic_state_map={topic: fake_topic} ): assert func_spy.call_count == 1
def test_ensure_messages_published_on_new_topic(self, create_new_schema, producer): """When a topic doesn't exist, all of the messages on that topic should be published. """ new_schema = create_new_schema(source='ensure_published_source_two') message = CreateMessage(new_schema.schema_id, payload=str('1')) topic = str(new_schema.topic.name) with attach_spy_on_func(producer, 'publish') as func_spy: producer.ensure_messages_published([message], {}) assert func_spy.call_count == 1 with setup_capture_new_messages_consumer(topic) as consumer: kafka_offset = 0 consumer.seek(kafka_offset, 0) # kafka_offset from head self._assert_all_messages_published(consumer, expected_payloads=[1])
def test_ensure_messages_published_on_new_topic( self, create_new_schema, producer ): """When a topic doesn't exist, all of the messages on that topic should be published. """ new_schema = create_new_schema(source='ensure_published_source_two') message = CreateMessage(new_schema.schema_id, payload=str('1')) topic = str(new_schema.topic.name) with attach_spy_on_func(producer, 'publish') as func_spy: producer.ensure_messages_published([message], {}) assert func_spy.call_count == 1 with setup_capture_new_messages_consumer(topic) as consumer: kafka_offset = 0 consumer.seek(kafka_offset, 0) # kafka_offset from head self._assert_all_messages_published(consumer, expected_payloads=[1])
def test_publish_succeeds_without_retry(self, topic, message, producer): with attach_spy_on_func( producer._kafka_producer.kafka_client, 'send_produce_request' ) as send_request_spy, capture_new_messages(topic) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert send_request_spy.call_count == 1 self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=1)
def test_producer_periodic_registration_messages(self, producer_instance): """ Note: Tests fails when threshold is set significanly below 1 second, presumably because of the nature of threading. Should be irrelevant if the threshold in registrar is set significantly higher. """ producer_instance.registrar.threshold = 1 with producer_instance as producer: with attach_spy_on_func( producer.registrar.clog_writer, 'publish' ) as func_spy: producer.publish(CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE"))) producer.publish(CreateMessage( schema_id=2, payload=bytes("DIFFERENT FAKE MESSAGE") )) time.sleep(2.5) assert func_spy.call_count == 4
def skip_test_offset_cache_cleared_at_rebalance(self, topic, pii_topic, publish_messages, consumer_instance, consumer_two_instance, message, pii_message): # TODO [DATAPIPE-249] previous version of test has an issue that # sometimes the consumer one doesn't get any message right after # consumer two starts. It's unclear the cause and may be related # to how the tests are setup. Re-writting the test to bypass it # and defer addressing it in the DATAPIPE-249. consumer_one_rebalanced_event = Event() with consumer_instance as consumer_one: publish_messages(message, count=10) publish_messages(pii_message, count=10) consumer_one_message = consumer_one.get_message(blocking=True, timeout=TIMEOUT) consumer_one.commit_message(consumer_one_message) # trigger rebalancing by starting another consumer with same name consumer_two_process = Process( target=self._run_consumer_two, args=(consumer_two_instance, consumer_one_rebalanced_event)) consumer_two_process.start() # consumer one is rebalanced during `get_message` consumer_one.get_message(blocking=True, timeout=TIMEOUT) consumer_one_rebalanced_event.set() consumer_two_process.join(timeout=1) assert not consumer_two_process.exitcode # force consumer rebalance again; consumer rebalance occurs when # get_message is called; set short timeout because we don't care # if there is any message left. consumer_one.get_message(blocking=True, timeout=0.1) # The same offset should be committed again because the rebalancing # will clear the internal offset cache. with attach_spy_on_func(consumer_one.kafka_client, 'send_offset_commit_request') as func_spy: consumer_one.commit_message(consumer_one_message) assert func_spy.call_count == 1
def test_skip_commit_offset_if_offset_unchanged( self, publish_messages, message, consumer_instance ): asserter = ConsumerAsserter( consumer=consumer_instance, expected_message=message ) with consumer_instance as consumer: publish_messages(message, 4) with attach_spy_on_func( consumer.kafka_client, 'send_offset_commit_request' ) as func_spy: msgs_r1 = consumer.get_messages( count=2, blocking=True, timeout=TIMEOUT ) asserter.assert_messages(msgs_r1, 2) consumer.commit_messages(msgs_r1) assert func_spy.call_count == 1 func_spy.reset_mock() # call_count does not increase # when no new msgs are commited consumer.commit_messages(msgs_r1) assert func_spy.call_count == 0 # assert that next call to get_message should # get message from next offset msgs_r2 = consumer.get_messages( count=2, blocking=True, timeout=TIMEOUT ) asserter.assert_messages(msgs_r2, 2)
def test_offset_cache_reset_on_topic_reset( self, publish_messages, message, consumer_instance ): asserter = ConsumerAsserter( consumer=consumer_instance, expected_message=message ) with consumer_instance as consumer: publish_messages(message, 4) with attach_spy_on_func( consumer.kafka_client, 'send_offset_commit_request' ) as func_spy: msgs = consumer.get_messages( count=4, blocking=True, timeout=TIMEOUT ) assert len(msgs) == 4 asserter.assert_messages(msgs, 4) consumer.commit_messages(msgs) assert func_spy.call_count == 1 topic_map = {topic: None for topic in consumer.topic_to_partition_map} with mock.patch.object( consumer, '_get_topics_in_region_from_topic_name', side_effect=[[x] for x in topic_map.keys()] ): consumer.reset_topics(topic_to_consumer_topic_state_map=topic_map) func_spy.reset_mock() # on commiting messages with same offset # send_offset_commit_request should get called # because cache is reset on consumer.reset_topics consumer.commit_messages(msgs) assert func_spy.call_count == 1
def test_publish_succeeds_without_retry(self, topic, message, producer): with attach_spy_on_func( producer._kafka_producer.kafka_client, 'send_produce_request' ) as send_request_spy, capture_new_messages( topic ) as get_messages: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer) producer.publish(message) producer.flush() messages = get_messages() self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) assert send_request_spy.call_count == 1 self.assert_new_topic_to_offset_map( producer, message.topic, orig_topic_to_offset_map, published_message_count=1 )
def test_consumer_periodic_registration_messages(self, publish_messages, input_compatible_message, consumer_instance): """ This function tests whether a Consumer correctly periodically creates and sends registration messages once it has received messages from a topic it is consuming from. Note: Tests fails when threshold is set significanly below 1 second """ TIMEOUT = 1.8 consumer_instance.registrar.threshold = 1 with consumer_instance as consumer: with attach_spy_on_func(consumer.registrar.clog_writer, 'publish') as func_spy: publish_messages(input_compatible_message, count=1) consumer.get_message(blocking=True, timeout=TIMEOUT) consumer.registrar.threshold = 1 consumer.registrar.start() time.sleep(2.5) assert func_spy.call_count == 2 consumer.registrar.stop()
def test_offset_cache_reset_on_topic_reset(self, publish_messages, message, consumer_instance): asserter = ConsumerAsserter(consumer=consumer_instance, expected_message=message) with consumer_instance as consumer: publish_messages(message, 4) with attach_spy_on_func(consumer.kafka_client, 'send_offset_commit_request') as func_spy: msgs = consumer.get_messages(count=4, blocking=True, timeout=TIMEOUT) assert len(msgs) == 4 asserter.assert_messages(msgs, 4) consumer.commit_messages(msgs) assert func_spy.call_count == 1 topic_map = { topic: None for topic in consumer.topic_to_partition_map } with mock.patch.object(consumer, '_get_topics_in_region_from_topic_name', side_effect=[[x] for x in topic_map.keys() ]): consumer.reset_topics( topic_to_consumer_topic_state_map=topic_map) func_spy.reset_mock() # on commiting messages with same offset # send_offset_commit_request should get called # because cache is reset on consumer.reset_topics consumer.commit_messages(msgs) assert func_spy.call_count == 1
def test_call_kafka_commit_offsets_when_offset_change( self, publish_messages, message, consumer_instance): asserter = ConsumerAsserter(consumer=consumer_instance, expected_message=message) with consumer_instance as consumer: publish_messages(message, 4) with attach_spy_on_func(consumer.kafka_client, 'send_offset_commit_request') as func_spy: msgs_r1 = consumer.get_messages(count=3, blocking=True, timeout=TIMEOUT) asserter.assert_messages(msgs_r1, 3) consumer.commit_messages(msgs_r1) assert func_spy.call_count == 1 func_spy.reset_mock() # call_count increases # when offset is different from last commited offset consumer.commit_message(msgs_r1[0]) assert func_spy.call_count == 1 func_spy.reset_mock() consumer.commit_message(msgs_r1[2]) assert func_spy.call_count == 1 # assert that next call to get_message should # get message from next offset msgs_r2 = consumer.get_messages(count=1, blocking=True, timeout=TIMEOUT) assert len(msgs_r2) == 1 asserter.assert_messages(msgs_r2, 1)
def test_publish_to_new_topic(self, create_new_schema, producer): new_schema = create_new_schema(source='retry_source') message = CreateMessage(new_schema.schema_id, payload=str('1')) with attach_spy_on_func(producer._kafka_producer.kafka_client, 'send_produce_request') as send_request_spy: orig_topic_to_offset_map = self.get_orig_topic_to_offset_map( producer) send_request_spy.reset() producer.publish(message) producer.flush() # it should fail at least the 1st time because the topic doesn't # exist. Depending on how fast the topic is created, it could retry # more than 2 times. assert send_request_spy.call_count >= 2 messages = self.get_messages_from_start(message.topic) self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages) self.assert_new_topic_to_offset_map(producer, message.topic, orig_topic_to_offset_map, published_message_count=1)