def test_forced_recovery_when_overpublished( self, topic, messages, producer, topic_offsets ): for message in messages: producer.publish(message) producer.flush() with reconfigure( force_recovery_from_publication_unensurable_error=True ), setup_capture_new_messages_consumer( topic ) as consumer, mock.patch.object( data_pipeline.producer, 'logger' ) as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct( mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2]) ) self._verify_position_and_highwatermarks( topics=[topic], producer=producer, message_count=len(messages[:2]) ) assert len(consumer.get_messages(10)) == 2
def test_encrypted_message(self, pii_schema, payload, example_payload_data): # TODO [clin|DATAPIPE-851] let's see if this can be refactored with reconfigure(encryption_type='AES_MODE_CBC-1'): test_params = [(payload, None), (None, example_payload_data)] for _payload, _payload_data in test_params: message = self.message_class( schema_id=pii_schema.schema_id, payload=_payload, previous_payload=_payload, payload_data=_payload_data, previous_payload_data=_payload_data) assert message.payload == payload assert message.previous_payload == payload assert message.payload_data == example_payload_data assert message.previous_payload_data == example_payload_data self.assert_equal_decrypted_payload( message, actual_encrypted_payload=message.avro_repr['payload'], expected_decrypted_payload=payload) self.assert_equal_decrypted_payload( message, actual_encrypted_payload=message. avro_repr['previous_payload'], expected_decrypted_payload=payload)
def test_skip_publish_pii_message(self, pii_schema, payload, producer_instance): with reconfigure( encryption_type='AES_MODE_CBC-1', skip_messages_with_pii=True ), producer_instance as producer, mock.patch.object( data_pipeline._kafka_producer, 'logger' ) as mock_logger: pii_message = CreateMessage( schema_id=pii_schema.schema_id, payload=payload ) messages = self._publish_message(pii_message, producer) assert len(messages) == 0 assert len(multiprocessing.active_children()) == 0 call_args = ( "Skipping a PII message - uuid hex: {}, schema_id: {}, " "timestamp: {}, type: {}" ).format( pii_message.uuid_hex, pii_message.schema_id, pii_message.timestamp, pii_message.message_type.name ) assert mock_logger.info.call_args_list[0] == mock.call(call_args)
def configure_teams(): config_path = os.path.join( os.path.dirname(__file__), '../teams.yaml' ) with reconfigure(data_pipeline_teams_config_file_path=config_path): yield
def test_kafka_discovery(self, config, cluster_name, cluster_type): with reconfigure( kafka_cluster_type=cluster_type, kafka_cluster_name=cluster_name ): cluster_config = config.cluster_config assert cluster_config.name == cluster_name
def test_encrypted_message(self, pii_schema, payload, example_payload_data): # TODO [clin|DATAPIPE-851] let's see if this can be refactored with reconfigure(encryption_type='AES_MODE_CBC-1'): test_params = [(payload, None), (None, example_payload_data)] for _payload, _payload_data in test_params: message = self.message_class( schema_id=pii_schema.schema_id, payload=_payload, previous_payload=_payload, payload_data=_payload_data, previous_payload_data=_payload_data ) assert message.payload == payload assert message.previous_payload == payload assert message.payload_data == example_payload_data assert message.previous_payload_data == example_payload_data self.assert_equal_decrypted_payload( message, actual_encrypted_payload=message.avro_repr['payload'], expected_decrypted_payload=payload ) self.assert_equal_decrypted_payload( message, actual_encrypted_payload=message.avro_repr['previous_payload'], expected_decrypted_payload=payload )
def test_publish_pii_message(self, pii_schema, payload, producer_instance): with reconfigure( encryption_type='AES_MODE_CBC-1', skip_messages_with_pii=False), producer_instance as producer: pii_message = CreateMessage(schema_id=pii_schema.schema_id, payload=payload) self._publish_and_assert_pii_message(pii_message, producer) assert len(multiprocessing.active_children()) == 0
def test_publishing_message_when_skipping_unset_position_info(self, tracker, position_info): with reconfigure(skip_position_info_update_when_not_set=True): self._publish_messages(tracker, [ self._create_message(upstream_position_info=position_info), self._create_message(upstream_position_info=None) ]) position_data = tracker.get_position_data() assert position_data.last_published_message_position_info == position_info assert position_data.topic_to_last_position_info_map == {self.topic: position_info}
def config_benchmark_containers_connections(): """Reconfigures the clientlib to talk to benchmark containers, when both the clientlib and benchmarks are run inside docker containers. """ with reconfigure(schematizer_host_and_port='schematizer:8888', kafka_zookeeper='zk:2181', kafka_broker_list=['kafka:9092'], should_use_testing_containers=True): yield
def test_kafka_discovery_precedence(self, config, addr, cluster_name, cluster_type): with reconfigure(kafka_cluster_type=cluster_type, kafka_cluster_name=cluster_name, kafka_broker_list=[addr], kafka_zookeeper=addr): cluster_config = config.cluster_config assert cluster_config.name == cluster_name assert cluster_config.broker_list != [addr] assert cluster_config.zookeeper != addr
def reconfigure_config(): """Reconfigures the clientlib configs to pick up files from the repo and not point to, file system by default. """ with reconfigure( zookeeper_discovery_path='zookeeper_discovery{ecosystem}.yaml', key_location='./', data_pipeline_teams_config_file_path='teams.yaml', ecosystem_file_path='ecosystem'): yield
def test_kafka_discovery_precedence(self, config, addr, cluster_name, cluster_type): with reconfigure( kafka_cluster_type=cluster_type, kafka_cluster_name=cluster_name, kafka_broker_list=[addr], kafka_zookeeper=addr ): cluster_config = config.cluster_config assert cluster_config.name == cluster_name assert cluster_config.broker_list != [addr] assert cluster_config.zookeeper != addr
def test_publish_pii_message(self, pii_schema, payload, producer_instance): with reconfigure( encryption_type='AES_MODE_CBC-1', skip_messages_with_pii=False ), producer_instance as producer: pii_message = CreateMessage( schema_id=pii_schema.schema_id, payload=payload ) self._publish_and_assert_pii_message(pii_message, producer) assert len(multiprocessing.active_children()) == 0
def test_message_str_with_pii(self, pii_message): with reconfigure(encryption_type='AES_MODE_CBC-1'): actual = str(pii_message) expected_payload_data = {u'good_field': u"<type 'int'>"} expected = { 'message_type': self.expected_message_type.name, 'schema_id': pii_message.schema_id, 'timestamp': pii_message.timestamp, 'meta': [pii_message.meta[0]._asdict()], 'encryption_type': pii_message.encryption_type, 'uuid': pii_message.uuid_hex, 'payload_data': expected_payload_data, } # only use eval to get the original dict when the string is trusted assert eval(actual) == expected
def test_message_str_with_pii(self, pii_message): with reconfigure(encryption_type='AES_MODE_CBC-1'): actual = str(pii_message) expected_payload_data = {u'good_field': u"<type 'int'>"} expected = { 'message_type': self.expected_message_type.name, 'schema_id': pii_message.schema_id, 'timestamp': pii_message.timestamp, 'meta': [pii_message.meta[0]._asdict()], 'encryption_type': pii_message.encryption_type, 'uuid': pii_message.uuid_hex, 'payload_data': expected_payload_data, } # only use eval to get the original dict when the string is trusted assert eval(actual) == expected
def test_encrypted_message(self, pii_schema, payload, example_payload_data): with reconfigure(encryption_type='AES_MODE_CBC-1'): test_params = [(payload, None), (None, example_payload_data)] for _payload, _payload_data in test_params: message = self.message_class( schema_id=pii_schema.schema_id, payload=_payload, payload_data=_payload_data, ) assert message.payload == payload assert message.payload_data == example_payload_data self.assert_equal_decrypted_payload( message, actual_encrypted_payload=message.avro_repr['payload'], expected_decrypted_payload=payload )
def test_encrypted_message(self, pii_schema, payload, example_payload_data): with reconfigure(encryption_type='AES_MODE_CBC-1'): test_params = [(payload, None), (None, example_payload_data)] for _payload, _payload_data in test_params: message = self.message_class( schema_id=pii_schema.schema_id, payload=_payload, payload_data=_payload_data, ) assert message.payload == payload assert message.payload_data == example_payload_data self.assert_equal_decrypted_payload( message, actual_encrypted_payload=message.avro_repr['payload'], expected_decrypted_payload=payload)
def test_skip_publish_pii_message(self, pii_schema, payload, producer_instance): with reconfigure(encryption_type='AES_MODE_CBC-1', skip_messages_with_pii=True ), producer_instance as producer, mock.patch.object( data_pipeline._kafka_producer, 'logger') as mock_logger: pii_message = CreateMessage(schema_id=pii_schema.schema_id, payload=payload) messages = self._publish_message(pii_message, producer) assert len(messages) == 0 assert len(multiprocessing.active_children()) == 0 call_args = ("Skipping a PII message - uuid hex: {}, schema_id: {}, " "timestamp: {}, type: {}").format( pii_message.uuid_hex, pii_message.schema_id, pii_message.timestamp, pii_message.message_type.name) assert mock_logger.info.call_args_list[0] == mock.call(call_args)
def test_forced_recovery_when_overpublished(self, topic, messages, producer, topic_offsets): for message in messages: producer.publish(message) producer.flush() with reconfigure(force_recovery_from_publication_unensurable_error=True ), setup_capture_new_messages_consumer( topic) as consumer, mock.patch.object( data_pipeline.producer, 'logger') as mock_logger: producer.ensure_messages_published(messages[:2], topic_offsets) self._assert_logged_info_correct(mock_logger, len(messages), topic, topic_offsets, message_count=len(messages[:2])) assert len(consumer.get_messages(10)) == 2
def test_kafka_zookeeper(self, config, addr): with reconfigure(kafka_zookeeper=addr): assert config.cluster_config.zookeeper == addr
def test_setup_encryption_type_from_config_once(self, pii_message): with reconfigure(encryption_type='Algorithm_one-1'): assert pii_message.encryption_type == 'Algorithm_one-1' with reconfigure(encryption_type='Algorithm_two-1'): assert pii_message.encryption_type == 'Algorithm_one-1'
def test_consumer_partitioner_cooldown_default(self, config): with reconfigure(consumer_partitioner_cooldown_default=10.0): assert config.consumer_partitioner_cooldown_default == 10.0
def test_consumer_get_messages_timeout_default(self, config): with reconfigure(consumer_get_messages_timeout_default=10.0): assert config.consumer_get_messages_timeout_default == 10.0
def test_consumer_use_group_sha_default(self, config): with reconfigure(consumer_use_group_sha_default=False): assert config.consumer_use_group_sha_default is False
def setup_flush_time_limit(self): # publish all msgs together yield reconfigure(kafka_producer_flush_time_limit_seconds=10)
def test_force_recovery_from_publication_unensurable_error(self, config): with reconfigure(force_recovery_from_publication_unensurable_error=True): assert config.force_recovery_from_publication_unensurable_error
def test_kafka_client_ack_count(self, config): with reconfigure(kafka_client_ack_count=1): assert config.kafka_client_ack_count == 1
def test_topic_creation_wait_timeout(self, config): with reconfigure(topic_creation_wait_timeout=10): assert config.topic_creation_wait_timeout == 10
def test_schematizer_host_and_port(self, config, addr): with reconfigure(schematizer_host_and_port=addr): assert config.schematizer_host_and_port == addr
def test_skip_messages_with_pii(self, config): with reconfigure(skip_messages_with_pii=False): assert not config.skip_messages_with_pii
def configure_teams(): config_path = os.path.join(os.path.dirname(__file__), '../teams.yaml') with reconfigure(data_pipeline_teams_config_file_path=config_path): yield
def setup_encryption_config(self): with reconfigure( encryption_type='AES_MODE_CBC-1', skip_messages_with_pii=False ): yield
def test_kafka_broker_list(self, config, addr): with reconfigure(kafka_broker_list=[addr]): assert config.cluster_config.broker_list == [addr]
def test_monitoring_window_in_sec(self, config): with reconfigure(monitoring_window_in_sec=10): assert config.monitoring_window_in_sec == 10
def test_kafka_producer_flush_time_limit_seconds(self, config): with reconfigure(kafka_producer_flush_time_limit_seconds=3.2): assert config.kafka_producer_flush_time_limit_seconds == 3.2
def test_data_pipeline_teams_config_file_path(self, config): with reconfigure(data_pipeline_teams_config_file_path='/some/path'): assert config.data_pipeline_teams_config_file_path == '/some/path'
def setup_flush_time_limit(self): # publish all msgs together yield reconfigure(kafka_producer_flush_time_limit_seconds=10)
def test_skip_position_info_update_when_not_set(self, config): with reconfigure(skip_position_info_update_when_not_set=True): assert config.skip_position_info_update_when_not_set
def test_setup_encryption_type_from_config_once(self, pii_message): with reconfigure(encryption_type='Algorithm_one-1'): assert pii_message.encryption_type == 'Algorithm_one-1' with reconfigure(encryption_type='Algorithm_two-1'): assert pii_message.encryption_type == 'Algorithm_one-1'
def test_merge_position_info_update(self, config): with reconfigure(merge_position_info_update=True): assert config.merge_position_info_update
def test_producer_max_publish_retry_count(self, config): with reconfigure(producer_max_publish_retry_count=3): assert config.producer_max_publish_retry_count == 3
def test_load_schematizer_host_and_port_from_smartstack(self, config, yocalhost): with reconfigure(load_schematizer_host_and_port_from_smartstack=True): assert config.schematizer_host_and_port == '{0}:20912'.format(yocalhost)
def setup_encryption_config(self): with reconfigure(encryption_type='AES_MODE_CBC-1', skip_messages_with_pii=False): yield
def test_kafka_producer_buffer_size(self, config): with reconfigure(kafka_producer_buffer_size=10): assert config.kafka_producer_buffer_size == 10