Exemplo n.º 1
0
    def test_forced_recovery_when_overpublished(
        self, topic, messages, producer, topic_offsets
    ):
        for message in messages:
            producer.publish(message)
        producer.flush()

        with reconfigure(
            force_recovery_from_publication_unensurable_error=True
        ), setup_capture_new_messages_consumer(
            topic
        ) as consumer, mock.patch.object(
            data_pipeline.producer,
            'logger'
        ) as mock_logger:
            producer.ensure_messages_published(messages[:2], topic_offsets)

            self._assert_logged_info_correct(
                mock_logger,
                len(messages),
                topic,
                topic_offsets,
                message_count=len(messages[:2])
            )
            self._verify_position_and_highwatermarks(
                topics=[topic],
                producer=producer,
                message_count=len(messages[:2])
            )

            assert len(consumer.get_messages(10)) == 2
Exemplo n.º 2
0
 def test_encrypted_message(self, pii_schema, payload,
                            example_payload_data):
     # TODO [clin|DATAPIPE-851] let's see if this can be refactored
     with reconfigure(encryption_type='AES_MODE_CBC-1'):
         test_params = [(payload, None), (None, example_payload_data)]
         for _payload, _payload_data in test_params:
             message = self.message_class(
                 schema_id=pii_schema.schema_id,
                 payload=_payload,
                 previous_payload=_payload,
                 payload_data=_payload_data,
                 previous_payload_data=_payload_data)
             assert message.payload == payload
             assert message.previous_payload == payload
             assert message.payload_data == example_payload_data
             assert message.previous_payload_data == example_payload_data
             self.assert_equal_decrypted_payload(
                 message,
                 actual_encrypted_payload=message.avro_repr['payload'],
                 expected_decrypted_payload=payload)
             self.assert_equal_decrypted_payload(
                 message,
                 actual_encrypted_payload=message.
                 avro_repr['previous_payload'],
                 expected_decrypted_payload=payload)
Exemplo n.º 3
0
    def test_skip_publish_pii_message(self, pii_schema, payload, producer_instance):
        with reconfigure(
            encryption_type='AES_MODE_CBC-1',
            skip_messages_with_pii=True
        ), producer_instance as producer, mock.patch.object(
            data_pipeline._kafka_producer,
            'logger'
        ) as mock_logger:
            pii_message = CreateMessage(
                schema_id=pii_schema.schema_id,
                payload=payload
            )
            messages = self._publish_message(pii_message, producer)

        assert len(messages) == 0
        assert len(multiprocessing.active_children()) == 0
        call_args = (
            "Skipping a PII message - uuid hex: {}, schema_id: {}, "
            "timestamp: {}, type: {}"
        ).format(
            pii_message.uuid_hex,
            pii_message.schema_id,
            pii_message.timestamp,
            pii_message.message_type.name
        )
        assert mock_logger.info.call_args_list[0] == mock.call(call_args)
Exemplo n.º 4
0
def configure_teams():
    config_path = os.path.join(
        os.path.dirname(__file__),
        '../teams.yaml'
    )
    with reconfigure(data_pipeline_teams_config_file_path=config_path):
        yield
Exemplo n.º 5
0
 def test_kafka_discovery(self, config, cluster_name, cluster_type):
     with reconfigure(
         kafka_cluster_type=cluster_type,
         kafka_cluster_name=cluster_name
     ):
         cluster_config = config.cluster_config
         assert cluster_config.name == cluster_name
Exemplo n.º 6
0
 def test_encrypted_message(self, pii_schema, payload, example_payload_data):
     # TODO [clin|DATAPIPE-851] let's see if this can be refactored
     with reconfigure(encryption_type='AES_MODE_CBC-1'):
         test_params = [(payload, None), (None, example_payload_data)]
         for _payload, _payload_data in test_params:
             message = self.message_class(
                 schema_id=pii_schema.schema_id,
                 payload=_payload,
                 previous_payload=_payload,
                 payload_data=_payload_data,
                 previous_payload_data=_payload_data
             )
             assert message.payload == payload
             assert message.previous_payload == payload
             assert message.payload_data == example_payload_data
             assert message.previous_payload_data == example_payload_data
             self.assert_equal_decrypted_payload(
                 message,
                 actual_encrypted_payload=message.avro_repr['payload'],
                 expected_decrypted_payload=payload
             )
             self.assert_equal_decrypted_payload(
                 message,
                 actual_encrypted_payload=message.avro_repr['previous_payload'],
                 expected_decrypted_payload=payload
             )
Exemplo n.º 7
0
 def test_publish_pii_message(self, pii_schema, payload, producer_instance):
     with reconfigure(
             encryption_type='AES_MODE_CBC-1',
             skip_messages_with_pii=False), producer_instance as producer:
         pii_message = CreateMessage(schema_id=pii_schema.schema_id,
                                     payload=payload)
         self._publish_and_assert_pii_message(pii_message, producer)
     assert len(multiprocessing.active_children()) == 0
Exemplo n.º 8
0
 def test_publishing_message_when_skipping_unset_position_info(self, tracker, position_info):
     with reconfigure(skip_position_info_update_when_not_set=True):
         self._publish_messages(tracker, [
             self._create_message(upstream_position_info=position_info),
             self._create_message(upstream_position_info=None)
         ])
         position_data = tracker.get_position_data()
     assert position_data.last_published_message_position_info == position_info
     assert position_data.topic_to_last_position_info_map == {self.topic: position_info}
Exemplo n.º 9
0
def config_benchmark_containers_connections():
    """Reconfigures the clientlib to talk to benchmark containers, when both the
    clientlib and benchmarks are run inside docker containers.
    """
    with reconfigure(schematizer_host_and_port='schematizer:8888',
                     kafka_zookeeper='zk:2181',
                     kafka_broker_list=['kafka:9092'],
                     should_use_testing_containers=True):
        yield
Exemplo n.º 10
0
 def test_kafka_discovery_precedence(self, config, addr, cluster_name,
                                     cluster_type):
     with reconfigure(kafka_cluster_type=cluster_type,
                      kafka_cluster_name=cluster_name,
                      kafka_broker_list=[addr],
                      kafka_zookeeper=addr):
         cluster_config = config.cluster_config
         assert cluster_config.name == cluster_name
         assert cluster_config.broker_list != [addr]
         assert cluster_config.zookeeper != addr
Exemplo n.º 11
0
def reconfigure_config():
    """Reconfigures the clientlib configs to pick up files from the repo and not point to,
    file system by default.
    """
    with reconfigure(
            zookeeper_discovery_path='zookeeper_discovery{ecosystem}.yaml',
            key_location='./',
            data_pipeline_teams_config_file_path='teams.yaml',
            ecosystem_file_path='ecosystem'):
        yield
Exemplo n.º 12
0
 def test_kafka_discovery_precedence(self, config, addr, cluster_name, cluster_type):
     with reconfigure(
         kafka_cluster_type=cluster_type,
         kafka_cluster_name=cluster_name,
         kafka_broker_list=[addr],
         kafka_zookeeper=addr
     ):
         cluster_config = config.cluster_config
         assert cluster_config.name == cluster_name
         assert cluster_config.broker_list != [addr]
         assert cluster_config.zookeeper != addr
Exemplo n.º 13
0
 def test_publish_pii_message(self, pii_schema, payload, producer_instance):
     with reconfigure(
         encryption_type='AES_MODE_CBC-1',
         skip_messages_with_pii=False
     ), producer_instance as producer:
         pii_message = CreateMessage(
             schema_id=pii_schema.schema_id,
             payload=payload
         )
         self._publish_and_assert_pii_message(pii_message, producer)
     assert len(multiprocessing.active_children()) == 0
Exemplo n.º 14
0
 def test_message_str_with_pii(self, pii_message):
     with reconfigure(encryption_type='AES_MODE_CBC-1'):
         actual = str(pii_message)
         expected_payload_data = {u'good_field': u"<type 'int'>"}
         expected = {
             'message_type': self.expected_message_type.name,
             'schema_id': pii_message.schema_id,
             'timestamp': pii_message.timestamp,
             'meta': [pii_message.meta[0]._asdict()],
             'encryption_type': pii_message.encryption_type,
             'uuid': pii_message.uuid_hex,
             'payload_data': expected_payload_data,
         }
         # only use eval to get the original dict when the string is trusted
         assert eval(actual) == expected
Exemplo n.º 15
0
 def test_message_str_with_pii(self, pii_message):
     with reconfigure(encryption_type='AES_MODE_CBC-1'):
         actual = str(pii_message)
         expected_payload_data = {u'good_field': u"<type 'int'>"}
         expected = {
             'message_type': self.expected_message_type.name,
             'schema_id': pii_message.schema_id,
             'timestamp': pii_message.timestamp,
             'meta': [pii_message.meta[0]._asdict()],
             'encryption_type': pii_message.encryption_type,
             'uuid': pii_message.uuid_hex,
             'payload_data': expected_payload_data,
         }
         # only use eval to get the original dict when the string is trusted
         assert eval(actual) == expected
Exemplo n.º 16
0
 def test_encrypted_message(self, pii_schema, payload, example_payload_data):
     with reconfigure(encryption_type='AES_MODE_CBC-1'):
         test_params = [(payload, None), (None, example_payload_data)]
         for _payload, _payload_data in test_params:
             message = self.message_class(
                 schema_id=pii_schema.schema_id,
                 payload=_payload,
                 payload_data=_payload_data,
             )
             assert message.payload == payload
             assert message.payload_data == example_payload_data
             self.assert_equal_decrypted_payload(
                 message,
                 actual_encrypted_payload=message.avro_repr['payload'],
                 expected_decrypted_payload=payload
             )
Exemplo n.º 17
0
 def test_encrypted_message(self, pii_schema, payload,
                            example_payload_data):
     with reconfigure(encryption_type='AES_MODE_CBC-1'):
         test_params = [(payload, None), (None, example_payload_data)]
         for _payload, _payload_data in test_params:
             message = self.message_class(
                 schema_id=pii_schema.schema_id,
                 payload=_payload,
                 payload_data=_payload_data,
             )
             assert message.payload == payload
             assert message.payload_data == example_payload_data
             self.assert_equal_decrypted_payload(
                 message,
                 actual_encrypted_payload=message.avro_repr['payload'],
                 expected_decrypted_payload=payload)
Exemplo n.º 18
0
    def test_skip_publish_pii_message(self, pii_schema, payload,
                                      producer_instance):
        with reconfigure(encryption_type='AES_MODE_CBC-1',
                         skip_messages_with_pii=True
                         ), producer_instance as producer, mock.patch.object(
                             data_pipeline._kafka_producer,
                             'logger') as mock_logger:
            pii_message = CreateMessage(schema_id=pii_schema.schema_id,
                                        payload=payload)
            messages = self._publish_message(pii_message, producer)

        assert len(messages) == 0
        assert len(multiprocessing.active_children()) == 0
        call_args = ("Skipping a PII message - uuid hex: {}, schema_id: {}, "
                     "timestamp: {}, type: {}").format(
                         pii_message.uuid_hex, pii_message.schema_id,
                         pii_message.timestamp, pii_message.message_type.name)
        assert mock_logger.info.call_args_list[0] == mock.call(call_args)
Exemplo n.º 19
0
    def test_forced_recovery_when_overpublished(self, topic, messages,
                                                producer, topic_offsets):
        for message in messages:
            producer.publish(message)
        producer.flush()

        with reconfigure(force_recovery_from_publication_unensurable_error=True
                         ), setup_capture_new_messages_consumer(
                             topic) as consumer, mock.patch.object(
                                 data_pipeline.producer,
                                 'logger') as mock_logger:
            producer.ensure_messages_published(messages[:2], topic_offsets)

            self._assert_logged_info_correct(mock_logger,
                                             len(messages),
                                             topic,
                                             topic_offsets,
                                             message_count=len(messages[:2]))

            assert len(consumer.get_messages(10)) == 2
Exemplo n.º 20
0
 def test_kafka_zookeeper(self, config, addr):
     with reconfigure(kafka_zookeeper=addr):
         assert config.cluster_config.zookeeper == addr
Exemplo n.º 21
0
 def test_setup_encryption_type_from_config_once(self, pii_message):
     with reconfigure(encryption_type='Algorithm_one-1'):
         assert pii_message.encryption_type == 'Algorithm_one-1'
     with reconfigure(encryption_type='Algorithm_two-1'):
         assert pii_message.encryption_type == 'Algorithm_one-1'
Exemplo n.º 22
0
 def test_consumer_partitioner_cooldown_default(self, config):
     with reconfigure(consumer_partitioner_cooldown_default=10.0):
         assert config.consumer_partitioner_cooldown_default == 10.0
Exemplo n.º 23
0
 def test_consumer_get_messages_timeout_default(self, config):
     with reconfigure(consumer_get_messages_timeout_default=10.0):
         assert config.consumer_get_messages_timeout_default == 10.0
Exemplo n.º 24
0
 def test_consumer_use_group_sha_default(self, config):
     with reconfigure(consumer_use_group_sha_default=False):
         assert config.consumer_use_group_sha_default is False
Exemplo n.º 25
0
 def setup_flush_time_limit(self):
     # publish all msgs together
     yield reconfigure(kafka_producer_flush_time_limit_seconds=10)
Exemplo n.º 26
0
 def test_force_recovery_from_publication_unensurable_error(self, config):
     with reconfigure(force_recovery_from_publication_unensurable_error=True):
         assert config.force_recovery_from_publication_unensurable_error
Exemplo n.º 27
0
 def test_kafka_client_ack_count(self, config):
     with reconfigure(kafka_client_ack_count=1):
         assert config.kafka_client_ack_count == 1
Exemplo n.º 28
0
 def test_topic_creation_wait_timeout(self, config):
     with reconfigure(topic_creation_wait_timeout=10):
         assert config.topic_creation_wait_timeout == 10
Exemplo n.º 29
0
 def test_schematizer_host_and_port(self, config, addr):
     with reconfigure(schematizer_host_and_port=addr):
         assert config.schematizer_host_and_port == addr
Exemplo n.º 30
0
 def test_skip_messages_with_pii(self, config):
     with reconfigure(skip_messages_with_pii=False):
         assert not config.skip_messages_with_pii
Exemplo n.º 31
0
def configure_teams():
    config_path = os.path.join(os.path.dirname(__file__), '../teams.yaml')
    with reconfigure(data_pipeline_teams_config_file_path=config_path):
        yield
Exemplo n.º 32
0
 def setup_encryption_config(self):
     with reconfigure(
         encryption_type='AES_MODE_CBC-1',
         skip_messages_with_pii=False
     ):
         yield
Exemplo n.º 33
0
 def test_kafka_broker_list(self, config, addr):
     with reconfigure(kafka_broker_list=[addr]):
         assert config.cluster_config.broker_list == [addr]
Exemplo n.º 34
0
 def test_monitoring_window_in_sec(self, config):
     with reconfigure(monitoring_window_in_sec=10):
         assert config.monitoring_window_in_sec == 10
Exemplo n.º 35
0
 def test_kafka_producer_flush_time_limit_seconds(self, config):
     with reconfigure(kafka_producer_flush_time_limit_seconds=3.2):
         assert config.kafka_producer_flush_time_limit_seconds == 3.2
Exemplo n.º 36
0
 def test_data_pipeline_teams_config_file_path(self, config):
     with reconfigure(data_pipeline_teams_config_file_path='/some/path'):
         assert config.data_pipeline_teams_config_file_path == '/some/path'
Exemplo n.º 37
0
 def setup_flush_time_limit(self):
     # publish all msgs together
     yield reconfigure(kafka_producer_flush_time_limit_seconds=10)
Exemplo n.º 38
0
 def test_skip_position_info_update_when_not_set(self, config):
     with reconfigure(skip_position_info_update_when_not_set=True):
         assert config.skip_position_info_update_when_not_set
Exemplo n.º 39
0
 def test_setup_encryption_type_from_config_once(self, pii_message):
     with reconfigure(encryption_type='Algorithm_one-1'):
         assert pii_message.encryption_type == 'Algorithm_one-1'
     with reconfigure(encryption_type='Algorithm_two-1'):
         assert pii_message.encryption_type == 'Algorithm_one-1'
Exemplo n.º 40
0
 def test_merge_position_info_update(self, config):
     with reconfigure(merge_position_info_update=True):
         assert config.merge_position_info_update
Exemplo n.º 41
0
 def test_producer_max_publish_retry_count(self, config):
     with reconfigure(producer_max_publish_retry_count=3):
         assert config.producer_max_publish_retry_count == 3
Exemplo n.º 42
0
 def test_load_schematizer_host_and_port_from_smartstack(self, config, yocalhost):
     with reconfigure(load_schematizer_host_and_port_from_smartstack=True):
         assert config.schematizer_host_and_port == '{0}:20912'.format(yocalhost)
Exemplo n.º 43
0
 def setup_encryption_config(self):
     with reconfigure(encryption_type='AES_MODE_CBC-1',
                      skip_messages_with_pii=False):
         yield
Exemplo n.º 44
0
 def test_kafka_producer_buffer_size(self, config):
     with reconfigure(kafka_producer_buffer_size=10):
         assert config.kafka_producer_buffer_size == 10