示例#1
0
def _fetch_messages(
    containers,
    schematizer,
    namespace,
    source,
    message_count
):
    _wait_for_schematizer_topic(schematizer, namespace, source)

    topics = schematizer.get_topics_by_criteria(
        namespace_name=namespace,
        source_name=source
    )
    assert len(topics) == 1

    _wait_for_kafka_topic(containers, topics[0].name)

    consumer = get_consumer(containers, topics[0].name)
    messages = [
        create_from_offset_and_message(kafka_message)
        for kafka_message in consumer.get_messages(count=message_count, block=True, timeout=60)
    ]

    assert len(messages) == message_count
    _assert_topic_set_in_messages(messages, topics[0].name)
    _assert_contains_pii_set_in_messages(messages, topics[0].contains_pii)
    _assert_keys_set_in_messages(messages, topics[0].primary_keys)
    _assert_meta_in_messages(messages)
    return messages
示例#2
0
    def _publish_and_assert_pii_message(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()

        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.payload == message.payload
        assert dp_message.payload_data == message.payload_data
        assert dp_message.schema_id == message.schema_id

        unpacked_message = Envelope().unpack(offsets_and_messages[0].message.value)
        unpacked_meta_attr = unpacked_message['meta'][0]
        encryption_helper = EncryptionHelper(
            dp_message.encryption_type,
            MetaAttribute(
                unpacked_meta_attr['schema_id'],
                unpacked_meta_attr['payload']
            )
        )
        encrypted_payload = encryption_helper.encrypt_payload(message.payload)
        assert unpacked_message['payload'] == encrypted_payload
示例#3
0
    def test_publish_message_with_keys(self, message_with_pkeys, producer):
        expected_keys_avro_json = {
            "type":
            "record",
            "namespace":
            "yelp.data_pipeline",
            "name":
            "primary_keys",
            "doc":
            "Represents primary keys present in Message payload.",
            "fields": [
                {
                    "type": "string",
                    "name": "field2",
                    "doc": "test",
                    "pkey": 1
                },
                {
                    "type": "int",
                    "name": "field1",
                    "doc": "test",
                    "pkey": 2
                },
                {
                    "type": "int",
                    "name": "field3",
                    "doc": "test",
                    "pkey": 3
                },
            ]
        }
        expected_keys = {
            "field2": message_with_pkeys.payload_data["field2"],
            "field1": message_with_pkeys.payload_data["field1"],
            "field3": message_with_pkeys.payload_data["field3"]
        }

        with capture_new_messages(message_with_pkeys.topic) as get_messages:
            producer.publish(message_with_pkeys)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.keys == expected_keys

        avro_string_writer = AvroStringWriter(schema=expected_keys_avro_json)
        expected_encoded_keys = avro_string_writer.encode(
            message_avro_representation=expected_keys)
        assert offsets_and_messages[0].message.key == expected_encoded_keys

        avro_string_reader = AvroStringReader(
            reader_schema=expected_keys_avro_json,
            writer_schema=expected_keys_avro_json)
        decoded_keys = avro_string_reader.decode(
            encoded_message=offsets_and_messages[0].message.key)
        assert decoded_keys == expected_keys
示例#4
0
    def test_publish_message_with_no_keys(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.keys == {}
示例#5
0
 def test_create_from_offset_and_message(self, offset_and_message, message):
     extracted_message = create_from_offset_and_message(
         offset_and_message=offset_and_message)
     assert extracted_message.message_type == message.message_type
     assert extracted_message.payload == message.payload
     assert extracted_message.payload_data == message.payload_data
     assert extracted_message.schema_id == message.schema_id
     assert extracted_message.timestamp == message.timestamp
     assert extracted_message.topic == message.topic
     assert extracted_message.uuid == message.uuid
示例#6
0
 def test_create_from_offset_and_message(self, offset_and_message, message):
     extracted_message = create_from_offset_and_message(
         offset_and_message=offset_and_message
     )
     assert extracted_message.message_type == message.message_type
     assert extracted_message.payload == message.payload
     assert extracted_message.payload_data == message.payload_data
     assert extracted_message.schema_id == message.schema_id
     assert extracted_message.timestamp == message.timestamp
     assert extracted_message.topic == message.topic
     assert extracted_message.uuid == message.uuid
示例#7
0
    def test_publish_message_with_keys(
        self,
        message_with_pkeys,
        producer
    ):
        expected_keys_avro_json = {
            "type": "record",
            "namespace": "yelp.data_pipeline",
            "name": "primary_keys",
            "doc": "Represents primary keys present in Message payload.",
            "fields": [
                {"type": "string", "name": "field2", "doc": "test", "pkey": 1},
                {"type": "int", "name": "field1", "doc": "test", "pkey": 2},
                {"type": "int", "name": "field3", "doc": "test", "pkey": 3},
            ]
        }
        expected_keys = {
            "field2": message_with_pkeys.payload_data["field2"],
            "field1": message_with_pkeys.payload_data["field1"],
            "field3": message_with_pkeys.payload_data["field3"]
        }

        with capture_new_messages(message_with_pkeys.topic) as get_messages:
            producer.publish(message_with_pkeys)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.keys == expected_keys

        avro_string_writer = AvroStringWriter(
            schema=expected_keys_avro_json
        )
        expected_encoded_keys = avro_string_writer.encode(
            message_avro_representation=expected_keys
        )
        assert offsets_and_messages[0].message.key == expected_encoded_keys

        avro_string_reader = AvroStringReader(
            reader_schema=expected_keys_avro_json,
            writer_schema=expected_keys_avro_json
        )
        decoded_keys = avro_string_reader.decode(
            encoded_message=offsets_and_messages[0].message.key
        )
        assert decoded_keys == expected_keys
示例#8
0
    def test_publish_message_with_no_keys(
        self,
        message,
        producer
    ):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.keys == {}
示例#9
0
    def test_create_from_offset_and_message_with_no_reader_schema_specified(
            self, registered_schema, payload, example_payload_data):
        unpacked_message = CreateMessage(
            schema_id=registered_schema.schema_id,
            payload=payload,
            timestamp=1500,
        )
        offset_and_message = OffsetAndMessage(
            0, create_message(Envelope().pack(unpacked_message)))

        extracted_message = create_from_offset_and_message(
            offset_and_message=offset_and_message, reader_schema_id=None)
        assert extracted_message.schema_id == registered_schema.schema_id
        assert extracted_message.topic == registered_schema.topic.name
        assert extracted_message.reader_schema_id == registered_schema.schema_id
        assert extracted_message.payload_data == example_payload_data
示例#10
0
    def _publish_and_assert_pii_message(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()

        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.payload == message.payload
        assert dp_message.payload_data == message.payload_data
        assert dp_message.schema_id == message.schema_id

        unpacked_message = Envelope().unpack(
            offsets_and_messages[0].message.value)
        unpacked_meta_attr = unpacked_message['meta'][0]
        encryption_helper = EncryptionHelper(
            dp_message.encryption_type,
            MetaAttribute(unpacked_meta_attr['schema_id'],
                          unpacked_meta_attr['payload']))
        encrypted_payload = encryption_helper.encrypt_payload(message.payload)
        assert unpacked_message['payload'] == encrypted_payload
示例#11
0
    def test_create_from_offset_and_message_with_no_reader_schema_specified(
        self,
        registered_schema,
        payload,
        example_payload_data
    ):
        unpacked_message = CreateMessage(
            schema_id=registered_schema.schema_id,
            payload=payload,
            timestamp=1500,
        )
        offset_and_message = OffsetAndMessage(
            0,
            create_message(Envelope().pack(unpacked_message))
        )

        extracted_message = create_from_offset_and_message(
            offset_and_message=offset_and_message,
            reader_schema_id=None
        )
        assert extracted_message.schema_id == registered_schema.schema_id
        assert extracted_message.topic == registered_schema.topic.name
        assert extracted_message.reader_schema_id == registered_schema.schema_id
        assert extracted_message.payload_data == example_payload_data
示例#12
0
 def get_data_pipeline_messages(count=100):
     kafka_messages = get_kafka_messages(count)
     return [
         create_from_offset_and_message(kafka_message)
         for kafka_message in kafka_messages
     ]
示例#13
0
 def get_data_pipeline_messages(count=100):
     kafka_messages = get_kafka_messages(count)
     return [
         create_from_offset_and_message(kafka_message)
         for kafka_message in kafka_messages
     ]