def _fetch_messages( containers, schematizer, namespace, source, message_count ): _wait_for_schematizer_topic(schematizer, namespace, source) topics = schematizer.get_topics_by_criteria( namespace_name=namespace, source_name=source ) assert len(topics) == 1 _wait_for_kafka_topic(containers, topics[0].name) consumer = get_consumer(containers, topics[0].name) messages = [ create_from_offset_and_message(kafka_message) for kafka_message in consumer.get_messages(count=message_count, block=True, timeout=60) ] assert len(messages) == message_count _assert_topic_set_in_messages(messages, topics[0].name) _assert_contains_pii_set_in_messages(messages, topics[0].contains_pii) _assert_keys_set_in_messages(messages, topics[0].primary_keys) _assert_meta_in_messages(messages) return messages
def _publish_and_assert_pii_message(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.payload == message.payload assert dp_message.payload_data == message.payload_data assert dp_message.schema_id == message.schema_id unpacked_message = Envelope().unpack(offsets_and_messages[0].message.value) unpacked_meta_attr = unpacked_message['meta'][0] encryption_helper = EncryptionHelper( dp_message.encryption_type, MetaAttribute( unpacked_meta_attr['schema_id'], unpacked_meta_attr['payload'] ) ) encrypted_payload = encryption_helper.encrypt_payload(message.payload) assert unpacked_message['payload'] == encrypted_payload
def test_publish_message_with_keys(self, message_with_pkeys, producer): expected_keys_avro_json = { "type": "record", "namespace": "yelp.data_pipeline", "name": "primary_keys", "doc": "Represents primary keys present in Message payload.", "fields": [ { "type": "string", "name": "field2", "doc": "test", "pkey": 1 }, { "type": "int", "name": "field1", "doc": "test", "pkey": 2 }, { "type": "int", "name": "field3", "doc": "test", "pkey": 3 }, ] } expected_keys = { "field2": message_with_pkeys.payload_data["field2"], "field1": message_with_pkeys.payload_data["field1"], "field3": message_with_pkeys.payload_data["field3"] } with capture_new_messages(message_with_pkeys.topic) as get_messages: producer.publish(message_with_pkeys) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.keys == expected_keys avro_string_writer = AvroStringWriter(schema=expected_keys_avro_json) expected_encoded_keys = avro_string_writer.encode( message_avro_representation=expected_keys) assert offsets_and_messages[0].message.key == expected_encoded_keys avro_string_reader = AvroStringReader( reader_schema=expected_keys_avro_json, writer_schema=expected_keys_avro_json) decoded_keys = avro_string_reader.decode( encoded_message=offsets_and_messages[0].message.key) assert decoded_keys == expected_keys
def test_publish_message_with_no_keys(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.keys == {}
def test_create_from_offset_and_message(self, offset_and_message, message): extracted_message = create_from_offset_and_message( offset_and_message=offset_and_message) assert extracted_message.message_type == message.message_type assert extracted_message.payload == message.payload assert extracted_message.payload_data == message.payload_data assert extracted_message.schema_id == message.schema_id assert extracted_message.timestamp == message.timestamp assert extracted_message.topic == message.topic assert extracted_message.uuid == message.uuid
def test_create_from_offset_and_message(self, offset_and_message, message): extracted_message = create_from_offset_and_message( offset_and_message=offset_and_message ) assert extracted_message.message_type == message.message_type assert extracted_message.payload == message.payload assert extracted_message.payload_data == message.payload_data assert extracted_message.schema_id == message.schema_id assert extracted_message.timestamp == message.timestamp assert extracted_message.topic == message.topic assert extracted_message.uuid == message.uuid
def test_publish_message_with_keys( self, message_with_pkeys, producer ): expected_keys_avro_json = { "type": "record", "namespace": "yelp.data_pipeline", "name": "primary_keys", "doc": "Represents primary keys present in Message payload.", "fields": [ {"type": "string", "name": "field2", "doc": "test", "pkey": 1}, {"type": "int", "name": "field1", "doc": "test", "pkey": 2}, {"type": "int", "name": "field3", "doc": "test", "pkey": 3}, ] } expected_keys = { "field2": message_with_pkeys.payload_data["field2"], "field1": message_with_pkeys.payload_data["field1"], "field3": message_with_pkeys.payload_data["field3"] } with capture_new_messages(message_with_pkeys.topic) as get_messages: producer.publish(message_with_pkeys) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.keys == expected_keys avro_string_writer = AvroStringWriter( schema=expected_keys_avro_json ) expected_encoded_keys = avro_string_writer.encode( message_avro_representation=expected_keys ) assert offsets_and_messages[0].message.key == expected_encoded_keys avro_string_reader = AvroStringReader( reader_schema=expected_keys_avro_json, writer_schema=expected_keys_avro_json ) decoded_keys = avro_string_reader.decode( encoded_message=offsets_and_messages[0].message.key ) assert decoded_keys == expected_keys
def test_publish_message_with_no_keys( self, message, producer ): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message( offsets_and_messages[0] ) assert dp_message.keys == {}
def test_create_from_offset_and_message_with_no_reader_schema_specified( self, registered_schema, payload, example_payload_data): unpacked_message = CreateMessage( schema_id=registered_schema.schema_id, payload=payload, timestamp=1500, ) offset_and_message = OffsetAndMessage( 0, create_message(Envelope().pack(unpacked_message))) extracted_message = create_from_offset_and_message( offset_and_message=offset_and_message, reader_schema_id=None) assert extracted_message.schema_id == registered_schema.schema_id assert extracted_message.topic == registered_schema.topic.name assert extracted_message.reader_schema_id == registered_schema.schema_id assert extracted_message.payload_data == example_payload_data
def _publish_and_assert_pii_message(self, message, producer): with capture_new_messages(message.topic) as get_messages: producer.publish(message) producer.flush() offsets_and_messages = get_messages() assert len(offsets_and_messages) == 1 dp_message = create_from_offset_and_message(offsets_and_messages[0]) assert dp_message.payload == message.payload assert dp_message.payload_data == message.payload_data assert dp_message.schema_id == message.schema_id unpacked_message = Envelope().unpack( offsets_and_messages[0].message.value) unpacked_meta_attr = unpacked_message['meta'][0] encryption_helper = EncryptionHelper( dp_message.encryption_type, MetaAttribute(unpacked_meta_attr['schema_id'], unpacked_meta_attr['payload'])) encrypted_payload = encryption_helper.encrypt_payload(message.payload) assert unpacked_message['payload'] == encrypted_payload
def test_create_from_offset_and_message_with_no_reader_schema_specified( self, registered_schema, payload, example_payload_data ): unpacked_message = CreateMessage( schema_id=registered_schema.schema_id, payload=payload, timestamp=1500, ) offset_and_message = OffsetAndMessage( 0, create_message(Envelope().pack(unpacked_message)) ) extracted_message = create_from_offset_and_message( offset_and_message=offset_and_message, reader_schema_id=None ) assert extracted_message.schema_id == registered_schema.schema_id assert extracted_message.topic == registered_schema.topic.name assert extracted_message.reader_schema_id == registered_schema.schema_id assert extracted_message.payload_data == example_payload_data
def get_data_pipeline_messages(count=100): kafka_messages = get_kafka_messages(count) return [ create_from_offset_and_message(kafka_message) for kafka_message in kafka_messages ]