Exemplo n.º 1
0
    def test_retry_failed_publish_without_highwatermark(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        with mock.patch.object(
            producer._kafka_producer.kafka_client,
            'send_produce_request',
            side_effect=[FailedPayloadsError]
        ) as mock_send_request, mock.patch(
            'data_pipeline._kafka_util.get_topics_watermarks',
            side_effect=Exception
        ), capture_new_messages(
            message.topic
        ) as get_messages, pytest.raises(
            MaxRetryError
        ) as e:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)

            producer.publish(message)
            producer.flush()

            assert mock_send_request.call_count == 1  # should be no retry
            self.assert_last_retry_result(
                e.value.last_result,
                message,
                expected_published_msgs_count=0
            )

            messages = get_messages()
            assert len(messages) == 0
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=0
            )
Exemplo n.º 2
0
    def test_retry_failed_publish_without_highwatermark(
            self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        with mock.patch.object(
                producer._kafka_producer.kafka_client,
                'send_produce_request',
                side_effect=[
                    FailedPayloadsError
                ]) as mock_send_request, mock.patch(
                    'data_pipeline._kafka_util.get_topics_watermarks',
                    side_effect=Exception), capture_new_messages(
                        message.topic) as get_messages, pytest.raises(
                            MaxRetryError) as e:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)

            producer.publish(message)
            producer.flush()

            assert mock_send_request.call_count == 1  # should be no retry
            self.assert_last_retry_result(e.value.last_result,
                                          message,
                                          expected_published_msgs_count=0)

            messages = get_messages()
            assert len(messages) == 0
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=0)
Exemplo n.º 3
0
    def _publish_and_assert_pii_message(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()

        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.payload == message.payload
        assert dp_message.payload_data == message.payload_data
        assert dp_message.schema_id == message.schema_id

        unpacked_message = Envelope().unpack(offsets_and_messages[0].message.value)
        unpacked_meta_attr = unpacked_message['meta'][0]
        encryption_helper = EncryptionHelper(
            dp_message.encryption_type,
            MetaAttribute(
                unpacked_meta_attr['schema_id'],
                unpacked_meta_attr['payload']
            )
        )
        encrypted_payload = encryption_helper.encrypt_payload(message.payload)
        assert unpacked_message['payload'] == encrypted_payload
Exemplo n.º 4
0
    def test_retry_false_failed_publish(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        orig_func = producer._kafka_producer.kafka_client.send_produce_request

        def run_original_func_but_throw_exception(*args, **kwargs):
            orig_func(*args, **kwargs)
            raise RandomException()

        with mock.patch.object(
            producer._kafka_producer.kafka_client,
            'send_produce_request',
            side_effect=run_original_func_but_throw_exception
        ) as mock_send_request, capture_new_messages(
            message.topic
        ) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)
            mock_send_request.reset()
            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
            assert mock_send_request.call_count == 1  # should be no retry
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=1
            )
Exemplo n.º 5
0
    def test_retry_false_failed_publish(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        orig_func = producer._kafka_producer.kafka_client.send_produce_request

        def run_original_func_but_throw_exception(*args, **kwargs):
            orig_func(*args, **kwargs)
            raise RandomException()

        with mock.patch.object(
                producer._kafka_producer.kafka_client,
                'send_produce_request',
                side_effect=run_original_func_but_throw_exception
        ) as mock_send_request, capture_new_messages(
                message.topic) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)
            mock_send_request.reset()
            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message],
                                   actual_msgs=messages)
            assert mock_send_request.call_count == 1  # should be no retry
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=1)
Exemplo n.º 6
0
    def test_messages_published_without_flush(self, message,
                                              producer_instance):
        with capture_new_messages(
                message.topic) as get_messages, producer_instance as producer:
            producer.publish(message)

        assert len(multiprocessing.active_children()) == 0
        assert len(get_messages()) == 1
Exemplo n.º 7
0
    def test_messages_not_duplicated(self, message, producer_instance):
        with capture_new_messages(
                message.topic) as get_messages, producer_instance as producer:
            producer.publish(message)
            producer.flush()

        assert len(multiprocessing.active_children()) == 0
        assert len(get_messages()) == 1
Exemplo n.º 8
0
    def test_messages_published_without_flush(self, message, producer_instance):
        with capture_new_messages(
            message.topic
        ) as get_messages, producer_instance as producer:
            producer.publish(message)

        assert len(multiprocessing.active_children()) == 0
        assert len(get_messages()) == 1
Exemplo n.º 9
0
    def test_publish_message_with_keys(self, message_with_pkeys, producer):
        expected_keys_avro_json = {
            "type":
            "record",
            "namespace":
            "yelp.data_pipeline",
            "name":
            "primary_keys",
            "doc":
            "Represents primary keys present in Message payload.",
            "fields": [
                {
                    "type": "string",
                    "name": "field2",
                    "doc": "test",
                    "pkey": 1
                },
                {
                    "type": "int",
                    "name": "field1",
                    "doc": "test",
                    "pkey": 2
                },
                {
                    "type": "int",
                    "name": "field3",
                    "doc": "test",
                    "pkey": 3
                },
            ]
        }
        expected_keys = {
            "field2": message_with_pkeys.payload_data["field2"],
            "field1": message_with_pkeys.payload_data["field1"],
            "field3": message_with_pkeys.payload_data["field3"]
        }

        with capture_new_messages(message_with_pkeys.topic) as get_messages:
            producer.publish(message_with_pkeys)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.keys == expected_keys

        avro_string_writer = AvroStringWriter(schema=expected_keys_avro_json)
        expected_encoded_keys = avro_string_writer.encode(
            message_avro_representation=expected_keys)
        assert offsets_and_messages[0].message.key == expected_encoded_keys

        avro_string_reader = AvroStringReader(
            reader_schema=expected_keys_avro_json,
            writer_schema=expected_keys_avro_json)
        decoded_keys = avro_string_reader.decode(
            encoded_message=offsets_and_messages[0].message.key)
        assert decoded_keys == expected_keys
Exemplo n.º 10
0
    def test_messages_not_duplicated(self, message, producer_instance):
        with capture_new_messages(
            message.topic
        ) as get_messages, producer_instance as producer:
            producer.publish(message)
            producer.flush()

        assert len(multiprocessing.active_children()) == 0
        assert len(get_messages()) == 1
Exemplo n.º 11
0
    def test_publish_message_with_no_keys(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.keys == {}
Exemplo n.º 12
0
    def test_publish_message_with_keys(
        self,
        message_with_pkeys,
        producer
    ):
        expected_keys_avro_json = {
            "type": "record",
            "namespace": "yelp.data_pipeline",
            "name": "primary_keys",
            "doc": "Represents primary keys present in Message payload.",
            "fields": [
                {"type": "string", "name": "field2", "doc": "test", "pkey": 1},
                {"type": "int", "name": "field1", "doc": "test", "pkey": 2},
                {"type": "int", "name": "field3", "doc": "test", "pkey": 3},
            ]
        }
        expected_keys = {
            "field2": message_with_pkeys.payload_data["field2"],
            "field1": message_with_pkeys.payload_data["field1"],
            "field3": message_with_pkeys.payload_data["field3"]
        }

        with capture_new_messages(message_with_pkeys.topic) as get_messages:
            producer.publish(message_with_pkeys)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.keys == expected_keys

        avro_string_writer = AvroStringWriter(
            schema=expected_keys_avro_json
        )
        expected_encoded_keys = avro_string_writer.encode(
            message_avro_representation=expected_keys
        )
        assert offsets_and_messages[0].message.key == expected_encoded_keys

        avro_string_reader = AvroStringReader(
            reader_schema=expected_keys_avro_json,
            writer_schema=expected_keys_avro_json
        )
        decoded_keys = avro_string_reader.decode(
            encoded_message=offsets_and_messages[0].message.key
        )
        assert decoded_keys == expected_keys
Exemplo n.º 13
0
    def test_publish_message_with_no_keys(
        self,
        message,
        producer
    ):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.keys == {}
Exemplo n.º 14
0
    def test_publish_succeeds_without_retry(self, topic, message, producer):
        with attach_spy_on_func(
                producer._kafka_producer.kafka_client, 'send_produce_request'
        ) as send_request_spy, capture_new_messages(topic) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)

            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message],
                                   actual_msgs=messages)
            assert send_request_spy.call_count == 1
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=1)
Exemplo n.º 15
0
    def test_publish_fails_after_retry(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        with mock.patch.object(
                producer._kafka_producer.kafka_client,
                'send_produce_request',
                side_effect=[FailedPayloadsError
                             ]) as mock_send_request, capture_new_messages(
                                 message.topic) as get_messages, pytest.raises(
                                     MaxRetryError):
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)
            producer.publish(message)
            producer.flush()

            messages = get_messages()
            assert len(messages) == 0
            assert mock_send_request.call_count == self.max_retry_count
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=0)
Exemplo n.º 16
0
    def test_publish_succeeds_without_retry(self, topic, message, producer):
        with attach_spy_on_func(
            producer._kafka_producer.kafka_client,
            'send_produce_request'
        ) as send_request_spy, capture_new_messages(
            topic
        ) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)

            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
            assert send_request_spy.call_count == 1
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=1
            )
Exemplo n.º 17
0
    def _publish_and_assert_pii_message(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()

        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.payload == message.payload
        assert dp_message.payload_data == message.payload_data
        assert dp_message.schema_id == message.schema_id

        unpacked_message = Envelope().unpack(
            offsets_and_messages[0].message.value)
        unpacked_meta_attr = unpacked_message['meta'][0]
        encryption_helper = EncryptionHelper(
            dp_message.encryption_type,
            MetaAttribute(unpacked_meta_attr['schema_id'],
                          unpacked_meta_attr['payload']))
        encrypted_payload = encryption_helper.encrypt_payload(message.payload)
        assert unpacked_message['payload'] == encrypted_payload
Exemplo n.º 18
0
    def test_publish_fails_after_retry(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        with mock.patch.object(
            producer._kafka_producer.kafka_client,
            'send_produce_request',
            side_effect=[FailedPayloadsError]
        ) as mock_send_request, capture_new_messages(
            message.topic
        ) as get_messages, pytest.raises(
            MaxRetryError
        ):
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)
            producer.publish(message)
            producer.flush()

            messages = get_messages()
            assert len(messages) == 0
            assert mock_send_request.call_count == self.max_retry_count
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=0
            )