示例#1
0
 def test_producer_registration_message_on_exit(self, producer_instance):
     producer = producer_instance.__enter__()
     with attach_spy_on_func(producer.registrar, 'stop') as func_spy:
         producer.publish(
             CreateMessage(schema_id=1, payload=bytes("Test message")))
         producer.__exit__(None, None, None)
         assert func_spy.call_count == 1
示例#2
0
    def _publish_and_assert_pii_message(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()

        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.payload == message.payload
        assert dp_message.payload_data == message.payload_data
        assert dp_message.schema_id == message.schema_id

        unpacked_message = Envelope().unpack(offsets_and_messages[0].message.value)
        unpacked_meta_attr = unpacked_message['meta'][0]
        encryption_helper = EncryptionHelper(
            dp_message.encryption_type,
            MetaAttribute(
                unpacked_meta_attr['schema_id'],
                unpacked_meta_attr['payload']
            )
        )
        encrypted_payload = encryption_helper.encrypt_payload(message.payload)
        assert unpacked_message['payload'] == encrypted_payload
示例#3
0
    def test_retry_false_failed_publish(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        orig_func = producer._kafka_producer.kafka_client.send_produce_request

        def run_original_func_but_throw_exception(*args, **kwargs):
            orig_func(*args, **kwargs)
            raise RandomException()

        with mock.patch.object(
                producer._kafka_producer.kafka_client,
                'send_produce_request',
                side_effect=run_original_func_but_throw_exception
        ) as mock_send_request, capture_new_messages(
                message.topic) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)
            mock_send_request.reset()
            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message],
                                   actual_msgs=messages)
            assert mock_send_request.call_count == 1  # should be no retry
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=1)
示例#4
0
    def test_retry_failed_publish_without_highwatermark(
            self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        with mock.patch.object(
                producer._kafka_producer.kafka_client,
                'send_produce_request',
                side_effect=[
                    FailedPayloadsError
                ]) as mock_send_request, mock.patch(
                    'data_pipeline._kafka_util.get_topics_watermarks',
                    side_effect=Exception), capture_new_messages(
                        message.topic) as get_messages, pytest.raises(
                            MaxRetryError) as e:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)

            producer.publish(message)
            producer.flush()

            assert mock_send_request.call_count == 1  # should be no retry
            self.assert_last_retry_result(e.value.last_result,
                                          message,
                                          expected_published_msgs_count=0)

            messages = get_messages()
            assert len(messages) == 0
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=0)
示例#5
0
 def test_child_processes_do_not_survive_an_exception(self, producer_instance, message):
     with pytest.raises(RandomException), producer_instance as producer:
         producer.publish(message)
         producer.flush()
         producer.publish(message)
         raise RandomException()
     assert len(multiprocessing.active_children()) == 0
示例#6
0
    def test_forced_recovery_when_overpublished(
        self, topic, messages, producer, topic_offsets
    ):
        for message in messages:
            producer.publish(message)
        producer.flush()

        with reconfigure(
            force_recovery_from_publication_unensurable_error=True
        ), setup_capture_new_messages_consumer(
            topic
        ) as consumer, mock.patch.object(
            data_pipeline.producer,
            'logger'
        ) as mock_logger:
            producer.ensure_messages_published(messages[:2], topic_offsets)

            self._assert_logged_info_correct(
                mock_logger,
                len(messages),
                topic,
                topic_offsets,
                message_count=len(messages[:2])
            )
            self._verify_position_and_highwatermarks(
                topics=[topic],
                producer=producer,
                message_count=len(messages[:2])
            )

            assert len(consumer.get_messages(10)) == 2
示例#7
0
    def _test_success_ensure_messages_published(self, topic, messages,
                                                producer, topic_offsets,
                                                unpublished_count):
        messages_to_publish = len(messages) - unpublished_count
        messages_published_first = messages[:messages_to_publish]

        with setup_capture_new_messages_consumer(
                topic) as consumer, mock.patch.object(data_pipeline.producer,
                                                      'logger') as mock_logger:
            for message in messages_published_first:
                producer.publish(message)
            producer.flush()
            producer.position_data_callback = mock.Mock()

            producer.ensure_messages_published(messages, topic_offsets)

            if unpublished_count > 0:
                assert producer.position_data_callback.call_count == 1

            self._assert_all_messages_published(consumer)

            self._verify_position_and_highwatermarks(
                topics=[topic],
                producer=producer,
                message_count=self.number_of_messages)

            self._assert_logged_info_correct(
                mock_logger,
                messages_already_published=len(messages_published_first),
                topic=topic,
                topic_offsets=topic_offsets,
                message_count=len(messages))
示例#8
0
    def test_retry_false_failed_publish(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        orig_func = producer._kafka_producer.kafka_client.send_produce_request

        def run_original_func_but_throw_exception(*args, **kwargs):
            orig_func(*args, **kwargs)
            raise RandomException()

        with mock.patch.object(
            producer._kafka_producer.kafka_client,
            'send_produce_request',
            side_effect=run_original_func_but_throw_exception
        ) as mock_send_request, capture_new_messages(
            message.topic
        ) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)
            mock_send_request.reset()
            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
            assert mock_send_request.call_count == 1  # should be no retry
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=1
            )
示例#9
0
    def test_retry_failed_publish_without_highwatermark(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        with mock.patch.object(
            producer._kafka_producer.kafka_client,
            'send_produce_request',
            side_effect=[FailedPayloadsError]
        ) as mock_send_request, mock.patch(
            'data_pipeline._kafka_util.get_topics_watermarks',
            side_effect=Exception
        ), capture_new_messages(
            message.topic
        ) as get_messages, pytest.raises(
            MaxRetryError
        ) as e:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)

            producer.publish(message)
            producer.flush()

            assert mock_send_request.call_count == 1  # should be no retry
            self.assert_last_retry_result(
                e.value.last_result,
                message,
                expected_published_msgs_count=0
            )

            messages = get_messages()
            assert len(messages) == 0
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=0
            )
示例#10
0
    def test_publish_one_msg_succeeds_one_fails_after_retry(
        self,
        message,
        another_message,
        topic,
        producer
    ):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        mock_response = ProduceResponse(topic, partition=0, error=0, offset=1)
        fail_response = FailedPayloadsError(payload=mock.Mock())
        side_effect = ([[mock_response, fail_response]] +
                       [[fail_response]] * self.max_retry_count)
        with mock.patch.object(
            producer._kafka_producer.kafka_client,
            'send_produce_request',
            side_effect=side_effect
        ), pytest.raises(
            MaxRetryError
        ) as e:
            producer.publish(message)
            producer.publish(another_message)
            producer.flush()

            self.assert_last_retry_result(
                e.value.last_result,
                another_message,
                expected_published_msgs_count=1
            )
示例#11
0
    def test_publish_to_new_topic(self, create_new_schema, producer):
        new_schema = create_new_schema(source='retry_source')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))

        with attach_spy_on_func(
            producer._kafka_producer.kafka_client,
            'send_produce_request'
        ) as send_request_spy:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)
            send_request_spy.reset()

            producer.publish(message)
            producer.flush()

            # it should fail at least the 1st time because the topic doesn't
            # exist. Depending on how fast the topic is created, it could retry
            # more than 2 times.
            assert send_request_spy.call_count >= 2

        messages = self.get_messages_from_start(message.topic)
        self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
        self.assert_new_topic_to_offset_map(
            producer,
            message.topic,
            orig_topic_to_offset_map,
            published_message_count=1
        )
示例#12
0
    def test_multitopic_offsets(self, topic, messages, secondary_topic,
                                secondary_messages, producer, topic_offsets,
                                containers):
        """Publishes a single message on the secondary_topic, and all
        messages on the primary topic, simulating the case where publishes for
        one topic fail, while the other succeeds, and the one that succeeds
        comes later in time.  The goal is that the position data still reflects
        the original message ordering, irrespective of failure.
        """
        containers.create_kafka_topic(secondary_topic)
        with setup_capture_new_messages_consumer(secondary_topic) as consumer:
            producer.publish(secondary_messages[0])
            for message in messages:
                producer.publish(message)
            producer.flush()

            producer.ensure_messages_published(secondary_messages + messages,
                                               topic_offsets)

            self._verify_position_and_highwatermarks(
                topics=[topic, secondary_topic],
                producer=producer,
                message_count=self.number_of_messages)

            assert len(consumer.get_messages(10)) == len(secondary_messages)
示例#13
0
    def _test_success_ensure_messages_published(self, topic, messages,
                                                producer, topic_offsets,
                                                unpublished_count):
        messages_to_publish = len(messages) - unpublished_count
        messages_published_first = messages[:messages_to_publish]

        with setup_capture_new_messages_consumer(
                topic) as consumer, mock.patch.object(data_pipeline.producer,
                                                      'logger') as mock_logger:
            for message in messages_published_first:
                producer.publish(message)
            producer.flush()
            producer.position_data_callback = mock.Mock()

            producer.ensure_messages_published(messages, topic_offsets)

            if unpublished_count > 0:
                assert producer.position_data_callback.call_count == 1

            self._assert_all_messages_published(consumer)

            position_info = producer.get_checkpoint_position_data()
            last_position = position_info.last_published_message_position_info
            assert last_position['position'] == self.number_of_messages

            self._assert_logged_info_correct(
                mock_logger,
                messages_already_published=len(messages_published_first),
                topic=topic,
                topic_offsets=topic_offsets,
                message_count=len(messages))
示例#14
0
    def test_messages_published_without_flush(self, message, producer_instance):
        with capture_new_messages(
            message.topic
        ) as get_messages, producer_instance as producer:
            producer.publish(message)

        assert len(multiprocessing.active_children()) == 0
        assert len(get_messages()) == 1
示例#15
0
    def test_messages_published_without_flush(self, message,
                                              producer_instance):
        with capture_new_messages(
                message.topic) as get_messages, producer_instance as producer:
            producer.publish(message)

        assert len(multiprocessing.active_children()) == 0
        assert len(get_messages()) == 1
示例#16
0
    def test_messages_not_duplicated(self, message, producer_instance):
        with capture_new_messages(
                message.topic) as get_messages, producer_instance as producer:
            producer.publish(message)
            producer.flush()

        assert len(multiprocessing.active_children()) == 0
        assert len(get_messages()) == 1
示例#17
0
 def test_child_processes_do_not_survive_an_exception(
         self, producer_instance, message):
     with pytest.raises(RandomException), producer_instance as producer:
         producer.publish(message)
         producer.flush()
         producer.publish(message)
         raise RandomException()
     assert len(multiprocessing.active_children()) == 0
示例#18
0
 def test_producer_registration_message_on_exit(self, producer_instance):
     producer = producer_instance.__enter__()
     with attach_spy_on_func(
         producer.registrar,
         'stop'
     ) as func_spy:
         producer.publish(CreateMessage(schema_id=1, payload=bytes("Test message")))
         producer.__exit__(None, None, None)
         assert func_spy.call_count == 1
示例#19
0
    def test_messages_not_duplicated(self, message, producer_instance):
        with capture_new_messages(
            message.topic
        ) as get_messages, producer_instance as producer:
            producer.publish(message)
            producer.flush()

        assert len(multiprocessing.active_children()) == 0
        assert len(get_messages()) == 1
示例#20
0
    def test_publish_message_with_keys(self, message_with_pkeys, producer):
        expected_keys_avro_json = {
            "type":
            "record",
            "namespace":
            "yelp.data_pipeline",
            "name":
            "primary_keys",
            "doc":
            "Represents primary keys present in Message payload.",
            "fields": [
                {
                    "type": "string",
                    "name": "field2",
                    "doc": "test",
                    "pkey": 1
                },
                {
                    "type": "int",
                    "name": "field1",
                    "doc": "test",
                    "pkey": 2
                },
                {
                    "type": "int",
                    "name": "field3",
                    "doc": "test",
                    "pkey": 3
                },
            ]
        }
        expected_keys = {
            "field2": message_with_pkeys.payload_data["field2"],
            "field1": message_with_pkeys.payload_data["field1"],
            "field3": message_with_pkeys.payload_data["field3"]
        }

        with capture_new_messages(message_with_pkeys.topic) as get_messages:
            producer.publish(message_with_pkeys)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.keys == expected_keys

        avro_string_writer = AvroStringWriter(schema=expected_keys_avro_json)
        expected_encoded_keys = avro_string_writer.encode(
            message_avro_representation=expected_keys)
        assert offsets_and_messages[0].message.key == expected_encoded_keys

        avro_string_reader = AvroStringReader(
            reader_schema=expected_keys_avro_json,
            writer_schema=expected_keys_avro_json)
        decoded_keys = avro_string_reader.decode(
            encoded_message=offsets_and_messages[0].message.key)
        assert decoded_keys == expected_keys
示例#21
0
    def test_publish_message_with_no_keys(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.keys == {}
示例#22
0
    def topic_offsets(self, request, producer, random_schema, containers):
        is_fresh_topic = request.param
        if is_fresh_topic:
            containers.create_kafka_topic(str(random_schema.topic.name))
            return {}

        message = CreateMessage(random_schema.schema_id, payload=str('-1'))
        producer.publish(message)
        producer.flush()
        return producer.get_checkpoint_position_data().topic_to_kafka_offset_map
示例#23
0
 def test_meteorite_on_off(self, create_message, registered_schema,
                           producer, enable_meteorite, expected_call_count):
     with mock.patch.object(
             data_pipeline.tools.meteorite_wrappers.StatsCounter,
             'process',
             autospec=True) as mock_stats_counter:
         producer.enable_meteorite = enable_meteorite
         m = create_message(registered_schema, timeslot=1.0)
         producer.publish(m)
         assert mock_stats_counter.call_count == expected_call_count
示例#24
0
    def topic_offsets(self, request, producer, random_schema, containers):
        is_fresh_topic = request.param
        if is_fresh_topic:
            containers.create_kafka_topic(str(random_schema.topic.name))
            return {}

        message = CreateMessage(random_schema.schema_id, payload=str('-1'))
        producer.publish(message)
        producer.flush()
        return producer.get_checkpoint_position_data(
        ).topic_to_kafka_offset_map
示例#25
0
 def test_sensu_on_off(self, create_message, registered_schema, producer,
                       enable_sensu, expected_call_count):
     with mock.patch.object(
             data_pipeline.tools.sensu_ttl_alerter.SensuTTLAlerter,
             'process',
             autospec=True,
             return_value=None) as mock_sensu_ttl_process:
         producer.enable_sensu = enable_sensu
         m = create_message(registered_schema, timeslot=1.0)
         producer.publish(m)
         assert mock_sensu_ttl_process.call_count == expected_call_count
示例#26
0
    def test_get_position_data(self, create_message, producer):
        upstream_info = {'offset': 'fake'}
        message = create_message(upstream_position_info=upstream_info)
        with setup_capture_new_messages_consumer(message.topic) as consumer:
            producer.publish(message)
            producer.flush()
            position_data = producer.get_checkpoint_position_data()

            self._verify_position_data(position_data, upstream_info,
                                       message.topic)
            self._verify_topic_kafka_offset(position_data, message.topic,
                                            consumer, producer, create_message)
示例#27
0
 def test_sensu_process_called_once_inside_window(self, create_message,
                                                  registered_schema,
                                                  producer, message_count):
     with mock.patch.object(
             data_pipeline.tools.sensu_ttl_alerter.SensuTTLAlerter,
             'process',
             autospec=True,
             return_value=None) as mock_sensu_ttl_process:
         producer.enable_sensu = True
         m1 = create_message(registered_schema, timeslot=1.0)
         for i in range(message_count):
             producer.publish(m1)
         assert mock_sensu_ttl_process.call_count == 1
示例#28
0
    def test_publish_message_with_keys(
        self,
        message_with_pkeys,
        producer
    ):
        expected_keys_avro_json = {
            "type": "record",
            "namespace": "yelp.data_pipeline",
            "name": "primary_keys",
            "doc": "Represents primary keys present in Message payload.",
            "fields": [
                {"type": "string", "name": "field2", "doc": "test", "pkey": 1},
                {"type": "int", "name": "field1", "doc": "test", "pkey": 2},
                {"type": "int", "name": "field3", "doc": "test", "pkey": 3},
            ]
        }
        expected_keys = {
            "field2": message_with_pkeys.payload_data["field2"],
            "field1": message_with_pkeys.payload_data["field1"],
            "field3": message_with_pkeys.payload_data["field3"]
        }

        with capture_new_messages(message_with_pkeys.topic) as get_messages:
            producer.publish(message_with_pkeys)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.keys == expected_keys

        avro_string_writer = AvroStringWriter(
            schema=expected_keys_avro_json
        )
        expected_encoded_keys = avro_string_writer.encode(
            message_avro_representation=expected_keys
        )
        assert offsets_and_messages[0].message.key == expected_encoded_keys

        avro_string_reader = AvroStringReader(
            reader_schema=expected_keys_avro_json,
            writer_schema=expected_keys_avro_json
        )
        decoded_keys = avro_string_reader.decode(
            encoded_message=offsets_and_messages[0].message.key
        )
        assert decoded_keys == expected_keys
示例#29
0
    def test_ensure_messages_published_fails_when_overpublished(
            self, topic, messages, producer, topic_offsets):
        for message in messages:
            producer.publish(message)
        producer.flush()

        with pytest.raises(PublicationUnensurableError), mock.patch.object(
                data_pipeline.producer, 'logger') as mock_logger:
            producer.ensure_messages_published(messages[:2], topic_offsets)

            self._assert_logged_info_correct(mock_logger,
                                             len(messages),
                                             topic,
                                             topic_offsets,
                                             message_count=len(messages[:2]))
示例#30
0
    def test_publish_message_with_no_keys(
        self,
        message,
        producer
    ):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()
        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(
            offsets_and_messages[0]
        )
        assert dp_message.keys == {}
示例#31
0
    def _verify_topic_kafka_offset(self, position_data, topic, consumer,
                                   producer, create_message):
        # The pointer is to the next offset where messages will be published.
        # There shouldn't be any messages there yet.
        kafka_offset = position_data.topic_to_kafka_offset_map[topic]
        consumer.seek(kafka_offset, 0)  # kafka_offset from head
        assert len(consumer.get_messages(count=10)) == 0

        # publish another message, so we can seek to it
        message = create_message(upstream_position_info={'offset': 'fake2'})
        producer.publish(message)
        producer.flush()

        # There should be a message now that we've published one
        consumer.seek(kafka_offset, 0)  # kafka_offset from head
        assert len(consumer.get_messages(count=10)) == 1
示例#32
0
    def test_get_position_data(self, create_message, producer):
        upstream_info = {'offset': 'fake'}
        message = create_message(upstream_position_info=upstream_info)
        with setup_capture_new_messages_consumer(message.topic) as consumer:
            producer.publish(message)
            producer.flush()
            position_data = producer.get_checkpoint_position_data()

            self._verify_position_data(position_data, upstream_info, message.topic)
            self._verify_topic_kafka_offset(
                position_data,
                message.topic,
                consumer,
                producer,
                create_message
            )
示例#33
0
 def test_meteorite_on_off(
     self,
     create_message,
     registered_schema,
     producer,
     enable_meteorite,
     expected_call_count
 ):
     with mock.patch.object(
         data_pipeline.tools.meteorite_wrappers.StatsCounter,
         'process',
         autospec=True
     ) as mock_stats_counter:
         producer.enable_meteorite = enable_meteorite
         m = create_message(registered_schema, timeslot=1.0)
         producer.publish(m)
         assert mock_stats_counter.call_count == expected_call_count
示例#34
0
 def test_producer_periodic_registration_messages(self, producer_instance):
     """
     Note: Tests fails when threshold is set significanly below 1 second, presumably
           because of the nature of threading. Should be irrelevant if the threshold
           in registrar is set significantly higher.
     """
     producer_instance.registrar.threshold = 1
     with producer_instance as producer:
         with attach_spy_on_func(producer.registrar.clog_writer,
                                 'publish') as func_spy:
             producer.publish(
                 CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE")))
             producer.publish(
                 CreateMessage(schema_id=2,
                               payload=bytes("DIFFERENT FAKE MESSAGE")))
             time.sleep(2.5)
             assert func_spy.call_count == 4
示例#35
0
    def _verify_topic_kafka_offset(
        self, position_data, topic, consumer, producer, create_message
    ):
        # The pointer is to the next offset where messages will be published.
        # There shouldn't be any messages there yet.
        kafka_offset = position_data.topic_to_kafka_offset_map[topic]
        consumer.seek(kafka_offset, 0)  # kafka_offset from head
        assert len(consumer.get_messages(count=10)) == 0

        # publish another message, so we can seek to it
        message = create_message(upstream_position_info={'offset': 'fake2'})
        producer.publish(message)
        producer.flush()

        # There should be a message now that we've published one
        consumer.seek(kafka_offset, 0)  # kafka_offset from head
        assert len(consumer.get_messages(count=10)) == 1
示例#36
0
 def test_sensu_on_off(
     self,
     create_message,
     registered_schema,
     producer,
     enable_sensu,
     expected_call_count
 ):
     with mock.patch.object(
         data_pipeline.tools.sensu_ttl_alerter.SensuTTLAlerter,
         'process',
         autospec=True,
         return_value=None
     ) as mock_sensu_ttl_process:
         producer.enable_sensu = enable_sensu
         m = create_message(registered_schema, timeslot=1.0)
         producer.publish(m)
         assert mock_sensu_ttl_process.call_count == expected_call_count
示例#37
0
 def test_sensu_process_called_once_inside_window(
     self,
     create_message,
     registered_schema,
     producer,
     message_count
 ):
     with mock.patch.object(
         data_pipeline.tools.sensu_ttl_alerter.SensuTTLAlerter,
         'process',
         autospec=True,
         return_value=None
     ) as mock_sensu_ttl_process:
         producer.enable_sensu = True
         m1 = create_message(registered_schema, timeslot=1.0)
         for i in range(message_count):
             producer.publish(m1)
         assert mock_sensu_ttl_process.call_count == 1
示例#38
0
    def test_publish_succeeds_without_retry(self, topic, message, producer):
        with attach_spy_on_func(
                producer._kafka_producer.kafka_client, 'send_produce_request'
        ) as send_request_spy, capture_new_messages(topic) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)

            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message],
                                   actual_msgs=messages)
            assert send_request_spy.call_count == 1
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=1)
示例#39
0
    def test_publish_one_msg_succeeds_one_fails_after_retry(
            self, message, another_message, topic, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        mock_response = ProduceResponse(topic, partition=0, error=0, offset=1)
        fail_response = FailedPayloadsError(payload=mock.Mock())
        side_effect = ([[mock_response, fail_response]] +
                       [[fail_response]] * self.max_retry_count)
        with mock.patch.object(
                producer._kafka_producer.kafka_client,
                'send_produce_request',
                side_effect=side_effect), pytest.raises(MaxRetryError) as e:
            producer.publish(message)
            producer.publish(another_message)
            producer.flush()

            self.assert_last_retry_result(e.value.last_result,
                                          another_message,
                                          expected_published_msgs_count=1)
示例#40
0
    def test_position_data_callback(self, create_message, producer_name,
                                    team_name):
        callback = mock.Mock()
        producer = Producer(
            producer_name=producer_name,
            team_name=team_name,
            expected_frequency_seconds=ExpectedFrequency.constantly,
            position_data_callback=callback)
        upstream_info = {'offset': 'fake'}
        message = create_message(upstream_position_info=upstream_info)
        with setup_capture_new_messages_consumer(message.topic) as consumer:
            producer.publish(message)
            producer.flush()
            (position_data, ), _ = callback.call_args

            self._verify_position_data(position_data, upstream_info,
                                       message.topic)
            self._verify_topic_kafka_offset(position_data, message.topic,
                                            consumer, producer, create_message)
示例#41
0
 def test_producer_periodic_registration_messages(self, producer_instance):
     """
     Note: Tests fails when threshold is set significanly below 1 second, presumably
           because of the nature of threading. Should be irrelevant if the threshold
           in registrar is set significantly higher.
     """
     producer_instance.registrar.threshold = 1
     with producer_instance as producer:
         with attach_spy_on_func(
             producer.registrar.clog_writer,
             'publish'
         ) as func_spy:
             producer.publish(CreateMessage(schema_id=1, payload=bytes("FAKE MESSAGE")))
             producer.publish(CreateMessage(
                 schema_id=2,
                 payload=bytes("DIFFERENT FAKE MESSAGE")
             ))
             time.sleep(2.5)
             assert func_spy.call_count == 4
示例#42
0
    def test_forced_recovery_when_overpublished(self, topic, messages,
                                                producer, topic_offsets):
        for message in messages:
            producer.publish(message)
        producer.flush()

        with reconfigure(force_recovery_from_publication_unensurable_error=True
                         ), setup_capture_new_messages_consumer(
                             topic) as consumer, mock.patch.object(
                                 data_pipeline.producer,
                                 'logger') as mock_logger:
            producer.ensure_messages_published(messages[:2], topic_offsets)

            self._assert_logged_info_correct(mock_logger,
                                             len(messages),
                                             topic,
                                             topic_offsets,
                                             message_count=len(messages[:2]))

            assert len(consumer.get_messages(10)) == 2
示例#43
0
    def test_publish_fails_after_retry(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        with mock.patch.object(
                producer._kafka_producer.kafka_client,
                'send_produce_request',
                side_effect=[FailedPayloadsError
                             ]) as mock_send_request, capture_new_messages(
                                 message.topic) as get_messages, pytest.raises(
                                     MaxRetryError):
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)
            producer.publish(message)
            producer.flush()

            messages = get_messages()
            assert len(messages) == 0
            assert mock_send_request.call_count == self.max_retry_count
            self.assert_new_topic_to_offset_map(producer,
                                                message.topic,
                                                orig_topic_to_offset_map,
                                                published_message_count=0)
示例#44
0
    def test_publish_succeeds_without_retry(self, topic, message, producer):
        with attach_spy_on_func(
            producer._kafka_producer.kafka_client,
            'send_produce_request'
        ) as send_request_spy, capture_new_messages(
            topic
        ) as get_messages:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)

            producer.publish(message)
            producer.flush()

            messages = get_messages()
            self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
            assert send_request_spy.call_count == 1
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=1
            )
示例#45
0
    def _publish_and_assert_pii_message(self, message, producer):
        with capture_new_messages(message.topic) as get_messages:
            producer.publish(message)
            producer.flush()
            offsets_and_messages = get_messages()

        assert len(offsets_and_messages) == 1

        dp_message = create_from_offset_and_message(offsets_and_messages[0])
        assert dp_message.payload == message.payload
        assert dp_message.payload_data == message.payload_data
        assert dp_message.schema_id == message.schema_id

        unpacked_message = Envelope().unpack(
            offsets_and_messages[0].message.value)
        unpacked_meta_attr = unpacked_message['meta'][0]
        encryption_helper = EncryptionHelper(
            dp_message.encryption_type,
            MetaAttribute(unpacked_meta_attr['schema_id'],
                          unpacked_meta_attr['payload']))
        encrypted_payload = encryption_helper.encrypt_payload(message.payload)
        assert unpacked_message['payload'] == encrypted_payload
示例#46
0
    def test_ensure_messages_published_fails_when_overpublished(
        self, topic, messages, producer, topic_offsets
    ):
        for message in messages:
            producer.publish(message)
        producer.flush()

        with pytest.raises(
            PublicationUnensurableError
        ), mock.patch.object(
            data_pipeline.producer,
            'logger'
        ) as mock_logger:
            producer.ensure_messages_published(messages[:2], topic_offsets)

            self._assert_logged_info_correct(
                mock_logger,
                len(messages),
                topic,
                topic_offsets,
                message_count=len(messages[:2])
            )
示例#47
0
    def test_position_data_callback(self, create_message, producer_name, team_name):
        callback = mock.Mock()
        producer = Producer(
            producer_name=producer_name,
            team_name=team_name,
            expected_frequency_seconds=ExpectedFrequency.constantly,
            position_data_callback=callback
        )
        upstream_info = {'offset': 'fake'}
        message = create_message(upstream_position_info=upstream_info)
        with setup_capture_new_messages_consumer(message.topic) as consumer:
            producer.publish(message)
            producer.flush()
            (position_data,), _ = callback.call_args

            self._verify_position_data(position_data, upstream_info, message.topic)
            self._verify_topic_kafka_offset(
                position_data,
                message.topic,
                consumer,
                producer,
                create_message
            )
示例#48
0
    def _test_success_ensure_messages_published(
        self, topic, messages, producer, topic_offsets, unpublished_count
    ):
        messages_to_publish = len(messages) - unpublished_count
        messages_published_first = messages[:messages_to_publish]

        with setup_capture_new_messages_consumer(
            topic
        ) as consumer, mock.patch.object(
            data_pipeline.producer,
            'logger'
        ) as mock_logger:
            for message in messages_published_first:
                producer.publish(message)
            producer.flush()
            producer.position_data_callback = mock.Mock()

            producer.ensure_messages_published(messages, topic_offsets)

            if unpublished_count > 0:
                assert producer.position_data_callback.call_count == 1

            self._assert_all_messages_published(consumer)

            self._verify_position_and_highwatermarks(
                topics=[topic],
                producer=producer,
                message_count=self.number_of_messages
            )

            self._assert_logged_info_correct(
                mock_logger,
                messages_already_published=len(messages_published_first),
                topic=topic,
                topic_offsets=topic_offsets,
                message_count=len(messages)
            )
示例#49
0
    def test_multitopic_offsets(
        self,
        topic,
        messages,
        secondary_topic,
        secondary_messages,
        producer,
        topic_offsets,
        containers
    ):
        """Publishes a single message on the secondary_topic, and all
        messages on the primary topic, simulating the case where publishes for
        one topic fail, while the other succeeds, and the one that succeeds
        comes later in time.  The goal is that the position data still reflects
        the original message ordering, irrespective of failure.
        """
        containers.create_kafka_topic(secondary_topic)
        with setup_capture_new_messages_consumer(
            secondary_topic
        ) as consumer:
            producer.publish(secondary_messages[0])
            for message in messages:
                producer.publish(message)
            producer.flush()

            producer.ensure_messages_published(
                secondary_messages + messages,
                topic_offsets
            )

            self._verify_position_and_highwatermarks(
                topics=[topic, secondary_topic],
                producer=producer,
                message_count=self.number_of_messages
            )

            assert len(consumer.get_messages(10)) == len(secondary_messages)
示例#50
0
    def test_publish_to_new_topic(self, create_new_schema, producer):
        new_schema = create_new_schema(source='retry_source')
        message = CreateMessage(new_schema.schema_id, payload=str('1'))

        with attach_spy_on_func(producer._kafka_producer.kafka_client,
                                'send_produce_request') as send_request_spy:
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(
                producer)
            send_request_spy.reset()

            producer.publish(message)
            producer.flush()

            # it should fail at least the 1st time because the topic doesn't
            # exist. Depending on how fast the topic is created, it could retry
            # more than 2 times.
            assert send_request_spy.call_count >= 2

        messages = self.get_messages_from_start(message.topic)
        self.assert_equal_msgs(expected_msgs=[message], actual_msgs=messages)
        self.assert_new_topic_to_offset_map(producer,
                                            message.topic,
                                            orig_topic_to_offset_map,
                                            published_message_count=1)
示例#51
0
    def test_publish_fails_after_retry(self, message, producer):
        # TODO(DATAPIPE-606|clin) investigate better way than mocking response
        with mock.patch.object(
            producer._kafka_producer.kafka_client,
            'send_produce_request',
            side_effect=[FailedPayloadsError]
        ) as mock_send_request, capture_new_messages(
            message.topic
        ) as get_messages, pytest.raises(
            MaxRetryError
        ):
            orig_topic_to_offset_map = self.get_orig_topic_to_offset_map(producer)
            producer.publish(message)
            producer.flush()

            messages = get_messages()
            assert len(messages) == 0
            assert mock_send_request.call_count == self.max_retry_count
            self.assert_new_topic_to_offset_map(
                producer,
                message.topic,
                orig_topic_to_offset_map,
                published_message_count=0
            )
示例#52
0
 def set_topic_offsets_to_latest(self, producer, message, another_message):
     producer.publish(message)
     producer.publish(another_message)
     producer.flush()
示例#53
0
 def set_topic_offsets_to_latest(self, producer, message, another_message):
     producer.publish(message)
     producer.publish(another_message)
     producer.flush()
示例#54
0
 def _publish_message(self, message, producer):
     with capture_new_data_pipeline_messages(message.topic) as get_messages:
         producer.publish(message)
         producer.flush()
         return get_messages()
示例#55
0
 def publish_messages(self, messages, producer):
     for message in messages:
         producer.publish(message)
     producer.flush()
     producer.monitor.flush_buffered_info()
示例#56
0
 def _publish_message(self, message, producer):
     with capture_new_data_pipeline_messages(message.topic) as get_messages:
         producer.publish(message)
         producer.flush()
         return get_messages()
示例#57
0
 def publish_messages(self, messages, producer):
     for message in messages:
         producer.publish(message)
     producer.flush()
     producer.monitor.flush_buffered_info()