def test_message_matcher_success(self, mock_get_sub, unsued_mock): self.init_matcher(expected_msg=[b'a', b'b']) mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {})]), create_pull_response([PullResponseMessage(b'b', {})]), ] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 2) self.assertEqual(mock_sub.acknowledge.call_count, 2)
def test_message_matcher_success(self, mock_get_sub, unsued_mock): self.init_matcher() self.pubsub_matcher.expected_msg = ['a', 'b'] mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {})]), create_pull_response([PullResponseMessage(b'b', {})]), ] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 2) self.assertEqual(mock_sub.acknowledge.call_count, 2)
def test_read_messages_timestamp_attribute_rfc3339_success( mocker, patch_sub_client, patch_msg_manager, ): exp_entity_id = "entity_id" kmsg = klio_pb2.KlioMessage() kmsg.data.element = bytes(exp_entity_id, "utf-8") data = kmsg.SerializeToString() attributes = {"time": "2018-03-12T13:37:01.234567Z"} publish_time_secs = 1337000000 publish_time_nanos = 133700000 ack_id = "ack_id" pull_response = beam_test_utils.create_pull_response([ beam_test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) pmsg = b_pubsub.PubsubMessage(data, attributes) expected_elements = [ beam_testing_util.TestWindowedValue( pmsg, beam_utils.timestamp.Timestamp.from_rfc3339(attributes["time"]), [beam_transforms.window.GlobalWindow()], ), ] patch_sub_client.pull.return_value = pull_response options = pipeline_options.PipelineOptions([]) options.view_as(pipeline_options.StandardOptions).streaming = True with beam_test_pipeline.TestPipeline(options=options) as p: pcoll = p | b_pubsub.ReadFromPubSub( "projects/fakeprj/topics/a_topic", None, None, with_attributes=True, timestamp_attribute="time", ) # Check original functionality that was kept the same beam_testing_util.assert_that( pcoll, beam_testing_util.equal_to(expected_elements), reify_windows=True, ) # Check overridden functionality: # 1. Check that auto-acking is skipped patch_sub_client.acknowledge.assert_not_called() # 2. Check that MessageManager daemon threads were started patch_msg_manager.assert_called_once_with( patch_sub_client.subscription_path()) # 3. Check that messages were added to the MessageManager patch_msg_manager.return_value.add.assert_called_once_with(ack_id, pmsg) # 4. Check that one message is handled at a time, instead of the # original 10 patch_sub_client.pull.assert_called_once_with(mocker.ANY, max_messages=1, return_immediately=True) patch_sub_client.api.transport.channel.close.assert_called_once_with()
def test_read_messages_success(self, mock_pubsub): data = 'data' publish_time_secs = 1520861821 publish_time_nanos = 234567000 attributes = {'key': 'value'} ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue(PubsubMessage(data, attributes), timestamp.Timestamp(1520861821.234567), [window.GlobalWindow()]) ] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True p = TestPipeline(options=options) pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True)) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(mock.ANY, [ack_id])])
def test_read_messages_timestamp_attribute_fail_parse(patch_sub_client): exp_entity_id = "entity_id" kmsg = klio_pb2.KlioMessage() kmsg.data.element = bytes(exp_entity_id, "utf-8") data = kmsg.SerializeToString() attributes = {"time": "1337 unparseable"} publish_time_secs = 1520861821 publish_time_nanos = 234567000 ack_id = "ack_id" pull_response = beam_test_utils.create_pull_response([ beam_test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) patch_sub_client.pull.return_value = pull_response options = pipeline_options.PipelineOptions([]) options.view_as(pipeline_options.StandardOptions).streaming = True p = beam_test_pipeline.TestPipeline(options=options) _ = p | b_pubsub.ReadFromPubSub( "projects/fakeprj/topics/a_topic", None, None, with_attributes=True, timestamp_attribute="time", ) with pytest.raises(ValueError, match=r"parse"): p.run() patch_sub_client.acknowledge.assert_not_called() patch_sub_client.api.transport.channel.close.assert_called_with()
def test_read_messages_timestamp_attribute_fail_parse(self, mock_pubsub): data = b'data' attributes = {'time': '1337 unparseable'} publish_time_secs = 1520861821 publish_time_nanos = 234567000 ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True p = TestPipeline(options=options) _ = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='time')) with self.assertRaisesRegex(ValueError, r'parse'): p.run() mock_pubsub.return_value.acknowledge.assert_not_called() mock_pubsub.return_value.api.transport.channel.close.assert_has_calls( [mock.call()])
def test_read_messages_timestamp_attribute_missing(self, mock_pubsub): data = b'data' attributes = {} publish_time_secs = 1520861821 publish_time_nanos = 234567000 publish_time = '2018-03-12T13:37:01.234567Z' ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue(PubsubMessage(data, attributes), timestamp.Timestamp.from_rfc3339(publish_time), [window.GlobalWindow()]), ] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True with TestPipeline(options=options) as p: pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='nonexistent')) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(mock.ANY, [ack_id])]) mock_pubsub.return_value.api.transport.channel.close.assert_has_calls( [mock.call()])
def test_read_messages_timestamp_attribute_rfc3339_success( self, mock_pubsub): data = 'data' attributes = {'time': '2018-03-12T13:37:01.234567Z'} publish_time_secs = 1337000000 publish_time_nanos = 133700000 ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue( PubsubMessage(data, attributes), timestamp.Timestamp.from_rfc3339(attributes['time']), [window.GlobalWindow()]), ] mock_pubsub.return_value.pull.return_value = pull_response p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='time')) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(mock.ANY, [ack_id])])
def test_read_messages_timestamp_attribute_missing(self, mock_pubsub): data = 'data' attributes = {} publish_time_secs = 1520861821 publish_time_nanos = 234567000 publish_time = '2018-03-12T13:37:01.234567Z' ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage( data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue( PubsubMessage(data, attributes), timestamp.Timestamp.from_rfc3339(publish_time), [window.GlobalWindow()]), ] mock_pubsub.return_value.pull.return_value = pull_response p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub( 'projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='nonexistent')) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls([ mock.call(mock.ANY, [ack_id])])
def test_read_from_pubsub_flaky(self): number_of_elements = 10 mock_pubsub = mock.Mock() subscription_path = "project/fakeproj/subscriptions/fakesub" data = b'data' ack_id = 'ack_id' pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data, ack_id=ack_id)]) class FlakyPullResponse(object): def __init__(self, pull_response): self.pull_response = pull_response self._state = -1 def __call__(self, *args, **kwargs): self._state += 1 if self._state % 3 == 0: raise gexc.RetryError("", "") if self._state % 3 == 1: raise gexc.DeadlineExceeded("") if self._state % 3 == 2: return self.pull_response mock_pubsub.pull.side_effect = FlakyPullResponse(pull_response) output = utils.read_from_pubsub(mock_pubsub, subscription_path, number_of_elements=number_of_elements) self.assertEqual([data] * number_of_elements, output) self._assert_ack_ids_equal(mock_pubsub, [ack_id] * number_of_elements)
def test_read_messages_timestamp_attribute_milli_success( self, mock_pubsub): data = b'data' attributes = {'time': '1337'} publish_time_secs = 1520861821 publish_time_nanos = 234567000 ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue( PubsubMessage(data, attributes), timestamp.Timestamp(micros=int(attributes['time']) * 1000), [window.GlobalWindow()]), ] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True with TestPipeline(options=options) as p: pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='time')) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(subscription=mock.ANY, ack_ids=[ack_id])]) mock_pubsub.return_value.close.assert_has_calls([mock.call()])
def test_read_messages_timestamp_attribute_milli_success(self, mock_pubsub): data = b'data' attributes = {'time': '1337'} publish_time_secs = 1520861821 publish_time_nanos = 234567000 ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage( data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue( PubsubMessage(data, attributes), timestamp.Timestamp(micros=int(attributes['time']) * 1000), [window.GlobalWindow()]), ] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True p = TestPipeline(options=options) pcoll = (p | ReadFromPubSub( 'projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='time')) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls([ mock.call(mock.ANY, [ack_id])])
def test_message_matcher_attributes_success(self, mock_get_sub, unsued_mock): self.init_matcher(expected_msg=[PubsubMessage(b'a', {'k': 'v'})], with_attributes=True) mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {'k': 'v'})]) ] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_message_count_matcher_success(self, mock_get_sub, unused_mock): self.init_counter_matcher(expected_msg_len=15) mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response( [PullResponseMessage(b'a', {'foo': 'bar'}) for _ in range(15)]) ] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_message_matcher_attributes_success(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True) self.pubsub_matcher.expected_msg = [PubsubMessage(b'a', {'k': 'v'})] mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {'k': 'v'})]) ] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_message_matcher_attributes_fail(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True) self.pubsub_matcher.expected_msg = [PubsubMessage(b'a', {})] mock_sub = mock_get_sub.return_value # Unexpected attribute 'k'. mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {'k': 'v'})]) ] with self.assertRaisesRegexp(AssertionError, r'Unexpected'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_read_from_pubsub(self): mock_pubsub = mock.Mock() subscription_path = "project/fakeproj/subscriptions/fakesub" data = b'data' ack_id = 'ack_id' pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data, ack_id=ack_id)]) mock_pubsub.pull.return_value = pull_response output = utils.read_from_pubsub( mock_pubsub, subscription_path, number_of_elements=1) self.assertEqual([data], output) mock_pubsub.acknowledge.assert_called_once_with(subscription_path, [ack_id])
def test_message_matcher_attributes_fail(self, mock_get_sub, unsued_mock): self.init_matcher(expected_msg=[PubsubMessage(b'a', {})], with_attributes=True) mock_sub = mock_get_sub.return_value # Unexpected attribute 'k'. mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {'k': 'v'})]) ] with self.assertRaisesRegex(AssertionError, r'Unexpected'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_message_matcher_strip_fail(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True, strip_attributes=['id', 'timestamp']) self.pubsub_matcher.expected_msg = [PubsubMessage(b'a', {'k': 'v'})] mock_sub = mock_get_sub.return_value # Message is missing attribute 'timestamp'. mock_sub.pull.side_effect = [create_pull_response([ PullResponseMessage(b'a', {'id': 'foo', 'k': 'v'}) ])] with self.assertRaisesRegexp(AssertionError, r'Stripped attributes'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_message_matcher_strip_fail(self, mock_get_sub, unsued_mock): self.init_matcher(expected_msg=[PubsubMessage(b'a', {'k': 'v'})], with_attributes=True, strip_attributes=['id', 'timestamp']) mock_sub = mock_get_sub.return_value # Message is missing attribute 'timestamp'. mock_sub.pull.side_effect = [create_pull_response([ PullResponseMessage(b'a', {'id': 'foo', 'k': 'v'}) ])] with self.assertRaisesRegex(AssertionError, r'Stripped attributes'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_message_count_matcher_below_fail(self, mock_get_sub, unused_mock): self.init_counter_matcher(expected_msg_len=1) mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'c', {}), PullResponseMessage(b'd', {})]), ] with self.assertRaises(AssertionError) as error: hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertIn( '\nExpected: Expected 1 messages.\n but: Got 2 messages.', str(error.exception.args[0]))
def test_read_data_success(self, mock_pubsub): data_encoded = u'🤷 ¯\\_(ツ)_/¯'.encode('utf-8') ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data_encoded, ack_id=ack_id)]) expected_elements = [data_encoded] mock_pubsub.return_value.pull.return_value = pull_response p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None)) assert_that(pcoll, equal_to(expected_elements)) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls([ mock.call(mock.ANY, [ack_id])])
def test_message_matcher_mismatch(self, mock_get_sub, unused_mock): self.init_matcher() self.pubsub_matcher.expected_msg = ['a'] mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'c', {}), PullResponseMessage(b'd', {})]), ] with self.assertRaises(AssertionError) as error: hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertCountEqual(['c', 'd'], self.pubsub_matcher.messages) self.assertTrue( '\nExpected: Expected 1 messages.\n but: Got 2 messages.' in str(error.exception.args[0])) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_read_from_pubsub_with_attributes(self): mock_pubsub = mock.Mock() subscription_path = "project/fakeproj/subscriptions/fakesub" data = b'data' ack_id = 'ack_id' attributes = {'key': 'value'} message = PubsubMessage(data, attributes) pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data, attributes, ack_id=ack_id)]) mock_pubsub.pull.return_value = pull_response output = utils.read_from_pubsub(mock_pubsub, subscription_path, with_attributes=True, number_of_elements=1) self.assertEqual([message], output) mock_pubsub.acknowledge.assert_called_once_with( subscription=subscription_path, ack_ids=[ack_id])
def test_message_matcher_mismatch(self, mock_get_sub, unused_mock): self.init_matcher(expected_msg=[b'a']) mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response( [PullResponseMessage(b'c', {}), PullResponseMessage(b'd', {})]), ] with self.assertRaises(AssertionError) as error: hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertCountEqual([b'c', b'd'], self.pubsub_matcher.messages) self.assertTrue( '\nExpected: Expected 1 messages.\n but: Got 2 messages.' in str(error.exception.args[0])) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_read_data_success(self, mock_pubsub): data_encoded = u'🤷 ¯\\_(ツ)_/¯'.encode('utf-8') ack_id = 'ack_id' pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data_encoded, ack_id=ack_id)]) expected_elements = [data_encoded] mock_pubsub.return_value.pull.return_value = pull_response p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None)) assert_that(pcoll, equal_to(expected_elements)) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(mock.ANY, [ack_id])])
def test_read_strings_success(self, mock_pubsub): data = u'🤷 ¯\\_(ツ)_/¯' data_encoded = data.encode('utf-8') ack_id = 'ack_id' pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data_encoded, ack_id=ack_id)]) expected_elements = [data] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True with TestPipeline(options=options) as p: pcoll = (p | ReadStringsFromPubSub('projects/fakeprj/topics/a_topic', None, None)) assert_that(pcoll, equal_to(expected_elements)) mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(subscription=mock.ANY, ack_ids=[ack_id])]) mock_pubsub.return_value.close.assert_has_calls([mock.call()])
def test_read_messages_timestamp_attribute_fail_parse(self, mock_pubsub): data = 'data' attributes = {'time': '1337 unparseable'} publish_time_secs = 1520861821 publish_time_nanos = 234567000 ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage( data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) mock_pubsub.return_value.pull.return_value = pull_response p = TestPipeline() p.options.view_as(StandardOptions).streaming = True _ = (p | ReadFromPubSub( 'projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='time')) with self.assertRaisesRegexp(ValueError, r'parse'): p.run() mock_pubsub.return_value.acknowledge.assert_not_called()
def __call__(self, *args, **kwargs): start = self._index self._index += self.response_size response = test_utils.create_pull_response( self.response_messages[start:start + self.response_size]) return response