def test_file_checksum_matcher_read_failed(self, mock_match): mock_match.side_effect = IOError('No file found.') matcher = verifiers.FileChecksumMatcher('dummy/path', Mock()) with self.assertRaises(IOError): hc_assert_that(self._mock_result, matcher) self.assertTrue(mock_match.called) self.assertEqual(verifiers.MAX_RETRIES + 1, mock_match.call_count)
def run_pipeline(self, pipeline): """Execute test pipeline and verify test matcher""" options = pipeline._options.view_as(TestOptions) on_success_matcher = options.on_success_matcher wait_duration = options.wait_until_finish_duration is_streaming = options.view_as(StandardOptions).streaming # [BEAM-1889] Do not send this to remote workers also, there is no need to # send this option to remote executors. options.on_success_matcher = None self.result = super(TestDataflowRunner, self).run_pipeline(pipeline) if self.result.has_job: # TODO(markflyhigh)(BEAM-1890): Use print since Nose dosen't show logs # in some cases. print('Found: %s.' % self.build_console_url(pipeline.options)) try: self.wait_until_in_state(PipelineState.RUNNING) if is_streaming and not wait_duration: logging.warning('Waiting indefinitely for streaming job.') self.result.wait_until_finish(duration=wait_duration) if on_success_matcher: from hamcrest import assert_that as hc_assert_that hc_assert_that(self.result, pickler.loads(on_success_matcher)) finally: if not self.result.is_in_terminal_state(): self.result.cancel() self.wait_until_in_state(PipelineState.CANCELLED, timeout=300) return self.result
def run(self, pipeline): """Execute test pipeline and verify test matcher""" options = pipeline.options.view_as(TestOptions) on_success_matcher = options.on_success_matcher # [BEAM-1889] Do not send this to remote workers also, there is no need to # send this option to remote executors. options.on_success_matcher = None self.result = super(TestDataflowRunner, self).run(pipeline) if self.result.has_job: project = pipeline.options.view_as(GoogleCloudOptions).project job_id = self.result.job_id() # TODO(markflyhigh)(BEAM-1890): Use print since Nose dosen't show logs # in some cases. print ( 'Found: https://console.cloud.google.com/dataflow/job/%s?project=%s' % (job_id, project)) self.result.wait_until_finish() if on_success_matcher: from hamcrest import assert_that as hc_assert_that hc_assert_that(self.result, pickler.loads(on_success_matcher)) return self.result
def test_matches_none_for_distribution(self): metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION) matcher = MetricResultMatcher( namespace=is_not(equal_to('invalidNamespace')), name=is_not(equal_to('invalidName')), step=is_not(equal_to('invalidStep')), labels={ is_not(equal_to('invalidPcollection')): anything(), is_not(equal_to('invalidCustomKey')): is_not(equal_to( 'invalidCustomValue')) }, committed=is_not(DistributionMatcher( sum_value=120, count_value=50, min_value=100, max_value=60 )), attempted=is_not(DistributionMatcher( sum_value=120, count_value=50, min_value=100, max_value=60 )), ) hc_assert_that(metric_result, matcher)
def run_pipeline(self, pipeline): """Execute test pipeline and verify test matcher""" options = pipeline._options.view_as(TestOptions) on_success_matcher = options.on_success_matcher is_streaming = options.view_as(StandardOptions).streaming # [BEAM-1889] Do not send this to remote workers also, there is no need to # send this option to remote executors. options.on_success_matcher = None self.result = super(TestDirectRunner, self).run_pipeline(pipeline) try: if not is_streaming: self.result.wait_until_finish() if on_success_matcher: from hamcrest import assert_that as hc_assert_that hc_assert_that(self.result, pickler.loads(on_success_matcher)) finally: if not PipelineState.is_terminal(self.result.state): self.result.cancel() self.result.wait_until_finish() return self.result
def test_matches_key_but_not_value(self): metric_result = _create_metric_result(EVERYTHING_COUNTER) matcher = is_not(MetricResultMatcher( labels={ 'pcollection': 'invalidCollection' })) hc_assert_that(metric_result, matcher)
def test_matches_distribution_with_custom_matchers(self): metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION) matcher = is_not(MetricResultMatcher( namespace=equal_to_ignoring_case('MYNAMESPACE'), name=equal_to_ignoring_case('MYNAME'), step=equal_to_ignoring_case('MYSTEP'), labels={ equal_to_ignoring_case('PCOLLECTION') : equal_to_ignoring_case('MYCUSTOMVALUE'), 'myCustomKey': equal_to_ignoring_case('MYCUSTOMVALUE') }, committed=is_not(DistributionMatcher( sum_value=greater_than(-1), count_value=greater_than(-1), min_value=greater_than(-1), max_value=greater_than(-1) )), attempted=is_not(DistributionMatcher( sum_value=greater_than(-1), count_value=greater_than(-1), min_value=greater_than(-1), max_value=greater_than(-1) )), )) hc_assert_that(metric_result, matcher)
def test_distribution_does_not_match_counter_and_doesnt_crash(self): metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION) matcher = is_not(MetricResultMatcher( attempted=42, committed=42 )) hc_assert_that(metric_result, matcher)
def test_file_checksum_matcher_success(self): for case in self.test_cases: temp_dir = tempfile.mkdtemp() for _ in range(case['num_files']): self.create_temp_file(case['content'], temp_dir) matcher = verifiers.FileChecksumMatcher(temp_dir + '/*', case['expected_checksum']) hc_assert_that(self._mock_result, matcher)
def test_message_matcher_timeout(self, mock_get_sub, unused_mock): self.init_matcher() mock_sub = mock_get_sub.return_value mock_sub.return_value.full_name.return_value = 'mock_sub' self.pubsub_matcher.timeout = 0.1 with self.assertRaisesRegexp(AssertionError, r'Expected 1.*\n.*Got 0'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertTrue(mock_sub.pull.called)
def test_message_matcher_success(self, mock_get_sub, unsued_mock): self.pubsub_matcher.expected_msg = ['a', 'b'] mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ [(1, pubsub.message.Message(b'a', 'unused_id'))], [(2, pubsub.message.Message(b'b', 'unused_id'))], ] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 2)
def test_file_checksum_matcher_service_error(self, mock_match): mock_match.side_effect = HttpError( response={'status': '404'}, url='', content='Not Found', ) matcher = verifiers.FileChecksumMatcher('gs://dummy/path', Mock()) with self.assertRaises(HttpError): hc_assert_that(self._mock_result, matcher) self.assertTrue(mock_match.called) self.assertEqual(verifiers.MAX_RETRIES + 1, mock_match.call_count)
def test_file_checksum_matcher_sleep_before_verify(self, mocked_sleep): temp_dir = tempfile.mkdtemp() case = self.test_cases[0] self.create_temp_file(case['content'], temp_dir) matcher = verifiers.FileChecksumMatcher(temp_dir + '/*', case['expected_checksum'], 10) hc_assert_that(self._mock_result, matcher) self.assertTrue(mocked_sleep.called)
def test_message_matcher_attributes_success(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True) self.pubsub_matcher.expected_msg = [PubsubMessage('a', {'k': 'v'})] mock_sub = mock_get_sub.return_value msg_a = pubsub.message.Message(b'a', 'unused_id') msg_a.attributes['k'] = 'v' mock_sub.pull.side_effect = [[(1, msg_a)]] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1)
def test_message_matcher_attributes_success(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True) self.pubsub_matcher.expected_msg = [PubsubMessage(b'a', {'k': 'v'})] mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {'k': 'v'})]) ] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_message_matcher_attributes_fail(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True) self.pubsub_matcher.expected_msg = [PubsubMessage('a', {})] mock_sub = mock_get_sub.return_value msg_a = pubsub.message.Message(b'a', 'unused_id') msg_a.attributes['k'] = 'v' # Unexpected. mock_sub.pull.side_effect = [[(1, msg_a)]] with self.assertRaisesRegexp(AssertionError, r'Unexpected'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1)
def test_message_metcher_timeout(self, mock_get_sub, unused_mock): mock_sub = mock_get_sub.return_value mock_sub.return_value.full_name.return_value = 'mock_sub' self.pubsub_matcher.timeout = 0.1 with self.assertRaises(AssertionError) as error: hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertTrue(mock_sub.pull.called) self.assertEqual( '\nExpected: Expected %d messages.\n but: Got %d messages. Diffs: ' '%s.\n' % (1, 0, ['mock_expected_msg']), str(error.exception.args[0]))
def test_message_matcher_success(self, mock_get_sub, unsued_mock): self.init_matcher() self.pubsub_matcher.expected_msg = ['a', 'b'] mock_sub = mock_get_sub.return_value mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {})]), create_pull_response([PullResponseMessage(b'b', {})]), ] hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 2) self.assertEqual(mock_sub.acknowledge.call_count, 2)
def test_bigquery_matcher_success(self, mock_bigquery): mock_query = Mock() mock_client = mock_bigquery.return_value mock_client.run_sync_query.return_value = mock_query mock_query.fetch_data.return_value = ([], None, None) matcher = bq_verifier.BigqueryMatcher( 'mock_project', 'mock_query', 'da39a3ee5e6b4b0d3255bfef95601890afd80709') hc_assert_that(self._mock_result, matcher)
def test_bigquery_matcher_success(self, mock_bigquery): mock_query = Mock() mock_client = mock_bigquery.return_value mock_client.run_sync_query.return_value = mock_query mock_query.fetch_data.return_value = ([], None, None) matcher = bq_verifier.BigqueryMatcher( 'mock_project', 'mock_query', '59f9d6bdee30d67ea73b8aded121c3a0280f9cd8') hc_assert_that(self._mock_result, matcher)
def run(self, pipeline): """Execute test pipeline and verify test matcher""" self.result = super(TestDataflowRunner, self).run(pipeline) self.result.wait_until_finish() options = pipeline.options.view_as(TestOptions) if options.on_success_matcher: from hamcrest import assert_that as hc_assert_that hc_assert_that(self.result, pickler.loads(options.on_success_matcher)) return self.result
def test_message_matcher_attributes_fail(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True) self.pubsub_matcher.expected_msg = [PubsubMessage(b'a', {})] mock_sub = mock_get_sub.return_value # Unexpected attribute 'k'. mock_sub.pull.side_effect = [ create_pull_response([PullResponseMessage(b'a', {'k': 'v'})]) ] with self.assertRaisesRegexp(AssertionError, r'Unexpected'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_message_matcher_strip_fail(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True, strip_attributes=['id', 'timestamp']) self.pubsub_matcher.expected_msg = [PubsubMessage(b'a', {'k': 'v'})] mock_sub = mock_get_sub.return_value # Message is missing attribute 'timestamp'. mock_sub.pull.side_effect = [create_pull_response([ PullResponseMessage(b'a', {'id': 'foo', 'k': 'v'}) ])] with self.assertRaisesRegexp(AssertionError, r'Stripped attributes'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertEqual(mock_sub.acknowledge.call_count, 1)
def test_bigquery_matcher_query_responds_error_code(self, mock_bigquery): mock_query = Mock() mock_client = mock_bigquery.return_value mock_client.run_sync_query.return_value = mock_query mock_query.run.side_effect = NotFound('table is not found') matcher = bq_verifier.BigqueryMatcher('mock_project', 'mock_query', 'mock_checksum') with self.assertRaises(NotFound): hc_assert_that(self._mock_result, matcher) self.assertTrue(mock_query.run.called) self.assertEqual(bq_verifier.MAX_RETRIES + 1, mock_query.run.call_count)
def test_bigquery_matcher_query_run_error(self, mock_bigquery): mock_query = Mock() mock_client = mock_bigquery.return_value mock_client.run_sync_query.return_value = mock_query mock_query.run.side_effect = ValueError('job is already running') matcher = bq_verifier.BigqueryMatcher('mock_project', 'mock_query', 'mock_checksum') with self.assertRaises(ValueError): hc_assert_that(self._mock_result, matcher) self.assertTrue(mock_query.run.called) self.assertEqual(bq_verifier.MAX_RETRIES + 1, mock_query.run.call_count)
def test_message_matcher_strip_fail(self, mock_get_sub, unsued_mock): self.init_matcher(with_attributes=True, strip_attributes=['id', 'timestamp']) self.pubsub_matcher.expected_msg = [PubsubMessage('a', {'k': 'v'})] mock_sub = mock_get_sub.return_value # msg_a is missing attribute 'timestamp'. msg_a = pubsub.message.Message(b'a', 'unused_id') msg_a.attributes['id'] = 'foo' msg_a.attributes['k'] = 'v' mock_sub.pull.side_effect = [[(1, msg_a)]] with self.assertRaisesRegexp(AssertionError, r'Stripped attributes'): hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1)
def test_pipeline_state_matcher_fails(self): """Test PipelineStateMatcher fails when using default expected state and job actually finished in CANCELLED/DRAINED/FAILED/STOPPED/UNKNOWN """ failed_state = [PipelineState.CANCELLED, PipelineState.DRAINED, PipelineState.FAILED, PipelineState.STOPPED, PipelineState.UNKNOWN] for state in failed_state: pipeline_result = PipelineResult(state) with self.assertRaises(AssertionError): hc_assert_that(pipeline_result, verifiers.PipelineStateMatcher())
def test_matches_none_for_counter(self): metric_result = _create_metric_result(EVERYTHING_COUNTER) matcher = MetricResultMatcher( namespace=is_not(equal_to('invalidNamespace')), name=is_not(equal_to('invalidName')), step=is_not(equal_to('invalidStep')), labels={ is_not(equal_to('invalidPcollection')): anything(), is_not(equal_to('invalidCustomKey')): is_not(equal_to( 'invalidCustomValue')) }, attempted=is_not(equal_to(1000)), committed=is_not(equal_to(1000))) hc_assert_that(metric_result, matcher)
def test_message_matcher_mismatch(self, mock_get_sub, unused_mock): self.pubsub_matcher.expected_msg = ['a'] mock_sub = mock_get_sub.return_value mock_sub.pull.return_value = [ (1, pubsub.message.Message(b'c', 'unused_id')), (1, pubsub.message.Message(b'd', 'unused_id')), ] with self.assertRaises(AssertionError) as error: hc_assert_that(self.mock_presult, self.pubsub_matcher) self.assertEqual(mock_sub.pull.call_count, 1) self.assertItemsEqual(['c', 'd'], self.pubsub_matcher.messages) self.assertTrue( '\nExpected: Expected 1 messages.\n but: Got 2 messages.' in str(error.exception.args[0]))
def test_matches_all_for_counter(self): metric_result = _create_metric_result(EVERYTHING_COUNTER) matcher = MetricResultMatcher( namespace='myNamespace', name='myName', step='myStep', labels={ 'pcollection': 'myCollection', 'myCustomKey': 'myCustomValue' }, attempted=42, committed=42 ) hc_assert_that(metric_result, matcher)
def test_pipeline_state_matcher_given_state(self): """Test PipelineStateMatcher successes when matches given state""" pipeline_result = PipelineResult(PipelineState.FAILED) hc_assert_that(pipeline_result, verifiers.PipelineStateMatcher(PipelineState.FAILED))
def test_pipeline_state_matcher_success(self): """Test PipelineStateMatcher successes when using default expected state and job actually finished in DONE """ pipeline_result = PipelineResult(PipelineState.DONE) hc_assert_that(pipeline_result, verifiers.PipelineStateMatcher())
def test_distribution_does_not_match_counter_and_doesnt_crash(self): metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION) matcher = is_not(MetricResultMatcher(attempted=42, committed=42)) hc_assert_that(metric_result, matcher)
def test_matches_key_but_not_value(self): metric_result = _create_metric_result(EVERYTHING_COUNTER) matcher = is_not( MetricResultMatcher(labels={'pcollection': 'invalidCollection'})) hc_assert_that(metric_result, matcher)