def test_matches_distribution_with_custom_matchers(self): metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION) matcher = is_not( MetricResultMatcher( namespace=equal_to_ignoring_case('MYNAMESPACE'), name=equal_to_ignoring_case('MYNAME'), step=equal_to_ignoring_case('MYSTEP'), labels={ equal_to_ignoring_case('PCOLLECTION'): equal_to_ignoring_case('MYCUSTOMVALUE'), 'myCustomKey': equal_to_ignoring_case('MYCUSTOMVALUE') }, committed=is_not( DistributionMatcher(sum_value=greater_than(-1), count_value=greater_than(-1), min_value=greater_than(-1), max_value=greater_than(-1))), attempted=is_not( DistributionMatcher(sum_value=greater_than(-1), count_value=greater_than(-1), min_value=greater_than(-1), max_value=greater_than(-1))), )) hc_assert_that(metric_result, matcher)
def test_streaming_pipeline_returns_expected_user_metrics_fnapi_it(self): """ Runs streaming Dataflow job and verifies that user metrics are reported correctly. """ self._inject_words(self.input_topic, MESSAGES_TO_PUBLISH) result = self.run_pipeline() METRIC_NAMESPACE = \ ('apache_beam.runners.dataflow.' 'dataflow_exercise_streaming_metrics_pipeline.StreamingUserMetricsDoFn') matchers = [ # System metrics MetricResultMatcher( name='ElementCount', labels={ "output_user_name": "generate_metrics-out0", "original_name": "generate_metrics-out0-ElementCount" }, attempted=len(MESSAGES_TO_PUBLISH), committed=len(MESSAGES_TO_PUBLISH), ), MetricResultMatcher( name='ElementCount', labels={ "output_user_name": "ReadFromPubSub/Read-out0", "original_name": "ReadFromPubSub/Read-out0-ElementCount" }, attempted=len(MESSAGES_TO_PUBLISH), committed=len(MESSAGES_TO_PUBLISH), ), # User Counter Metrics. MetricResultMatcher(name='double_msg_counter_name', namespace=METRIC_NAMESPACE, step='generate_metrics', attempted=len(MESSAGES_TO_PUBLISH) * 2, committed=len(MESSAGES_TO_PUBLISH) * 2), MetricResultMatcher( name='msg_len_dist_metric_name', namespace=METRIC_NAMESPACE, step='generate_metrics', attempted=DistributionMatcher( sum_value=len(''.join(MESSAGES_TO_PUBLISH)), count_value=len(MESSAGES_TO_PUBLISH), min_value=len(MESSAGES_TO_PUBLISH[0]), max_value=len(MESSAGES_TO_PUBLISH[1])), committed=DistributionMatcher( sum_value=len(''.join(MESSAGES_TO_PUBLISH)), count_value=len(MESSAGES_TO_PUBLISH), min_value=len(MESSAGES_TO_PUBLISH[0]), max_value=len(MESSAGES_TO_PUBLISH[1]))), ] metrics = result.metrics().all_metrics() errors = metric_result_matchers.verify_all(metrics, matchers) self.assertFalse(errors, str(errors))
def test_counter_does_not_match_distribution_and_doesnt_crash(self): metric_result = _create_metric_result(EVERYTHING_COUNTER) matcher = is_not( MetricResultMatcher( committed=DistributionMatcher(sum_value=120, count_value=50, min_value=100, max_value=60), attempted=DistributionMatcher(sum_value=120, count_value=50, min_value=100, max_value=60), )) hc_assert_that(metric_result, matcher)
def legacy_metric_matchers(): """MetricResult matchers with adjusted step names for the legacy DF test.""" # TODO(ajamato): Move these to the common_metric_matchers once implemented # in the FN API. matchers = common_metric_matchers() matchers.extend([ # User distribution metric, legacy DF only. MetricResultMatcher( name='distribution_values', namespace=METRIC_NAMESPACE, step='metrics', attempted=DistributionMatcher( sum_value=sum(INPUT), count_value=len(INPUT), min_value=min(INPUT), max_value=max(INPUT) ), committed=DistributionMatcher( sum_value=sum(INPUT), count_value=len(INPUT), min_value=min(INPUT), max_value=max(INPUT) ), ), # Element count and MeanByteCount for a User ParDo. MetricResultMatcher( name='ElementCount', labels={ 'output_user_name': 'metrics-out0', 'original_name': 'metrics-out0-ElementCount' }, attempted=greater_than(0), committed=greater_than(0) ), MetricResultMatcher( name='MeanByteCount', labels={ 'output_user_name': 'metrics-out0', 'original_name': 'metrics-out0-MeanByteCount' }, attempted=greater_than(0), committed=greater_than(0) ), ]) return matchers
def test_user_counter_using_pardo(self): class SomeDoFn(beam.DoFn): """A custom dummy DoFn using yield.""" static_counter_elements = metrics.Metrics.counter( "SomeDoFn", 'metrics_static_counter_element') def __init__(self): self.user_counter_elements = metrics.Metrics.counter( self.__class__, 'metrics_user_counter_element') def process(self, element): self.static_counter_elements.inc(2) self.user_counter_elements.inc() distro = Metrics.distribution(self.__class__, 'element_dist') distro.update(element) yield element pipeline = TestPipeline() nums = pipeline | 'Input' >> beam.Create([1, 2, 3, 4]) results = nums | 'ApplyPardo' >> beam.ParDo(SomeDoFn()) assert_that(results, equal_to([1, 2, 3, 4])) res = pipeline.run() res.wait_until_finish() # Verify static counter. metric_results = (res.metrics().query(MetricsFilter().with_metric( SomeDoFn.static_counter_elements))) outputs_static_counter = metric_results['counters'][0] self.assertEqual(outputs_static_counter.key.metric.name, 'metrics_static_counter_element') self.assertEqual(outputs_static_counter.committed, 8) # Verify user counter. metric_results = (res.metrics().query( MetricsFilter().with_name('metrics_user_counter_element'))) outputs_user_counter = metric_results['counters'][0] self.assertEqual(outputs_user_counter.key.metric.name, 'metrics_user_counter_element') self.assertEqual(outputs_user_counter.committed, 4) # Verify user distribution counter. metric_results = res.metrics().query() matcher = MetricResultMatcher( step='ApplyPardo', namespace=hc.contains_string('SomeDoFn'), name='element_dist', committed=DistributionMatcher( sum_value=hc.greater_than_or_equal_to(0), count_value=hc.greater_than_or_equal_to(0), min_value=hc.greater_than_or_equal_to(0), max_value=hc.greater_than_or_equal_to(0))) hc.assert_that(metric_results['distributions'], hc.contains_inanyorder(matcher))
def test_matches_all_for_distribution(self): metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION) matcher = MetricResultMatcher( namespace='myNamespace', name='myName', step='myStep', labels={ 'pcollection': 'myCollection', 'myCustomKey': 'myCustomValue' }, committed=DistributionMatcher(sum_value=12, count_value=5, min_value=0, max_value=6), attempted=DistributionMatcher(sum_value=12, count_value=5, min_value=0, max_value=6), ) hc_assert_that(metric_result, matcher)
def test_matches_none_for_distribution(self): metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION) matcher = MetricResultMatcher( namespace=is_not(equal_to('invalidNamespace')), name=is_not(equal_to('invalidName')), step=is_not(equal_to('invalidStep')), labels={ is_not(equal_to('invalidPcollection')): anything(), is_not(equal_to('invalidCustomKey')): is_not(equal_to('invalidCustomValue')) }, committed=is_not( DistributionMatcher(sum_value=120, count_value=50, min_value=100, max_value=60)), attempted=is_not( DistributionMatcher(sum_value=120, count_value=50, min_value=100, max_value=60)), ) hc_assert_that(metric_result, matcher)
def metric_matchers(): """MetricResult matchers common to all tests.""" # TODO(ajamato): Matcher for the 'metrics' step's ElementCount. # TODO(ajamato): Matcher for the 'metrics' step's MeanByteCount. # TODO(ajamato): Matcher for the start and finish exec times. # TODO(ajamato): Matcher for a gauge metric once implemented in dataflow. matchers = [ # User Counter Metrics. MetricResultMatcher(name='total_values', namespace=METRIC_NAMESPACE, step='metrics', attempted=sum(INPUT), committed=sum(INPUT)), MetricResultMatcher(name='ExecutionTime_StartBundle', step='metrics', attempted=greater_than(0), committed=greater_than(0)), MetricResultMatcher(name='ExecutionTime_ProcessElement', step='metrics', attempted=greater_than(0), committed=greater_than(0)), MetricResultMatcher(name='ExecutionTime_FinishBundle', step='metrics', attempted=greater_than(0), committed=greater_than(0)), MetricResultMatcher( name='distribution_values', namespace=METRIC_NAMESPACE, step='metrics', attempted=DistributionMatcher(sum_value=sum(INPUT), count_value=len(INPUT), min_value=min(INPUT), max_value=max(INPUT)), committed=DistributionMatcher(sum_value=sum(INPUT), count_value=len(INPUT), min_value=min(INPUT), max_value=max(INPUT)), ), # Element count and MeanByteCount for a User ParDo. MetricResultMatcher(name='ElementCount', labels={ 'output_user_name': 'metrics-out0', 'original_name': 'metrics-out0-ElementCount' }, attempted=greater_than(0), committed=greater_than(0)), MetricResultMatcher(name='MeanByteCount', labels={ 'output_user_name': 'metrics-out0', 'original_name': 'metrics-out0-MeanByteCount' }, attempted=greater_than(0), committed=greater_than(0)) ] pcoll_names = [ 'GroupByKey/Reify-out0', 'GroupByKey/Read-out0', 'map_to_common_key-out0', 'GroupByKey/GroupByWindow-out0', 'GroupByKey/Read-out0', 'GroupByKey/Reify-out0' ] for name in pcoll_names: matchers.extend([ MetricResultMatcher(name='ElementCount', labels={ 'output_user_name': name, 'original_name': '%s-ElementCount' % name }, attempted=greater_than(0), committed=greater_than(0)), MetricResultMatcher(name='MeanByteCount', labels={ 'output_user_name': name, 'original_name': '%s-MeanByteCount' % name }, attempted=greater_than(0), committed=greater_than(0)), ]) return matchers