示例#1
0
    def test_system_counters_set_labels_and_step_name(self):
        mock_client, mock_job_result = self.setup_mock_client_result(
            self.SYSTEM_COUNTERS_LIST)
        test_object = dataflow_metrics.DataflowMetrics(mock_client,
                                                       mock_job_result)
        all_metrics = test_object.all_metrics()

        matchers = [
            MetricResultMatcher(name='ElementCount',
                                labels={
                                    'original_name':
                                    'ToIsmRecordForMultimap-out0-ElementCount',
                                    'output_user_name':
                                    'ToIsmRecordForMultimap-out0'
                                },
                                attempted=42,
                                committed=42),
            MetricResultMatcher(name='MeanByteCount',
                                labels={
                                    'original_name': 'Read-out0-MeanByteCount',
                                    'output_user_name': 'GroupByKey/Read-out0'
                                },
                                attempted=31,
                                committed=31),
            MetricResultMatcher(name='ExecutionTime_ProcessElement',
                                step='write/Write/Write',
                                attempted=1000,
                                committed=1000)
        ]
        errors = metric_result_matchers.verify_all(all_metrics, matchers)
        self.assertFalse(errors, errors)
    def test_streaming_pipeline_returns_expected_user_metrics_fnapi_it(self):
        """
    Runs streaming Dataflow job and verifies that user metrics are reported
    correctly.
    """
        self._inject_words(self.input_topic, MESSAGES_TO_PUBLISH)
        result = self.run_pipeline()

        METRIC_NAMESPACE = \
          ('apache_beam.runners.dataflow.'
           'dataflow_exercise_streaming_metrics_pipeline.StreamingUserMetricsDoFn')
        matchers = [
            # System metrics
            MetricResultMatcher(
                name='ElementCount',
                labels={
                    "output_user_name": "generate_metrics-out0",
                    "original_name": "generate_metrics-out0-ElementCount"
                },
                attempted=len(MESSAGES_TO_PUBLISH),
                committed=len(MESSAGES_TO_PUBLISH),
            ),
            MetricResultMatcher(
                name='ElementCount',
                labels={
                    "output_user_name": "ReadFromPubSub/Read-out0",
                    "original_name": "ReadFromPubSub/Read-out0-ElementCount"
                },
                attempted=len(MESSAGES_TO_PUBLISH),
                committed=len(MESSAGES_TO_PUBLISH),
            ),
            # User Counter Metrics.
            MetricResultMatcher(name='double_msg_counter_name',
                                namespace=METRIC_NAMESPACE,
                                step='generate_metrics',
                                attempted=len(MESSAGES_TO_PUBLISH) * 2,
                                committed=len(MESSAGES_TO_PUBLISH) * 2),
            MetricResultMatcher(
                name='msg_len_dist_metric_name',
                namespace=METRIC_NAMESPACE,
                step='generate_metrics',
                attempted=DistributionMatcher(
                    sum_value=len(''.join(MESSAGES_TO_PUBLISH)),
                    count_value=len(MESSAGES_TO_PUBLISH),
                    min_value=len(MESSAGES_TO_PUBLISH[0]),
                    max_value=len(MESSAGES_TO_PUBLISH[1])),
                committed=DistributionMatcher(
                    sum_value=len(''.join(MESSAGES_TO_PUBLISH)),
                    count_value=len(MESSAGES_TO_PUBLISH),
                    min_value=len(MESSAGES_TO_PUBLISH[0]),
                    max_value=len(MESSAGES_TO_PUBLISH[1]))),
        ]

        metrics = result.metrics().all_metrics()
        errors = metric_result_matchers.verify_all(metrics, matchers)
        self.assertFalse(errors, str(errors))
 def test_matches_distribution_with_custom_matchers(self):
     metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION)
     matcher = is_not(
         MetricResultMatcher(
             namespace=equal_to_ignoring_case('MYNAMESPACE'),
             name=equal_to_ignoring_case('MYNAME'),
             step=equal_to_ignoring_case('MYSTEP'),
             labels={
                 equal_to_ignoring_case('PCOLLECTION'):
                 equal_to_ignoring_case('MYCUSTOMVALUE'),
                 'myCustomKey':
                 equal_to_ignoring_case('MYCUSTOMVALUE')
             },
             committed=is_not(
                 DistributionMatcher(sum_value=greater_than(-1),
                                     count_value=greater_than(-1),
                                     min_value=greater_than(-1),
                                     max_value=greater_than(-1))),
             attempted=is_not(
                 DistributionMatcher(sum_value=greater_than(-1),
                                     count_value=greater_than(-1),
                                     min_value=greater_than(-1),
                                     max_value=greater_than(-1))),
         ))
     hc_assert_that(metric_result, matcher)
def legacy_metric_matchers():
  """MetricResult matchers with adjusted step names for the legacy DF test."""
  # TODO(ajamato): Move these to the common_metric_matchers once implemented
  # in the FN API.
  matchers = common_metric_matchers()
  matchers.extend([
      # User distribution metric, legacy DF only.
      MetricResultMatcher(
          name='distribution_values',
          namespace=METRIC_NAMESPACE,
          step='metrics',
          attempted=DistributionMatcher(
              sum_value=sum(INPUT),
              count_value=len(INPUT),
              min_value=min(INPUT),
              max_value=max(INPUT)
          ),
          committed=DistributionMatcher(
              sum_value=sum(INPUT),
              count_value=len(INPUT),
              min_value=min(INPUT),
              max_value=max(INPUT)
          ),
      ),
      # Element count and MeanByteCount for a User ParDo.
      MetricResultMatcher(
          name='ElementCount',
          labels={
              'output_user_name': 'metrics-out0',
              'original_name': 'metrics-out0-ElementCount'
          },
          attempted=greater_than(0),
          committed=greater_than(0)
      ),
      MetricResultMatcher(
          name='MeanByteCount',
          labels={
              'output_user_name': 'metrics-out0',
              'original_name': 'metrics-out0-MeanByteCount'
          },
          attempted=greater_than(0),
          committed=greater_than(0)
      ),
  ])
  return matchers
示例#5
0
    def test_user_counter_using_pardo(self):
        class SomeDoFn(beam.DoFn):
            """A custom dummy DoFn using yield."""
            static_counter_elements = metrics.Metrics.counter(
                "SomeDoFn", 'metrics_static_counter_element')

            def __init__(self):
                self.user_counter_elements = metrics.Metrics.counter(
                    self.__class__, 'metrics_user_counter_element')

            def process(self, element):
                self.static_counter_elements.inc(2)
                self.user_counter_elements.inc()
                distro = Metrics.distribution(self.__class__, 'element_dist')
                distro.update(element)
                yield element

        pipeline = TestPipeline()
        nums = pipeline | 'Input' >> beam.Create([1, 2, 3, 4])
        results = nums | 'ApplyPardo' >> beam.ParDo(SomeDoFn())
        assert_that(results, equal_to([1, 2, 3, 4]))

        res = pipeline.run()
        res.wait_until_finish()

        # Verify static counter.
        metric_results = (res.metrics().query(MetricsFilter().with_metric(
            SomeDoFn.static_counter_elements)))
        outputs_static_counter = metric_results['counters'][0]

        self.assertEqual(outputs_static_counter.key.metric.name,
                         'metrics_static_counter_element')
        self.assertEqual(outputs_static_counter.committed, 8)

        # Verify user counter.
        metric_results = (res.metrics().query(
            MetricsFilter().with_name('metrics_user_counter_element')))
        outputs_user_counter = metric_results['counters'][0]

        self.assertEqual(outputs_user_counter.key.metric.name,
                         'metrics_user_counter_element')
        self.assertEqual(outputs_user_counter.committed, 4)

        # Verify user distribution counter.
        metric_results = res.metrics().query()
        matcher = MetricResultMatcher(
            step='ApplyPardo',
            namespace=hc.contains_string('SomeDoFn'),
            name='element_dist',
            committed=DistributionMatcher(
                sum_value=hc.greater_than_or_equal_to(0),
                count_value=hc.greater_than_or_equal_to(0),
                min_value=hc.greater_than_or_equal_to(0),
                max_value=hc.greater_than_or_equal_to(0)))
        hc.assert_that(metric_results['distributions'],
                       hc.contains_inanyorder(matcher))
 def test_matches_all_for_counter(self):
     metric_result = _create_metric_result(EVERYTHING_COUNTER)
     matcher = MetricResultMatcher(namespace='myNamespace',
                                   name='myName',
                                   step='myStep',
                                   labels={
                                       'pcollection': 'myCollection',
                                       'myCustomKey': 'myCustomValue'
                                   },
                                   attempted=42,
                                   committed=42)
     hc_assert_that(metric_result, matcher)
 def test_counter_does_not_match_distribution_and_doesnt_crash(self):
     metric_result = _create_metric_result(EVERYTHING_COUNTER)
     matcher = is_not(
         MetricResultMatcher(
             committed=DistributionMatcher(sum_value=120,
                                           count_value=50,
                                           min_value=100,
                                           max_value=60),
             attempted=DistributionMatcher(sum_value=120,
                                           count_value=50,
                                           min_value=100,
                                           max_value=60),
         ))
     hc_assert_that(metric_result, matcher)
 def test_matches_none_for_counter(self):
     metric_result = _create_metric_result(EVERYTHING_COUNTER)
     matcher = MetricResultMatcher(
         namespace=is_not(equal_to('invalidNamespace')),
         name=is_not(equal_to('invalidName')),
         step=is_not(equal_to('invalidStep')),
         labels={
             is_not(equal_to('invalidPcollection')):
             anything(),
             is_not(equal_to('invalidCustomKey')):
             is_not(equal_to('invalidCustomValue'))
         },
         attempted=is_not(equal_to(1000)),
         committed=is_not(equal_to(1000)))
     hc_assert_that(metric_result, matcher)
 def test_matches_counter_with_custom_matchers(self):
     metric_result = _create_metric_result(EVERYTHING_COUNTER)
     matcher = is_not(
         MetricResultMatcher(
             namespace=equal_to_ignoring_case('MYNAMESPACE'),
             name=equal_to_ignoring_case('MYNAME'),
             step=equal_to_ignoring_case('MYSTEP'),
             labels={
                 equal_to_ignoring_case('PCOLLECTION'):
                 equal_to_ignoring_case('MYCUSTOMVALUE'),
                 'myCustomKey':
                 equal_to_ignoring_case('MYCUSTOMVALUE')
             },
             committed=greater_than(0),
             attempted=greater_than(0)))
     hc_assert_that(metric_result, matcher)
 def test_matches_all_for_distribution(self):
     metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION)
     matcher = MetricResultMatcher(
         namespace='myNamespace',
         name='myName',
         step='myStep',
         labels={
             'pcollection': 'myCollection',
             'myCustomKey': 'myCustomValue'
         },
         committed=DistributionMatcher(sum_value=12,
                                       count_value=5,
                                       min_value=0,
                                       max_value=6),
         attempted=DistributionMatcher(sum_value=12,
                                       count_value=5,
                                       min_value=0,
                                       max_value=6),
     )
     hc_assert_that(metric_result, matcher)
 def test_matches_none_for_distribution(self):
     metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION)
     matcher = MetricResultMatcher(
         namespace=is_not(equal_to('invalidNamespace')),
         name=is_not(equal_to('invalidName')),
         step=is_not(equal_to('invalidStep')),
         labels={
             is_not(equal_to('invalidPcollection')):
             anything(),
             is_not(equal_to('invalidCustomKey')):
             is_not(equal_to('invalidCustomValue'))
         },
         committed=is_not(
             DistributionMatcher(sum_value=120,
                                 count_value=50,
                                 min_value=100,
                                 max_value=60)),
         attempted=is_not(
             DistributionMatcher(sum_value=120,
                                 count_value=50,
                                 min_value=100,
                                 max_value=60)),
     )
     hc_assert_that(metric_result, matcher)
def metric_matchers():
    """MetricResult matchers common to all tests."""
    # TODO(ajamato): Matcher for the 'metrics' step's ElementCount.
    # TODO(ajamato): Matcher for the 'metrics' step's MeanByteCount.
    # TODO(ajamato): Matcher for the start and finish exec times.
    # TODO(ajamato): Matcher for a gauge metric once implemented in dataflow.
    matchers = [
        # User Counter Metrics.
        MetricResultMatcher(name='total_values',
                            namespace=METRIC_NAMESPACE,
                            step='metrics',
                            attempted=sum(INPUT),
                            committed=sum(INPUT)),
        MetricResultMatcher(name='ExecutionTime_StartBundle',
                            step='metrics',
                            attempted=greater_than(0),
                            committed=greater_than(0)),
        MetricResultMatcher(name='ExecutionTime_ProcessElement',
                            step='metrics',
                            attempted=greater_than(0),
                            committed=greater_than(0)),
        MetricResultMatcher(name='ExecutionTime_FinishBundle',
                            step='metrics',
                            attempted=greater_than(0),
                            committed=greater_than(0)),
        MetricResultMatcher(
            name='distribution_values',
            namespace=METRIC_NAMESPACE,
            step='metrics',
            attempted=DistributionMatcher(sum_value=sum(INPUT),
                                          count_value=len(INPUT),
                                          min_value=min(INPUT),
                                          max_value=max(INPUT)),
            committed=DistributionMatcher(sum_value=sum(INPUT),
                                          count_value=len(INPUT),
                                          min_value=min(INPUT),
                                          max_value=max(INPUT)),
        ),
        # Element count and MeanByteCount for a User ParDo.
        MetricResultMatcher(name='ElementCount',
                            labels={
                                'output_user_name': 'metrics-out0',
                                'original_name': 'metrics-out0-ElementCount'
                            },
                            attempted=greater_than(0),
                            committed=greater_than(0)),
        MetricResultMatcher(name='MeanByteCount',
                            labels={
                                'output_user_name': 'metrics-out0',
                                'original_name': 'metrics-out0-MeanByteCount'
                            },
                            attempted=greater_than(0),
                            committed=greater_than(0))
    ]

    pcoll_names = [
        'GroupByKey/Reify-out0', 'GroupByKey/Read-out0',
        'map_to_common_key-out0', 'GroupByKey/GroupByWindow-out0',
        'GroupByKey/Read-out0', 'GroupByKey/Reify-out0'
    ]
    for name in pcoll_names:
        matchers.extend([
            MetricResultMatcher(name='ElementCount',
                                labels={
                                    'output_user_name': name,
                                    'original_name': '%s-ElementCount' % name
                                },
                                attempted=greater_than(0),
                                committed=greater_than(0)),
            MetricResultMatcher(name='MeanByteCount',
                                labels={
                                    'output_user_name': name,
                                    'original_name': '%s-MeanByteCount' % name
                                },
                                attempted=greater_than(0),
                                committed=greater_than(0)),
        ])
    return matchers
 def test_distribution_does_not_match_counter_and_doesnt_crash(self):
     metric_result = _create_metric_result(EVERYTHING_DISTRIBUTION)
     matcher = is_not(MetricResultMatcher(attempted=42, committed=42))
     hc_assert_that(metric_result, matcher)
 def test_matches_key_but_not_value(self):
     metric_result = _create_metric_result(EVERYTHING_COUNTER)
     matcher = is_not(
         MetricResultMatcher(labels={'pcollection': 'invalidCollection'}))
     hc_assert_that(metric_result, matcher)