示例#1
0
    def test_query_structured_metrics(self):
        mock_client, mock_job_result = self.setup_mock_client_result(
            self.STRUCTURED_COUNTER_LIST)
        dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)
        dm._translate_step_name = types.MethodType(lambda self, x: 'split', dm)
        query_result = dm.query()
        expected_counters = [
            MetricResult(
                MetricKey(
                    'split',
                    MetricName('__main__.WordExtractingDoFn', 'word_lengths'),
                ), 109475, 109475),
        ]
        self.assertEqual(query_result['counters'], expected_counters)

        expected_distributions = [
            MetricResult(
                MetricKey(
                    'split',
                    MetricName('__main__.WordExtractingDoFn',
                               'word_length_dist'),
                ), DistributionResult(DistributionData(18, 2, 2, 16)),
                DistributionResult(DistributionData(18, 2, 2, 16))),
        ]
        self.assertEqual(query_result['distributions'], expected_distributions)
示例#2
0
  def test_direct_runner_metrics(self):

    class MyDoFn(beam.DoFn):
      def start_bundle(self):
        count = Metrics.counter(self.__class__, 'bundles')
        count.inc()

      def finish_bundle(self):
        count = Metrics.counter(self.__class__, 'finished_bundles')
        count.inc()

      def process(self, element):
        gauge = Metrics.gauge(self.__class__, 'latest_element')
        gauge.set(element)
        count = Metrics.counter(self.__class__, 'elements')
        count.inc()
        distro = Metrics.distribution(self.__class__, 'element_dist')
        distro.update(element)
        return [element]

    p = Pipeline(DirectRunner())
    pcoll = (p | beam.Create([1, 2, 3, 4, 5])
             | 'Do' >> beam.ParDo(MyDoFn()))
    assert_that(pcoll, equal_to([1, 2, 3, 4, 5]))
    result = p.run()
    result.wait_until_finish()
    metrics = result.metrics().query()
    namespace = '{}.{}'.format(MyDoFn.__module__,
                               MyDoFn.__name__)

    hc.assert_that(
        metrics['counters'],
        hc.contains_inanyorder(
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'elements')),
                5, 5),
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'bundles')),
                1, 1),
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'finished_bundles')),
                1, 1)))

    hc.assert_that(
        metrics['distributions'],
        hc.contains_inanyorder(
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'element_dist')),
                DistributionResult(DistributionData(15, 5, 1, 5)),
                DistributionResult(DistributionData(15, 5, 1, 5)))))

    gauge_result = metrics['gauges'][0]
    hc.assert_that(
        gauge_result.key,
        hc.equal_to(MetricKey('Do', MetricName(namespace, 'latest_element'))))
    hc.assert_that(gauge_result.committed.value, hc.equal_to(5))
    hc.assert_that(gauge_result.attempted.value, hc.equal_to(5))
示例#3
0
  def test_direct_runner_metrics(self):
    from apache_beam.metrics.metric import Metrics

    class MyDoFn(beam.DoFn):
      def start_bundle(self):
        count = Metrics.counter(self.__class__, 'bundles')
        count.inc()

      def finish_bundle(self):
        count = Metrics.counter(self.__class__, 'finished_bundles')
        count.inc()

      def process(self, element):
        count = Metrics.counter(self.__class__, 'elements')
        count.inc()
        distro = Metrics.distribution(self.__class__, 'element_dist')
        distro.update(element)
        return [element]

    runner = DirectRunner()
    p = Pipeline(runner,
                 options=PipelineOptions(self.default_properties))
    # pylint: disable=expression-not-assigned
    (p | ptransform.Create([1, 2, 3, 4, 5])
     | 'Do' >> beam.ParDo(MyDoFn()))
    result = p.run()
    result.wait_until_finish()
    metrics = result.metrics().query()
    namespace = '{}.{}'.format(MyDoFn.__module__,
                               MyDoFn.__name__)

    hc.assert_that(
        metrics['counters'],
        hc.contains_inanyorder(
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'elements')),
                5, 5),
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'bundles')),
                1, 1),
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'finished_bundles')),
                1, 1)))
    hc.assert_that(
        metrics['distributions'],
        hc.contains_inanyorder(
            MetricResult(
                MetricKey('Do', MetricName(namespace, 'element_dist')),
                DistributionResult(DistributionData(15, 5, 1, 5)),
                DistributionResult(DistributionData(15, 5, 1, 5)))))
示例#4
0
    def _get_metric_value(self, metric):
        """Get a metric result object from a MetricUpdate from Dataflow API."""
        if metric is None:
            return None

        if metric.scalar is not None:
            return metric.scalar.integer_value
        elif metric.distribution is not None:
            dist_count = _get_match(
                metric.distribution.object_value.properties,
                lambda x: x.key == 'count').value.integer_value
            dist_min = _get_match(metric.distribution.object_value.properties,
                                  lambda x: x.key == 'min').value.integer_value
            dist_max = _get_match(metric.distribution.object_value.properties,
                                  lambda x: x.key == 'max').value.integer_value
            dist_sum = _get_match(metric.distribution.object_value.properties,
                                  lambda x: x.key == 'sum').value.integer_value
            if not dist_sum:
                # distribution metric is not meant to use on large values, but in case
                # it is, the value can overflow and become double_value, the correctness
                # of the value may not be guaranteed.
                _LOGGER.info(
                    "Distribution metric sum value seems to have "
                    "overflowed integer_value range, the correctness of sum or mean "
                    "value may not be guaranteed: %s" % metric.distribution)
                dist_sum = int(
                    _get_match(metric.distribution.object_value.properties,
                               lambda x: x.key == 'sum').value.double_value)
            return DistributionResult(
                DistributionData(dist_sum, dist_count, dist_min, dist_max))
        else:
            return None
    def _get_metric_value(self, metric):
        """Get a metric result object from a MetricUpdate from Dataflow API."""
        if metric is None:
            return None

        if metric.scalar is not None:
            return metric.scalar.integer_value
        elif metric.distribution is not None:
            dist_count = _get_match(
                metric.distribution.object_value.properties,
                lambda x: x.key == 'count').value.integer_value
            dist_min = _get_match(metric.distribution.object_value.properties,
                                  lambda x: x.key == 'min').value.integer_value
            dist_max = _get_match(metric.distribution.object_value.properties,
                                  lambda x: x.key == 'max').value.integer_value
            dist_mean = _get_match(
                metric.distribution.object_value.properties,
                lambda x: x.key == 'mean').value.integer_value
            # Calculating dist_sum with a hack, as distribution sum is not yet
            # available in the Dataflow API.
            # TODO(pabloem) Switch to "sum" field once it's available in the API
            dist_sum = dist_count * dist_mean
            return DistributionResult(
                DistributionData(dist_sum, dist_count, dist_min, dist_max))
        else:
            return None
示例#6
0
  def test_commit_logical_no_filter(self):
    metrics = DirectMetrics()
    metrics.commit_logical(
        self.bundle1,
        MetricUpdates(
            counters={
                MetricKey('step1', self.name1): 5,
                MetricKey('step1', self.name2): 8
            },
            distributions={
                MetricKey('step1', self.name1): DistributionData(8, 2, 3, 5)
            }))

    metrics.commit_logical(
        self.bundle1,
        MetricUpdates(
            counters={
                MetricKey('step2', self.name1): 7,
                MetricKey('step1', self.name2): 4
            },
            distributions={
                MetricKey('step1', self.name1): DistributionData(4, 1, 4, 4)
            }))

    results = metrics.query()
    hc.assert_that(
        results['counters'],
        hc.contains_inanyorder(
            *[
                MetricResult(MetricKey('step1', self.name2), 12, 0),
                MetricResult(MetricKey('step2', self.name1), 7, 0),
                MetricResult(MetricKey('step1', self.name1), 5, 0)
            ]))
    hc.assert_that(
        results['distributions'],
        hc.contains_inanyorder(
            MetricResult(
                MetricKey('step1', self.name1),
                DistributionResult(DistributionData(12, 3, 3, 5)),
                DistributionResult(DistributionData(0, 0, None, None)))))
示例#7
0
    def test_combiner_functions(self):
        metrics = DirectMetrics()
        counter = metrics._counters['anykey']
        counter.commit_logical(self.bundle1, 5)
        self.assertEqual(counter.extract_committed(), 5)
        with self.assertRaises(TypeError):
            counter.commit_logical(self.bundle1, None)

        distribution = metrics._distributions['anykey']
        distribution.commit_logical(self.bundle1, DistributionData(4, 1, 4, 4))
        self.assertEqual(distribution.extract_committed(),
                         DistributionResult(DistributionData(4, 1, 4, 4)))

        with self.assertRaises(AttributeError):
            distribution.commit_logical(self.bundle1, None)
def extract_metric_result_map_value(monitoring_info_proto):
  """Returns the relevant GaugeResult, DistributionResult or int value.

  These are the proper format for use in the MetricResult.query() result.
  """
  # Returns a metric result (AKA the legacy format).
  # from the MonitoringInfo
  if is_counter(monitoring_info_proto):
    return extract_counter_value(monitoring_info_proto)
  if is_distribution(monitoring_info_proto):
    (count, sum, min, max) = extract_distribution(monitoring_info_proto)
    return DistributionResult(DistributionData(sum, count, min, max))
  if is_gauge(monitoring_info_proto):
    (timestamp, value) = extract_gauge_value(monitoring_info_proto)
    return GaugeResult(GaugeData(value, timestamp))
示例#9
0
def extract_metric_result_map_value(monitoring_info_proto):
  """Returns the relevant GaugeResult, DistributionResult or int value.

  These are the proper format for use in the MetricResult.query() result.
  """
  # Returns a metric result (AKA the legacy format).
  # from the MonitoringInfo
  if is_counter(monitoring_info_proto):
    return extract_counter_value(monitoring_info_proto)
  if is_distribution(monitoring_info_proto):
    distribution_data = extract_distribution(monitoring_info_proto)
    return DistributionResult(
        DistributionData(distribution_data.sum, distribution_data.count,
                         distribution_data.min, distribution_data.max))
  if is_gauge(monitoring_info_proto):
    timestamp_secs = to_timestamp_secs(monitoring_info_proto.timestamp)
    return GaugeResult(GaugeData(
        extract_counter_value(monitoring_info_proto), timestamp_secs))
示例#10
0
    def _get_metric_value(self, metric):
        """Get a metric result object from a MetricUpdate from Dataflow API."""
        if metric is None:
            return None

        if metric.scalar is not None:
            return metric.scalar.integer_value
        elif metric.distribution is not None:
            dist_count = _get_match(
                metric.distribution.object_value.properties,
                lambda x: x.key == 'count').value.integer_value
            dist_min = _get_match(metric.distribution.object_value.properties,
                                  lambda x: x.key == 'min').value.integer_value
            dist_max = _get_match(metric.distribution.object_value.properties,
                                  lambda x: x.key == 'max').value.integer_value
            dist_sum = _get_match(metric.distribution.object_value.properties,
                                  lambda x: x.key == 'sum').value.integer_value
            return DistributionResult(
                DistributionData(dist_sum, dist_count, dist_min, dist_max))
        else:
            return None
def _create_metric_result(data_dict):
    step = data_dict['step'] if 'step' in data_dict else ''
    labels = data_dict['labels'] if 'labels' in data_dict else dict()
    values = {}
    for key in ['attempted', 'committed']:
        if key in data_dict:
            if 'counter' in data_dict[key]:
                values[key] = data_dict[key]['counter']
            elif 'distribution' in data_dict[key]:
                distribution = data_dict[key]['distribution']
                values[key] = DistributionResult(
                    DistributionData(
                        distribution['sum'],
                        distribution['count'],
                        distribution['min'],
                        distribution['max'],
                    ))
    attempted = values['attempted'] if 'attempted' in values else None
    committed = values['committed'] if 'committed' in values else None

    metric_name = MetricName(data_dict['namespace'], data_dict['name'])
    metric_key = MetricKey(step, metric_name, labels)
    return MetricResult(metric_key, committed, attempted)
示例#12
0
    def test_apply_physical_logical(self):
        metrics = DirectMetrics()
        dist_zero = DistributionData(0, 0, None, None)
        metrics.update_physical(
            object(),
            MetricUpdates(counters={
                MetricKey('step1', self.name1): 7,
                MetricKey('step1', self.name2): 5,
                MetricKey('step2', self.name1): 1
            },
                          distributions={
                              MetricKey('step1', self.name1):
                              DistributionData(3, 1, 3, 3),
                              MetricKey('step2', self.name3):
                              DistributionData(8, 2, 4, 4)
                          }))
        results = metrics.query()
        hc.assert_that(
            results['counters'],
            hc.contains_inanyorder(*[
                MetricResult(MetricKey('step1', self.name1), 0, 7),
                MetricResult(MetricKey('step1', self.name2), 0, 5),
                MetricResult(MetricKey('step2', self.name1), 0, 1)
            ]))
        hc.assert_that(
            results['distributions'],
            hc.contains_inanyorder(*[
                MetricResult(MetricKey('step1', self.name1),
                             DistributionResult(dist_zero),
                             DistributionResult(DistributionData(3, 1, 3, 3))),
                MetricResult(MetricKey('step2', self.name3),
                             DistributionResult(dist_zero),
                             DistributionResult(DistributionData(8, 2, 4, 4)))
            ]))

        metrics.commit_physical(
            object(),
            MetricUpdates(counters={
                MetricKey('step1', self.name1): -3,
                MetricKey('step2', self.name1): -5
            },
                          distributions={
                              MetricKey('step1', self.name1):
                              DistributionData(8, 4, 1, 5),
                              MetricKey('step2', self.name2):
                              DistributionData(8, 8, 1, 1)
                          }))
        results = metrics.query()
        hc.assert_that(
            results['counters'],
            hc.contains_inanyorder(*[
                MetricResult(MetricKey('step1', self.name1), 0, 4),
                MetricResult(MetricKey('step1', self.name2), 0, 5),
                MetricResult(MetricKey('step2', self.name1), 0, -4)
            ]))
        hc.assert_that(
            results['distributions'],
            hc.contains_inanyorder(*[
                MetricResult(MetricKey('step1', self.name1),
                             DistributionResult(dist_zero),
                             DistributionResult(DistributionData(11, 5, 1,
                                                                 5))),
                MetricResult(MetricKey('step2', self.name3),
                             DistributionResult(dist_zero),
                             DistributionResult(DistributionData(8, 2, 4, 4))),
                MetricResult(MetricKey('step2', self.name2),
                             DistributionResult(dist_zero),
                             DistributionResult(DistributionData(8, 8, 1, 1)))
            ]))

        metrics.commit_logical(
            object(),
            MetricUpdates(counters={
                MetricKey('step1', self.name1): 3,
                MetricKey('step1', self.name2): 5,
                MetricKey('step2', self.name1): -3
            },
                          distributions={
                              MetricKey('step1', self.name1):
                              DistributionData(11, 5, 1, 5),
                              MetricKey('step2', self.name2):
                              DistributionData(8, 8, 1, 1),
                              MetricKey('step2', self.name3):
                              DistributionData(4, 1, 4, 4)
                          }))

        results = metrics.query()
        hc.assert_that(
            results['counters'],
            hc.contains_inanyorder(*[
                MetricResult(MetricKey('step1', self.name1), 3, 4),
                MetricResult(MetricKey('step1', self.name2), 5, 5),
                MetricResult(MetricKey('step2', self.name1), -3, -4)
            ]))
        hc.assert_that(
            results['distributions'],
            hc.contains_inanyorder(*[
                MetricResult(MetricKey('step1', self.name1),
                             DistributionResult(DistributionData(11, 5, 1, 5)),
                             DistributionResult(DistributionData(11, 5, 1,
                                                                 5))),
                MetricResult(MetricKey('step2', self.name3),
                             DistributionResult(DistributionData(4, 1, 4, 4)),
                             DistributionResult(DistributionData(8, 2, 4, 4))),
                MetricResult(MetricKey('step2', self.name2),
                             DistributionResult(DistributionData(8, 8, 1, 1)),
                             DistributionResult(DistributionData(8, 8, 1, 1)))
            ]))