Пример #1
0
    def test_compute_bounds_stddevs_from_mean(self,
                                              comparison,
                                              std_devs,
                                              expected_bounds,
                                              inclusive=False):
        # mean = 3, stddev = ~1.414
        value_history = range(1, 6)

        class _FakeMetricStore:
            def get_metric_history(*args, **kwargs):
                return [
                    bigquery_client.MetricHistoryRow('', '',
                                                     datetime.datetime.now(),
                                                     '', v)
                    for v in value_history
                ]

        assertion = metrics_pb2.Assertion(
            std_devs_from_mean=metrics_pb2.Assertion.StdDevsFromMean(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                std_devs=std_devs,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None, _FakeMetricStore())
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            # EQUAL is always inclusive
            dataclasses.astuple(
                utils.Bounds(*expected_bounds, inclusive
                             or comparison == 'EQUAL')),
            places=3)
Пример #2
0
    def test_compute_bounds_percent_difference_with_mean_value(
            self, comparison, pct_diff, expected_bounds, inclusive=False):
        # mean = 3
        value_history = range(1, 6)

        class _FakeMetricStore:
            def get_metric_history(*args, **kwargs):
                return [
                    bigquery_client.MetricHistoryRow('', '',
                                                     datetime.datetime.now(),
                                                     '', v)
                    for v in value_history
                ]

        assertion = metrics_pb2.Assertion(
            percent_difference=metrics_pb2.Assertion.PercentDifference(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                use_historical_mean=True,
                percent=pct_diff,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None, _FakeMetricStore())
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            dataclasses.astuple(utils.Bounds(*expected_bounds, inclusive)),
            places=3)
 def test_assert_duration(self):
     metric_source = metrics_pb2.MetricSource(
         literals=metrics_pb2.LiteralSource(
             assertions={
                 "duration":
                 metrics_pb2.Assertion(
                     within_bounds=metrics_pb2.Assertion.WithinBounds(
                         lower_bound=100,
                         upper_bound=200,
                     ),
                     inclusive_bounds=False,
                 )
             }))
     event = metrics_pb2.TestCompletedEvent(
         benchmark_id="test_benchmark",
         duration=duration_pb2.Duration(seconds=150),
         metric_collection_config=metrics_pb2.MetricCollectionConfig(
             sources=[metric_source]))
     collector = literal_collector.LiteralCollector(
         event=event, raw_source=metric_source)
     points = collector.metric_points()
     self.assertLen(points, 1)
     self.assertEqual(points[0].metric_key, 'duration')
     self.assertEqual(points[0].metric_value, 150)
     self.assertEqual(points[0].bounds, utils.Bounds(100, 200, False))
Пример #4
0
 def test_compute_bounds_within_bounds(self,
                                       lower,
                                       upper,
                                       expected_bounds,
                                       inclusive=False):
     assertion = metrics_pb2.Assertion(
         within_bounds=metrics_pb2.Assertion.WithinBounds(
             lower_bound=lower,
             upper_bound=upper,
         ),
         inclusive_bounds=inclusive,
     )
     collector = base.BaseCollector(
         metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
         None)
     bounds = collector.compute_bounds("metric_key", assertion)
     self.assertEqual(bounds, utils.Bounds(*expected_bounds, inclusive))
    def test_aggregate_metrics_with_assertion(self):
        metric_source = metrics_pb2.MetricSource(
            tensorboard=metrics_pb2.TensorBoardSource(
                include_tags=[
                    metrics_pb2.TensorBoardSource.TagStrategy(
                        tag_pattern="eval/*",
                        strategies=[
                            metrics_pb2.TensorBoardSource.FINAL,
                            metrics_pb2.TensorBoardSource.MAX,
                            metrics_pb2.TensorBoardSource.MIN,
                        ])
                ],
                aggregate_assertions=[
                    metrics_pb2.TensorBoardSource.AggregateAssertion(
                        tag='eval/accuracy',
                        strategy=metrics_pb2.TensorBoardSource.MAX,
                        assertion=metrics_pb2.Assertion(
                            within_bounds=metrics_pb2.Assertion.WithinBounds(
                                lower_bound=.4,
                                upper_bound=1.0,
                            ),
                            inclusive_bounds=True,
                        ))
                ]))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=self.temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))
        collector = tensorboard_collector.TensorBoardCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())

        self.assertCountEqual(
            points,
            [
                utils.MetricPoint('eval/accuracy_max', .5,
                                  utils.Bounds(.4, 1.0, True)),
                utils.MetricPoint('eval/accuracy_min', .125, utils.NO_BOUNDS),
                utils.MetricPoint('eval/accuracy_final', .25, utils.NO_BOUNDS),
            ],
        )
Пример #6
0
 def test_compute_bounds_percent_difference_with_target_value(
         self,
         comparison,
         target,
         pct_diff,
         expected_bounds,
         inclusive=False):
     assertion = metrics_pb2.Assertion(
         percent_difference=metrics_pb2.Assertion.PercentDifference(
             comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
             value=target,
             percent=pct_diff,
         ),
         inclusive_bounds=inclusive,
     )
     collector = base.BaseCollector(
         metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
         None)
     bounds = collector.compute_bounds("metric_key", assertion)
     self.assertEqual(bounds, utils.Bounds(*expected_bounds, inclusive))
    def test_include_and_exclude(self):
        metric_source = metrics_pb2.MetricSource(
            tensorboard=metrics_pb2.TensorBoardSource(
                include_tags=[
                    metrics_pb2.TensorBoardSource.TagStrategy(
                        tag_pattern="*",
                        strategies=[
                            metrics_pb2.TensorBoardSource.FINAL,
                        ])
                ],
                exclude_tags=[
                    'foo',
                    'train/*',
                ],
                aggregate_assertions=[
                    metrics_pb2.TensorBoardSource.AggregateAssertion(
                        tag='foo',
                        strategy=metrics_pb2.TensorBoardSource.MIN,
                        assertion=metrics_pb2.Assertion(
                            within_bounds=metrics_pb2.Assertion.WithinBounds(
                                lower_bound=0.,
                                upper_bound=2.,
                            )))
                ]))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=self.temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))
        collector = tensorboard_collector.TensorBoardCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())

        self.assertCountEqual(
            points,
            [
                utils.MetricPoint('eval/accuracy_final', .25, utils.NO_BOUNDS),
                utils.MetricPoint('foo_min', 1, utils.Bounds(0., 2., False)),
            ],
        )
Пример #8
0
    def test_compute_bounds_fixed_value(self,
                                        comparison,
                                        threshold_value,
                                        expected_bounds,
                                        inclusive=False):

        assertion = metrics_pb2.Assertion(
            fixed_value=metrics_pb2.Assertion.FixedValue(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                value=threshold_value,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None)
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            # EQUAL is always inclusive
            dataclasses.astuple(
                utils.Bounds(*expected_bounds, inclusive
                             or comparison == 'EQUAL')),
            places=3)
Пример #9
0
    def compute_bounds(self, metric_key: str,
                       assertion: metrics_pb2.Assertion) -> utils.Bounds:
        """Returns the bounds for a given metric, based on the given assertion.

    This method may result in database calls to gather historical data for some
    types of assertions.

    Args:
      metric_key: Unique string identifying the name of the metric.
      assertion: The assertion that will be used to define the bounds.

    Returns:
      An instance of utils.Bounds representing the metric bounds.
    """
        if assertion is None:
            return utils.NO_BOUNDS

        lower_bound = -math.inf
        upper_bound = math.inf
        inclusive = assertion.inclusive_bounds

        assertion_type = assertion.WhichOneof('assertion_type')
        if assertion_type == 'fixed_value':
            c = assertion.fixed_value.comparison
            if c == metrics_pb2.Assertion.LESS:
                upper_bound = assertion.fixed_value.value
            elif c == metrics_pb2.Assertion.GREATER:
                lower_bound = assertion.fixed_value.value
            elif c == metrics_pb2.Assertion.EQUAL:
                lower_bound = assertion.fixed_value.value
                upper_bound = assertion.fixed_value.value
                inclusive = True
        elif assertion_type == 'within_bounds':
            lower_bound = assertion.within_bounds.lower_bound
            upper_bound = assertion.within_bounds.upper_bound
        elif assertion_type == 'std_devs_from_mean':
            values = self.get_metric_history(metric_key, assertion.time_window,
                                             assertion.min_timestamp)

            # Standard deviation not defined for n < 2
            min_num_points = max(assertion.wait_for_n_data_points, 2)
            if len(values) < min_num_points:
                logging.info(
                    'Not enough data points to compute bounds for %s. '
                    'Need %d points, have %d.', metric_key, min_num_points,
                    len(values))
                return utils.NO_BOUNDS

            mean = np.mean(values)
            stddev = np.std(values)
            c = assertion.std_devs_from_mean.comparison
            if c in (metrics_pb2.Assertion.LESS, metrics_pb2.Assertion.WITHIN):
                upper_bound = mean + (stddev *
                                      assertion.std_devs_from_mean.std_devs)
            if c in (metrics_pb2.Assertion.GREATER,
                     metrics_pb2.Assertion.WITHIN):
                lower_bound = mean - (stddev *
                                      assertion.std_devs_from_mean.std_devs)

            if upper_bound == math.inf and lower_bound == -math.inf:
                logging.error(
                    '%s: comparison %s is not implemented for assertion type `%s`',
                    metric_key, metrics_pb2.Assertion.Comparison.Name(c),
                    assertion_type)
                return utils.NO_BOUNDS
        elif assertion_type == 'percent_difference':
            target_type = assertion.percent_difference.WhichOneof(
                'target_type')
            if target_type == 'use_historical_mean':
                values = self.get_metric_history(metric_key,
                                                 assertion.time_window,
                                                 assertion.min_timestamp)

                # Mean not defined for n < 1.
                min_num_points = max(assertion.wait_for_n_data_points, 1)
                if len(values) < min_num_points:
                    logging.info(
                        'Not enough data points to compute bounds for %s. '
                        'Need %d points, have %d.', metric_key, len(values),
                        min_num_points)
                    return utils.NO_BOUNDS
                target = np.mean(values)
            elif target_type == 'value':
                target = assertion.percent_difference.value
            else:
                logging.error(
                    '%s: No `target_type` defined for assertion type `%s`.',
                    metric_key, assertion_type)
                return utils.NO_BOUNDS

            c = assertion.percent_difference.comparison
            if c in (metrics_pb2.Assertion.LESS, metrics_pb2.Assertion.WITHIN):
                upper_bound = target + (assertion.percent_difference.percent *
                                        target)
            if c in (metrics_pb2.Assertion.GREATER,
                     metrics_pb2.Assertion.WITHIN):
                lower_bound = target - (assertion.percent_difference.percent *
                                        target)

            if upper_bound == math.inf and lower_bound == -math.inf:
                logging.error(
                    '%s: comparison %s is not implemented for assertion type `%s`',
                    metric_key, metrics_pb2.Assertion.Comparison.Name(c),
                    assertion_type)
                return utils.NO_BOUNDS

        return utils.Bounds(lower_bound, upper_bound, inclusive)
Пример #10
0
    def test_get_metrics_from_perfzero_summary(self):
        temp_dir = self.create_tempdir().full_path
        summary_dir = os.path.join(temp_dir, 'date_and_time')
        pathlib.Path(summary_dir).mkdir(parents=True, exist_ok=True)
        summary_path = os.path.join(summary_dir, 'perfzero_summary.json')
        with open(summary_path, 'w') as f:
            json.dump(
                {
                    "execution_id": "execution_id",
                    "execution_timestamp": 1234567890.1,
                    "benchmark_result": {
                        "wall_time":
                        1234,
                        "metrics": [{
                            "name": "exp_per_second",
                            "value": 1.1,
                        }, {
                            "name": "avg_exp_per_second",
                            "value": 2.2,
                        }, {
                            "name": "startup_time",
                            "value": 3.3
                        }],
                    },
                    "benchmark_info": {
                        "not": "important",
                    },
                    "setup_info": {},
                    "ml_framework_info": {
                        "not": "important",
                    },
                    "system_info": {
                        "not": "important"
                    },
                    "process_info": {
                        "max_rss": 4.4,
                        "max_vms": 5.5,
                        "max_cpu_percent": 6.6,
                    }
                }, f)

        metric_source = metrics_pb2.MetricSource(
            perfzero=metrics_pb2.PerfZeroSource(
                assertions={
                    'total_wall_time':
                    metrics_pb2.Assertion(
                        within_bounds=metrics_pb2.Assertion.WithinBounds(
                            lower_bound=1230,
                            upper_bound=1240,
                        )),
                    'exp_per_second':
                    metrics_pb2.Assertion(
                        within_bounds=metrics_pb2.Assertion.WithinBounds(
                            lower_bound=1,
                            upper_bound=100,
                        ), )
                }))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))

        collector = perfzero_collector.PerfZeroCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())
        self.assertCountEqual(
            points,
            {
                utils.MetricPoint("total_wall_time", 1234,
                                  utils.Bounds(1230., 1240., False)),
                utils.MetricPoint("exp_per_second", 1.1,
                                  utils.Bounds(1., 100., False)),
                utils.MetricPoint("avg_exp_per_second", 2.2, utils.NO_BOUNDS),
                utils.MetricPoint("startup_time", 3.3, utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_rss", 4.4,
                                  utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_vms", 5.5,
                                  utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_cpu_percent", 6.6,
                                  utils.NO_BOUNDS),
            },
        )