Exemplo n.º 1
0
    def test_compare_metrics_reports_no_difference(self):
        data_points = mcu.get_data_points_from_metrics_reports(
            [_REPORT_3, _REPORT_3, _REPORT_3])
        metrics_difference_report = mcu.compare_metrics(
            data_points, _REPORT_3, config={'base_expression': 'v == v_mean'})

        # The latest metrics match the previous ones exactly, so the difference
        # report should be empty.
        self.assertEqual(metrics_difference_report, '')
Exemplo n.º 2
0
    def test_get_data_points_from_metrics_reports(self):
        correct_dict = {
            'InboundData__TotalSamples': [1728.0, 73216.0, 73216.0],
            'InboundData__Accumulator_mb': [10000.0, 64750000.0, 64750.0],
            'InboundData__Percentile_1_mb': [0.393, 0.393, 0.393],
            'InboundData__Percentile_5_mb': [0.393, 0.393, 0.393],
            'InboundData__Percentile_10_mb': [0.786, 0.786, 0.786],
            'InboundData__Percentile_20_mb': [1.54, 1.54, 1.54],
            'InboundData__Percentile_50_mb': [1.54, 1.54, 1.54],
            'InboundData__Percentile_80_mb': [1.54, 1.54, 1.54],
            'InboundData__Percentile_90_mb': [1.54, 1.54, 1.54],
            'InboundData__Percentile_95_mb': [1.54, 1.54, 1.54],
            'InboundData__Percentile_99_mb': [1.54, 1.54, 1.54],
            'TransferToServerTime__TotalSamples': [2616.0, 247016.0, 247016.0],
            'TransferToServerTime__Accumulator_sec':
            [89.615, 407467.495546299, 1.0],
            'TransferToServerTime__Percentile_1_sec':
            [300.003, 300.003, 300.003],
            'TransferToServerTime__Percentile_5_sec':
            [300.004, 300.004, 300.004],
            'TransferToServerTime__Percentile_10_sec':
            [300.01, 300.01, 300.01],
            'TransferToServerTime__Percentile_20_sec':
            [300.015, 300.015, 300.015],
            'TransferToServerTime__Percentile_50_sec':
            [300.026, 300.026, 300.026],
            'TransferToServerTime__Percentile_80_sec':
            [300.035, 300.035, 300.035],
            'TransferToServerTime__Percentile_90_sec':
            [300.082, 300.082, 300.082],
            'TransferToServerTime__Percentile_95_sec':
            [300.108, 300.108, 300.108],
            'TransferToServerTime__Percentile_99_sec':
            [300.129, 300.129, 300.129],
            'UniqueMetric__TotalSamples': [None, None, 9000.0],
            'UniqueMetric__Accumulator': [None, None, 9000.0],
            'UniqueMetric__Percentile_1': [None, None, 8902.0],
            'UniqueMetric__Percentile_5': [None, None, 89010.0],
            'UniqueMetric__Percentile_10': [None, None, 8920.0],
            'UniqueMetric__Percentile_20': [None, None, 8940.0],
            'UniqueMetric__Percentile_50': [None, None, 9000.0],
            'UniqueMetric__Percentile_80': [None, None, 9060.0],
            'UniqueMetric__Percentile_90': [None, None, 9080.0],
            'UniqueMetric__Percentile_95': [None, None, 9090.0],
            'UniqueMetric__Percentile_99': [None, None, 9098.0],
            'CachedSyncTensors__Value': [11336, 1022168, 1022168],
            'CreateCompileHandles__Value': [40, 40, 40],
            'CreateDataHandles__Value': [407992, 576462152, 576462152],
            'UniqueCounter__Value': [None, None, 9000]
        }

        self.assertTrue(
            self._dict_almost_equal(
                mcu.get_data_points_from_metrics_reports(
                    [_REPORT_1, _REPORT_2, _REPORT_3]), correct_dict))
Exemplo n.º 3
0
    def test_compare_metrics_reports_value_difference_tolerance_loose(self):
        data_points = mcu.get_data_points_from_metrics_reports(
            [_REPORT_3, _REPORT_3, _REPORT_3_SLIGHTLY_DIFFERENT_VALUES])
        metrics_difference_report = mcu.compare_metrics(
            data_points,
            _REPORT_3_SLIGHTLY_DIFFERENT_VALUES,
            config={'base_expression': 'v <= v_mean + (v_stddev * 2.0)'})

        # Since the tolerance is 2.0, the small differences in values are not
        # big enough to trigger lines in the difference report.
        self.assertEqual(metrics_difference_report, '')
Exemplo n.º 4
0
    def test_compare_metrics_reports_new_counters(self):
        data_points = mcu.get_data_points_from_metrics_reports(
            [_REPORT_3, _REPORT_3, _REPORT_3_SLIGHTLY_DIFFERENT_VALUES])
        metrics_difference_report = mcu.compare_metrics(
            data_points,
            _REPORT_3_WITH_NEW_COUNTERS,
            config={'base_expression': 'v <= v_mean + (v_stddev * 2.0)'})

        # Since the tolerance is 2.0, the small differences in values are not
        # big enough to trigger lines in the difference report.
        expected_report = 'Found new aten counter: aten::_local_scalar_dense__Value: 73216\n'
        self.assertEqual(metrics_difference_report, expected_report)
Exemplo n.º 5
0
    def test_compare_metrics_reports_value_difference_tolerance_strict(self):
        data_points = mcu.get_data_points_from_metrics_reports(
            [_REPORT_3, _REPORT_3, _REPORT_3_SLIGHTLY_DIFFERENT_VALUES])
        metrics_difference_report = mcu.compare_metrics(
            data_points,
            _REPORT_3_SLIGHTLY_DIFFERENT_VALUES,
            config={'base_expression': 'v == v_mean'})

        # Since the tolerance is 0.0, even a tiny difference leads to a line in
        # the metrics_difference_report.
        expected_report = 'InboundData__Accumulator_mb failed its expression check. Expression: v == v_mean.  Mean: 68083.33333333333.  Stddev: 4714.045207910317.  Actual Value: 74750.0\nInboundData__TotalSamples failed its expression check. Expression: v == v_mean.  Mean: 72144.0.  Stddev: 1516.0369388639579.  Actual Value: 70000.0\nUniqueCounter__Value failed its expression check. Expression: v == v_mean.  Mean: 9333.0.  Stddev: 470.93311627024065.  Actual Value: 9999\n'
        self.assertEqual(metrics_difference_report, expected_report)
Exemplo n.º 6
0
 def test_parse_real_metrics(self):
     print(
         "Testing against TPU. If this hangs, check that $XRT_TPU_CONFIG is set"
     )
     x = torch.rand(3, 5, device=xm.xla_device())
     x = torch.flatten(x, 1)
     x = torch.roll(x, 1, 0)
     x = torch.flip(x, [0, 1])
     self.assertEqual(x.device.type, 'xla')
     metrics = met.metrics_report()
     self.assertTrue(metrics)
     data_points = mcu.get_data_points_from_metrics_reports([metrics])
     self.assertIn('CompileTime__Percentile_99_sec', data_points.keys())
     self.assertIn('CompileTime__TotalSamples', data_points.keys())
Exemplo n.º 7
0
    def test_compare_metrics_reports_value_difference_tolerance_custom(self):
        data_points = mcu.get_data_points_from_metrics_reports(
            [_REPORT_3, _REPORT_3, _REPORT_3_SLIGHTLY_DIFFERENT_VALUES])
        metrics_difference_report = mcu.compare_metrics(
            data_points,
            _REPORT_3_SLIGHTLY_DIFFERENT_VALUES,
            config={
                'base_expression': 'True',
                'InboundData__Accumulator_mb_expression': 'v < v_mean',
                'UniqueCounter__Value_expression': 'v < v_mean',
            })

        # 2 of 3 differing values have custom tolerances and therefore should pass.
        # The third uses the default tolerance of 0.0, so it will generate a line.
        expected_report = 'InboundData__Accumulator_mb failed its expression check. Expression: v < v_mean.  Mean: 68083.33333333333.  Stddev: 4714.045207910317.  Actual Value: 74750.0\nUniqueCounter__Value failed its expression check. Expression: v < v_mean.  Mean: 9333.0.  Stddev: 470.93311627024065.  Actual Value: 9999\n'
        self.assertEqual(metrics_difference_report, expected_report)
Exemplo n.º 8
0
    config.update(c)

  # Collect historical metrics for this test and check for any regressions in
  # the current run vs. the averages from previous runs.
  metrics_storage_dir = os.path.join(
      FLAGS.root, FLAGS.test_folder_name, _METRICS_HISTORY_DIR_NAME)
  metrics_storage_dir += '/'
  regression_test_config = config.get('regression_test_config', None)
  if regression_test_config:
    metrics_file_pattern = re.compile(_METRICS_FILE_PATTERN)
    prev_metrics_files = [f for f in gcsfs.list(
        metrics_storage_dir) if metrics_file_pattern.match(f.path)]
    prev_metrics_strings = [gcsfs.open(
        os.path.join(FLAGS.root, f.path), mode='rt').read() for f in
        prev_metrics_files]
    data_points = mcu.get_data_points_from_metrics_reports(
        prev_metrics_strings)
    regression_report = mcu.compare_metrics(
        data_points, metrics, config=regression_test_config)
  else:
    print('Unable to check for metrics regressions. Config should contain '
          '"regression_test_config" key -- see example at the top of '
          'metrics_test_wrapper.py.', file=sys.stderr)
    regression_report = ''

  # Write the metrics from the current run to disk unless disabled by config.
  if config.get('write_metrics_to_disk', True):
    # Include the params for this invocation when saving metrics.
    output_string = '{}\n\n{}'.format(FLAGS, metrics)
    output_filename = os.path.join(
        metrics_storage_dir, datetime.datetime.utcnow().strftime('%Y_%m_%d'))
    _write_to_disk(output_string, output_filename)