Exemplo n.º 1
0
def test_add_raw_data():
    """Test interface BenchmarkResult.add_raw_data()."""
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    result.add_raw_data('metric1', 'raw log 1', False)
    result.add_raw_data('metric1', 'raw log 2', False)
    assert (result.raw_data['metric1'][0] == 'raw log 1')
    assert (result.raw_data['metric1'][1] == 'raw log 2')
    assert (result.type == BenchmarkType.MICRO)
    assert (result.return_code == ReturnCode.SUCCESS)

    result = BenchmarkResult('model', BenchmarkType.MODEL, ReturnCode.SUCCESS)
    result.add_raw_data('metric1', [1, 2, 3], False)
    result.add_raw_data('metric1', [4, 5, 6], False)
    assert (result.raw_data['metric1'][0] == [1, 2, 3])
    assert (result.raw_data['metric1'][1] == [4, 5, 6])
    assert (result.type == BenchmarkType.MODEL)
    assert (result.return_code == ReturnCode.SUCCESS)

    # Test log_raw_data = True.
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    result.add_raw_data('metric1', 'raw log 1', True)
    result.add_raw_data('metric1', 'raw log 2', True)
    assert (result.type == BenchmarkType.MICRO)
    assert (result.return_code == ReturnCode.SUCCESS)
    raw_data_file = os.path.join(os.getcwd(), 'rawdata.log')
    assert (os.path.isfile(raw_data_file))
    os.remove(raw_data_file)
Exemplo n.º 2
0
    def _preprocess(self):
        """Preprocess/preparation operations before the benchmarking.

        Return:
            True if _preprocess() succeed.
        """
        self.add_parser_arguments()
        ret, self._args, unknown = self.parse_args()

        if not ret:
            self._result = BenchmarkResult(self._name, self._benchmark_type,
                                           ReturnCode.INVALID_ARGUMENT)
            return False

        self._result = BenchmarkResult(self._name,
                                       self._benchmark_type,
                                       ReturnCode.SUCCESS,
                                       run_count=self._args.run_count)

        if not isinstance(self._benchmark_type, BenchmarkType):
            logger.error(
                'Invalid benchmark type - benchmark: {}, type: {}'.format(
                    self._name, type(self._benchmark_type)))
            self._result.set_return_code(ReturnCode.INVALID_BENCHMARK_TYPE)
            return False

        return True
Exemplo n.º 3
0
def test_fambench():
    """Test FAMBench benchmarks."""
    benchmark_name = 'fambench'
    (benchmark_class,
     predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CUDA)
    assert (benchmark_class)
    benchmark = benchmark_class(benchmark_name)
    assert (benchmark._benchmark_type == BenchmarkType.DOCKER)
    assert (benchmark._image_uri == 'superbench/benchmark:cuda11.1.1-fambench')
    assert (benchmark._container_name == 'fambench-benchmarks')
    assert (benchmark._entrypoint == '/workspace/FAMBench/benchmarks/run_all_benchmarks.sh')
    assert (benchmark._cmd is None)
    benchmark._result = BenchmarkResult(benchmark._name, benchmark._benchmark_type, ReturnCode.SUCCESS)
    benchmark._args = SimpleNamespace(log_raw_data=False)

    raw_output = """
benchmark implementation mode config score units batch_latency_95_sec
DLRM OOTB eval tiny 152.800399 ex/s 0.515052
DLRM OOTB train tiny 35.483686 ex/s None
DLRM UBENCH train linear_[(2,2,2,2,2)] 3.679281e-07 TF/s None
XLMR OOTB eval default-config 1.015586 ex/s 16.463461
"""
    assert (benchmark._process_raw_result(0, raw_output))
    assert (benchmark.result['dlrm_ootb_eval_tiny_ex_s'][0] == 152.800399)
    assert (benchmark.result['dlrm_ootb_train_tiny_ex_s'][0] == 35.483686)
    assert (benchmark.result['dlrm_ubench_train_linear_[(2,2,2,2,2)]_tf_s'][0] == 3.679281e-07)
    assert (benchmark.result['xlmr_ootb_eval_default_config_ex_s'][0] == 1.015586)
Exemplo n.º 4
0
def test_add_result():
    """Test interface BenchmarkResult.add_result()."""
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    result.add_result('metric1', 300)
    result.add_result('metric1', 200)
    assert (result.result['metric1'][0] == 300)
    assert (result.result['metric1'][1] == 200)
    def test_tensorrt_inference_result_parsing(self, test_raw_log):
        """Test tensorrt-inference benchmark result parsing."""
        (benchmark_cls,
         _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(
             self.benchmark_name, Platform.CUDA)
        benchmark = benchmark_cls(self.benchmark_name, parameters='')
        benchmark._args = SimpleNamespace(
            pytorch_models=['model_0', 'model_1'], log_raw_data=False)
        benchmark._result = BenchmarkResult(self.benchmark_name,
                                            BenchmarkType.MICRO,
                                            ReturnCode.SUCCESS,
                                            run_count=1)

        # Positive case - valid raw output
        self.assertTrue(benchmark._process_raw_result(0, test_raw_log))
        self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)

        self.assertEqual(6 + benchmark.default_metric_count,
                         len(benchmark.result))
        for tag in ['mean', '99']:
            self.assertEqual(0.5,
                             benchmark.result[f'model_0_gpu_time_{tag}'][0])
            self.assertEqual(0.6,
                             benchmark.result[f'model_0_host_time_{tag}'][0])
            self.assertEqual(
                1.0, benchmark.result[f'model_0_end_to_end_time_{tag}'][0])

        # Negative case - invalid raw output
        self.assertFalse(benchmark._process_raw_result(1,
                                                       'Invalid raw output'))
Exemplo n.º 6
0
def test_set_timestamp():
    """Test interface BenchmarkResult.set_timestamp()."""
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    start_time = '2021-02-03 16:59:49'
    end_time = '2021-02-03 17:00:08'
    result.set_timestamp(start_time, end_time)
    assert (result.start_time == start_time)
    assert (result.end_time == end_time)
Exemplo n.º 7
0
def test_set_return_code():
    """Test interface BenchmarkResult.set_return_code()."""
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    assert (result.return_code == ReturnCode.SUCCESS)
    assert (result.result['return_code'] == [ReturnCode.SUCCESS.value])
    result.set_return_code(ReturnCode.INVALID_ARGUMENT)
    assert (result.return_code == ReturnCode.INVALID_ARGUMENT)
    assert (result.result['return_code'] == [ReturnCode.INVALID_ARGUMENT.value])
    result.set_return_code(ReturnCode.INVALID_BENCHMARK_RESULT)
    assert (result.return_code == ReturnCode.INVALID_BENCHMARK_RESULT)
    assert (result.result['return_code'] == [ReturnCode.INVALID_BENCHMARK_RESULT.value])
Exemplo n.º 8
0
def test_serialize_deserialize():
    """Test serialization/deserialization and compare the results."""
    # Result with one metric.
    result = BenchmarkResult('pytorch-bert-base1', BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=2)
    result.add_result('metric1', 300, ReduceType.MAX)
    result.add_result('metric1', 200, ReduceType.MAX)
    result.add_result('metric2', 100, ReduceType.AVG)
    result.add_raw_data('metric1', [1, 2, 3], False)
    result.add_raw_data('metric1', [4, 5, 6], False)
    result.add_raw_data('metric1', [7, 8, 9], False)
    start_time = '2021-02-03 16:59:49'
    end_time = '2021-02-03 17:00:08'
    result.set_timestamp(start_time, end_time)
    result.set_benchmark_type(BenchmarkType.MICRO)

    expected = (
        '{"name": "pytorch-bert-base1", "type": "micro", "run_count": 2, "return_code": 0, '
        '"start_time": "2021-02-03 16:59:49", "end_time": "2021-02-03 17:00:08", '
        '"raw_data": {"metric1": [[1, 2, 3], [4, 5, 6], [7, 8, 9]]}, '
        '"result": {"return_code": [0], "metric1": [300, 200], "metric2": [100]}, '
        '"reduce_op": {"return_code": null, "metric1": "max", "metric2": "avg"}}'
    )
    assert (result.to_string() == expected)
Exemplo n.º 9
0
def test_set_benchmark_type():
    """Test interface BenchmarkResult.set_benchmark_type()."""
    result = BenchmarkResult('micro', BenchmarkType.MICRO, ReturnCode.SUCCESS)
    result.set_benchmark_type(BenchmarkType.MICRO)
    assert (result.type == BenchmarkType.MICRO)
def test_rocm_onnxruntime_performance():
    """Test onnxruntime model benchmark."""
    benchmark_name = 'onnxruntime-ort-models'
    (benchmark_class, predefine_params
     ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(
         benchmark_name, Platform.ROCM)
    assert (benchmark_class)
    benchmark = benchmark_class(benchmark_name)
    assert (benchmark._benchmark_type == BenchmarkType.DOCKER)
    assert (benchmark._image_uri ==
            'superbench/benchmark:rocm4.3.1-onnxruntime1.9.0')
    assert (benchmark._container_name == 'rocm-onnxruntime-model-benchmarks')
    assert (
        benchmark._entrypoint ==
        '/stage/onnxruntime-training-examples/huggingface/azureml/run_benchmark.sh'
    )
    assert (benchmark._cmd is None)
    benchmark._result = BenchmarkResult(benchmark._name,
                                        benchmark._benchmark_type,
                                        ReturnCode.SUCCESS)
    benchmark._args = SimpleNamespace(log_raw_data=False)

    raw_output = """
__superbench__ begin bert-large-uncased ngpu=1
    "samples_per_second": 21.829
__superbench__ begin bert-large-uncased ngpu=8
    "samples_per_second": 147.181
__superbench__ begin distilbert-base-uncased ngpu=1
    "samples_per_second": 126.827
__superbench__ begin distilbert-base-uncased ngpu=8
    "samples_per_second": 966.796
__superbench__ begin gpt2 ngpu=1
    "samples_per_second": 20.46
__superbench__ begin gpt2 ngpu=8
    "samples_per_second": 151.089
__superbench__ begin facebook/bart-large ngpu=1
    "samples_per_second": 66.171
__superbench__ begin facebook/bart-large ngpu=8
    "samples_per_second": 370.343
__superbench__ begin roberta-large ngpu=1
    "samples_per_second": 37.103
__superbench__ begin roberta-large ngpu=8
    "samples_per_second": 274.455
"""
    assert (benchmark._process_raw_result(0, raw_output))
    assert (
        benchmark.result['bert_large_uncased_ngpu_1_throughput'][0] == 21.829)
    assert (
        benchmark.result['bert_large_uncased_ngpu_8_throughput'][0] == 147.181)
    assert (benchmark.result['distilbert_base_uncased_ngpu_1_throughput'][0] ==
            126.827)
    assert (benchmark.result['distilbert_base_uncased_ngpu_8_throughput'][0] ==
            966.796)
    assert (benchmark.result['gpt2_ngpu_1_throughput'][0] == 20.46)
    assert (benchmark.result['gpt2_ngpu_8_throughput'][0] == 151.089)
    assert (
        benchmark.result['facebook_bart_large_ngpu_1_throughput'][0] == 66.171)
    assert (benchmark.result['facebook_bart_large_ngpu_8_throughput'][0] ==
            370.343)
    assert (benchmark.result['roberta_large_ngpu_1_throughput'][0] == 37.103)
    assert (benchmark.result['roberta_large_ngpu_8_throughput'][0] == 274.455)