def test_kernel_launch_overhead():
    """Test kernel-launch benchmark."""
    context = BenchmarkRegistry.create_benchmark_context(
        'kernel-launch',
        parameters='--num_warmup 200 --num_steps 20000 --interval 100')

    assert (BenchmarkRegistry.is_benchmark_context_valid(context))

    benchmark = BenchmarkRegistry.launch_benchmark(context)

    # Check basic information.
    assert (benchmark)
    assert (benchmark.name == 'kernel-launch')
    assert (benchmark.type == BenchmarkType.MICRO)

    # Check parameters specified in BenchmarkContext.
    assert (benchmark._args.num_warmup == 200)
    assert (benchmark._args.num_steps == 20000)
    assert (benchmark._args.interval == 100)

    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
    assert ('raw_output_0' in benchmark.raw_data)
    assert (len(benchmark.raw_data['raw_output_0']) == 1)
    assert (isinstance(benchmark.raw_data['raw_output_0'][0], str))
    for metric in ['event_time', 'wall_time']:
        assert (metric in benchmark.result)
        assert (len(benchmark.result[metric]) == 1)
        assert (isinstance(benchmark.result[metric][0], numbers.Number))
Exemplo n.º 2
0
def test_pytorch_computation_communication_overlap_normal():
    """Test pytorch-computation-communication-overlap benchmark on distributed normal case."""
    context = BenchmarkRegistry.create_benchmark_context(
        'computation-communication-overlap',
        parameters='--num_warmup 5 --num_steps 10 --ratio 5',
        framework=Framework.PYTORCH
    )
    world_size = 2
    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
    results = utils.simulated_ddp_distributed_benchmark(context, world_size)
    assert (results)
    for benchmark in results:
        # Check basic information.
        assert (benchmark)
        assert (isinstance(benchmark, ComputationCommunicationOverlap))
        assert (benchmark.name == 'pytorch-computation-communication-overlap')
        assert (benchmark.type == BenchmarkType.MICRO)

        # Check predefined parameters of sharding-matmul benchmark.
        assert (benchmark._args.kernel == [ComputationKernelType.MUL, ComputationKernelType.MATMUL])

        # Check parameters specified in BenchmarkContext.
        assert (benchmark._args.num_steps == 10)

        # Check results and metrics.
        assert (benchmark.run_count == 1)
        assert (benchmark.return_code == ReturnCode.SUCCESS)

        assert (len(benchmark.raw_data) == len(benchmark._args.kernel))
        assert (len(benchmark.result) == len(benchmark._args.kernel) + benchmark.default_metric_count)
Exemplo n.º 3
0
def test_pytorch_matmul():
    """Test pytorch-matmul benchmark."""
    context = BenchmarkRegistry.create_benchmark_context(
        'matmul',
        platform=Platform.CUDA,
        parameters='--run_count 2 --num_steps 20',
        framework=Framework.PYTORCH)

    assert (BenchmarkRegistry.is_benchmark_context_valid(context))

    benchmark = BenchmarkRegistry.launch_benchmark(context)

    # Check basic information.
    assert (benchmark)
    assert (benchmark.name == 'pytorch-matmul')
    assert (benchmark.type == BenchmarkType.MICRO)

    # Check predefined parameters of sharding-matmul benchmark.
    assert (benchmark._args.mode == [ShardingMode.NOSHARDING])

    # Check parameters specified in BenchmarkContext.
    assert (benchmark._args.run_count == 2)
    assert (benchmark._args.num_steps == 20)

    # Check results and metrics.
    assert (benchmark.run_count == 2)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
    assert (len(benchmark.raw_data['nosharding_time']) == benchmark.run_count)
    assert (len(
        benchmark.raw_data['nosharding_time'][0]) == benchmark._args.num_steps)
    assert (len(benchmark.result['nosharding_time']) == benchmark.run_count)
Exemplo n.º 4
0
def test_pytorch_computation_communication_overlap_fake_distributed():
    """Test pytorch-computation-communication-overlap benchmark on single gpu."""
    context = BenchmarkRegistry.create_benchmark_context(
        'computation-communication-overlap',
        parameters='--num_warmup 5 --num_steps 10 --ratio 5',
        framework=Framework.PYTORCH
    )
    port = network.get_free_port()
    assert (port)
    utils.setup_simulated_ddp_distributed_env(1, 0, port)
    benchmark = BenchmarkRegistry.launch_benchmark(context)

    # Check basic information.
    assert (benchmark)
    assert (isinstance(benchmark, ComputationCommunicationOverlap))
    assert (benchmark.name == 'pytorch-computation-communication-overlap')
    assert (benchmark.type == BenchmarkType.MICRO)

    # Check predefined parameters of sharding-matmul benchmark.
    assert (benchmark._args.kernel == [ComputationKernelType.MUL, ComputationKernelType.MATMUL])

    # Check parameters specified in BenchmarkContext.
    assert (benchmark._args.num_steps == 10)

    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)

    assert (len(benchmark.raw_data) == len(benchmark._args.kernel))
    assert (len(benchmark.result) == len(benchmark._args.kernel) + benchmark.default_metric_count)
    utils.clean_simulated_ddp_distributed_env()
Exemplo n.º 5
0
    def exec(self):
        """Run the SuperBench benchmarks locally."""
        for benchmark_name in self._sb_benchmarks:
            if benchmark_name not in self._sb_enabled:
                continue
            benchmark_config = self._sb_benchmarks[benchmark_name]
            benchmark_results = list()
            self.__create_benchmark_dir(benchmark_name)
            cwd = os.getcwd()
            os.chdir(self.__get_benchmark_dir(benchmark_name))

            monitor = None
            if self.__get_rank_id(
            ) == 0 and self._sb_monitor_config and self._sb_monitor_config.enable:
                if self.__get_platform() == Platform.CUDA:
                    monitor = Monitor(
                        None, int(self._sb_monitor_config.sample_duration
                                  or 10),
                        int(self._sb_monitor_config.sample_interval or 1),
                        self.__get_monitor_path(benchmark_name))
                    monitor.start()
                else:
                    logger.warning(
                        'Monitor can not support ROCM/CPU platform.')

            benchmark_real_name = benchmark_name.split(':')[0]
            for framework in benchmark_config.frameworks or [
                    Framework.NONE.value
            ]:
                if benchmark_real_name == 'model-benchmarks' or (
                        ':' not in benchmark_name
                        and benchmark_name.endswith('_models')):
                    for model in benchmark_config.models:
                        full_name = f'{benchmark_name}/{framework}-{model}'
                        logger.info('Executor is going to execute %s.',
                                    full_name)
                        context = BenchmarkRegistry.create_benchmark_context(
                            model,
                            platform=self.__get_platform(),
                            framework=Framework(framework.lower()),
                            parameters=self.__get_arguments(
                                benchmark_config.parameters))
                        result = self.__exec_benchmark(full_name, context)
                        benchmark_results.append(result)
                else:
                    full_name = benchmark_name
                    logger.info('Executor is going to execute %s.', full_name)
                    context = BenchmarkRegistry.create_benchmark_context(
                        benchmark_real_name,
                        platform=self.__get_platform(),
                        framework=Framework(framework.lower()),
                        parameters=self.__get_arguments(
                            benchmark_config.parameters))
                    result = self.__exec_benchmark(full_name, context)
                    benchmark_results.append(result)

            if monitor:
                monitor.stop()
            self.__write_benchmark_results(benchmark_name, benchmark_results)
            os.chdir(cwd)
def benchmark_list_params_command_handler(name=None):
    """List parameters for benchmarks which match the regular expression.

    Args:
        name (str, optional): Benchmark name or regular expression. Defaults to None.

    Raises:
        CLIError: If cannot find the matching benchmark.
    """
    for benchmark_name in benchmark_list_command_handler(name):
        format_help = ''
        for platform in Platform:
            if platform in BenchmarkRegistry.benchmarks[benchmark_name]:
                format_help = BenchmarkRegistry.get_benchmark_configurable_settings(
                    BenchmarkRegistry.create_benchmark_context(benchmark_name, platform=platform)
                )
                break
        print(
            (
                f'=== {benchmark_name} ===\n\n'
                f'{format_help}\n\n'
                f'default values:\n'
                f'{pformat(BenchmarkRegistry.benchmarks[benchmark_name]["predefine_param"])}\n'
            )
        )
Exemplo n.º 7
0
def test_is_benchmark_context_valid():
    """Test interface BenchmarkRegistry.is_benchmark_context_valid()."""
    # Positive case.
    context = BenchmarkRegistry.create_benchmark_context('accumulation',
                                                         platform=Platform.CPU)
    assert (BenchmarkRegistry.is_benchmark_context_valid(context))

    # Negative case.
    context = 'context'
    assert (BenchmarkRegistry.is_benchmark_context_valid(context) is False)
    context = None
    assert (BenchmarkRegistry.is_benchmark_context_valid(context) is False)
Exemplo n.º 8
0
def test_pytorch_bert_base():
    """Test pytorch-bert-base benchmark."""
    context = BenchmarkRegistry.create_benchmark_context(
        'bert-base',
        platform=Platform.CUDA,
        parameters=
        '--batch_size 1 --num_classes 5 --seq_len 8 --num_warmup 2 --num_steps 4 \
            --model_action train inference',
        framework=Framework.PYTORCH)

    assert (BenchmarkRegistry.is_benchmark_context_valid(context))

    benchmark = BenchmarkRegistry.launch_benchmark(context)

    # Check basic information.
    assert (benchmark)
    assert (isinstance(benchmark, PytorchBERT))
    assert (benchmark.name == 'pytorch-bert-base')
    assert (benchmark.type == BenchmarkType.MODEL)

    # Check predefined parameters of resnet101 model.
    assert (benchmark._args.hidden_size == 768)
    assert (benchmark._args.num_hidden_layers == 12)
    assert (benchmark._args.num_attention_heads == 12)
    assert (benchmark._args.intermediate_size == 3072)

    # Check parameters specified in BenchmarkContext.
    assert (benchmark._args.batch_size == 1)
    assert (benchmark._args.num_classes == 5)
    assert (benchmark._args.seq_len == 8)
    assert (benchmark._args.num_warmup == 2)
    assert (benchmark._args.num_steps == 4)

    # Check dataset scale.
    assert (len(benchmark._dataset) == benchmark._args.sample_count *
            benchmark._world_size)

    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
    for metric in [
            'fp32_train_step_time', 'fp32_train_throughput',
            'fp16_train_step_time', 'fp16_train_throughput',
            'fp32_inference_step_time', 'fp32_inference_throughput',
            'fp16_inference_step_time', 'fp16_inference_throughput'
    ]:
        assert (len(benchmark.raw_data[metric]) == benchmark.run_count)
        assert (len(
            benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
        assert (len(benchmark.result[metric]) == benchmark.run_count)
    def test_tensorrt_inference_result_parsing(self, test_raw_log):
        """Test tensorrt-inference benchmark result parsing."""
        (benchmark_cls,
         _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(
             self.benchmark_name, Platform.CUDA)
        benchmark = benchmark_cls(self.benchmark_name, parameters='')
        benchmark._args = SimpleNamespace(
            pytorch_models=['model_0', 'model_1'], log_raw_data=False)
        benchmark._result = BenchmarkResult(self.benchmark_name,
                                            BenchmarkType.MICRO,
                                            ReturnCode.SUCCESS,
                                            run_count=1)

        # Positive case - valid raw output
        self.assertTrue(benchmark._process_raw_result(0, test_raw_log))
        self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)

        self.assertEqual(6 + benchmark.default_metric_count,
                         len(benchmark.result))
        for tag in ['mean', '99']:
            self.assertEqual(0.5,
                             benchmark.result[f'model_0_gpu_time_{tag}'][0])
            self.assertEqual(0.6,
                             benchmark.result[f'model_0_host_time_{tag}'][0])
            self.assertEqual(
                1.0, benchmark.result[f'model_0_end_to_end_time_{tag}'][0])

        # Negative case - invalid raw output
        self.assertFalse(benchmark._process_raw_result(1,
                                                       'Invalid raw output'))
Exemplo n.º 10
0
def test_fambench():
    """Test FAMBench benchmarks."""
    benchmark_name = 'fambench'
    (benchmark_class,
     predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CUDA)
    assert (benchmark_class)
    benchmark = benchmark_class(benchmark_name)
    assert (benchmark._benchmark_type == BenchmarkType.DOCKER)
    assert (benchmark._image_uri == 'superbench/benchmark:cuda11.1.1-fambench')
    assert (benchmark._container_name == 'fambench-benchmarks')
    assert (benchmark._entrypoint == '/workspace/FAMBench/benchmarks/run_all_benchmarks.sh')
    assert (benchmark._cmd is None)
    benchmark._result = BenchmarkResult(benchmark._name, benchmark._benchmark_type, ReturnCode.SUCCESS)
    benchmark._args = SimpleNamespace(log_raw_data=False)

    raw_output = """
benchmark implementation mode config score units batch_latency_95_sec
DLRM OOTB eval tiny 152.800399 ex/s 0.515052
DLRM OOTB train tiny 35.483686 ex/s None
DLRM UBENCH train linear_[(2,2,2,2,2)] 3.679281e-07 TF/s None
XLMR OOTB eval default-config 1.015586 ex/s 16.463461
"""
    assert (benchmark._process_raw_result(0, raw_output))
    assert (benchmark.result['dlrm_ootb_eval_tiny_ex_s'][0] == 152.800399)
    assert (benchmark.result['dlrm_ootb_train_tiny_ex_s'][0] == 35.483686)
    assert (benchmark.result['dlrm_ubench_train_linear_[(2,2,2,2,2)]_tf_s'][0] == 3.679281e-07)
    assert (benchmark.result['xlmr_ootb_eval_default_config_ex_s'][0] == 1.015586)
Exemplo n.º 11
0
    def _test_gpu_copy_bw_performance_result_parsing(self, platform, test_raw_output):
        """Test gpu-copy benchmark result parsing."""
        benchmark_name = 'gpu-copy-bw'
        (benchmark_class,
         predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, platform)
        assert (benchmark_class)
        benchmark = benchmark_class(benchmark_name, parameters='')
        assert (benchmark)
        ret = benchmark._preprocess()
        assert (ret is True)
        assert (benchmark.return_code == ReturnCode.SUCCESS)
        assert (benchmark.name == 'gpu-copy-bw')
        assert (benchmark.type == BenchmarkType.MICRO)

        # Positive case - valid raw output.
        assert (benchmark._process_raw_result(0, test_raw_output))
        assert (benchmark.return_code == ReturnCode.SUCCESS)

        assert (1 == len(benchmark.raw_data))
        print(test_raw_output.splitlines())
        test_raw_output_dict = {x.split()[0]: float(x.split()[1]) for x in test_raw_output.strip().splitlines()}
        assert (len(test_raw_output_dict) + benchmark.default_metric_count == len(benchmark.result))
        for output_key in benchmark.result:
            if output_key == 'return_code':
                assert (benchmark.result[output_key] == [0])
            else:
                assert (len(benchmark.result[output_key]) == 1)
                assert (isinstance(benchmark.result[output_key][0], numbers.Number))
                assert (output_key.strip('_bw') in test_raw_output_dict)
                assert (test_raw_output_dict[output_key.strip('_bw')] == benchmark.result[output_key][0])

        # Negative case - invalid raw output.
        assert (benchmark._process_raw_result(1, 'Invalid raw output') is False)
        assert (benchmark.return_code == ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
Exemplo n.º 12
0
    def test_disk_performance_benchmark_disabled(self, mock_is_block_device):
        """Test disk-performance benchmark command generation with all benchmarks disabled."""
        mock_is_block_device.return_value = True

        benchmark_name = 'disk-benchmark'
        (benchmark_class, predefine_params
         ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(
             benchmark_name, Platform.CPU)
        assert (benchmark_class)

        block_devices = ['/dev/nvme0n1', '/dev/nvme1n1']
        block_device_option = '--block_devices ' + ' '.join(block_devices)

        param_str = block_device_option
        param_str += ' --rand_precond_time=0'
        param_str += ' --seq_read_runtime=0'
        param_str += ' --rand_read_runtime=0'
        benchmark = benchmark_class(benchmark_name, parameters=param_str)

        # Check basic information
        assert (benchmark)
        ret = benchmark._preprocess()
        assert (ret is True)
        assert (benchmark.return_code == ReturnCode.SUCCESS)
        assert (benchmark.name == 'disk-benchmark')
        assert (benchmark.type == BenchmarkType.MICRO)

        # Command list should be empty
        assert (0 == len(benchmark._commands))
Exemplo n.º 13
0
    def __exec_benchmark(self, benchmark_full_name, context):
        """Launch benchmark for context.

        Args:
            benchmark_full_name (str): Benchmark full name.
            context (BenchmarkContext): Benchmark context to launch.

        Return:
            dict: Benchmark result.
        """
        try:
            benchmark = BenchmarkRegistry.launch_benchmark(context)
            if benchmark:
                logger.info('benchmark: %s, return code: %s, result: %s.',
                            benchmark.name, benchmark.return_code,
                            benchmark.result)
                if benchmark.return_code.value == 0:
                    logger.info('Executor succeeded in %s.',
                                benchmark_full_name)
                else:
                    logger.error('Executor failed in %s.', benchmark_full_name)
                result = json.loads(benchmark.serialized_result)
                result['name'] = benchmark_full_name
                return result
            else:
                logger.error('Executor failed in %s, invalid context.',
                             benchmark_full_name)
        except Exception as e:
            logger.error(e)
            logger.error('Executor failed in %s.', benchmark_full_name)
        return None
Exemplo n.º 14
0
    def test_tcp_connectivity(self):
        """Test tcp-connectivity benchmark."""
        context = BenchmarkRegistry.create_benchmark_context(
            'tcp-connectivity',
            parameters=
            '--hostfile /tmp/superbench/hostfile.test --port 80 --parallel 2',
        )
        assert (BenchmarkRegistry.is_benchmark_context_valid(context))
        benchmark = BenchmarkRegistry.launch_benchmark(context)

        # Check basic information.
        assert (benchmark)
        assert (isinstance(benchmark, TCPConnectivityBenchmark))
        assert (benchmark.name == 'tcp-connectivity')
        assert (benchmark.type == BenchmarkType.MICRO)

        # Check parameters specified in BenchmarkContext.
        assert (benchmark._args.hostfile == '/tmp/superbench/hostfile.test')
        assert (benchmark._args.port == 80)
        assert (benchmark._args.count == 10)
        assert (benchmark._args.timeout == 1)
        assert (benchmark._args.parallel == 2)

        print(benchmark.result)
        assert (benchmark.result)

        # Check results and metrics.
        assert (benchmark.result['api.github.com_successed_count'][0] == 10)
        assert (benchmark.result['api.github.com_failed_count'][0] == 0)
        assert (benchmark.result['api.github.com_success_rate'][0] == 100.0)
        assert (isinstance(benchmark.result['api.github.com_time_min'][0],
                           numbers.Number))
        assert (isinstance(benchmark.result['api.github.com_time_max'][0],
                           numbers.Number))
        assert (isinstance(benchmark.result['api.github.com_time_avg'][0],
                           numbers.Number))
        assert (isinstance(benchmark.result['localhost_successed_count'][0],
                           numbers.Number))
        assert (isinstance(benchmark.result['localhost_failed_count'][0],
                           numbers.Number))
        assert (isinstance(benchmark.result['localhost_time_max'][0],
                           numbers.Number))
        assert (isinstance(benchmark.result['localhost_time_min'][0],
                           numbers.Number))
        assert (isinstance(benchmark.result['localhost_time_avg'][0],
                           numbers.Number))
        assert (benchmark.return_code == ReturnCode.SUCCESS)
Exemplo n.º 15
0
def benchmark_in_one_process(context, world_size, local_rank, port, queue):
    """Function to setup env for DDP initialization and run the benchmark in each single process."""
    setup_simulated_ddp_distributed_env(world_size, local_rank, port)
    benchmark = BenchmarkRegistry.launch_benchmark(context)
    # parser object must be removed becaues it can not be serialized.
    benchmark._parser = None
    queue.put(benchmark)
    clean_simulated_ddp_distributed_env()
 def test_tensorrt_inference_cls(self):
     """Test tensorrt-inference benchmark class."""
     for platform in Platform:
         (benchmark_cls,
          _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(
              self.benchmark_name, platform)
         if platform is Platform.CUDA:
             self.assertIsNotNone(benchmark_cls)
         else:
             self.assertIsNone(benchmark_cls)
Exemplo n.º 17
0
def test_register_benchmark():
    """Test interface BenchmarkRegistry.register_benchmark()."""
    # Register the benchmark for all platform if use default platform.
    BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark)
    for platform in Platform:
        context = BenchmarkRegistry.create_benchmark_context('accumulation',
                                                             platform=platform)
        assert (BenchmarkRegistry.is_benchmark_registered(context))

    # Register the benchmark for CUDA platform if use platform=Platform.CUDA.
    BenchmarkRegistry.register_benchmark('accumulation-cuda',
                                         AccumulationBenchmark,
                                         platform=Platform.CUDA)
    context = BenchmarkRegistry.create_benchmark_context(
        'accumulation-cuda', platform=Platform.CUDA)
    assert (BenchmarkRegistry.is_benchmark_registered(context))
    context = BenchmarkRegistry.create_benchmark_context(
        'accumulation-cuda', platform=Platform.ROCM)
    assert (BenchmarkRegistry.is_benchmark_registered(context) is False)
Exemplo n.º 18
0
def run_pytorch_lstm(parameters='', check_metrics=[]):
    """Test pytorch-lstm benchmark."""
    context = BenchmarkRegistry.create_benchmark_context(
        'lstm',
        platform=Platform.CUDA,
        parameters=parameters,
        framework=Framework.PYTORCH)

    assert (BenchmarkRegistry.is_benchmark_context_valid(context))

    benchmark = BenchmarkRegistry.launch_benchmark(context)

    # Check basic information.
    assert (benchmark)
    assert (isinstance(benchmark, PytorchLSTM))
    assert (benchmark.name == 'pytorch-lstm')
    assert (benchmark.type == BenchmarkType.MODEL)

    # Check predefined parameters of lstm model.
    assert (benchmark._args.input_size == 256)
    assert (benchmark._args.hidden_size == 1024)
    assert (benchmark._args.num_layers == 8)

    # Check parameters specified in BenchmarkContext.
    assert (benchmark._args.batch_size == 1)
    assert (benchmark._args.num_classes == 5)
    assert (benchmark._args.seq_len == 8)
    assert (benchmark._args.num_warmup == 2)
    assert (benchmark._args.num_steps == 4)

    # Check dataset scale.
    assert (len(benchmark._dataset) == benchmark._args.sample_count *
            benchmark._world_size)

    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
    for metric in check_metrics:
        assert (len(benchmark.raw_data[metric]) == benchmark.run_count)
        assert (len(
            benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
        assert (len(benchmark.result[metric]) == benchmark.run_count)
Exemplo n.º 19
0
def test_get_benchmark_configurable_settings():
    """Test BenchmarkRegistry interface.

    BenchmarkRegistry.get_benchmark_configurable_settings().
    """
    # Register benchmarks for testing.
    BenchmarkRegistry.register_benchmark('accumulation', AccumulationBenchmark)

    context = BenchmarkRegistry.create_benchmark_context('accumulation',
                                                         platform=Platform.CPU)
    settings = BenchmarkRegistry.get_benchmark_configurable_settings(context)

    expected = """optional arguments:
  --duration int     The elapsed time of benchmark in seconds.
  --log_raw_data     Log raw data into file instead of saving it into result
                     object.
  --lower_bound int  The lower bound for accumulation.
  --run_count int    The run count of benchmark.
  --upper_bound int  The upper bound for accumulation."""
    assert (settings == expected)
Exemplo n.º 20
0
def run_pytorch_cnn(models=[], parameters='', check_metrics=[]):
    """Run pytorch cnn benchmarks."""
    for model in models:
        context = BenchmarkRegistry.create_benchmark_context(
            model,
            platform=Platform.CUDA,
            parameters=parameters,
            framework=Framework.PYTORCH)

        assert (BenchmarkRegistry.is_benchmark_context_valid(context))

        benchmark = BenchmarkRegistry.launch_benchmark(context)

        # Check basic information.
        assert (benchmark)
        assert (isinstance(benchmark, PytorchCNN))
        assert (benchmark.name == 'pytorch-' + model)
        assert (benchmark.type == BenchmarkType.MODEL)

        # Check predefined parameters of resnet101 model.
        assert (benchmark._args.model_type == model)

        # Check parameters specified in BenchmarkContext.
        assert (benchmark._args.batch_size == 1)
        assert (benchmark._args.image_size == 224)
        assert (benchmark._args.num_classes == 5)
        assert (benchmark._args.num_warmup == 2)
        assert (benchmark._args.num_steps == 4)

        # Check Dataset.
        assert (len(benchmark._dataset) == benchmark._args.sample_count *
                benchmark._world_size)

        # Check results and metrics.
        assert (benchmark.run_count == 1)
        assert (benchmark.return_code == ReturnCode.SUCCESS)
        for metric in check_metrics:
            assert (len(benchmark.raw_data[metric]) == benchmark.run_count)
            assert (len(
                benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
            assert (len(benchmark.result[metric]) == benchmark.run_count)
Exemplo n.º 21
0
def create_benchmark(params='--num_steps 8'):
    """Register and create benchmark."""
    # Register the FakeModelBenchmark benchmark.
    BenchmarkRegistry.register_benchmark(
        'pytorch-fake-model',
        FakeModelBenchmark,
        parameters='--hidden_size 2',
        platform=Platform.CUDA,
    )
    context = BenchmarkRegistry.create_benchmark_context(
        'fake-model',
        platform=Platform.CUDA,
        parameters=params,
        framework=Framework.PYTORCH)
    name = BenchmarkRegistry._BenchmarkRegistry__get_benchmark_name(context)
    assert (name)
    (benchmark_class, predefine_params
     ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(
         name, context.platform)
    assert (benchmark_class)
    return benchmark_class(name, predefine_params + ' ' + context.parameters)
Exemplo n.º 22
0
def test_pytorch_sharding_matmul():
    """Test pytorch-sharding-matmul benchmark."""
    context = BenchmarkRegistry.create_benchmark_context(
        'sharding-matmul',
        platform=Platform.CUDA,
        parameters='--run_count 2 --num_steps 20',
        framework=Framework.PYTORCH)

    assert (BenchmarkRegistry.is_benchmark_context_valid(context))

    port = network.get_free_port()
    assert (port)
    utils.setup_simulated_ddp_distributed_env(1, 0, port)
    benchmark = BenchmarkRegistry.launch_benchmark(context)

    # Check basic information.
    assert (benchmark)
    assert (isinstance(benchmark, ShardingMatmul))
    assert (benchmark.name == 'pytorch-sharding-matmul')
    assert (benchmark.type == BenchmarkType.MICRO)

    # Check predefined parameters of sharding-matmul benchmark.
    assert (benchmark._args.mode == [
        ShardingMode.ALLREDUCE, ShardingMode.ALLGATHER
    ])

    # Check parameters specified in BenchmarkContext.
    assert (benchmark._args.run_count == 2)
    assert (benchmark._args.num_steps == 20)

    # Check results and metrics.
    assert (benchmark.run_count == 2)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
    for metric in ['allreduce_time', 'allgather_time']:
        assert (len(benchmark.raw_data[metric]) == benchmark.run_count)
        assert (len(
            benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
        assert (len(benchmark.result[metric]) == benchmark.run_count)

    utils.clean_simulated_ddp_distributed_env()
def test_ort_inference_performance(mock_ort_session_run, mock_get_dir):
    """Test ort-inference benchmark."""
    benchmark_name = 'ort-inference'
    (benchmark_class, predefine_params
     ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(
         benchmark_name, Platform.CUDA)
    assert (benchmark_class)

    mock_get_dir.return_value = '/tmp/superbench/'
    benchmark = benchmark_class(
        benchmark_name,
        parameters=
        '--pytorch_models resnet50 --graph_opt_level 1 --precision float16'
        ' --batch_size 16 --num_warmup 128 --num_steps 512')

    assert (isinstance(benchmark, ORTInferenceBenchmark))
    assert (benchmark._preprocess())

    # Check basic information.
    assert (benchmark.name == 'ort-inference')
    assert (benchmark.type == BenchmarkType.MICRO)
    assert (benchmark._ORTInferenceBenchmark__model_cache_path == Path(
        torch.hub.get_dir()) / 'checkpoints')
    for model in benchmark._args.pytorch_models:
        assert (hasattr(torchvision.models, model))
        file_name = '{model}.{precision}.onnx'.format(
            model=model, precision=benchmark._args.precision)
        assert ((benchmark._ORTInferenceBenchmark__model_cache_path /
                 file_name).is_file())

    # Check parameters specified in BenchmarkContext.
    assert (benchmark._args.pytorch_models == ['resnet50'])
    assert (benchmark._args.graph_opt_level == 1)
    assert (benchmark._args.precision == Precision.FLOAT16)
    assert (benchmark._args.batch_size == 16)
    assert (benchmark._args.num_warmup == 128)
    assert (benchmark._args.num_steps == 512)

    # Check results and metrics.
    assert (benchmark._benchmark())
    shutil.rmtree(benchmark._ORTInferenceBenchmark__model_cache_path)
    assert (benchmark.return_code == ReturnCode.SUCCESS)
    precision_metric = {'float16': 'fp16', 'float32': 'fp32', 'int8': 'int8'}
    for model in benchmark._args.pytorch_models:
        if benchmark._args.precision.value in precision_metric:
            precision = precision_metric[benchmark._args.precision.value]
        else:
            precision = benchmark._args.precision.value
        metric = '{}_{}_time'.format(precision, model)
        assert (metric in benchmark.result)
        assert (metric in benchmark.raw_data)
Exemplo n.º 24
0
def test_get_benchmark_name():
    """Test interface BenchmarkRegistry.get_benchmark_name()."""
    # Register benchmarks for testing.
    benchmark_names = [
        'accumulation', 'pytorch-accumulation', 'tf1-accumulation',
        'onnxruntime-accumulation'
    ]
    for name in benchmark_names:
        BenchmarkRegistry.register_benchmark(name, AccumulationBenchmark)

    # Test benchmark name for different Frameworks.
    benchmark_frameworks = [
        Framework.NONE, Framework.PYTORCH, Framework.TENSORFLOW1,
        Framework.ONNXRUNTIME
    ]
    for i in range(len(benchmark_names)):
        context = BenchmarkRegistry.create_benchmark_context(
            'accumulation',
            platform=Platform.CPU,
            framework=benchmark_frameworks[i])
        name = BenchmarkRegistry._BenchmarkRegistry__get_benchmark_name(
            context)
        assert (name == benchmark_names[i])
Exemplo n.º 25
0
def test_pytorch_empty_cache():
    """Test PytorchBase class."""
    # Register mnist benchmark.
    BenchmarkRegistry.register_benchmark('pytorch-mnist', PytorchMNIST)

    # Test cache empty by manually calling torch.cuda.empty_cache().
    parameters = '--batch_size 32 --num_warmup 8 --num_steps 64 --model_action train'
    benchmark = PytorchMNIST('pytorch-mnist', parameters=parameters)
    assert (benchmark)
    assert (benchmark._preprocess())
    assert (benchmark._benchmark())
    del benchmark
    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] > 0)
    torch.cuda.empty_cache()
    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)

    # Test automatic cache empty.
    context = BenchmarkRegistry.create_benchmark_context(
        'pytorch-mnist', parameters='--batch_size 32 --num_warmup 8 --num_steps 64 --model_action train'
    )

    benchmark = BenchmarkRegistry.launch_benchmark(context)
    assert (benchmark)
    assert (torch.cuda.memory_stats()['reserved_bytes.all.current'] == 0)
Exemplo n.º 26
0
    def _test_gpu_copy_bw_performance_command_generation(self, platform):
        """Test gpu-copy benchmark command generation."""
        benchmark_name = 'gpu-copy-bw'
        (benchmark_class,
         predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, platform)
        assert (benchmark_class)

        size = 1048576
        num_warm_up = 20
        num_loops = 10000
        mem_types = ['htod', 'dtoh', 'dtod']
        copy_types = ['sm', 'dma']

        parameters = '--mem_type %s --copy_type %s --size %d ' \
            '--num_warm_up %d --num_loops %d --bidirectional --check_data' % \
            (' '.join(mem_types), ' '.join(copy_types), size, num_warm_up, num_loops)
        benchmark = benchmark_class(benchmark_name, parameters=parameters)

        # Check basic information
        assert (benchmark)
        ret = benchmark._preprocess()
        assert (ret is True)
        assert (benchmark.return_code == ReturnCode.SUCCESS)
        assert (benchmark.name == benchmark_name)
        assert (benchmark.type == BenchmarkType.MICRO)

        # Check parameters specified in BenchmarkContext.
        assert (benchmark._args.mem_type == mem_types)
        assert (benchmark._args.copy_type == copy_types)
        assert (benchmark._args.size == size)
        assert (benchmark._args.num_warm_up == num_warm_up)
        assert (benchmark._args.num_loops == num_loops)
        assert (benchmark._args.bidirectional)
        assert (benchmark._args.check_data)

        # Check command
        assert (1 == len(benchmark._commands))
        assert (benchmark._commands[0].startswith(benchmark._GpuCopyBwBenchmark__bin_path))
        for mem_type in mem_types:
            assert ('--%s' % mem_type in benchmark._commands[0])
        for copy_type in copy_types:
            assert ('--%s_copy' % copy_type in benchmark._commands[0])
        assert ('--size %d' % size in benchmark._commands[0])
        assert ('--num_warm_up %d' % num_warm_up in benchmark._commands[0])
        assert ('--num_loops %d' % num_loops in benchmark._commands[0])
        assert ('--bidirectional' in benchmark._commands[0])
        assert ('--check_data' in benchmark._commands[0])
Exemplo n.º 27
0
    def test_gpcnet_network_test(self, raw_output, raw_output_no_execution):
        """Test gpcnet-network-test benchmark."""
        # Check registry.
        benchmark_name = 'gpcnet-network-test'
        (benchmark_class,
         predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU)
        assert (benchmark_class)

        # Check preprocess
        benchmark = benchmark_class(benchmark_name)
        ret = benchmark._preprocess()
        assert (ret)

        expect_command = 'network_test'
        command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
        assert (command == expect_command)

        assert (benchmark._process_raw_result(0, raw_output_no_execution))
        assert (len(benchmark.result) == benchmark.default_metric_count)

        # Check function process_raw_data.
        # Positive case - valid raw output.
        assert (benchmark._process_raw_result(0, raw_output))
        metric_list = [
            'rr_two-sided_lat',
            'rr_get_lat',
            'rr_two-sided_bw',
            'rr_put_bw',
            'rr_two-sided+sync_bw',
            'nat_two-sided_bw',
            'multiple_allreduce_time',
            'multiple_alltoall_bw',
        ]
        for metric_medium in metric_list:
            for suffix in ['avg', '99%']:
                metric = metric_medium + '_' + suffix
                assert (metric in benchmark.result)
                assert (len(benchmark.result[metric]) == 1)
                assert (isinstance(benchmark.result[metric][0], numbers.Number))

        # Negative case - Add invalid raw output.
        assert (benchmark._process_raw_result(0, 'ERROR') is False)

        # Check basic information.
        assert (benchmark.name == 'gpcnet-network-test')
        assert (benchmark.type == BenchmarkType.MICRO)
        assert (benchmark._bin_name == 'network_test')
Exemplo n.º 28
0
def test_get_all_benchmark_predefine_settings():
    """Test interface BenchmarkRegistry.get_all_benchmark_predefine_settings()."""
    benchmark_params = BenchmarkRegistry.get_all_benchmark_predefine_settings()

    # Choose benchmark 'pytorch-sharding-matmul' for testing.
    benchmark_name = 'pytorch-sharding-matmul'
    assert (benchmark_name in benchmark_params)
    assert (benchmark_params[benchmark_name]['run_count'] == 1)
    assert (benchmark_params[benchmark_name]['duration'] == 0)
    assert (benchmark_params[benchmark_name]['n'] == 12288)
    assert (benchmark_params[benchmark_name]['k'] == 12288)
    assert (benchmark_params[benchmark_name]['m'] == 16000)
    assert (benchmark_params[benchmark_name]['mode'] == [
        ShardingMode.ALLREDUCE, ShardingMode.ALLGATHER
    ])
    assert (benchmark_params[benchmark_name]['num_warmup'] == 10)
    assert (benchmark_params[benchmark_name]['num_steps'] == 500)
Exemplo n.º 29
0
def benchmark_list_command_handler(name=None):
    """List benchmarks which match the regular expression.

    Args:
        name (str, optional): Benchmark name or regular expression. Defaults to None.

    Raises:
        CLIError: If cannot find the matching benchmark.

    Returns:
        list: Benchmark list.
    """
    benchmark_list = list(BenchmarkRegistry.get_all_benchmark_predefine_settings().keys())
    if name is None:
        return benchmark_list
    filter_list = list(filter(re.compile(name).match, benchmark_list))
    if not filter_list:
        raise CLIError('Benchmark {} does not exist.'.format(name))
    return filter_list
Exemplo n.º 30
0
    def test_gpu_burn(self, results):
        """Test gpu-burn benchmark command generation."""
        benchmark_name = 'gpu-burn'
        (benchmark_class, predefine_params
         ) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(
             benchmark_name, Platform.CUDA)
        assert (benchmark_class)

        time = 10

        parameters = '--doubles --tensor_core --time ' + str(time)
        benchmark = benchmark_class(benchmark_name, parameters=parameters)

        # Check basic information
        assert (benchmark)
        ret = benchmark._preprocess()
        assert (ret is True)
        assert (benchmark.return_code == ReturnCode.SUCCESS)
        assert (benchmark.name == benchmark_name)
        assert (benchmark.type == BenchmarkType.MICRO)

        # Check parameters specified in BenchmarkContext.
        assert (benchmark._args.time == time)
        assert (benchmark._args.doubles)
        assert (benchmark._args.tensor_core)

        # Check command
        compare_copy = 'cp ' + benchmark._args.bin_dir + '/compare.ptx ./'
        compare_rm = 'rm ' + 'compare.ptx'
        assert (1 == len(benchmark._commands))
        assert (benchmark._commands[0].startswith(compare_copy))
        assert ('-d' in benchmark._commands[0])
        assert ('-tc' in benchmark._commands[0])
        assert (str(time) in benchmark._commands[0])
        assert (compare_rm in benchmark._commands[0])

        # Check results
        assert (benchmark._process_raw_result(0, results))
        assert (benchmark.result['return_code'][0] == 0)
        assert (benchmark.result['time'][0] == time)
        for device in range(8):
            assert (benchmark.result['gpu_' + str(device) + '_pass'][0] == 1)
        assert (benchmark.result['abort'][0] == 0)