def Run(benchmark_spec):
  """Run MXNet on the cluster for each model specified.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
  _UpdateBenchmarkSpecWithFlags(benchmark_spec)
  vm = benchmark_spec.vms[0]
  mx_benchmark_dir = 'incubator-mxnet/example/image-classification'
  results = []
  for model in FLAGS.mx_models:
    num_layers = _GetNumLayers(model)
    batch_size = _GetBatchSize(model, num_layers)
    benchmark_spec.model = model
    benchmark_spec.batch_size = batch_size
    benchmark_spec.num_layers = num_layers
    benchmark_spec.image_shape = _GetImageShape(model)
    mx_benchmark_cmd = (
        'python train_imagenet.py '
        '--benchmark=1 '
        '--network={network} '
        '--batch-size={batch_size} '
        '--image-shape={image_shape} '
        '--num-epochs={num_epochs} '
        '--dtype={precision} '
        '--kv-store={key_value_store}').format(
            network=model,
            batch_size=batch_size,
            image_shape=benchmark_spec.image_shape,
            num_epochs=benchmark_spec.num_epochs,
            precision=benchmark_spec.precision,
            key_value_store=benchmark_spec.key_value_store)
    if benchmark_spec.device == GPU:
      num_gpus = cuda_toolkit.QueryNumberOfGpus(vm)
      mx_benchmark_cmd = '{env} {cmd} --gpus {gpus}'.format(
          env=mxnet.GetEnvironmentVars(vm),
          cmd=mx_benchmark_cmd,
          gpus=','.join(str(n) for n in range(num_gpus)))
    elif benchmark_spec.device == CPU:
      # Specifies the number of threads to use in CPU test.
      # https://mxnet.incubator.apache.org/faq/perf.html
      mx_benchmark_cmd = 'OMP_NUM_THREADS={omp_num_threads} {cmd}'.format(
          omp_num_threads=vm.NumCpusForBenchmark() // 2,
          cmd=mx_benchmark_cmd)

    if num_layers:
      mx_benchmark_cmd = '%s --num-layers %s' % (mx_benchmark_cmd, num_layers)
    run_command = 'cd %s && %s' % (mx_benchmark_dir,
                                   mx_benchmark_cmd)
    stdout, stderr = vm.RobustRemoteCommand(run_command, should_log=True)

    results.append(_MakeSamplesFromOutput(benchmark_spec, stdout or stderr))

  return results
Пример #2
0
def Run(benchmark_spec):
    """Run MXNet on the cluster for each model specified.

  Args:
    benchmark_spec: The benchmark specification. Contains all data that is
      required to run the benchmark.

  Returns:
    A list of sample.Sample objects.
  """
    _UpdateBenchmarkSpecWithFlags(benchmark_spec)
    vm = benchmark_spec.vms[0]
    mx_benchmark_dir = 'incubator-mxnet/example/image-classification'
    results = []
    for model in FLAGS.mx_models:
        num_layers = _GetNumLayers(model)
        batch_size = _GetBatchSize(model, num_layers)
        benchmark_spec.model = model
        benchmark_spec.batch_size = batch_size
        mx_benchmark_cmd = (
            'python train_imagenet.py --benchmark 1 --network %s --batch-size %s '
            '--image-shape %s --num-epochs %s --kv-store device') % (
                model, batch_size, IMAGENET_SHAPE, benchmark_spec.num_epochs)
        if benchmark_spec.device == GPU:
            gpus = cuda_toolkit_8.QueryNumberOfGpus(vm)
            mx_benchmark_cmd = '%s %s --gpus %s' % (mxnet.GetEnvironmentVars(
                vm), mx_benchmark_cmd, ','.join(str(n) for n in range(gpus)))
        if num_layers:
            mx_benchmark_cmd = '%s --num-layers %s' % (mx_benchmark_cmd,
                                                       num_layers)
            benchmark_spec.num_layers = num_layers
        run_command = 'cd %s && %s' % (mx_benchmark_dir, mx_benchmark_cmd)
        stdout, stderr = vm.RobustRemoteCommand(run_command, should_log=True)

        results.append(_MakeSamplesFromOutput(benchmark_spec, stdout
                                              or stderr))

    return results