Exemplo n.º 1
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()

    # Print ENV Variables
    tf.logging.debug('=' * 20 + ' Environment Variables ' + '=' * 20)
    for k, v in os.environ.items():
        tf.logging.debug('{}: {}'.format(k, v))

    with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging,
                              params.model):
        params = benchmark_cnn.setup(params)
        bench = benchmark_cnn.BenchmarkCNN(params)

        tfversion = cnn_util.tensorflow_version_tuple()

        log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

        bench.print_info()
        bench.run()
Exemplo n.º 2
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    handler = benchmark_handler.Handler(params)
    params = handler.params
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params,
                                       dataset=handler.dataset,
                                       model=handler.model)
    handler.set_bench(bench)
    if getattr(bench.input_preprocessor, 'set_aug_list', None):
        bench.input_preprocessor.set_aug_list(params.aug_list)
    bench.benchmark_one_step = handler.benchmark_one_step
    bench.print_eval_results = handler.print_eval_results
    bench.check_early_stop = handler.check_early_stop

    bench.accum_grads = handler.accum_grads
    bench.build_fetches_forward = handler.build_fetches_forward
    if params.memory_saving_method == 'recomputing':
        bench.memory_saving = ms.Memory_Saving(benchmark_cnn=bench)


#    tfversion = util.tensorflow_version_tuple()
#    logging.info('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    bench.run()
Exemplo n.º 3
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    print('num_inter_threads: ' + str(params.num_inter_threads))
    print('num_intra_threads: ' + str(params.num_intra_threads))
    print('datasets_num_private_threads: ' +
          str(params.datasets_num_private_threads))
    print('datasets_use_prefetch: ' + str(params.datasets_use_prefetch))
    print('datasets_prefetch_buffer_size: ' +
          str(params.datasets_prefetch_buffer_size))

    bench.run()
Exemplo n.º 4
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    options = make_options_from_flags(FLAGS)

    params = benchmark_cnn.make_params_from_flags()
    params = params._replace(batch_size=options.batch_size)
    params = params._replace(model='MY_GTSRB')
    params = params._replace(num_epochs=options.num_epochs)
    params = params._replace(num_gpus=options.num_gpus)
    params = params._replace(data_format='NHWC')
    params = params._replace(train_dir=options.checkpoint_folder)
    params = params._replace(allow_growth=True)
    params = params._replace(variable_update='replicated')
    params = params._replace(local_parameter_device='gpu')
    params = params._replace(use_tf_layers=False)
    # params = params._replace(all_reduce_spec='nccl')

    # params = params._replace(bottom_file=options.bottom_file)
    # params = params._replace(affine_files=options.affine_files)
    # params = params._replace(affine_classes=options.affine_classes)

    params = params._replace(optimizer=options.optimizer)
    params = params._replace(weight_decay=options.weight_decay)

    #params = params._replace(print_training_accuracy=True)
    params = params._replace(backbone_model_path=options.backbone_model_path)
    # Summary and Save & load checkpoints.
    # params = params._replace(summary_verbosity=1)
    # params = params._replace(save_summaries_steps=10)
    # params = params._replace(save_model_secs=3600)  # save every 1 hour
    params = params._replace(save_model_secs=60)  #save every 5 min
    params = benchmark_cnn.setup(params)

    #testtest(params)
    #exit(0)

    if 'test' in options.data_dir:
        dataset = GTSRBTestDataset(options)
    else:
        dataset = GTSRBDataset(options)
    model = Model_Builder(options.model_name, dataset.num_classes, options,
                          params)

    bench = benchmark_cnn.BenchmarkCNN(params, dataset=dataset, model=model)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    bench.run()

    tf.reset_default_graph()
Exemplo n.º 5
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    with log_context(LOGGER_URL,
                     LOGGER_USRENAME,
                     LOGGER_PASSWORD,
                     LOGGER_DB,
                     LOGGER_SERIES,
                     machine=LOGGER_VM):
        bench.run()
Exemplo n.º 6
0
def main(positional_arguments):
  # Command-line arguments like '--distortions False' are equivalent to
  # '--distortions=True False', where False is a positional argument. To prevent
  # this from silently running with distortions, we do not allow positional
  # arguments.
  assert len(positional_arguments) >= 1
  if len(positional_arguments) > 1:
    raise ValueError('Received unknown positional arguments: %s'
                     % positional_arguments[1:])

  params = benchmark_cnn.make_params_from_flags()
  params = benchmark_cnn.setup(params)

  import sys
  if params.enable_dmo == True:
    if LoadFileSystem() == False:
        sys.exit(-1)
    else :
        print("\n*******DMO enabled********\n")
  #      sys.exit(0)

  bench = benchmark_cnn.BenchmarkCNN(params)

  tfversion = cnn_util.tensorflow_version_tuple()
  log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

  bench.print_info()
  bench.run()
def main(_):
  params = benchmark_cnn.make_params_from_flags()
  params = benchmark_cnn.setup(params)
  if params.model == 'test_model':
    run_with_test_model(params)
  else:
    run_with_real_model(params)
Exemplo n.º 8
0
def main(_):
    params = benchmark_cnn.make_params_from_flags()
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    bench.print_info()
    bench.run()
def main(_):
    # Build benchmark_cnn model
    params = benchmark_cnn.make_params_from_flags()
    params, sess_config = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    # Print informaton
    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
    bench.print_info()

    # Build single-GPU benchmark_cnn model
    with tf.Graph().as_default() as single_gpu_graph:
        bench.build_model()

    def run(sess, num_iters, tensor_or_op_name_to_replica_names, num_workers,
            worker_id, num_replicas_per_worker):
        fetches = {
            'global_step':
            tensor_or_op_name_to_replica_names[bench.global_step.name][0],
            'cost':
            tensor_or_op_name_to_replica_names[bench.cost.name][0],
            'train_op':
            tensor_or_op_name_to_replica_names[bench.train_op.name][0],
        }
        if isinstance(bench.lr, tf.Tensor):
            fetches['lr'] = tensor_or_op_name_to_replica_names[
                bench.lr.name][0]

        start = time.time()
        for i in range(num_iters):
            results = sess.run(fetches)
            if i % FLAGS.log_frequency == 0:
                end = time.time()
                throughput = float(FLAGS.log_frequency) / float(end - start)
                parallax.log.info(
                    "global step: %d, lr: %f, loss: %f, "
                    "throughput: %f steps/sec" %
                    (results['global_step'], results['lr'] if 'lr' in results
                     else bench.lr, results['cost'], throughput))
                start = time.time()

    config = parallax_config.build_config()
    config.sess_config = sess_config

    parallax.parallel_run(single_gpu_graph,
                          run,
                          FLAGS.resource_info_file,
                          FLAGS.max_steps,
                          sync=FLAGS.sync,
                          parallax_config=config)
Exemplo n.º 10
0
def main(positional_arguments):
  assert len(positional_arguments) >= 1
  if len(positional_arguments) > 1:
    raise ValueError('Received unknown positional arguments: %s'
                     % positional_arguments[1:])

  options = make_options_from_flags(FLAGS)

  params = benchmark_cnn.make_params_from_flags()
  params = params._replace(batch_size=options.batch_size)
  params = params._replace(model='MY_GTSRB')
  params = params._replace(num_epochs=options.num_epochs)
  params = params._replace(num_gpus=options.num_gpus)
  params = params._replace(data_format='NHWC')
  params = params._replace(train_dir=options.checkpoint_folder)
  params = params._replace(allow_growth=True)
  params = params._replace(variable_update='replicated')
  params = params._replace(local_parameter_device='gpu')
  params = params._replace(use_tf_layers=False)
  # params = params._replace(all_reduce_spec='nccl')

  # params = params._replace(bottom_file=options.bottom_file)
  # params = params._replace(affine_files=options.affine_files)
  # params = params._replace(affine_classes=options.affine_classes)

  params = params._replace(optimizer=options.optimizer)
  params = params._replace(weight_decay=options.weight_decay)

  params = params._replace(print_training_accuracy=True)
  params = params._replace(backbone_model_path=options.backbone_model_path)
  # Summary and Save & load checkpoints.
  # params = params._replace(summary_verbosity=1)
  # params = params._replace(save_summaries_steps=10)
  params = params._replace(save_model_secs=3600)  # save every 1 hour
  # params = params._replace(save_model_secs=300) #save every 5 min
  params = benchmark_cnn.setup(params)

  dataset = CifarDataset(options)
  model = Model_Builder(options.model_name, dataset.num_classes, options, params)

  bench = benchmark_cnn.BenchmarkCNN(params, dataset=dataset, model=model)

  tfversion = cnn_util.tensorflow_version_tuple()
  log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

  bench.print_info()
  bench.run()
Exemplo n.º 11
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    params = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

    run_benchmark(bench, absl_flags.FLAGS.iters_per_step)
Exemplo n.º 12
0
def main(extra_flags):
  # extra_flags is a list of command line arguments, excluding those defined
  # in tf.flags.FLAGS. extra_flags[0] is always the program name. It is an error
  # to supply flags not defined with tf.flags.FLAGS, so we raise an ValueError
  # in that case.
  assert len(extra_flags) >= 1
  if len(extra_flags) > 1:
    raise ValueError('Received unknown flags: %s' % extra_flags[1:])

  params = benchmark_cnn.make_params_from_flags()
  benchmark_cnn.setup(params)
  bench = benchmark_cnn.BenchmarkCNN(params)

  tfversion = cnn_util.tensorflow_version_tuple()
  log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

  bench.print_info()
  bench.run()
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.
    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError("Received unknown positional arguments: %s" % positional_arguments[1:])

    params = benchmark_cnn.make_params_from_flags()
    with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging, params.model):
        params = benchmark_cnn.setup(params)
        bench = benchmark_cnn.BenchmarkCNN(params)

    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn("TensorFlow:  %i.%i" % (tfversion[0], tfversion[1]))

    bench.print_info()
    bench.run()
def main(_):
    # Build benchmark_cnn model
    params = benchmark_cnn.make_params_from_flags()
    params, sess_config = benchmark_cnn.setup(params)
    bench = benchmark_cnn.BenchmarkCNN(params)

    # Print informaton
    tfversion = cnn_util.tensorflow_version_tuple()
    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
    bench.print_info()

    # Build single-GPU benchmark_cnn model
    single_gpu_graph = tf.Graph()
    with single_gpu_graph.as_default():
        bench.build_model()

    config = parallax_config.build_config()
    config.sess_config = sess_config

    sess, num_workers, worker_id, num_replicas_per_worker = \
        parallax.parallel_run(single_gpu_graph,
                              FLAGS.resource_info_file,
                              sync=FLAGS.sync,
                              parallax_config=config)

    fetches = {
        'global_step': bench.global_step,
        'cost': bench.cost,
        'train_op': bench.train_op,
    }

    start = time.time()
    for i in range(FLAGS.max_steps):
        results = sess.run(fetches)
        if (i + 1) % FLAGS.log_frequency == 0:
            end = time.time()
            throughput = float(FLAGS.log_frequency) / float(end - start)
            parallax.log.info(
                "global step: %d, loss: %f, throughput: %f steps/sec" %
                (results['global_step'][0] + 1, results['cost'][0],
                 throughput))
            start = time.time()
Exemplo n.º 15
0
def main(positional_arguments):
    # Command-line arguments like '--distortions False' are equivalent to
    # '--distortions=True False', where False is a positional argument. To prevent
    # this from silently running with distortions, we do not allow positional
    # arguments.

    # For DGX servers use hierarchical_copy=True argument

    assert len(positional_arguments) >= 1
    if len(positional_arguments) > 1:
        raise ValueError('Received unknown positional arguments: %s' %
                         positional_arguments[1:])

    tests_models = [
        {
            'num_gpus': None,
            'batch_size': 64,
            'variable_update': 'parameter_server',
            'model': 'inception3'
        },
        {
            'num_gpus': None,
            'batch_size': 64,
            'variable_update': 'parameter_server',
            'model': 'resnet50'
        },
        {
            'num_gpus': None,
            'batch_size': 32,
            'variable_update': 'parameter_server',
            'model': 'resnet152'
        },  #batch=64 crashes
        {
            'num_gpus': None,
            'batch_size': 64,
            'variable_update': 'replicated',
            'model': 'vgg16'
        },
        {
            'num_gpus': None,
            'batch_size': 512,
            'variable_update': 'replicated',
            'model': 'alexnet'
        }
    ]

    test_gpus = [1, 2, 4, 8]

    stats = []
    for test in tests_models:
        for num_gpus in test_gpus:
            test['num_gpus'] = num_gpus

            params = benchmark_cnn.make_params_from_flags()
            params = benchmark_cnn.setup(params)

            # force --hierarchical_copy to False when using 1 GPU
            if num_gpus == 1:
                params = params._replace(hierarchical_copy=False)

            params = params._replace(num_gpus=test['num_gpus'],
                                     batch_size=test['batch_size'],
                                     model=test['model'],
                                     variable_update=test['variable_update'])

            bench = benchmark_cnn.BenchmarkCNN(params)

            tfversion = cnn_util.tensorflow_version_tuple()
            log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))

            bench.print_info()
            results = bench.run()
            # result
            # {
            #     'average_wall_time': 0.6646941304206848,
            #     'images_per_sec': 385.1395525908701,
            #     'last_average_loss': 7.256145,
            #     'num_steps': 100,
            #     'num_workers': 1
            # }
            stats.append({'test': test.copy(), 'result': results})

    # summary
    print('summary:')
    print('==========')
    pprint.pprint(stats)

    print('==========')
    s = ''
    for i in range(len(test_gpus)):
        for j in range(len(tests_models)):
            s += str(stats[i + j * len(test_gpus)]['result']['images_per_sec'])
            s += ', '
        s += '\n'
    print(s)
    print('==========')
Exemplo n.º 16
0
def main(_):
  FLAGS.eval = True
  params = benchmark_cnn.make_params_from_flags()
  params, config = benchmark_cnn.setup(params)
  bench = benchmark_cnn.BenchmarkCNN(params)
  bench.evaluate()
Exemplo n.º 17
0
def main(_):
    params = benchmark_cnn.make_params_from_flags()

    models = [
        'alexnet',
    ]