Пример #1
0
def benchmark_inference(model, opts):
    """ Runs N inferences and returns array of batch times in seconds.

    :param model: Caffe2's model helper class instances.
    :type model: :py:class:`caffe2.python.model_helper.ModelHelper`
    :param dict opts: Options for the inference benchmark. Must contain `device`,\
                      `num_gpus` if device is gpu, `enable_tensor_core`,\
                      `num_warmup_batches` and `num_batches`. Optional parameters are\
                      `data_dir` and `data_backend`.
    :return: Tuple of model title and numpy array containing batch times.
    :rtype: (string, numpy array)
    """
    if opts['device'] == 'gpu':
        assert opts['num_gpus'] == 1,\
        "When inference is performed on a GPU, only one GPU (--num_gpus=1) must be specified."
    dev_opt = Model.get_device_option(0 if opts['device'] == 'gpu' else None)
    model_builder = ModelFactory.get_model(opts)
    with core.DeviceScope(dev_opt):
        create_model(model_builder, model, opts['enable_tensor_core'])
        model_builder.add_synthetic_inputs(model, add_labels=False)
    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)
    return (model_builder.name,
            run_n_times(model, opts['num_warmup_batches'],
                        opts['num_batches']))
Пример #2
0
def benchmark_inference(model, opts):
    """ Runs N inferences and returns array of batch times in seconds.

    :param model: Caffe2's model helper class instances.
    :type model: :py:class:`caffe2.python.model_helper.ModelHelper`
    :param dict opts: Options for the inference benchmark. Must contain `device`,\
                      `num_gpus` if device is gpu, `enable_tensor_core`,\
                      `num_warmup_batches` and `num_batches`. Optional parameters are\
                      `data_dir` and `data_backend`.
    :return: Tuple of model title and numpy array containing batch times.
    :rtype: (string, numpy array)
    """
    if opts['device'] == 'gpu':
        assert opts['num_gpus'] == 1,\
        "When inference is performed on a GPU, only one GPU (--num_gpus=1) must be specified."
    dev_opt = Model.get_device_option(0 if opts['device'] == 'gpu' else None)
    model_builder = ModelFactory.get_model(opts)
    # Reader must be shared by all GPUs in a one machine.
    reader = None
    if 'data_dir' in opts and opts['data_dir']:
        reader = model.CreateDB(
            "reader",
            db=opts['data_dir'],  # (str, path to training data)
            db_type=opts['data_backend'],  # (str, 'lmdb' or 'leveldb')
            num_shards=1,  # (int, number of machines)
            shard_id=0,  # (int, machine id)
        )
    with core.DeviceScope(dev_opt):
        if reader is None:
            print(
                "[INFO] Adding synthetic data input for Caffe2 inference benchmarks"
            )
            model_builder.add_synthetic_inputs(model, add_labels=False)
        else:
            print(
                "[INFO] Adding real data inputs (%s) for Caffe2 inference benchmarks"
                % (opts['data_dir']))
            model_builder.add_data_inputs(
                model,
                reader,
                use_gpu_transform=(opts['device'] == 'gpu'),
                num_decode_threads=opts['num_decode_threads'])
        create_model(model_builder, model, opts['enable_tensor_core'],
                     opts['float16_compute'])
    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)
    return (model_builder.name,
            run_n_times(model, opts['num_warmup_batches'],
                        opts['num_batches']))
Пример #3
0
def benchmark_training(model, opts):
    """ Runs N training batches and returns array of batch times in seconds.

    For some impl details see https://caffe2.ai/docs/SynchronousSGD.html.

    :param model: Caffe2's model helper class instances.
    :type model: :py:class:`caffe2.python.model_helper.ModelHelper`
    :param dict opts: Options for the inference benchmark. Must contain `device`,\
                      `num_gpus` if device is gpu, `enable_tensor_core`,\
                      `num_warmup_batches` and `num_batches`.
    :return: Tuple of model title and numpy array containing batch times.
    :rtype: (string, numpy array)
    """
    model_builder = ModelFactory.get_model(opts)
    assert model_builder.phase == 'training',\
           "Internal error, invalid phase was set. "\
           "Expecting 'training' but found %s" % (model_builder.phase)

    # Reader must be shared by all GPUs in a one machine.
    reader = None
    if 'data_dir' in opts and opts['data_dir']:
        reader = model.CreateDB(
            "reader",
            db=opts['data_dir'],            # (str, path to training data)
            db_type=opts['data_backend'],   # (str, 'lmdb' or 'leveldb')
            num_shards=1,                   # (int, number of machines)
            shard_id=0,                     # (int, machine id)
        )

    def add_inputs(model):
        if reader is None:
            print("[INFO] Adding synthetic data input for Caffe2 training benchmarks")
            model_builder.add_synthetic_inputs(model, add_labels=True)
        else:
            print("[INFO] Adding real data inputs (%s) for Caffe2 training benchmarks" %\
                  (opts['data_dir']))
            model_builder.add_data_inputs(
                model, reader, use_gpu_transform=(opts['device'] == 'gpu'),
                num_decode_threads=opts['num_decode_threads']
            )

    def create_net(model, loss_scale):
        return create_model(model_builder, model, opts['enable_tensor_core'],
                            opts['float16_compute'], loss_scale)

    def add_post_sync_ops(model):
        """Add ops applied after initial parameter sync."""
        for param_info in model.GetOptimizationParamInfo(model.GetParams()):
            if param_info.blob_copy is not None:
                model.param_init_net.HalfToFloat(
                    param_info.blob,
                    param_info.blob_copy[core.DataType.FLOAT]
                )
    def add_optimizer(model):
        return build_optimizer(model, float16_compute=opts['float16_compute'])

    if opts['device'] == 'gpu':
        rendezvous = setup_rendezvous(opts)
        print("rendezvous: %s" % str(rendezvous))
        dpm.Parallelize(
            model,
            input_builder_fun=add_inputs,
            forward_pass_builder_fun=create_net,
            optimizer_builder_fun=add_optimizer,
            #param_update_builder_fun=Model.add_parameter_update_ops,
            post_sync_builder_fun=add_post_sync_ops,
            devices=range(opts['num_gpus']),
            rendezvous=rendezvous,
            optimize_gradient_memory=True,
            cpu_device=(opts['device'] == 'cpu'),
            shared_model=(opts['device'] == 'cpu')
        )
    else:
        with core.DeviceScope(Model.get_device_option(gpu=None)):
            add_inputs(model)
            losses = create_net(model, 1.0)
            blobs_to_gradients = model.AddGradientOperators(losses)
            Model.add_parameter_update_ops(model)
        Model.optimize_gradient_memory(model, [blobs_to_gradients[losses[0]]])

    workspace.RunNetOnce(model.param_init_net)
    workspace.CreateNet(model.net)
    return (model_builder.name, run_n_times(model, opts['num_warmup_batches'], opts['num_batches']))