示例#1
0
def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
    """Runs benchmarks with the given context and precision (dtype) for all the
    rearrange operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Type of Profiler to use (native/python)
    int64_tensor: str, default 'off'
        Input tensor size to use for tests (if on, dimensions >= 2**32)
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """
    # Fetch all array rearrange operators
    mx_rearrange_ops = get_all_rearrange_operators()

    # Run benchmarks
    mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
    return mx_rearrange_op_results
示例#2
0
def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(),
                                               dtype='float32',
                                               profiler='native',
                                               warmup=25,
                                               runs=100):
    """Runs benchmarks with the given context and precision (dtype)for all the sorting and searching
    operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """
    # Fetch all Random Sampling Operators
    mx_sort_search_ops = get_all_sorting_searching_operators()
    # Run benchmarks
    mx_sort_search_op_results = run_op_benchmarks(mx_sort_search_ops, dtype,
                                                  ctx, profiler, warmup, runs)
    return mx_sort_search_op_results
def run_optimizer_operators_benchmarks(ctx=mx.cpu(),
                                       dtype='float32',
                                       profiler='native',
                                       warmup=25,
                                       runs=100):
    """Runs benchmarks with the given context and precision (dtype) for all the neural network
    optimizer update operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Type of Profiler to use (native/python)
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """
    # Fetch all optimizer operators
    mx_optimizer_ops = get_all_optimizer_operators()

    # Run benchmarks
    mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx,
                                                profiler, warmup, runs)
    return mx_optimizer_op_results
示例#4
0
def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(),
                                                    dtype='float32',
                                                    warmup=25,
                                                    runs=100):
    """Runs benchmarks with the given context and precision (dtype)for all the binary
    element_wise operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    warmup: int, default 10
        Number of times to run for warmup
    runs: int, default 50
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """
    # Fetch all Binary Element_wise Operators
    mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators()
    # Run benchmarks
    mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype,
                                             ctx, warmup, runs)
    return mx_binary_op_results
示例#5
0
def run_activation_operators_benchmarks(ctx=mx.cpu(),
                                        dtype='float32',
                                        profiler='native',
                                        warmup=25,
                                        runs=100):
    """Runs benchmarks with the given context and precision (dtype)for all the activation
    operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Module to use for tracking benchmark excecution time
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """

    # Fetch all NN Activation Operators
    mx_activation_ops = get_all_nn_activation_operators()

    # Run benchmarks
    mx_activation_op_results = run_op_benchmarks(mx_activation_ops, dtype, ctx,
                                                 profiler, warmup, runs)
    return mx_activation_op_results
def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100):
    """Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the unary
    operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Type of Profiler to use (native/python)
    int64_tensor: str, default 'off'
        Input tensor size to use for tests (if on, dimensions >= 2**32)
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """

    standard_inputs = [{"args": [(1024, 1024)],
                        "num_outputs":1},
                       {"args": [(10000, 1)],
                        "num_outputs":1}]
    int64_tensor_inputs = [{"args": [(2**32, 1)],
                            "num_outputs":1}]

    if int64_tensor == 'on':
        inputs = int64_tensor_inputs
    else:
        inputs = standard_inputs

    # Run amp_multicast as it needs data as positional argument
    amp_multicast_benchmark = run_performance_test([getattr(MX_OP_MODULE, "amp_multicast")],
                                                   run_backward=True,
                                                   dtype=dtype,
                                                   ctx=ctx,
                                                   profiler=profiler,
                                                   inputs=inputs,
                                                   warmup=warmup,
                                                   runs=runs)

    # Fetch all Unary Operators
    mx_unary_broadcast_ops = get_all_unary_operators()

    # Run benchmarks
    mx_unary_op_results = run_op_benchmarks(mx_unary_broadcast_ops, dtype, ctx, profiler, int64_tensor, warmup, runs)
    return merge_map_list(amp_multicast_benchmark + [mx_unary_op_results])
def run_linalg_operators_benchmarks(ctx=mx.cpu(),
                                    dtype='float32',
                                    profiler='native',
                                    warmup=25,
                                    runs=100):
    """Runs benchmarks with the given context and precision (dtype) for all the linear algebra
    operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Type of Profiler to use (native/python)
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """
    # Individual tests for ops with specific requirements on input data
    # linalg_potrf requires a positive definite matrix as input
    linalg_potrf_benchmark = run_performance_test(getattr(
        MX_OP_MODULE, "linalg_potrf"),
                                                  run_backward=False,
                                                  dtype=dtype,
                                                  ctx=ctx,
                                                  profiler=profiler,
                                                  inputs=[{
                                                      "A": [[1, 0], [0, 1]]
                                                  }, {
                                                      "A": [[2, -1, 0],
                                                            [-1, 2, -1],
                                                            [0, -1, 2]]
                                                  }],
                                                  warmup=warmup,
                                                  runs=runs)

    # Fetch all Linear Algebra Operators
    mx_linalg_ops = get_all_linalg_operators()
    # Run benchmarks
    mx_linalg_op_results = run_op_benchmarks(mx_linalg_ops, dtype, ctx,
                                             profiler, warmup, runs)
    return merge_map_list(linalg_potrf_benchmark + [mx_linalg_op_results])
示例#8
0
def run_activation_operators_benchmarks(ctx=mx.cpu(),
                                        dtype='float32',
                                        profiler='native',
                                        int64_tensor='off',
                                        warmup=25,
                                        runs=100):
    """Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the activation
    operators (relu, sigmoid, softmax) in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Module to use for tracking benchmark excecution time
    int64_tensor: str, default 'off'
        Input tensor size to use for tests (if on, dimensions >= 2**32)
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """

    # Fetch all NN Activation Operators
    mx_activation_ops = get_all_nn_activation_operators()

    # Run benchmarks
    mx_activation_op_results = run_op_benchmarks(mx_activation_ops, dtype, ctx,
                                                 profiler, int64_tensor,
                                                 warmup, runs)
    return mx_activation_op_results
def run_mx_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
    """Runs benchmarks with the given context and precision (dtype) for all the miscellaneous
    operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Type of Profiler to use (native/python)
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """
    # Individual tests for ops with positional args
    array_ops_benchmark = run_performance_test([getattr(MX_OP_MODULE, "reset_arrays"),
                                                getattr(MX_OP_MODULE, "multi_all_finite"),
                                                getattr(MX_OP_MODULE, "multi_sum_sq")],
                                               run_backward=False,
                                               dtype=dtype,
                                               ctx=ctx,
                                               profiler=profiler,
                                               inputs=[{"args": [(1024, 1024)],
                                                        "num_arrays": 1},
                                                       {"args": [(10000, 1)],
                                                        "num_arrays": 1},
                                                       {"args": [(10000, 10)],
                                                        "num_arrays": 1}],
                                               warmup=warmup,
                                               runs=runs)
    add_n_benchmark = run_performance_test([getattr(MX_OP_MODULE, "add_n")],
                                           run_backward=True,
                                           dtype=dtype,
                                           ctx=ctx,
                                           profiler=profiler,
                                           inputs=[{"args": [(1024, 1024)]},
                                                   {"args": [(10000, 1)]},
                                                   {"args": [(10000, 10)]}],
                                           warmup=warmup,
                                           runs=runs)
    # There are currently issus with UpSampling with bilinear interpolation.
    # track issue here: https://github.com/apache/incubator-mxnet/issues/9138
    upsampling_benchmark = run_performance_test([getattr(MX_OP_MODULE, "UpSampling")],
                                                run_backward=True,
                                                dtype=dtype,
                                                ctx=ctx,
                                                profiler=profiler,
                                                inputs=[{"args": (32, 3, 256, 256),
                                                         "scale": 2,
                                                         "sample_type": "nearest"},
                                                        {"args": (32, 3, 10000, 1),
                                                         "scale": 4,
                                                         "sample_type": "nearest"}],
                                                warmup=warmup,
                                                runs=runs)
    # Create and register CustomAddOne operator for use in Custom op testing
    c = CustomAddOneProp()
    c.create_operator(ctx, [(1024,1024)], [dtype])
    custom_benchmark = run_performance_test([getattr(MX_OP_MODULE, "Custom")],
                                            run_backward=True,
                                            dtype=dtype,
                                            ctx=ctx,
                                            profiler=profiler,
                                            inputs=[{"args": [(1024, 1024)],
                                                     "op_type": "CustomAddOne"},
                                                    {"args": [(10000, 1)],
                                                     "op_type": "CustomAddOne"},
                                                    {"args": [(10000, 10)],
                                                     "op_type": "CustomAddOne"}],
                                            warmup=warmup,
                                            runs=runs)

    # Fetch remaining Miscellaneous Operators
    mx_misc_ops = get_remaining_miscellaneous_operators()
    # Run benchmarks
    mx_misc_op_results = run_op_benchmarks(mx_misc_ops, dtype, ctx, profiler, warmup, runs)
    return merge_map_list(array_ops_benchmark + add_n_benchmark + upsampling_benchmark + custom_benchmark + [mx_misc_op_results])
def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100):
    """Runs benchmarks with the given context and precision (dtype) for all the neural network
    optimizer update operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Type of Profiler to use (native/python)
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """
    # Run independent tests for ops that need specific input data
    multi_mp_sgd_mom_res = run_performance_test([getattr(MX_OP_MODULE, "multi_mp_sgd_mom_update")],
                                                inputs=[{"args0": nd.random_normal(shape=(5,5)),
                                                "args1": nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)),
                                                "args3": nd.random_normal(shape=(5,5)), "lrs": 0.1, "wds": 0.2,
                                                "out": nd.random_normal(shape=(5,5))}],run_backward=False)

    multi_sgd_mom_res = run_performance_test([getattr(MX_OP_MODULE, "multi_sgd_mom_update")],
                                             inputs=[{"args0": nd.random_normal(shape=(5,5)),
                                             "args1": nd.random_normal(shape=(5,5)),"args2": nd.random_normal(shape=(5,5)),
                                             "lrs": 0.1, "wds": 0.2, "out": nd.random_normal(shape=(5,5))}], run_backward=False)

    multi_sgd_res = run_performance_test([getattr(MX_OP_MODULE, "multi_sgd_update")],
                                         inputs=[{"args0": nd.random_normal(shape=(5,5)),
                                         "args1": nd.random_normal(shape=(5,5)), "lrs": 0.1, "wds": 0.2,
                                         "out": nd.random_normal(shape=(5,5))}], run_backward=False)

    multi_mp_sgd_res = run_performance_test([getattr(MX_OP_MODULE, "multi_mp_sgd_update")],
                                            inputs=[{"args0": nd.random_normal(shape=(5,5)),
                                            "args1": nd.random_normal(shape=(5,5)),"args2": nd.random_normal(shape=(5,5)),
                                            "lrs": 0.1, "wds": 0.2, "out": nd.random_normal(shape=(5,5))}], run_backward=False)

    preloaded_multi_mp_sgd_res = run_performance_test(
                                 [getattr(MX_OP_MODULE, "preloaded_multi_mp_sgd_update")],
                                 inputs=[{"args0": nd.random_normal(shape=(5,5)),
                                          "args1": nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)),
                                          "args3": nd.random_normal(shape=(1)), "args4": nd.random_normal(shape=(1)),
                                          "out": nd.random_normal(shape=(5,5))}], run_backward=False)

    preloaded_multi_sgd_mom_res = run_performance_test(
                                  [getattr(MX_OP_MODULE, "preloaded_multi_sgd_mom_update")],
                                  inputs=[{"args0": nd.random_normal(shape=(5,5)),
                                           "args1": nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)),
                                           "args3": nd.random_normal(shape=(1)), "args4": nd.random_normal(shape=(1)),
                                           "out": nd.random_normal(shape=(5,5))}], run_backward=False)

    preloaded_multi_sgd_res = run_performance_test(
                              [getattr(MX_OP_MODULE, "preloaded_multi_sgd_update")],
                              inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)),
                                       "args4": nd.random_normal(shape=(1)), "args5": nd.random_normal(shape=(1)),
                                       "out": nd.random_normal(shape=(5,5))}], run_backward=False)

    preloaded_multi_mp_sgd_mom_res = run_performance_test(
                                     [getattr(MX_OP_MODULE, "preloaded_multi_mp_sgd_mom_update")],
                                     inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)),
                                              "args2": nd.random_normal(shape=(5,5)), "args3": nd.random_normal(shape=(5,5)),
                                              "args4": nd.random_normal(shape=(1)), "args5": nd.random_normal(shape=(1)),
                                              "out": nd.random_normal(shape=(5,5))}], run_backward=False)

    # Fetch remaining optimizer operators
    mx_optimizer_ops = get_all_optimizer_operators()

    # Run benchmarks
    mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx, profiler, warmup, runs)
    return merge_map_list(multi_sgd_mom_res + multi_sgd_mom_res + multi_sgd_res + multi_mp_sgd_res + preloaded_multi_mp_sgd_res +\
                          preloaded_multi_sgd_mom_res + preloaded_multi_mp_sgd_res + preloaded_multi_mp_sgd_mom_res +\
                          [mx_optimizer_op_results])
示例#11
0
def run_nn_basic_operators_benchmarks(ctx=mx.cpu(),
                                      dtype='float32',
                                      profiler='native',
                                      int64_tensor='off',
                                      warmup=25,
                                      runs=100):
    """Runs benchmarks with the given context, precision (dtype), and data size (int64_tensor) for all the basic neural network
    operators in MXNet.

    Parameters
    ----------
    ctx: mx.ctx
        Context to run benchmarks
    dtype: str, default 'float32'
        Precision to use for benchmarks
    profiler: str, default 'native'
        Type of Profiler to use (native/python)
    int64_tensor: str, default 'off'
        Input tensor size to use for tests (if on, dimensions >= 2**32)
    warmup: int, default 25
        Number of times to run for warmup
    runs: int, default 100
        Number of runs to capture benchmark results

    Returns
    -------
    Dictionary of results. Key -> Name of the operator, Value -> Benchmark results.

    """

    standard_data_list = [(1024, 4, 4)]
    int64_tensor_data_list = [(2**28, 4, 4)]

    if int64_tensor == 'on':
        data_list = int64_tensor_data_list
    else:
        data_list = standard_data_list

    for data in data_list:
        rnn_relu_benchmark = run_performance_test(
            [getattr(MX_OP_MODULE, "RNN")],
            run_backward=True,
            dtype=dtype,
            ctx=ctx,
            profiler=profiler,
            inputs=[{
                "data": data,
                "parameters": (7, ),
                "state": (1, 4, 1),
                "mode": "rnn_relu",
                "state_size": 1,
                "num_layers": 1
            }],
            warmup=warmup,
            runs=runs)
        rnn_tanh_benchmark = run_performance_test(
            [getattr(MX_OP_MODULE, "RNN")],
            run_backward=True,
            dtype=dtype,
            ctx=ctx,
            profiler=profiler,
            inputs=[{
                "data": data,
                "parameters": (7, ),
                "state": (1, 4, 1),
                "mode": "rnn_tanh",
                "state_size": 1,
                "num_layers": 1
            }],
            warmup=warmup,
            runs=runs)
        rnn_lstm_benchmark = run_performance_test(
            [getattr(MX_OP_MODULE, "RNN")],
            run_backward=True,
            dtype=dtype,
            ctx=ctx,
            profiler=profiler,
            inputs=[{
                "data": data,
                "parameters": (28, ),
                "state": (1, 4, 1),
                "state_cell": (1, 4, 1),
                "mode": "lstm",
                "state_size": 1,
                "num_layers": 1
            }],
            warmup=warmup,
            runs=runs)
        rnn_gru_benchmark = run_performance_test(
            [getattr(MX_OP_MODULE, "RNN")],
            run_backward=True,
            dtype=dtype,
            ctx=ctx,
            profiler=profiler,
            inputs=[{
                "data": data,
                "parameters": (21, ),
                "state": (1, 4, 1),
                "mode": "gru",
                "state_size": 1,
                "num_layers": 1
            }],
            warmup=warmup,
            runs=runs)
    # Fetch all NN Basic Operators
    mx_nn_basic_ops = get_all_nn_basic_operators()

    # Run benchmarks
    mx_nn_basic_op_results = run_op_benchmarks(mx_nn_basic_ops, dtype, ctx,
                                               profiler, int64_tensor, warmup,
                                               runs)
    return merge_map_list(rnn_relu_benchmark + rnn_tanh_benchmark +
                          rnn_lstm_benchmark + rnn_gru_benchmark +
                          [mx_nn_basic_op_results])