def run_rearrange_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100): """Runs benchmarks with the given context and precision (dtype) for all the rearrange operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Type of Profiler to use (native/python) int64_tensor: str, default 'off' Input tensor size to use for tests (if on, dimensions >= 2**32) warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Fetch all array rearrange operators mx_rearrange_ops = get_all_rearrange_operators() # Run benchmarks mx_rearrange_op_results = run_op_benchmarks(mx_rearrange_ops, dtype, ctx, profiler, int64_tensor, warmup, runs) return mx_rearrange_op_results
def run_sorting_searching_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100): """Runs benchmarks with the given context and precision (dtype)for all the sorting and searching operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Fetch all Random Sampling Operators mx_sort_search_ops = get_all_sorting_searching_operators() # Run benchmarks mx_sort_search_op_results = run_op_benchmarks(mx_sort_search_ops, dtype, ctx, profiler, warmup, runs) return mx_sort_search_op_results
def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100): """Runs benchmarks with the given context and precision (dtype) for all the neural network optimizer update operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Type of Profiler to use (native/python) warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Fetch all optimizer operators mx_optimizer_ops = get_all_optimizer_operators() # Run benchmarks mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx, profiler, warmup, runs) return mx_optimizer_op_results
def run_mx_binary_element_wise_operators_benchmarks(ctx=mx.cpu(), dtype='float32', warmup=25, runs=100): """Runs benchmarks with the given context and precision (dtype)for all the binary element_wise operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks warmup: int, default 10 Number of times to run for warmup runs: int, default 50 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Fetch all Binary Element_wise Operators mx_binary_element_wise_ops = get_all_elemen_wise_binary_operators() # Run benchmarks mx_binary_op_results = run_op_benchmarks(mx_binary_element_wise_ops, dtype, ctx, warmup, runs) return mx_binary_op_results
def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100): """Runs benchmarks with the given context and precision (dtype)for all the activation operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Module to use for tracking benchmark excecution time warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Fetch all NN Activation Operators mx_activation_ops = get_all_nn_activation_operators() # Run benchmarks mx_activation_op_results = run_op_benchmarks(mx_activation_ops, dtype, ctx, profiler, warmup, runs) return mx_activation_op_results
def run_mx_unary_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100): """Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the unary operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Type of Profiler to use (native/python) int64_tensor: str, default 'off' Input tensor size to use for tests (if on, dimensions >= 2**32) warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ standard_inputs = [{"args": [(1024, 1024)], "num_outputs":1}, {"args": [(10000, 1)], "num_outputs":1}] int64_tensor_inputs = [{"args": [(2**32, 1)], "num_outputs":1}] if int64_tensor == 'on': inputs = int64_tensor_inputs else: inputs = standard_inputs # Run amp_multicast as it needs data as positional argument amp_multicast_benchmark = run_performance_test([getattr(MX_OP_MODULE, "amp_multicast")], run_backward=True, dtype=dtype, ctx=ctx, profiler=profiler, inputs=inputs, warmup=warmup, runs=runs) # Fetch all Unary Operators mx_unary_broadcast_ops = get_all_unary_operators() # Run benchmarks mx_unary_op_results = run_op_benchmarks(mx_unary_broadcast_ops, dtype, ctx, profiler, int64_tensor, warmup, runs) return merge_map_list(amp_multicast_benchmark + [mx_unary_op_results])
def run_linalg_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100): """Runs benchmarks with the given context and precision (dtype) for all the linear algebra operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Type of Profiler to use (native/python) warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Individual tests for ops with specific requirements on input data # linalg_potrf requires a positive definite matrix as input linalg_potrf_benchmark = run_performance_test(getattr( MX_OP_MODULE, "linalg_potrf"), run_backward=False, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{ "A": [[1, 0], [0, 1]] }, { "A": [[2, -1, 0], [-1, 2, -1], [0, -1, 2]] }], warmup=warmup, runs=runs) # Fetch all Linear Algebra Operators mx_linalg_ops = get_all_linalg_operators() # Run benchmarks mx_linalg_op_results = run_op_benchmarks(mx_linalg_ops, dtype, ctx, profiler, warmup, runs) return merge_map_list(linalg_potrf_benchmark + [mx_linalg_op_results])
def run_activation_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100): """Runs benchmarks with the given context, precision (dtype), and input data size (int64_tensor) for all the activation operators (relu, sigmoid, softmax) in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Module to use for tracking benchmark excecution time int64_tensor: str, default 'off' Input tensor size to use for tests (if on, dimensions >= 2**32) warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Fetch all NN Activation Operators mx_activation_ops = get_all_nn_activation_operators() # Run benchmarks mx_activation_op_results = run_op_benchmarks(mx_activation_ops, dtype, ctx, profiler, int64_tensor, warmup, runs) return mx_activation_op_results
def run_mx_misc_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100): """Runs benchmarks with the given context and precision (dtype) for all the miscellaneous operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Type of Profiler to use (native/python) warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Individual tests for ops with positional args array_ops_benchmark = run_performance_test([getattr(MX_OP_MODULE, "reset_arrays"), getattr(MX_OP_MODULE, "multi_all_finite"), getattr(MX_OP_MODULE, "multi_sum_sq")], run_backward=False, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{"args": [(1024, 1024)], "num_arrays": 1}, {"args": [(10000, 1)], "num_arrays": 1}, {"args": [(10000, 10)], "num_arrays": 1}], warmup=warmup, runs=runs) add_n_benchmark = run_performance_test([getattr(MX_OP_MODULE, "add_n")], run_backward=True, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{"args": [(1024, 1024)]}, {"args": [(10000, 1)]}, {"args": [(10000, 10)]}], warmup=warmup, runs=runs) # There are currently issus with UpSampling with bilinear interpolation. # track issue here: https://github.com/apache/incubator-mxnet/issues/9138 upsampling_benchmark = run_performance_test([getattr(MX_OP_MODULE, "UpSampling")], run_backward=True, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{"args": (32, 3, 256, 256), "scale": 2, "sample_type": "nearest"}, {"args": (32, 3, 10000, 1), "scale": 4, "sample_type": "nearest"}], warmup=warmup, runs=runs) # Create and register CustomAddOne operator for use in Custom op testing c = CustomAddOneProp() c.create_operator(ctx, [(1024,1024)], [dtype]) custom_benchmark = run_performance_test([getattr(MX_OP_MODULE, "Custom")], run_backward=True, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{"args": [(1024, 1024)], "op_type": "CustomAddOne"}, {"args": [(10000, 1)], "op_type": "CustomAddOne"}, {"args": [(10000, 10)], "op_type": "CustomAddOne"}], warmup=warmup, runs=runs) # Fetch remaining Miscellaneous Operators mx_misc_ops = get_remaining_miscellaneous_operators() # Run benchmarks mx_misc_op_results = run_op_benchmarks(mx_misc_ops, dtype, ctx, profiler, warmup, runs) return merge_map_list(array_ops_benchmark + add_n_benchmark + upsampling_benchmark + custom_benchmark + [mx_misc_op_results])
def run_optimizer_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', warmup=25, runs=100): """Runs benchmarks with the given context and precision (dtype) for all the neural network optimizer update operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Type of Profiler to use (native/python) warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ # Run independent tests for ops that need specific input data multi_mp_sgd_mom_res = run_performance_test([getattr(MX_OP_MODULE, "multi_mp_sgd_mom_update")], inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)), "args3": nd.random_normal(shape=(5,5)), "lrs": 0.1, "wds": 0.2, "out": nd.random_normal(shape=(5,5))}],run_backward=False) multi_sgd_mom_res = run_performance_test([getattr(MX_OP_MODULE, "multi_sgd_mom_update")], inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)),"args2": nd.random_normal(shape=(5,5)), "lrs": 0.1, "wds": 0.2, "out": nd.random_normal(shape=(5,5))}], run_backward=False) multi_sgd_res = run_performance_test([getattr(MX_OP_MODULE, "multi_sgd_update")], inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)), "lrs": 0.1, "wds": 0.2, "out": nd.random_normal(shape=(5,5))}], run_backward=False) multi_mp_sgd_res = run_performance_test([getattr(MX_OP_MODULE, "multi_mp_sgd_update")], inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)),"args2": nd.random_normal(shape=(5,5)), "lrs": 0.1, "wds": 0.2, "out": nd.random_normal(shape=(5,5))}], run_backward=False) preloaded_multi_mp_sgd_res = run_performance_test( [getattr(MX_OP_MODULE, "preloaded_multi_mp_sgd_update")], inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)), "args3": nd.random_normal(shape=(1)), "args4": nd.random_normal(shape=(1)), "out": nd.random_normal(shape=(5,5))}], run_backward=False) preloaded_multi_sgd_mom_res = run_performance_test( [getattr(MX_OP_MODULE, "preloaded_multi_sgd_mom_update")], inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)), "args3": nd.random_normal(shape=(1)), "args4": nd.random_normal(shape=(1)), "out": nd.random_normal(shape=(5,5))}], run_backward=False) preloaded_multi_sgd_res = run_performance_test( [getattr(MX_OP_MODULE, "preloaded_multi_sgd_update")], inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)), "args4": nd.random_normal(shape=(1)), "args5": nd.random_normal(shape=(1)), "out": nd.random_normal(shape=(5,5))}], run_backward=False) preloaded_multi_mp_sgd_mom_res = run_performance_test( [getattr(MX_OP_MODULE, "preloaded_multi_mp_sgd_mom_update")], inputs=[{"args0": nd.random_normal(shape=(5,5)), "args1": nd.random_normal(shape=(5,5)), "args2": nd.random_normal(shape=(5,5)), "args3": nd.random_normal(shape=(5,5)), "args4": nd.random_normal(shape=(1)), "args5": nd.random_normal(shape=(1)), "out": nd.random_normal(shape=(5,5))}], run_backward=False) # Fetch remaining optimizer operators mx_optimizer_ops = get_all_optimizer_operators() # Run benchmarks mx_optimizer_op_results = run_op_benchmarks(mx_optimizer_ops, dtype, ctx, profiler, warmup, runs) return merge_map_list(multi_sgd_mom_res + multi_sgd_mom_res + multi_sgd_res + multi_mp_sgd_res + preloaded_multi_mp_sgd_res +\ preloaded_multi_sgd_mom_res + preloaded_multi_mp_sgd_res + preloaded_multi_mp_sgd_mom_res +\ [mx_optimizer_op_results])
def run_nn_basic_operators_benchmarks(ctx=mx.cpu(), dtype='float32', profiler='native', int64_tensor='off', warmup=25, runs=100): """Runs benchmarks with the given context, precision (dtype), and data size (int64_tensor) for all the basic neural network operators in MXNet. Parameters ---------- ctx: mx.ctx Context to run benchmarks dtype: str, default 'float32' Precision to use for benchmarks profiler: str, default 'native' Type of Profiler to use (native/python) int64_tensor: str, default 'off' Input tensor size to use for tests (if on, dimensions >= 2**32) warmup: int, default 25 Number of times to run for warmup runs: int, default 100 Number of runs to capture benchmark results Returns ------- Dictionary of results. Key -> Name of the operator, Value -> Benchmark results. """ standard_data_list = [(1024, 4, 4)] int64_tensor_data_list = [(2**28, 4, 4)] if int64_tensor == 'on': data_list = int64_tensor_data_list else: data_list = standard_data_list for data in data_list: rnn_relu_benchmark = run_performance_test( [getattr(MX_OP_MODULE, "RNN")], run_backward=True, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{ "data": data, "parameters": (7, ), "state": (1, 4, 1), "mode": "rnn_relu", "state_size": 1, "num_layers": 1 }], warmup=warmup, runs=runs) rnn_tanh_benchmark = run_performance_test( [getattr(MX_OP_MODULE, "RNN")], run_backward=True, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{ "data": data, "parameters": (7, ), "state": (1, 4, 1), "mode": "rnn_tanh", "state_size": 1, "num_layers": 1 }], warmup=warmup, runs=runs) rnn_lstm_benchmark = run_performance_test( [getattr(MX_OP_MODULE, "RNN")], run_backward=True, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{ "data": data, "parameters": (28, ), "state": (1, 4, 1), "state_cell": (1, 4, 1), "mode": "lstm", "state_size": 1, "num_layers": 1 }], warmup=warmup, runs=runs) rnn_gru_benchmark = run_performance_test( [getattr(MX_OP_MODULE, "RNN")], run_backward=True, dtype=dtype, ctx=ctx, profiler=profiler, inputs=[{ "data": data, "parameters": (21, ), "state": (1, 4, 1), "mode": "gru", "state_size": 1, "num_layers": 1 }], warmup=warmup, runs=runs) # Fetch all NN Basic Operators mx_nn_basic_ops = get_all_nn_basic_operators() # Run benchmarks mx_nn_basic_op_results = run_op_benchmarks(mx_nn_basic_ops, dtype, ctx, profiler, int64_tensor, warmup, runs) return merge_map_list(rnn_relu_benchmark + rnn_tanh_benchmark + rnn_lstm_benchmark + rnn_gru_benchmark + [mx_nn_basic_op_results])